104 files changed, 2260 insertions, 1151 deletions
diff --git a/configure b/configure
index 6f1b7cd83d..6d8c996c62 100755
--- a/configure
+++ b/configure
@@ -5641,6 +5641,12 @@ if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
 fi
 
+if test "$gtkabi" = "2.0"; then
+    echo
+    echo "WARNING: Use of GTK 2.0 is deprecated and will be removed in"
+    echo "WARNING: future releases. Please switch to using GTK 3.0"
+fi
+
 if test "$supported_cpu" = "no"; then
     echo
     echo "WARNING: SUPPORT FOR THIS HOST CPU WILL GO AWAY IN FUTURE RELEASES!"
diff --git a/docs/devel/migration.txt b/docs/devel/migration.rst
index 4030703726..015a9ebdf7 100644
--- a/docs/devel/migration.txt
+++ b/docs/devel/migration.rst
@@ -1,4 +1,6 @@
-= Migration =
+=========
+Migration
+=========
 
 QEMU has code to load/save the state of the guest that it is running.
 These are two complementary operations.  Saving the state just does
@@ -26,7 +28,8 @@ the guest to be stopped.  Typically the time that the guest is
 unresponsive during live migration is the low hundred of milliseconds
 (notice that this depends on a lot of things).
 
-=== Types of migration ===
+Types of migration
+==================
 
 Now that we have talked about live migration, there are several ways
 to do migration:
@@ -41,49 +44,21 @@ All these four migration protocols use the same infrastructure to
 save/restore state devices.  This infrastructure is shared with the
 savevm/loadvm functionality.
 
-=== State Live Migration ===
+State Live Migration
+====================
 
 This is used for RAM and block devices.  It is not yet ported to vmstate.
 <Fill more information here>
 
-=== What is the common infrastructure ===
+Common infrastructure
+=====================
 
-QEMU uses a QEMUFile abstraction to be able to do migration.  Any type
-of migration that wants to use QEMU infrastructure has to create a
-QEMUFile with:
+The files, sockets or fd's that carry the migration stream are abstracted by
+the  ``QEMUFile`` type (see `migration/qemu-file.h`).  In most cases this
+is connected to a subtype of ``QIOChannel`` (see `io/`).
 
-QEMUFile *qemu_fopen_ops(void *opaque,
-                         QEMUFilePutBufferFunc *put_buffer,
-                         QEMUFileGetBufferFunc *get_buffer,
-                         QEMUFileCloseFunc *close);
-
-The functions have the following functionality:
-
-This function writes a chunk of data to a file at the given position.
-The pos argument can be ignored if the file is only used for
-streaming.  The handler should try to write all of the data it can.
-
-typedef int (QEMUFilePutBufferFunc)(void *opaque, const uint8_t *buf,
-                                    int64_t pos, int size);
-
-Read a chunk of data from a file at the given position.  The pos argument
-can be ignored if the file is only be used for streaming.  The number of
-bytes actually read should be returned.
-
-typedef int (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf,
-                                    int64_t pos, int size);
-
-Close a file and return an error code.
-
-typedef int (QEMUFileCloseFunc)(void *opaque);
-
-You can use any internal state that you need using the opaque void *
-pointer that is passed to all functions.
-
-The important functions for us are put_buffer()/get_buffer() that
-allow to write/read a buffer into the QEMUFile.
-
-=== How to save the state of one device ===
+Saving the state of one device
+==============================
 
 The state of a device is saved using intermediate buffers.  There are
 some helper functions to assist this saving.
@@ -93,34 +68,38 @@ version.  When we migrate a device, we save/load the state as a series
 of fields.  Some times, due to bugs or new functionality, we need to
 change the state to store more/different information.  We use the
 version to identify each time that we do a change.  Each version is
-associated with a series of fields saved.  The save_state always saves
-the state as the newer version.  But load_state sometimes is able to
+associated with a series of fields saved.  The `save_state` always saves
+the state as the newer version.  But `load_state` sometimes is able to
 load state from an older version.
 
-=== Legacy way ===
+Legacy way
+----------
 
 This way is going to disappear as soon as all current users are ported to VMSTATE.
 
 Each device has to register two functions, one to save the state and
 another to load the state back.
 
-int register_savevm(DeviceState *dev,
-                    const char *idstr,
-                    int instance_id,
-                    int version_id,
-                    SaveStateHandler *save_state,
-                    LoadStateHandler *load_state,
-                    void *opaque);
+.. code:: c
+
+  int register_savevm(DeviceState *dev,
+                      const char *idstr,
+                      int instance_id,
+                      int version_id,
+                      SaveStateHandler *save_state,
+                      LoadStateHandler *load_state,
+                      void *opaque);
 
-typedef void SaveStateHandler(QEMUFile *f, void *opaque);
-typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
+  typedef void SaveStateHandler(QEMUFile *f, void *opaque);
+  typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
 
-The important functions for the device state format are the save_state
-and load_state.  Notice that load_state receives a version_id
-parameter to know what state format is receiving.  save_state doesn't
+The important functions for the device state format are the `save_state`
+and `load_state`.  Notice that `load_state` receives a version_id
+parameter to know what state format is receiving.  `save_state` doesn't
 have a version_id parameter because it always uses the latest version.
 
-=== VMState ===
+VMState
+-------
 
 The legacy way of saving/loading state of the device had the problem
 that we have to maintain two functions in sync.  If we did one change
@@ -135,31 +114,36 @@ save/load functions.
 
 An example (from hw/input/pckbd.c)
 
-static const VMStateDescription vmstate_kbd = {
-    .name = "pckbd",
-    .version_id = 3,
-    .minimum_version_id = 3,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT8(write_cmd, KBDState),
-        VMSTATE_UINT8(status, KBDState),
-        VMSTATE_UINT8(mode, KBDState),
-        VMSTATE_UINT8(pending, KBDState),
-        VMSTATE_END_OF_LIST()
-    }
-};
+.. code:: c
+
+  static const VMStateDescription vmstate_kbd = {
+      .name = "pckbd",
+      .version_id = 3,
+      .minimum_version_id = 3,
+      .fields = (VMStateField[]) {
+          VMSTATE_UINT8(write_cmd, KBDState),
+          VMSTATE_UINT8(status, KBDState),
+          VMSTATE_UINT8(mode, KBDState),
+          VMSTATE_UINT8(pending, KBDState),
+          VMSTATE_END_OF_LIST()
+      }
+  };
 
 We are declaring the state with name "pckbd".
-The version_id is 3, and the fields are 4 uint8_t in a KBDState structure.
+The `version_id` is 3, and the fields are 4 uint8_t in a KBDState structure.
 We registered this with:
 
+.. code:: c
+
     vmstate_register(NULL, 0, &vmstate_kbd, s);
 
 Note: talk about how vmstate <-> qdev interact, and what the instance ids mean.
 
-You can search for VMSTATE_* macros for lots of types used in QEMU in
+You can search for ``VMSTATE_*`` macros for lots of types used in QEMU in
 include/hw/hw.h.
 
-=== More about versions ===
+More about versions
+-------------------
 
 Version numbers are intended for major incompatible changes to the
 migration of a device, and using them breaks backwards-migration
@@ -168,22 +152,23 @@ compatibility; in general most changes can be made by adding Subsections
 
 You can see that there are several version fields:
 
-- version_id: the maximum version_id supported by VMState for that device.
-- minimum_version_id: the minimum version_id that VMState is able to understand
+- `version_id`: the maximum version_id supported by VMState for that device.
+- `minimum_version_id`: the minimum version_id that VMState is able to understand
   for that device.
-- minimum_version_id_old: For devices that were not able to port to vmstate, we can
+- `minimum_version_id_old`: For devices that were not able to port to vmstate, we can
   assign a function that knows how to read this old state. This field is
-  ignored if there is no load_state_old handler.
+  ignored if there is no `load_state_old` handler.
 
 So, VMState is able to read versions from minimum_version_id to
-version_id.  And the function load_state_old() (if present) is able to
+version_id.  And the function ``load_state_old()`` (if present) is able to
 load state from minimum_version_id_old to minimum_version_id.  This
 function is deprecated and will be removed when no more users are left.
 
 Saving state will always create a section with the 'version_id' value
 and thus can't be loaded by any older QEMU.
 
-===  Massaging functions ===
+Massaging functions
+-------------------
 
 Sometimes, it is not enough to be able to save the state directly
 from one structure, we need to fill the correct values there.  One
@@ -194,24 +179,24 @@ load the state for the cpu that we have just loaded from the QEMUFile.
 
 The functions to do that are inside a vmstate definition, and are called:
 
-- int (*pre_load)(void *opaque);
+- ``int (*pre_load)(void *opaque);``
 
   This function is called before we load the state of one device.
 
-- int (*post_load)(void *opaque, int version_id);
+- ``int (*post_load)(void *opaque, int version_id);``
 
   This function is called after we load the state of one device.
 
-- int (*pre_save)(void *opaque);
+- ``int (*pre_save)(void *opaque);``
 
   This function is called before we save the state of one device.
 
 Example: You can look at hpet.c, that uses the three function to
-         massage the state that is transferred.
+massage the state that is transferred.
 
 If you use memory API functions that update memory layout outside
 initialization (i.e., in response to a guest action), this is a strong
-indication that you need to call these functions in a post_load callback.
+indication that you need to call these functions in a `post_load` callback.
 Examples of such memory API functions are:
 
   - memory_region_add_subregion()
@@ -221,7 +206,8 @@ Examples of such memory API functions are:
   - memory_region_set_address()
   - memory_region_set_alias_offset()
 
-=== Subsections ===
+Subsections
+-----------
 
 The use of version_id allows to be able to migrate from older versions
 to newer versions of a device.  But not the other way around.  This
@@ -251,52 +237,54 @@ value that it uses.
 
 Example:
 
-static bool ide_drive_pio_state_needed(void *opaque)
-{
-    IDEState *s = opaque;
-
-    return ((s->status & DRQ_STAT) != 0)
-        || (s->bus->error_status & BM_STATUS_PIO_RETRY);
-}
-
-const VMStateDescription vmstate_ide_drive_pio_state = {
-    .name = "ide_drive/pio_state",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .pre_save = ide_drive_pio_pre_save,
-    .post_load = ide_drive_pio_post_load,
-    .needed = ide_drive_pio_state_needed,
-    .fields = (VMStateField[]) {
-        VMSTATE_INT32(req_nb_sectors, IDEState),
-        VMSTATE_VARRAY_INT32(io_buffer, IDEState, io_buffer_total_len, 1,
-                             vmstate_info_uint8, uint8_t),
-        VMSTATE_INT32(cur_io_buffer_offset, IDEState),
-        VMSTATE_INT32(cur_io_buffer_len, IDEState),
-        VMSTATE_UINT8(end_transfer_fn_idx, IDEState),
-        VMSTATE_INT32(elementary_transfer_size, IDEState),
-        VMSTATE_INT32(packet_transfer_size, IDEState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-const VMStateDescription vmstate_ide_drive = {
-    .name = "ide_drive",
-    .version_id = 3,
-    .minimum_version_id = 0,
-    .post_load = ide_drive_post_load,
-    .fields = (VMStateField[]) {
-        .... several fields ....
-        VMSTATE_END_OF_LIST()
-    },
-    .subsections = (const VMStateDescription*[]) {
-        &vmstate_ide_drive_pio_state,
-        NULL
-    }
-};
+.. code:: c
+
+  static bool ide_drive_pio_state_needed(void *opaque)
+  {
+      IDEState *s = opaque;
+
+      return ((s->status & DRQ_STAT) != 0)
+          || (s->bus->error_status & BM_STATUS_PIO_RETRY);
+  }
+
+  const VMStateDescription vmstate_ide_drive_pio_state = {
+      .name = "ide_drive/pio_state",
+      .version_id = 1,
+      .minimum_version_id = 1,
+      .pre_save = ide_drive_pio_pre_save,
+      .post_load = ide_drive_pio_post_load,
+      .needed = ide_drive_pio_state_needed,
+      .fields = (VMStateField[]) {
+          VMSTATE_INT32(req_nb_sectors, IDEState),
+          VMSTATE_VARRAY_INT32(io_buffer, IDEState, io_buffer_total_len, 1,
+                               vmstate_info_uint8, uint8_t),
+          VMSTATE_INT32(cur_io_buffer_offset, IDEState),
+          VMSTATE_INT32(cur_io_buffer_len, IDEState),
+          VMSTATE_UINT8(end_transfer_fn_idx, IDEState),
+          VMSTATE_INT32(elementary_transfer_size, IDEState),
+          VMSTATE_INT32(packet_transfer_size, IDEState),
+          VMSTATE_END_OF_LIST()
+      }
+  };
+
+  const VMStateDescription vmstate_ide_drive = {
+      .name = "ide_drive",
+      .version_id = 3,
+      .minimum_version_id = 0,
+      .post_load = ide_drive_post_load,
+      .fields = (VMStateField[]) {
+          .... several fields ....
+          VMSTATE_END_OF_LIST()
+      },
+      .subsections = (const VMStateDescription*[]) {
+          &vmstate_ide_drive_pio_state,
+          NULL
+      }
+  };
 
 Here we have a subsection for the pio state.  We only need to
 save/send this state when we are in the middle of a pio operation
-(that is what ide_drive_pio_state_needed() checks).  If DRQ_STAT is
+(that is what ``ide_drive_pio_state_needed()`` checks).  If DRQ_STAT is
 not enabled, the values on that fields are garbage and don't need to
 be sent.
 
@@ -304,11 +292,12 @@ Using a condition function that checks a 'property' to determine whether
 to send a subsection allows backwards migration compatibility when
 new subsections are added.
 
-For example;
-   a) Add a new property using DEFINE_PROP_BOOL - e.g. support-foo and
+For example:
+
+   a) Add a new property using ``DEFINE_PROP_BOOL`` - e.g. support-foo and
       default it to true.
-   b) Add an entry to the HW_COMPAT_ for the previous version
-      that sets the property to false.
+   b) Add an entry to the ``HW_COMPAT_`` for the previous version that sets
+      the property to false.
    c) Add a static bool  support_foo function that tests the property.
    d) Add a subsection with a .needed set to the support_foo function
    e) (potentially) Add a pre_load that sets up a default value for 'foo'
@@ -332,25 +321,30 @@ in most cases.  In general the preference is to tie the subsection to
 the machine type, and allow reliable migrations, unless the behaviour
 from omission of the subsection is really bad.
 
-= Not sending existing elements =
+Not sending existing elements
+-----------------------------
+
+Sometimes members of the VMState are no longer needed:
+
+  - removing them will break migration compatibility
 
-Sometimes members of the VMState are no longer needed;
-  removing them will break migration compatibility
-  making them version dependent and bumping the version will break backwards
-   migration compatibility.
+  - making them version dependent and bumping the version will break backwards migration compatibility.
 
 The best way is to:
-  a) Add a new property/compatibility/function in the same way for subsections
-    above.
+
+  a) Add a new property/compatibility/function in the same way for subsections above.
   b) replace the VMSTATE macro with the _TEST version of the macro, e.g.:
-     VMSTATE_UINT32(foo, barstruct)
+
+   ``VMSTATE_UINT32(foo, barstruct)``
+
    becomes
-     VMSTATE_UINT32_TEST(foo, barstruct, pre_version_baz)
 
-  Sometime in the future when we no longer care about the ancient
-versions these can be killed off.
+   ``VMSTATE_UINT32_TEST(foo, barstruct, pre_version_baz)``
 
-= Return path =
+   Sometime in the future when we no longer care about the ancient versions these can be killed off.
+
+Return path
+-----------
 
 In most migration scenarios there is only a single data path that runs
 from the source VM to the destination, typically along a single fd (although
@@ -360,19 +354,23 @@ However, some uses need two way communication; in particular the Postcopy
 destination needs to be able to request pages on demand from the source.
 
 For these scenarios there is a 'return path' from the destination to the source;
-qemu_file_get_return_path(QEMUFile* fwdpath) gives the QEMUFile* for the return
+``qemu_file_get_return_path(QEMUFile* fwdpath)`` gives the QEMUFile* for the return
 path.
 
   Source side
+
      Forward path - written by migration thread
      Return path  - opened by main thread, read by return-path thread
 
   Destination side
+
      Forward path - read by main thread
      Return path  - opened by main thread, written by main thread AND postcopy
-                    thread (protected by rp_mutex)
+     thread (protected by rp_mutex)
+
+Postcopy
+========
 
-= Postcopy =
 'Postcopy' migration is a way to deal with migrations that refuse to converge
 (or take too long to converge) its plus side is that there is an upper bound on
 the amount of migration traffic and time it takes, the down side is that during
@@ -386,27 +384,44 @@ a fault that's translated by QEMU into a request to the source QEMU.
 Postcopy can be combined with precopy (i.e. normal migration) so that if precopy
 doesn't finish in a given time the switch is made to postcopy.
 
-=== Enabling postcopy ===
+Enabling postcopy
+-----------------
 
 To enable postcopy, issue this command on the monitor prior to the
 start of migration:
 
-migrate_set_capability postcopy-ram on
+``migrate_set_capability postcopy-ram on``
 
 The normal commands are then used to start a migration, which is still
 started in precopy mode.  Issuing:
 
-migrate_start_postcopy
+``migrate_start_postcopy``
 
 will now cause the transition from precopy to postcopy.
 It can be issued immediately after migration is started or any
 time later on.  Issuing it after the end of a migration is harmless.
 
-Note: During the postcopy phase, the bandwidth limits set using
-migrate_set_speed is ignored (to avoid delaying requested pages that
-the destination is waiting for).
+Blocktime is a postcopy live migration metric, intended to show how
+long the vCPU was in state of interruptable sleep due to pagefault.
+That metric is calculated both for all vCPUs as overlapped value, and
+separately for each vCPU. These values are calculated on destination
+side.  To enable postcopy blocktime calculation, enter following
+command on destination monitor:
+
+``migrate_set_capability postcopy-blocktime on``
+
+Postcopy blocktime can be retrieved by query-migrate qmp command.
+postcopy-blocktime value of qmp command will show overlapped blocking
+time for all vCPU, postcopy-vcpu-blocktime will show list of blocking
+time per vCPU.
 
-=== Postcopy device transfer ===
+.. note::
+  During the postcopy phase, the bandwidth limits set using
+  ``migrate_set_speed`` is ignored (to avoid delaying requested pages that
+  the destination is waiting for).
+
+Postcopy device transfer
+------------------------
 
 Loading of device data may cause the device emulation to access guest RAM
 that may trigger faults that have to be resolved by the source, as such
@@ -416,6 +431,7 @@ before the device load begins to free the stream up.  This is achieved by
 'packaging' the device data into a blob that's read in one go.
 
 Source behaviour
+----------------
 
 Until postcopy is entered the migration stream is identical to normal
 precopy, except for the addition of a 'postcopy advise' command at
@@ -423,13 +439,14 @@ the beginning, to tell the destination that postcopy might happen.
 When postcopy starts the source sends the page discard data and then
 forms the 'package' containing:
 
-   Command: 'postcopy listen'
-   The device state
-      A series of sections, identical to the precopy streams device state stream
-      containing everything except postcopiable devices (i.e. RAM)
-   Command: 'postcopy run'
+   - Command: 'postcopy listen'
+   - The device state
+
+     A series of sections, identical to the precopy streams device state stream
+     containing everything except postcopiable devices (i.e. RAM)
+   - Command: 'postcopy run'
 
-The 'package' is sent as the data part of a Command: 'CMD_PACKAGED', and the
+The 'package' is sent as the data part of a Command: ``CMD_PACKAGED``, and the
 contents are formatted in the same way as the main migration stream.
 
 During postcopy the source scans the list of dirty pages and sends them
@@ -441,82 +458,100 @@ to be sent quickly in the hope that those pages are likely to be used
 by the destination soon.
 
 Destination behaviour
+---------------------
 
 Initially the destination looks the same as precopy, with a single thread
 reading the migration stream; the 'postcopy advise' and 'discard' commands
 are processed to change the way RAM is managed, but don't affect the stream
 processing.
 
-------------------------------------------------------------------------------
-                        1      2   3     4 5                      6   7
-main -----DISCARD-CMD_PACKAGED ( LISTEN  DEVICE     DEVICE DEVICE RUN )
-thread                             |       |
-                                   |     (page request)
-                                   |        \___
-                                   v            \
-listen thread:                     --- page -- page -- page -- page -- page --
-
-                                   a   b        c
-------------------------------------------------------------------------------
-
-On receipt of CMD_PACKAGED (1)
-   All the data associated with the package - the ( ... ) section in the
-diagram - is read into memory, and the main thread recurses into
-qemu_loadvm_state_main to process the contents of the package (2)
-which contains commands (3,6) and devices (4...)
-
-On receipt of 'postcopy listen' - 3 -(i.e. the 1st command in the package)
-a new thread (a) is started that takes over servicing the migration stream,
-while the main thread carries on loading the package.   It loads normal
-background page data (b) but if during a device load a fault happens (5) the
-returned page (c) is loaded by the listen thread allowing the main threads
-device load to carry on.
-
-The last thing in the CMD_PACKAGED is a 'RUN' command (6) letting the destination
-CPUs start running.
-At the end of the CMD_PACKAGED (7) the main thread returns to normal running behaviour
-and is no longer used by migration, while the listen thread carries
-on servicing page data until the end of migration.
-
-=== Postcopy states ===
+::
+
+  ------------------------------------------------------------------------------
+                          1      2   3     4 5                      6   7
+  main -----DISCARD-CMD_PACKAGED ( LISTEN  DEVICE     DEVICE DEVICE RUN )
+  thread                             |       |
+                                     |     (page request)
+                                     |        \___
+                                     v            \
+  listen thread:                     --- page -- page -- page -- page -- page --
+
+                                     a   b        c
+  ------------------------------------------------------------------------------
+
+- On receipt of ``CMD_PACKAGED`` (1)
+
+   All the data associated with the package - the ( ... ) section in the diagram -
+   is read into memory, and the main thread recurses into qemu_loadvm_state_main
+   to process the contents of the package (2) which contains commands (3,6) and
+   devices (4...)
+
+- On receipt of 'postcopy listen' - 3 -(i.e. the 1st command in the package)
+
+   a new thread (a) is started that takes over servicing the migration stream,
+   while the main thread carries on loading the package.   It loads normal
+   background page data (b) but if during a device load a fault happens (5)
+   the returned page (c) is loaded by the listen thread allowing the main
+   threads device load to carry on.
+
+- The last thing in the ``CMD_PACKAGED`` is a 'RUN' command (6)
+
+   letting the destination CPUs start running.  At the end of the
+   ``CMD_PACKAGED`` (7) the main thread returns to normal running behaviour and
+   is no longer used by migration, while the listen thread carries on servicing
+   page data until the end of migration.
+
+Postcopy states
+---------------
 
 Postcopy moves through a series of states (see postcopy_state) from
 ADVISE->DISCARD->LISTEN->RUNNING->END
 
-  Advise:  Set at the start of migration if postcopy is enabled, even
-           if it hasn't had the start command; here the destination
-           checks that its OS has the support needed for postcopy, and performs
-           setup to ensure the RAM mappings are suitable for later postcopy.
-           The destination will fail early in migration at this point if the
-           required OS support is not present.
-           (Triggered by reception of POSTCOPY_ADVISE command)
-
-  Discard: Entered on receipt of the first 'discard' command; prior to
-           the first Discard being performed, hugepages are switched off
-           (using madvise) to ensure that no new huge pages are created
-           during the postcopy phase, and to cause any huge pages that
-           have discards on them to be broken.
-
-  Listen:  The first command in the package, POSTCOPY_LISTEN, switches
-           the destination state to Listen, and starts a new thread
-           (the 'listen thread') which takes over the job of receiving
-           pages off the migration stream, while the main thread carries
-           on processing the blob.  With this thread able to process page
-           reception, the destination now 'sensitises' the RAM to detect
-           any access to missing pages (on Linux using the 'userfault'
-           system).
-
-  Running: POSTCOPY_RUN causes the destination to synchronise all
-           state and start the CPUs and IO devices running.  The main
-           thread now finishes processing the migration package and
-           now carries on as it would for normal precopy migration
-           (although it can't do the cleanup it would do as it
-           finishes a normal migration).
-
-  End:     The listen thread can now quit, and perform the cleanup of migration
-           state, the migration is now complete.
-
-=== Source side page maps ===
+ - Advise
+
+    Set at the start of migration if postcopy is enabled, even
+    if it hasn't had the start command; here the destination
+    checks that its OS has the support needed for postcopy, and performs
+    setup to ensure the RAM mappings are suitable for later postcopy.
+    The destination will fail early in migration at this point if the
+    required OS support is not present.
+    (Triggered by reception of POSTCOPY_ADVISE command)
+
+ - Discard
+
+    Entered on receipt of the first 'discard' command; prior to
+    the first Discard being performed, hugepages are switched off
+    (using madvise) to ensure that no new huge pages are created
+    during the postcopy phase, and to cause any huge pages that
+    have discards on them to be broken.
+
+ - Listen
+
+    The first command in the package, POSTCOPY_LISTEN, switches
+    the destination state to Listen, and starts a new thread
+    (the 'listen thread') which takes over the job of receiving
+    pages off the migration stream, while the main thread carries
+    on processing the blob.  With this thread able to process page
+    reception, the destination now 'sensitises' the RAM to detect
+    any access to missing pages (on Linux using the 'userfault'
+    system).
+
+ - Running
+
+    POSTCOPY_RUN causes the destination to synchronise all
+    state and start the CPUs and IO devices running.  The main
+    thread now finishes processing the migration package and
+    now carries on as it would for normal precopy migration
+    (although it can't do the cleanup it would do as it
+    finishes a normal migration).
+
+ - End
+
+    The listen thread can now quit, and perform the cleanup of migration
+    state, the migration is now complete.
+
+Source side page maps
+---------------------
 
 The source side keeps two bitmaps during postcopy; 'the migration bitmap'
 and 'unsent map'.  The 'migration bitmap' is basically the same as in
@@ -529,6 +564,7 @@ The 'unsent map' is used for the transition to postcopy. It is a bitmap that
 has a bit cleared whenever a page is sent to the destination, however during
 the transition to postcopy mode it is combined with the migration bitmap
 to form a set of pages that:
+
    a) Have been sent but then redirtied (which must be discarded)
    b) Have not yet been sent - which also must be discarded to cause any
       transparent huge pages built during precopy to be broken.
@@ -540,15 +576,17 @@ request for a page that has already been sent is ignored.  Duplicate requests
 such as this can happen as a page is sent at about the same time the
 destination accesses it.
 
-=== Postcopy with hugepages ===
+Postcopy with hugepages
+-----------------------
 
 Postcopy now works with hugetlbfs backed memory:
+
   a) The linux kernel on the destination must support userfault on hugepages.
   b) The huge-page configuration on the source and destination VMs must be
      identical; i.e. RAMBlocks on both sides must use the same page size.
-  c) Note that -mem-path /dev/hugepages  will fall back to allocating normal
+  c) Note that ``-mem-path /dev/hugepages``  will fall back to allocating normal
      RAM if it doesn't have enough hugepages, triggering (b) to fail.
-     Using -mem-prealloc enforces the allocation using hugepages.
+     Using ``-mem-prealloc`` enforces the allocation using hugepages.
   d) Care should be taken with the size of hugepage used; postcopy with 2MB
      hugepages works well, however 1GB hugepages are likely to be problematic
      since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link,
diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
index 954771d0d8..d49444e037 100644
--- a/docs/interop/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
@@ -53,8 +53,8 @@ Depending on the request type, payload can be:
 
  * A vring state description
    ---------------
-  | index | num |
-  ---------------
+   | index | num |
+   ---------------
 
    Index: a 32-bit index
    Num: a 32-bit number
@@ -66,11 +66,14 @@ Depending on the request type, payload can be:
 
    Index: a 32-bit vring index
    Flags: a 32-bit vring flags
-   Descriptor: a 64-bit user address of the vring descriptor table
-   Used: a 64-bit user address of the vring used ring
-   Available: a 64-bit user address of the vring available ring
+   Descriptor: a 64-bit ring address of the vring descriptor table
+   Used: a 64-bit ring address of the vring used ring
+   Available: a 64-bit ring address of the vring available ring
    Log: a 64-bit guest address for logging
 
+   Note that a ring address is an IOVA if VIRTIO_F_IOMMU_PLATFORM has been
+   negotiated.  Otherwise it is a user address.
+
  * Memory regions description
    ---------------------------------------------------
    | num regions | padding | region0 | ... | region7 |
@@ -273,6 +276,30 @@ Once the source has finished migration, rings will be stopped by
 the source. No further update must be done before rings are
 restarted.
 
+Memory access
+-------------
+
+The master sends a list of vhost memory regions to the slave using the
+VHOST_USER_SET_MEM_TABLE message.  Each region has two base addresses: a guest
+address and a user address.
+
+Messages contain guest addresses and/or user addresses to reference locations
+within the shared memory.  The mapping of these addresses works as follows.
+
+User addresses map to the vhost memory region containing that user address.
+
+When the VIRTIO_F_IOMMU_PLATFORM feature has not been negotiated:
+
+ * Guest addresses map to the vhost memory region containing that guest
+   address.
+
+When the VIRTIO_F_IOMMU_PLATFORM feature has been negotiated:
+
+ * Guest addresses are also called I/O virtual addresses (IOVAs).  They are
+   translated to user addresses via the IOTLB.
+
+ * The vhost memory region guest address is not used.
+
 IOMMU support
 -------------
 
diff --git a/hmp.c b/hmp.c
index 2d72f94193..c6bab5373b 100644
--- a/hmp.c
+++ b/hmp.c
@@ -264,6 +264,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
                        info->cpu_throttle_percentage);
     }
 
+    if (info->has_postcopy_blocktime) {
+        monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+                       info->postcopy_blocktime);
+    }
+
+    if (info->has_postcopy_vcpu_blocktime) {
+        Visitor *v;
+        char *str;
+        v = string_output_visitor_new(false, &str);
+        visit_type_int64List(v, NULL, &info->postcopy_vcpu_blocktime, NULL);
+        visit_complete(v, &str);
+        monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
+        g_free(str);
+        visit_free(v);
+    }
     qapi_free_MigrationInfo(info);
     qapi_free_MigrationCapabilityStatusList(caps);
 }
@@ -293,23 +308,23 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
 
     if (params) {
         assert(params->has_compress_level);
-        monitor_printf(mon, "%s: %" PRId64 "\n",
+        monitor_printf(mon, "%s: %u\n",
             MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_LEVEL),
             params->compress_level);
         assert(params->has_compress_threads);
-        monitor_printf(mon, "%s: %" PRId64 "\n",
+        monitor_printf(mon, "%s: %u\n",
             MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_THREADS),
             params->compress_threads);
         assert(params->has_decompress_threads);
-        monitor_printf(mon, "%s: %" PRId64 "\n",
+        monitor_printf(mon, "%s: %u\n",
             MigrationParameter_str(MIGRATION_PARAMETER_DECOMPRESS_THREADS),
             params->decompress_threads);
         assert(params->has_cpu_throttle_initial);
-        monitor_printf(mon, "%s: %" PRId64 "\n",
+        monitor_printf(mon, "%s: %u\n",
             MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL),
             params->cpu_throttle_initial);
         assert(params->has_cpu_throttle_increment);
-        monitor_printf(mon, "%s: %" PRId64 "\n",
+        monitor_printf(mon, "%s: %u\n",
             MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT),
             params->cpu_throttle_increment);
         assert(params->has_tls_creds);
@@ -321,28 +336,28 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
             MigrationParameter_str(MIGRATION_PARAMETER_TLS_HOSTNAME),
             params->tls_hostname);
         assert(params->has_max_bandwidth);
-        monitor_printf(mon, "%s: %" PRId64 " bytes/second\n",
+        monitor_printf(mon, "%s: %" PRIu64 " bytes/second\n",
             MigrationParameter_str(MIGRATION_PARAMETER_MAX_BANDWIDTH),
             params->max_bandwidth);
         assert(params->has_downtime_limit);
-        monitor_printf(mon, "%s: %" PRId64 " milliseconds\n",
+        monitor_printf(mon, "%s: %" PRIu64 " milliseconds\n",
             MigrationParameter_str(MIGRATION_PARAMETER_DOWNTIME_LIMIT),
             params->downtime_limit);
         assert(params->has_x_checkpoint_delay);
-        monitor_printf(mon, "%s: %" PRId64 "\n",
+        monitor_printf(mon, "%s: %u\n",
             MigrationParameter_str(MIGRATION_PARAMETER_X_CHECKPOINT_DELAY),
             params->x_checkpoint_delay);
         assert(params->has_block_incremental);
         monitor_printf(mon, "%s: %s\n",
             MigrationParameter_str(MIGRATION_PARAMETER_BLOCK_INCREMENTAL),
             params->block_incremental ? "on" : "off");
-        monitor_printf(mon, "%s: %" PRId64 "\n",
+        monitor_printf(mon, "%s: %u\n",
             MigrationParameter_str(MIGRATION_PARAMETER_X_MULTIFD_CHANNELS),
             params->x_multifd_channels);
-        monitor_printf(mon, "%s: %" PRId64 "\n",
+        monitor_printf(mon, "%s: %u\n",
             MigrationParameter_str(MIGRATION_PARAMETER_X_MULTIFD_PAGE_COUNT),
             params->x_multifd_page_count);
-        monitor_printf(mon, "%s: %" PRId64 "\n",
+        monitor_printf(mon, "%s: %" PRIu64 "\n",
             MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
             params->xbzrle_cache_size);
     }
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index 7da51c0569..91c82fdc7a 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -223,7 +223,7 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
 {
     PCIDevice *pdev = PCI_DEVICE(dev);
     int slot = PCI_SLOT(pdev->devfn);
-    int bsel = acpi_pcihp_get_bsel(pdev->bus);
+    int bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev));
     if (bsel < 0) {
         error_setg(errp, "Unsupported bus. Bus doesn't have property '"
                    ACPI_PCIHP_PROP_BSEL "' set");
@@ -246,7 +246,7 @@ void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
 {
     PCIDevice *pdev = PCI_DEVICE(dev);
     int slot = PCI_SLOT(pdev->devfn);
-    int bsel = acpi_pcihp_get_bsel(pdev->bus);
+    int bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev));
     if (bsel < 0) {
         error_setg(errp, "Unsupported bus. Bus doesn't have property '"
                    ACPI_PCIHP_PROP_BSEL "' set");
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index a0fb1ce037..8b703455b7 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -460,9 +460,9 @@ static void piix4_pm_machine_ready(Notifier *n, void *opaque)
         (memory_region_present(io_as, 0x2f8) ? 0x90 : 0);
 
     if (s->use_acpi_pci_hotplug) {
-        pci_for_each_bus(d->bus, piix4_update_bus_hotplug, s);
+        pci_for_each_bus(pci_get_bus(d), piix4_update_bus_hotplug, s);
     } else {
-        piix4_update_bus_hotplug(d->bus, s);
+        piix4_update_bus_hotplug(pci_get_bus(d), s);
     }
 }
 
@@ -535,7 +535,8 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp)
     qemu_add_machine_init_done_notifier(&s->machine_ready);
     qemu_register_reset(piix4_reset, s);
 
-    piix4_acpi_system_hot_add_init(pci_address_space_io(dev), dev->bus, s);
+    piix4_acpi_system_hot_add_init(pci_address_space_io(dev),
+                                   pci_get_bus(dev), s);
 
     piix4_pm_add_propeties(s);
 }
diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c
index 105044f666..ba6f47b67b 100644
--- a/hw/acpi/vmgenid.c
+++ b/hw/acpi/vmgenid.c
@@ -162,21 +162,6 @@ static void vmgenid_update_guest(VmGenIdState *vms)
     }
 }
 
-static void vmgenid_set_guid(Object *obj, const char *value, Error **errp)
-{
-    VmGenIdState *vms = VMGENID(obj);
-
-    if (!strcmp(value, "auto")) {
-        qemu_uuid_generate(&vms->guid);
-    } else if (qemu_uuid_parse(value, &vms->guid) < 0) {
-        error_setg(errp, "'%s. %s': Failed to parse GUID string: %s",
-                   object_get_typename(OBJECT(vms)), VMGENID_GUID, value);
-        return;
-    }
-
-    vmgenid_update_guest(vms);
-}
-
 /* After restoring an image, we need to update the guest memory and notify
  * it of a potential change to VM Generation ID
  */
@@ -224,23 +209,24 @@ static void vmgenid_realize(DeviceState *dev, Error **errp)
     }
 
     qemu_register_reset(vmgenid_handle_reset, vms);
+
+    vmgenid_update_guest(vms);
 }
 
+static Property vmgenid_device_properties[] = {
+    DEFINE_PROP_UUID(VMGENID_GUID, VmGenIdState, guid),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void vmgenid_device_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->vmsd = &vmstate_vmgenid;
     dc->realize = vmgenid_realize;
+    dc->props = vmgenid_device_properties;
     dc->hotpluggable = false;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-
-    object_class_property_add_str(klass, VMGENID_GUID, NULL,
-                                  vmgenid_set_guid, NULL);
-    object_class_property_set_description(klass, VMGENID_GUID,
-                                    "Set Global Unique Identifier "
-                                    "(big-endian) or auto for random value",
-                                    NULL);
 }
 
 static const TypeInfo vmgenid_device_info = {
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index ae11e012c7..6a40869488 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -881,10 +881,10 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
     memory_region_add_subregion(addr_space, 0x801fc000000ULL,
                                 &s->pchip.reg_io);
 
-    b = pci_register_bus(dev, "pci",
-                         typhoon_set_irq, sys_map_irq, s,
-                         &s->pchip.reg_mem, &s->pchip.reg_io,
-                         0, 64, TYPE_PCI_BUS);
+    b = pci_register_root_bus(dev, "pci",
+                              typhoon_set_irq, sys_map_irq, s,
+                              &s->pchip.reg_mem, &s->pchip.reg_io,
+                              0, 64, TYPE_PCI_BUS);
     phb->bus = b;
     qdev_init_nofail(dev);
 
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 151592b1e5..543f9bd6cc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1618,7 +1618,7 @@ static void machvirt_machine_init(void)
 }
 type_init(machvirt_machine_init);
 
-static void virt_2_11_instance_init(Object *obj)
+static void virt_2_12_instance_init(Object *obj)
 {
     VirtMachineState *vms = VIRT_MACHINE(obj);
     VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
@@ -1678,10 +1678,25 @@ static void virt_2_11_instance_init(Object *obj)
     vms->irqmap = a15irqmap;
 }
 
+static void virt_machine_2_12_options(MachineClass *mc)
+{
+}
+DEFINE_VIRT_MACHINE_AS_LATEST(2, 12)
+
+#define VIRT_COMPAT_2_11 \
+    HW_COMPAT_2_11
+
+static void virt_2_11_instance_init(Object *obj)
+{
+    virt_2_12_instance_init(obj);
+}
+
 static void virt_machine_2_11_options(MachineClass *mc)
 {
+    virt_machine_2_12_options(mc);
+    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_11);
 }
-DEFINE_VIRT_MACHINE_AS_LATEST(2, 11)
+DEFINE_VIRT_MACHINE(2, 11)
 
 #define VIRT_COMPAT_2_10 \
     HW_COMPAT_2_10
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 1dc80fcea2..24c17800e3 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -10,6 +10,7 @@
 #include "net/hub.h"
 #include "qapi/visitor.h"
 #include "chardev/char.h"
+#include "qemu/uuid.h"
 
 void qdev_prop_set_after_realize(DeviceState *dev, const char *name,
                                   Error **errp)
@@ -883,6 +884,66 @@ const PropertyInfo qdev_prop_pci_host_devaddr = {
     .set = set_pci_host_devaddr,
 };
 
+/* --- UUID --- */
+
+static void get_uuid(Object *obj, Visitor *v, const char *name, void *opaque,
+                     Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    QemuUUID *uuid = qdev_get_prop_ptr(dev, prop);
+    char buffer[UUID_FMT_LEN + 1];
+    char *p = buffer;
+
+    qemu_uuid_unparse(uuid, buffer);
+
+    visit_type_str(v, name, &p, errp);
+}
+
+#define UUID_VALUE_AUTO        "auto"
+
+static void set_uuid(Object *obj, Visitor *v, const char *name, void *opaque,
+                    Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    QemuUUID *uuid = qdev_get_prop_ptr(dev, prop);
+    Error *local_err = NULL;
+    char *str;
+
+    if (dev->realized) {
+        qdev_prop_set_after_realize(dev, name, errp);
+        return;
+    }
+
+    visit_type_str(v, name, &str, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (!strcmp(str, UUID_VALUE_AUTO)) {
+        qemu_uuid_generate(uuid);
+    } else if (qemu_uuid_parse(str, uuid) < 0) {
+        error_set_from_qdev_prop_error(errp, EINVAL, dev, prop, str);
+    }
+    g_free(str);
+}
+
+static void set_default_uuid_auto(Object *obj, const Property *prop)
+{
+    object_property_set_str(obj, UUID_VALUE_AUTO, prop->name, &error_abort);
+}
+
+const PropertyInfo qdev_prop_uuid = {
+    .name  = "str",
+    .description = "UUID (aka GUID) or \"" UUID_VALUE_AUTO
+        "\" for random value (default)",
+    .get   = get_uuid,
+    .set   = set_uuid,
+    .set_default_value = set_default_uuid_auto,
+};
+
 /* --- support for array properties --- */
 
 /* Used as an opaque for the object properties we add for each
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 99365c3e8f..b9fa067f6e 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -518,7 +518,6 @@ static void interface_attach_worker(QXLInstance *sin, QXLWorker *qxl_worker)
     PCIQXLDevice *qxl = container_of(sin, PCIQXLDevice, ssd.qxl);
 
     trace_qxl_interface_attach_worker(qxl->id);
-    qxl->ssd.worker = qxl_worker;
 }
 
 static void interface_set_compression_level(QXLInstance *sin, int level)
diff --git a/hw/i2c/pm_smbus.c b/hw/i2c/pm_smbus.c
index a044dd1b27..0d26e0f6b5 100644
--- a/hw/i2c/pm_smbus.c
+++ b/hw/i2c/pm_smbus.c
@@ -62,6 +62,9 @@ static void smb_transaction(PMSMBus *s)
     I2CBus *bus = s->smbus;
     int ret;
 
+    assert(s->smb_stat & STS_HOST_BUSY);
+    s->smb_stat &= ~STS_HOST_BUSY;
+
     SMBUS_DPRINTF("SMBus trans addr=0x%02x prot=0x%02x\n", addr, prot);
     /* Transaction isn't exec if STS_DEV_ERR bit set */
     if ((s->smb_stat & STS_DEV_ERR) != 0)  {
@@ -134,6 +137,13 @@ error:
 
 }
 
+static void smb_transaction_start(PMSMBus *s)
+{
+    /* Do not execute immediately the command ; it will be
+     * executed when guest will read SMB_STAT register */
+    s->smb_stat |= STS_HOST_BUSY;
+}
+
 static void smb_ioport_writeb(void *opaque, hwaddr addr, uint64_t val,
                               unsigned width)
 {
@@ -149,7 +159,7 @@ static void smb_ioport_writeb(void *opaque, hwaddr addr, uint64_t val,
     case SMBHSTCNT:
         s->smb_ctl = val;
         if (val & 0x40)
-            smb_transaction(s);
+            smb_transaction_start(s);
         break;
     case SMBHSTCMD:
         s->smb_cmd = val;
@@ -181,6 +191,10 @@ static uint64_t smb_ioport_readb(void *opaque, hwaddr addr, unsigned width)
     switch(addr) {
     case SMBHSTSTS:
         val = s->smb_stat;
+        if (s->smb_stat & STS_HOST_BUSY) {
+            /* execute command now */
+            smb_transaction(s);
+        }
         break;
     case SMBHSTCNT:
         s->smb_index = 0;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3a5bb0bc2e..fe15d3ba84 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -186,7 +186,7 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
     g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
 
     while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
-        for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
+        for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
             vtd_as = vtd_bus->dev_as[devfn_it];
             if (!vtd_as) {
                 continue;
@@ -1002,7 +1002,7 @@ static void vtd_switch_address_space_all(IntelIOMMUState *s)
 
     g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
-        for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) {
+        for (i = 0; i < PCI_DEVFN_MAX; i++) {
             if (!vtd_bus->dev_as[i]) {
                 continue;
             }
@@ -1294,7 +1294,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
     vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
     if (vtd_bus) {
         devfn = VTD_SID_TO_DEVFN(source_id);
-        for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
+        for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
             vtd_as = vtd_bus->dev_as[devfn_it];
             if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
                 trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
@@ -2327,7 +2327,7 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
     IntelIOMMUNotifierNode *next_node = NULL;
 
     if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
-        error_report("We need to set cache_mode=1 for intel-iommu to enable "
+        error_report("We need to set caching-mode=1 for intel-iommu to enable "
                      "device assignment with IOMMU protection.");
         exit(1);
     }
@@ -2699,7 +2699,7 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
         *new_key = (uintptr_t)bus;
         /* No corresponding free() */
         vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
-                            X86_IOMMU_PCI_DEVFN_MAX);
+                            PCI_DEVFN_MAX);
         vtd_bus->bus = bus;
         g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
     }
@@ -2982,7 +2982,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     IntelIOMMUState *s = opaque;
     VTDAddressSpace *vtd_as;
 
-    assert(0 <= devfn && devfn < X86_IOMMU_PCI_DEVFN_MAX);
+    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
 
     vtd_as = vtd_find_add_as(s, bus, devfn);
     return &vtd_as->as;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 5e47528993..2febd0e136 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -394,7 +394,7 @@ static void pc_xen_hvm_init_pci(MachineState *machine)
 
 static void pc_xen_hvm_init(MachineState *machine)
 {
-    PCIBus *bus;
+    PCMachineState *pcms = PC_MACHINE(machine);
 
     if (!xen_enabled()) {
         error_report("xenfv machine requires the xen accelerator");
@@ -402,11 +402,7 @@ static void pc_xen_hvm_init(MachineState *machine)
     }
 
     pc_xen_hvm_init_pci(machine);
-
-    bus = pci_find_primary_bus();
-    if (bus != NULL) {
-        pci_create_simple(bus, -1, "xen-platform");
-    }
+    pci_create_simple(pcms->bus, -1, "xen-platform");
 }
 #endif
 
diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index fc8623c90b..deb7a0c374 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -185,11 +185,11 @@ static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t v
         if (val & (UNPLUG_IDE_SCSI_DISKS | UNPLUG_AUX_IDE_DISKS |
                    UNPLUG_NVME_DISKS)) {
             DPRINTF("unplug disks\n");
-            pci_unplug_disks(pci_dev->bus, val);
+            pci_unplug_disks(pci_get_bus(pci_dev), val);
         }
         if (val & UNPLUG_ALL_NICS) {
             DPRINTF("unplug nics\n");
-            pci_unplug_nics(pci_dev->bus);
+            pci_unplug_nics(pci_get_bus(pci_dev));
         }
         break;
     }
@@ -371,17 +371,17 @@ static void xen_platform_ioport_writeb(void *opaque, hwaddr addr,
              * If VMDP was to control both disk and LAN it would use 4.
              * If it controlled just disk or just LAN, it would use 8 below.
              */
-            pci_unplug_disks(pci_dev->bus, UNPLUG_IDE_SCSI_DISKS);
-            pci_unplug_nics(pci_dev->bus);
+            pci_unplug_disks(pci_get_bus(pci_dev), UNPLUG_IDE_SCSI_DISKS);
+            pci_unplug_nics(pci_get_bus(pci_dev));
         }
         break;
     case 8:
         switch (val) {
         case 1:
-            pci_unplug_disks(pci_dev->bus, UNPLUG_IDE_SCSI_DISKS);
+            pci_unplug_disks(pci_get_bus(pci_dev), UNPLUG_IDE_SCSI_DISKS);
             break;
         case 2:
-            pci_unplug_nics(pci_dev->bus);
+            pci_unplug_nics(pci_get_bus(pci_dev));
             break;
         default:
             log_writeb(s, (uint32_t)val);
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index dd49b6c335..8ca6ceeb9b 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -896,13 +896,6 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs)
             val |= (1 << 8);
         }
         return val;
-    case 0xd28: /* Configurable Fault Status.  */
-        /* The BFSR bits [15:8] are shared between security states
-         * and we store them in the NS copy
-         */
-        val = cpu->env.v7m.cfsr[attrs.secure];
-        val |= cpu->env.v7m.cfsr[M_REG_NS] & R_V7M_CFSR_BFSR_MASK;
-        return val;
     case 0xd2c: /* Hard Fault Status.  */
         return cpu->env.v7m.hfsr;
     case 0xd30: /* Debug Fault Status.  */
@@ -1280,15 +1273,6 @@ static void nvic_writel(NVICState *s, uint32_t offset, uint32_t value,
         s->vectors[ARMV7M_EXCP_DEBUG].active = (value & (1 << 8)) != 0;
         nvic_irq_update(s);
         break;
-    case 0xd28: /* Configurable Fault Status.  */
-        cpu->env.v7m.cfsr[attrs.secure] &= ~value; /* W1C */
-        if (attrs.secure) {
-            /* The BFSR bits [15:8] are shared between security states
-             * and we store them in the NS copy.
-             */
-            cpu->env.v7m.cfsr[M_REG_NS] &= ~(value & R_V7M_CFSR_BFSR_MASK);
-        }
-        break;
     case 0xd2c: /* Hard Fault Status.  */
         cpu->env.v7m.hfsr &= ~value; /* W1C */
         break;
@@ -1667,6 +1651,14 @@ static MemTxResult nvic_sysreg_read(void *opaque, hwaddr addr,
             val = deposit32(val, i * 8, 8, get_prio(s, hdlidx, sbank));
         }
         break;
+    case 0xd28 ... 0xd2b: /* Configurable Fault Status (CFSR) */
+        /* The BFSR bits [15:8] are shared between security states
+         * and we store them in the NS copy
+         */
+        val = s->cpu->env.v7m.cfsr[attrs.secure];
+        val |= s->cpu->env.v7m.cfsr[M_REG_NS] & R_V7M_CFSR_BFSR_MASK;
+        val = extract32(val, (offset - 0xd28) * 8, size * 8);
+        break;
     case 0xfe0 ... 0xfff: /* ID.  */
         if (offset & 3) {
             val = 0;
@@ -1765,6 +1757,20 @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
         }
         nvic_irq_update(s);
         return MEMTX_OK;
+    case 0xd28 ... 0xd2b: /* Configurable Fault Status (CFSR) */
+        /* All bits are W1C, so construct 32 bit value with 0s in
+         * the parts not written by the access size
+         */
+        value <<= ((offset - 0xd28) * 8);
+
+        s->cpu->env.v7m.cfsr[attrs.secure] &= ~value;
+        if (attrs.secure) {
+            /* The BFSR bits [15:8] are shared between security states
+             * and we store them in the NS copy.
+             */
+            s->cpu->env.v7m.cfsr[M_REG_NS] &= ~(value & R_V7M_CFSR_BFSR_MASK);
+        }
+        return MEMTX_OK;
     }
     if (size == 4) {
         nvic_writel(s, offset, value, attrs);
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index ec3c9f7d0b..adcf077fa5 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -162,7 +162,7 @@ static void ich9_cc_write(void *opaque, hwaddr addr,
 
     ich9_cc_addr_len(&addr, &len);
     memcpy(lpc->chip_config + addr, &val, len);
-    pci_bus_fire_intx_routing_notifier(lpc->d.bus);
+    pci_bus_fire_intx_routing_notifier(pci_get_bus(&lpc->d));
     ich9_cc_update(lpc);
 }
 
@@ -218,7 +218,7 @@ static void ich9_lpc_update_pic(ICH9LPCState *lpc, int gsi)
         int tmp_dis;
         ich9_lpc_pic_irq(lpc, i, &tmp_irq, &tmp_dis);
         if (!tmp_dis && tmp_irq == gsi) {
-            pic_level |= pci_bus_get_irq_level(lpc->d.bus, i);
+            pic_level |= pci_bus_get_irq_level(pci_get_bus(&lpc->d), i);
         }
     }
     if (gsi == lpc->sci_gsi) {
@@ -246,7 +246,7 @@ static void ich9_lpc_update_apic(ICH9LPCState *lpc, int gsi)
 
     assert(gsi >= ICH9_LPC_PIC_NUM_PINS);
 
-    level |= pci_bus_get_irq_level(lpc->d.bus, ich9_gsi_to_pirq(gsi));
+    level |= pci_bus_get_irq_level(pci_get_bus(&lpc->d), ich9_gsi_to_pirq(gsi));
     if (gsi == lpc->sci_gsi) {
         level |= lpc->sci_level;
     }
@@ -524,10 +524,10 @@ static void ich9_lpc_config_write(PCIDevice *d,
         ich9_lpc_rcba_update(lpc, rcba_old);
     }
     if (ranges_overlap(addr, len, ICH9_LPC_PIRQA_ROUT, 4)) {
-        pci_bus_fire_intx_routing_notifier(lpc->d.bus);
+        pci_bus_fire_intx_routing_notifier(pci_get_bus(&lpc->d));
     }
     if (ranges_overlap(addr, len, ICH9_LPC_PIRQE_ROUT, 4)) {
-        pci_bus_fire_intx_routing_notifier(lpc->d.bus);
+        pci_bus_fire_intx_routing_notifier(pci_get_bus(&lpc->d));
     }
     if (ranges_overlap(addr, len, ICH9_LPC_GEN_PMCON_1, 8)) {
         ich9_lpc_pmcon_update(lpc);
diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c
index 5a9dad9aae..a9c222a967 100644
--- a/hw/mips/gt64xxx_pci.c
+++ b/hw/mips/gt64xxx_pci.c
@@ -1171,12 +1171,12 @@ PCIBus *gt64120_register(qemu_irq *pic)
     phb = PCI_HOST_BRIDGE(dev);
     memory_region_init(&d->pci0_mem, OBJECT(dev), "pci0-mem", UINT32_MAX);
     address_space_init(&d->pci0_mem_as, &d->pci0_mem, "pci0-mem");
-    phb->bus = pci_register_bus(dev, "pci",
-                                gt64120_pci_set_irq, gt64120_pci_map_irq,
-                                pic,
-                                &d->pci0_mem,
-                                get_system_io(),
-                                PCI_DEVFN(18, 0), 4, TYPE_PCI_BUS);
+    phb->bus = pci_register_root_bus(dev, "pci",
+                                     gt64120_pci_set_irq, gt64120_pci_map_irq,
+                                     pic,
+                                     &d->pci0_mem,
+                                     get_system_io(),
+                                     PCI_DEVFN(18, 0), 4, TYPE_PCI_BUS);
     qdev_init_nofail(dev);
     memory_region_init_io(&d->ISD_mem, OBJECT(dev), &isd_mem_ops, d, "isd-mem", 0x1000);
 
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index b8404cb2e2..0654d594c1 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -2356,7 +2356,7 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
     vmxnet3_net_init(s);
 
     if (pci_is_express(pci_dev)) {
-        if (pci_bus_is_express(pci_dev->bus)) {
+        if (pci_bus_is_express(pci_get_bus(pci_dev))) {
             pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET);
         }
 
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index 9e799dc10f..e62de4218f 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -51,7 +51,8 @@ typedef struct PXBDev {
 
 static PXBDev *convert_to_pxb(PCIDevice *dev)
 {
-    return pci_bus_is_express(dev->bus) ? PXB_PCIE_DEV(dev) : PXB_DEV(dev);
+    return pci_bus_is_express(pci_get_bus(dev))
+        ? PXB_PCIE_DEV(dev) : PXB_DEV(dev);
 }
 
 static GList *pxb_dev_list;
@@ -165,7 +166,7 @@ static const TypeInfo pxb_host_info = {
  */
 static void pxb_register_bus(PCIDevice *dev, PCIBus *pxb_bus, Error **errp)
 {
-    PCIBus *bus = dev->bus;
+    PCIBus *bus = pci_get_bus(dev);
     int pxb_bus_num = pci_bus_num(pxb_bus);
 
     if (bus->parent_dev) {
@@ -179,12 +180,12 @@ static void pxb_register_bus(PCIDevice *dev, PCIBus *pxb_bus, Error **errp)
             return;
         }
     }
-    QLIST_INSERT_HEAD(&dev->bus->child, pxb_bus, sibling);
+    QLIST_INSERT_HEAD(&pci_get_bus(dev)->child, pxb_bus, sibling);
 }
 
 static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
 {
-    PCIDevice *pxb = pci_dev->bus->parent_dev;
+    PCIDevice *pxb = pci_get_bus(pci_dev)->parent_dev;
 
     /*
      * The bios does not index the pxb slot number when
@@ -229,9 +230,9 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp)
 
     ds = qdev_create(NULL, TYPE_PXB_HOST);
     if (pcie) {
-        bus = pci_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_PCIE_BUS);
+        bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_PCIE_BUS);
     } else {
-        bus = pci_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS);
+        bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS);
         bds = qdev_create(BUS(bus), "pci-bridge");
         bds->id = dev_name;
         qdev_prop_set_uint8(bds, PCI_BRIDGE_DEV_PROP_CHASSIS_NR, pxb->bus_nr);
@@ -239,8 +240,8 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp)
     }
 
     bus->parent_dev = dev;
-    bus->address_space_mem = dev->bus->address_space_mem;
-    bus->address_space_io = dev->bus->address_space_io;
+    bus->address_space_mem = pci_get_bus(dev)->address_space_mem;
+    bus->address_space_io = pci_get_bus(dev)->address_space_io;
     bus->map_irq = pxb_map_irq_fn;
 
     PCI_HOST_BRIDGE(ds)->bus = bus;
@@ -271,7 +272,7 @@ err_register_bus:
 
 static void pxb_dev_realize(PCIDevice *dev, Error **errp)
 {
-    if (pci_bus_is_express(dev->bus)) {
+    if (pci_bus_is_express(pci_get_bus(dev))) {
         error_setg(errp, "pxb devices cannot reside on a PCIe bus");
         return;
     }
@@ -323,7 +324,7 @@ static const TypeInfo pxb_dev_info = {
 
 static void pxb_pcie_dev_realize(PCIDevice *dev, Error **errp)
 {
-    if (!pci_bus_is_express(dev->bus)) {
+    if (!pci_bus_is_express(pci_get_bus(dev))) {
         error_setg(errp, "pxb-pcie devices cannot reside on a PCI bus");
         return;
     }
diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
index 3e796fb6fc..ec676f94b6 100644
--- a/hw/pci-host/apb.c
+++ b/hw/pci-host/apb.c
@@ -433,11 +433,11 @@ static void pci_pbm_realize(DeviceState *dev, Error **errp)
     memory_region_add_subregion(get_system_memory(), s->mem_base,
                                 &s->pci_mmio);
 
-    phb->bus = pci_register_bus(dev, "pci",
-                                pci_apb_set_irq, pci_apb_map_irq, s,
-                                &s->pci_mmio,
-                                &s->pci_ioport,
-                                0, 32, TYPE_PCI_BUS);
+    phb->bus = pci_register_root_bus(dev, "pci",
+                                     pci_apb_set_irq, pci_apb_map_irq, s,
+                                     &s->pci_mmio,
+                                     &s->pci_ioport,
+                                     0, 32, TYPE_PCI_BUS);
 
     pci_create_simple(phb->bus, 0, "pbm-pci");
 
diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c
index 9f61e27edc..f08593feab 100644
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c
@@ -714,10 +714,10 @@ static int bonito_pcihost_initfn(SysBusDevice *dev)
 {
     PCIHostState *phb = PCI_HOST_BRIDGE(dev);
 
-    phb->bus = pci_register_bus(DEVICE(dev), "pci",
-                                pci_bonito_set_irq, pci_bonito_map_irq, dev,
-                                get_system_memory(), get_system_io(),
-                                0x28, 32, TYPE_PCI_BUS);
+    phb->bus = pci_register_root_bus(DEVICE(dev), "pci",
+                                     pci_bonito_set_irq, pci_bonito_map_irq,
+                                     dev, get_system_memory(), get_system_io(),
+                                     0x28, 32, TYPE_PCI_BUS);
 
     return 0;
 }
diff --git a/hw/pci-host/gpex.c b/hw/pci-host/gpex.c
index edf305b1fd..2583b151a4 100644
--- a/hw/pci-host/gpex.c
+++ b/hw/pci-host/gpex.c
@@ -89,9 +89,9 @@ static void gpex_host_realize(DeviceState *dev, Error **errp)
         s->irq_num[i] = -1;
     }
 
-    pci->bus = pci_register_bus(dev, "pcie.0", gpex_set_irq,
-                                pci_swizzle_map_irq_fn, s, &s->io_mmio,
-                                &s->io_ioport, 0, 4, TYPE_PCIE_BUS);
+    pci->bus = pci_register_root_bus(dev, "pcie.0", gpex_set_irq,
+                                     pci_swizzle_map_irq_fn, s, &s->io_mmio,
+                                     &s->io_ioport, 0, 4, TYPE_PCIE_BUS);
 
     qdev_set_parent_bus(DEVICE(&s->gpex_root), BUS(pci->bus));
     pci_bus_set_route_irq_fn(pci->bus, gpex_route_intx_pin_to_irq);
diff --git a/hw/pci-host/grackle.c b/hw/pci-host/grackle.c
index 38cd279b6b..3caf1ccb37 100644
--- a/hw/pci-host/grackle.c
+++ b/hw/pci-host/grackle.c
@@ -82,13 +82,13 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic,
     memory_region_add_subregion(address_space_mem, 0x80000000ULL,
                                 &d->pci_hole);
 
-    phb->bus = pci_register_bus(dev, NULL,
-                                pci_grackle_set_irq,
-                                pci_grackle_map_irq,
-                                pic,
-                                &d->pci_mmio,
-                                address_space_io,
-                                0, 4, TYPE_PCI_BUS);
+    phb->bus = pci_register_root_bus(dev, NULL,
+                                     pci_grackle_set_irq,
+                                     pci_grackle_map_irq,
+                                     pic,
+                                     &d->pci_mmio,
+                                     address_space_io,
+                                     0, 4, TYPE_PCI_BUS);
 
     pci_create_simple(phb->bus, 0, "grackle");
     qdev_init_nofail(dev);
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index a684a7cca9..0e608347c1 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -361,8 +361,8 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type,
 
     dev = qdev_create(NULL, host_type);
     s = PCI_HOST_BRIDGE(dev);
-    b = pci_bus_new(dev, NULL, pci_address_space,
-                    address_space_io, 0, TYPE_PCI_BUS);
+    b = pci_root_bus_new(dev, NULL, pci_address_space,
+                         address_space_io, 0, TYPE_PCI_BUS);
     s->bus = b;
     object_property_add_child(qdev_get_machine(), "i440fx", OBJECT(dev), NULL);
     qdev_init_nofail(dev);
@@ -512,12 +512,12 @@ static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin)
 /* irq routing is changed. so rebuild bitmap */
 static void piix3_update_irq_levels(PIIX3State *piix3)
 {
+    PCIBus *bus = pci_get_bus(&piix3->dev);
     int pirq;
 
     piix3->pic_levels = 0;
     for (pirq = 0; pirq < PIIX_NUM_PIRQS; pirq++) {
-        piix3_set_irq_level(piix3, pirq,
-                            pci_bus_get_irq_level(piix3->dev.bus, pirq));
+        piix3_set_irq_level(piix3, pirq, pci_bus_get_irq_level(bus, pirq));
     }
 }
 
@@ -529,7 +529,7 @@ static void piix3_write_config(PCIDevice *dev,
         PIIX3State *piix3 = PIIX3_PCI_DEVICE(dev);
         int pic_irq;
 
-        pci_bus_fire_intx_routing_notifier(piix3->dev.bus);
+        pci_bus_fire_intx_routing_notifier(pci_get_bus(&piix3->dev));
         piix3_update_irq_levels(piix3);
         for (pic_irq = 0; pic_irq < PIIX_NUM_PIC_IRQS; pic_irq++) {
             piix3_set_irq_pic(piix3, pic_irq);
@@ -601,7 +601,7 @@ static int piix3_post_load(void *opaque, int version_id)
     piix3->pic_levels = 0;
     for (pirq = 0; pirq < PIIX_NUM_PIRQS; pirq++) {
         piix3_set_irq_level_internal(piix3, pirq,
-                            pci_bus_get_irq_level(piix3->dev.bus, pirq));
+            pci_bus_get_irq_level(pci_get_bus(&piix3->dev), pirq));
     }
     return 0;
 }
@@ -613,7 +613,7 @@ static int piix3_pre_save(void *opaque)
 
     for (i = 0; i < ARRAY_SIZE(piix3->pci_irq_levels_vmstate); i++) {
         piix3->pci_irq_levels_vmstate[i] =
-            pci_bus_get_irq_level(piix3->dev.bus, i);
+            pci_bus_get_irq_level(pci_get_bus(&piix3->dev), i);
     }
 
     return 0;
@@ -804,60 +804,55 @@ static const IGDHostInfo igd_host_bridge_infos[] = {
     {0xa8, 4},  /* SNB: base of GTT stolen memory */
 };
 
-static int host_pci_config_read(int pos, int len, uint32_t *val)
+static void host_pci_config_read(int pos, int len, uint32_t *val, Error **errp)
 {
-    char path[PATH_MAX];
-    int config_fd;
-    ssize_t size = sizeof(path);
+    int rc, config_fd;
     /* Access real host bridge. */
-    int rc = snprintf(path, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s",
-                      0, 0, 0, 0, "config");
-    int ret = 0;
-
-    if (rc >= size || rc < 0) {
-        return -ENODEV;
-    }
+    char *path = g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s",
+                                 0, 0, 0, 0, "config");
 
     config_fd = open(path, O_RDWR);
     if (config_fd < 0) {
-        return -ENODEV;
+        error_setg_errno(errp, errno, "Failed to open: %s", path);
+        goto out;
     }
 
     if (lseek(config_fd, pos, SEEK_SET) != pos) {
-        ret = -errno;
-        goto out;
+        error_setg_errno(errp, errno, "Failed to seek: %s", path);
+        goto out_close_fd;
     }
 
     do {
         rc = read(config_fd, (uint8_t *)val, len);
     } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
     if (rc != len) {
-        ret = -errno;
+        error_setg_errno(errp, errno, "Failed to read: %s", path);
     }
 
-out:
+out_close_fd:
     close(config_fd);
-    return ret;
+out:
+    g_free(path);
 }
 
-static int igd_pt_i440fx_initfn(struct PCIDevice *pci_dev)
+static void igd_pt_i440fx_realize(PCIDevice *pci_dev, Error **errp)
 {
     uint32_t val = 0;
-    int rc, i, num;
+    int i, num;
     int pos, len;
+    Error *local_err = NULL;
 
     num = ARRAY_SIZE(igd_host_bridge_infos);
     for (i = 0; i < num; i++) {
         pos = igd_host_bridge_infos[i].offset;
         len = igd_host_bridge_infos[i].len;
-        rc = host_pci_config_read(pos, len, &val);
-        if (rc) {
-            return -ENODEV;
+        host_pci_config_read(pos, len, &val, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
         }
         pci_default_write_config(pci_dev, pos, val, len);
     }
-
-    return 0;
 }
 
 static void igd_passthrough_i440fx_class_init(ObjectClass *klass, void *data)
@@ -865,7 +860,7 @@ static void igd_passthrough_i440fx_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
-    k->init = igd_pt_i440fx_initfn;
+    k->realize = igd_pt_i440fx_realize;
     dc->desc = "IGD Passthrough Host bridge";
 }
 
diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c
index 279badc894..eb75e080fc 100644
--- a/hw/pci-host/ppce500.c
+++ b/hw/pci-host/ppce500.c
@@ -460,9 +460,9 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
     /* PIO lives at the bottom of our bus space */
     memory_region_add_subregion_overlap(&s->busmem, 0, &s->pio, -2);
 
-    b = pci_register_bus(DEVICE(dev), NULL, mpc85xx_pci_set_irq,
-                         mpc85xx_pci_map_irq, s, &s->busmem, &s->pio,
-                         PCI_DEVFN(s->first_slot, 0), 4, TYPE_PCI_BUS);
+    b = pci_register_root_bus(DEVICE(dev), NULL, mpc85xx_pci_set_irq,
+                              mpc85xx_pci_map_irq, s, &s->busmem, &s->pio,
+                              PCI_DEVFN(s->first_slot, 0), 4, TYPE_PCI_BUS);
     h->bus = b;
 
     /* Set up PCI view of memory */
diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c
index 92eed0f3e1..01f67f9db1 100644
--- a/hw/pci-host/prep.c
+++ b/hw/pci-host/prep.c
@@ -269,8 +269,8 @@ static void raven_pcihost_initfn(Object *obj)
     memory_region_add_subregion_overlap(address_space_mem, 0x80000000,
                                         &s->pci_io_non_contiguous, 1);
     memory_region_add_subregion(address_space_mem, 0xc0000000, &s->pci_memory);
-    pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), NULL,
-                        &s->pci_memory, &s->pci_io, 0, TYPE_PCI_BUS);
+    pci_root_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), NULL,
+                             &s->pci_memory, &s->pci_io, 0, TYPE_PCI_BUS);
 
     /* Bus master address space */
     memory_region_init(&s->bm, obj, "bm-raven", UINT32_MAX);
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 6cb9a8d121..a36a1195e4 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -51,9 +51,10 @@ static void q35_host_realize(DeviceState *dev, Error **errp)
     sysbus_add_io(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, &pci->data_mem);
     sysbus_init_ioports(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, 4);
 
-    pci->bus = pci_bus_new(DEVICE(s), "pcie.0",
-                           s->mch.pci_address_space, s->mch.address_space_io,
-                           0, TYPE_PCIE_BUS);
+    pci->bus = pci_root_bus_new(DEVICE(s), "pcie.0",
+                                s->mch.pci_address_space,
+                                s->mch.address_space_io,
+                                0, TYPE_PCIE_BUS);
     PC_MACHINE(qdev_get_machine())->bus = pci->bus;
     qdev_set_parent_bus(DEVICE(&s->mch), BUS(pci->bus));
     qdev_init_nofail(DEVICE(&s->mch));
diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c
index ea5c265718..5d8ccaa711 100644
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c
@@ -233,12 +233,12 @@ PCIBus *pci_pmac_init(qemu_irq *pic,
     memory_region_add_subregion(address_space_mem, 0x80000000ULL,
                                 &d->pci_hole);
 
-    h->bus = pci_register_bus(dev, NULL,
-                              pci_unin_set_irq, pci_unin_map_irq,
-                              pic,
-                              &d->pci_mmio,
-                              address_space_io,
-                              PCI_DEVFN(11, 0), 4, TYPE_PCI_BUS);
+    h->bus = pci_register_root_bus(dev, NULL,
+                                   pci_unin_set_irq, pci_unin_map_irq,
+                                   pic,
+                                   &d->pci_mmio,
+                                   address_space_io,
+                                   PCI_DEVFN(11, 0), 4, TYPE_PCI_BUS);
 
 #if 0
     pci_create_simple(h->bus, PCI_DEVFN(11, 0), "uni-north");
@@ -299,12 +299,12 @@ PCIBus *pci_pmac_u3_init(qemu_irq *pic,
     memory_region_add_subregion(address_space_mem, 0x80000000ULL,
                                 &d->pci_hole);
 
-    h->bus = pci_register_bus(dev, NULL,
-                              pci_unin_set_irq, pci_unin_map_irq,
-                              pic,
-                              &d->pci_mmio,
-                              address_space_io,
-                              PCI_DEVFN(11, 0), 4, TYPE_PCI_BUS);
+    h->bus = pci_register_root_bus(dev, NULL,
+                                   pci_unin_set_irq, pci_unin_map_irq,
+                                   pic,
+                                   &d->pci_mmio,
+                                   address_space_io,
+                                   PCI_DEVFN(11, 0), 4, TYPE_PCI_BUS);
 
     sysbus_mmio_map(s, 0, 0xf0800000);
     sysbus_mmio_map(s, 1, 0xf0c00000);
diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c
index 6394a520fc..d0b02bdc47 100644
--- a/hw/pci-host/versatile.c
+++ b/hw/pci-host/versatile.c
@@ -311,7 +311,7 @@ static const MemoryRegionOps pci_vpb_config_ops = {
 
 static int pci_vpb_map_irq(PCIDevice *d, int irq_num)
 {
-    PCIVPBState *s = container_of(d->bus, PCIVPBState, pci_bus);
+    PCIVPBState *s = container_of(pci_get_bus(d), PCIVPBState, pci_bus);
 
     if (s->irq_mapping == PCI_VPB_IRQMAP_BROKEN) {
         /* Legacy broken IRQ mapping for compatibility with old and
@@ -399,9 +399,9 @@ static void pci_vpb_realize(DeviceState *dev, Error **errp)
     memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32);
     memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32);
 
-    pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), dev, "pci",
-                        &s->pci_mem_space, &s->pci_io_space,
-                        PCI_DEVFN(11, 0), TYPE_PCI_BUS);
+    pci_root_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), dev, "pci",
+                             &s->pci_mem_space, &s->pci_io_space,
+                             PCI_DEVFN(11, 0), TYPE_PCI_BUS);
     h->bus = &s->pci_bus;
 
     object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_VERSATILE_PCI_HOST);
diff --git a/hw/pci-host/xilinx-pcie.c b/hw/pci-host/xilinx-pcie.c
index 7659253090..53b561f81f 100644
--- a/hw/pci-host/xilinx-pcie.c
+++ b/hw/pci-host/xilinx-pcie.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci-host/xilinx-pcie.h"
 
@@ -129,9 +130,9 @@ static void xilinx_pcie_host_realize(DeviceState *dev, Error **errp)
     sysbus_init_mmio(sbd, &pex->mmio);
     sysbus_init_mmio(sbd, &s->mmio);
 
-    pci->bus = pci_register_bus(dev, s->name, xilinx_pcie_set_irq,
-                                pci_swizzle_map_irq_fn, s, &s->mmio,
-                                &s->io, 0, 4, TYPE_PCIE_BUS);
+    pci->bus = pci_register_root_bus(dev, s->name, xilinx_pcie_set_irq,
+                                     pci_swizzle_map_irq_fn, s, &s->mmio,
+                                     &s->io, 0, 4, TYPE_PCIE_BUS);
 
     qdev_set_parent_bus(DEVICE(&s->root), BUS(pci->bus));
     qdev_init_nofail(DEVICE(&s->root));
@@ -267,24 +268,22 @@ static void xilinx_pcie_root_config_write(PCIDevice *d, uint32_t address,
     }
 }
 
-static int xilinx_pcie_root_init(PCIDevice *dev)
+static void xilinx_pcie_root_realize(PCIDevice *pci_dev, Error **errp)
 {
-    BusState *bus = qdev_get_parent_bus(DEVICE(dev));
+    BusState *bus = qdev_get_parent_bus(DEVICE(pci_dev));
     XilinxPCIEHost *s = XILINX_PCIE_HOST(bus->parent);
 
-    pci_set_word(dev->config + PCI_COMMAND,
+    pci_set_word(pci_dev->config + PCI_COMMAND,
                  PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
-    pci_set_word(dev->config + PCI_MEMORY_BASE, s->mmio_base >> 16);
-    pci_set_word(dev->config + PCI_MEMORY_LIMIT,
+    pci_set_word(pci_dev->config + PCI_MEMORY_BASE, s->mmio_base >> 16);
+    pci_set_word(pci_dev->config + PCI_MEMORY_LIMIT,
                  ((s->mmio_base + s->mmio_size - 1) >> 16) & 0xfff0);
 
-    pci_bridge_initfn(dev, TYPE_PCI_BUS);
+    pci_bridge_initfn(pci_dev, TYPE_PCI_BUS);
 
-    if (pcie_endpoint_cap_v1_init(dev, 0x80) < 0) {
-        hw_error("Failed to initialize PCIe capability");
+    if (pcie_endpoint_cap_v1_init(pci_dev, 0x80) < 0) {
+        error_setg(errp, "Failed to initialize PCIe capability");
     }
-
-    return 0;
 }
 
 static void xilinx_pcie_root_class_init(ObjectClass *klass, void *data)
@@ -300,7 +299,7 @@ static void xilinx_pcie_root_class_init(ObjectClass *klass, void *data)
     k->class_id = PCI_CLASS_BRIDGE_HOST;
     k->is_express = true;
     k->is_bridge = true;
-    k->init = xilinx_pcie_root_init;
+    k->realize = xilinx_pcie_root_realize;
     k->exit = pci_bridge_exitfn;
     dc->reset = pci_bridge_reset;
     k->config_read = xilinx_pcie_root_config_read;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index b2d139bd9a..e8f9fc1c27 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -222,7 +222,7 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change)
 {
     PCIBus *bus;
     for (;;) {
-        bus = pci_dev->bus;
+        bus = pci_get_bus(pci_dev);
         irq_num = bus->map_irq(pci_dev, irq_num);
         if (bus->set_irq)
             break;
@@ -331,31 +331,15 @@ static void pci_host_bus_register(DeviceState *host)
     QLIST_INSERT_HEAD(&pci_host_bridges, host_bridge, next);
 }
 
-PCIBus *pci_find_primary_bus(void)
-{
-    PCIBus *primary_bus = NULL;
-    PCIHostState *host;
-
-    QLIST_FOREACH(host, &pci_host_bridges, next) {
-        if (primary_bus) {
-            /* We have multiple root buses, refuse to select a primary */
-            return NULL;
-        }
-        primary_bus = host->bus;
-    }
-
-    return primary_bus;
-}
-
 PCIBus *pci_device_root_bus(const PCIDevice *d)
 {
-    PCIBus *bus = d->bus;
+    PCIBus *bus = pci_get_bus(d);
 
     while (!pci_bus_is_root(bus)) {
         d = bus->parent_dev;
         assert(d != NULL);
 
-        bus = d->bus;
+        bus = pci_get_bus(d);
     }
 
     return bus;
@@ -376,10 +360,10 @@ const char *pci_root_bus_path(PCIDevice *dev)
     return rootbus->qbus.name;
 }
 
-static void pci_bus_init(PCIBus *bus, DeviceState *parent,
-                         MemoryRegion *address_space_mem,
-                         MemoryRegion *address_space_io,
-                         uint8_t devfn_min)
+static void pci_root_bus_init(PCIBus *bus, DeviceState *parent,
+                              MemoryRegion *address_space_mem,
+                              MemoryRegion *address_space_io,
+                              uint8_t devfn_min)
 {
     assert(PCI_FUNC(devfn_min) == 0);
     bus->devfn_min = devfn_min;
@@ -403,25 +387,27 @@ bool pci_bus_is_root(PCIBus *bus)
     return PCI_BUS_GET_CLASS(bus)->is_root(bus);
 }
 
-void pci_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
-                         const char *name,
-                         MemoryRegion *address_space_mem,
-                         MemoryRegion *address_space_io,
-                         uint8_t devfn_min, const char *typename)
+void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
+                              const char *name,
+                              MemoryRegion *address_space_mem,
+                              MemoryRegion *address_space_io,
+                              uint8_t devfn_min, const char *typename)
 {
     qbus_create_inplace(bus, bus_size, typename, parent, name);
-    pci_bus_init(bus, parent, address_space_mem, address_space_io, devfn_min);
+    pci_root_bus_init(bus, parent, address_space_mem, address_space_io,
+                      devfn_min);
 }
 
-PCIBus *pci_bus_new(DeviceState *parent, const char *name,
-                    MemoryRegion *address_space_mem,
-                    MemoryRegion *address_space_io,
-                    uint8_t devfn_min, const char *typename)
+PCIBus *pci_root_bus_new(DeviceState *parent, const char *name,
+                         MemoryRegion *address_space_mem,
+                         MemoryRegion *address_space_io,
+                         uint8_t devfn_min, const char *typename)
 {
     PCIBus *bus;
 
     bus = PCI_BUS(qbus_create(typename, parent, name));
-    pci_bus_init(bus, parent, address_space_mem, address_space_io, devfn_min);
+    pci_root_bus_init(bus, parent, address_space_mem, address_space_io,
+                      devfn_min);
     return bus;
 }
 
@@ -435,17 +421,18 @@ void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
     bus->irq_count = g_malloc0(nirq * sizeof(bus->irq_count[0]));
 }
 
-PCIBus *pci_register_bus(DeviceState *parent, const char *name,
-                         pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
-                         void *irq_opaque,
-                         MemoryRegion *address_space_mem,
-                         MemoryRegion *address_space_io,
-                         uint8_t devfn_min, int nirq, const char *typename)
+PCIBus *pci_register_root_bus(DeviceState *parent, const char *name,
+                              pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
+                              void *irq_opaque,
+                              MemoryRegion *address_space_mem,
+                              MemoryRegion *address_space_io,
+                              uint8_t devfn_min, int nirq,
+                              const char *typename)
 {
     PCIBus *bus;
 
-    bus = pci_bus_new(parent, name, address_space_mem,
-                      address_space_io, devfn_min, typename);
+    bus = pci_root_bus_new(parent, name, address_space_mem,
+                           address_space_io, devfn_min, typename);
     pci_bus_irqs(bus, set_irq, map_irq, irq_opaque, nirq);
     return bus;
 }
@@ -879,7 +866,7 @@ static void pci_config_free(PCIDevice *pci_dev)
 
 static void do_pci_unregister_device(PCIDevice *pci_dev)
 {
-    pci_dev->bus->devices[pci_dev->devfn] = NULL;
+    pci_get_bus(pci_dev)->devices[pci_dev->devfn] = NULL;
     pci_config_free(pci_dev);
 
     if (memory_region_is_mapped(&pci_dev->bus_master_enable_region)) {
@@ -900,7 +887,7 @@ static uint16_t pci_req_id_cache_extract(PCIReqIDCache *cache)
         result = pci_get_bdf(cache->dev);
         break;
     case PCI_REQ_ID_SECONDARY_BUS:
-        bus_n = pci_bus_num(cache->dev->bus);
+        bus_n = pci_dev_bus_num(cache->dev);
         result = PCI_BUILD_BDF(bus_n, 0);
         break;
     default:
@@ -930,9 +917,9 @@ static PCIReqIDCache pci_req_id_cache_get(PCIDevice *dev)
         .type = PCI_REQ_ID_BDF,
     };
 
-    while (!pci_bus_is_root(dev->bus)) {
+    while (!pci_bus_is_root(pci_get_bus(dev))) {
         /* We are under PCI/PCIe bridges */
-        parent = dev->bus->parent_dev;
+        parent = pci_get_bus(dev)->parent_dev;
         if (pci_is_express(parent)) {
             if (pcie_cap_get_type(parent) == PCI_EXP_TYPE_PCI_BRIDGE) {
                 /* When we pass through PCIe-to-PCI/PCIX bridges, we
@@ -975,7 +962,7 @@ static bool pci_bus_devfn_reserved(PCIBus *bus, int devfn)
 }
 
 /* -1 for devfn means auto assign */
-static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
+static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
                                          const char *name, int devfn,
                                          Error **errp)
 {
@@ -984,8 +971,8 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     PCIConfigWriteFunc *config_write = pc->config_write;
     Error *local_err = NULL;
     DeviceState *dev = DEVICE(pci_dev);
+    PCIBus *bus = pci_get_bus(pci_dev);
 
-    pci_dev->bus = bus;
     /* Only pci bridges can be attached to extra PCI root buses */
     if (pci_bus_is_root(bus) && bus->parent_dev && !pc->is_bridge) {
         error_setg(errp,
@@ -1139,8 +1126,8 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
     r->type = type;
     r->memory = memory;
     r->address_space = type & PCI_BASE_ADDRESS_SPACE_IO
-                        ? pci_dev->bus->address_space_io
-                        : pci_dev->bus->address_space_mem;
+                        ? pci_get_bus(pci_dev)->address_space_io
+                        : pci_get_bus(pci_dev)->address_space_mem;
 
     wmask = ~(size - 1);
     if (region_num == PCI_ROM_SLOT) {
@@ -1182,21 +1169,23 @@ static void pci_update_vga(PCIDevice *pci_dev)
 void pci_register_vga(PCIDevice *pci_dev, MemoryRegion *mem,
                       MemoryRegion *io_lo, MemoryRegion *io_hi)
 {
+    PCIBus *bus = pci_get_bus(pci_dev);
+
     assert(!pci_dev->has_vga);
 
     assert(memory_region_size(mem) == QEMU_PCI_VGA_MEM_SIZE);
     pci_dev->vga_regions[QEMU_PCI_VGA_MEM] = mem;
-    memory_region_add_subregion_overlap(pci_dev->bus->address_space_mem,
+    memory_region_add_subregion_overlap(bus->address_space_mem,
                                         QEMU_PCI_VGA_MEM_BASE, mem, 1);
 
     assert(memory_region_size(io_lo) == QEMU_PCI_VGA_IO_LO_SIZE);
     pci_dev->vga_regions[QEMU_PCI_VGA_IO_LO] = io_lo;
-    memory_region_add_subregion_overlap(pci_dev->bus->address_space_io,
+    memory_region_add_subregion_overlap(bus->address_space_io,
                                         QEMU_PCI_VGA_IO_LO_BASE, io_lo, 1);
 
     assert(memory_region_size(io_hi) == QEMU_PCI_VGA_IO_HI_SIZE);
     pci_dev->vga_regions[QEMU_PCI_VGA_IO_HI] = io_hi;
-    memory_region_add_subregion_overlap(pci_dev->bus->address_space_io,
+    memory_region_add_subregion_overlap(bus->address_space_io,
                                         QEMU_PCI_VGA_IO_HI_BASE, io_hi, 1);
     pci_dev->has_vga = true;
 
@@ -1205,15 +1194,17 @@ void pci_register_vga(PCIDevice *pci_dev, MemoryRegion *mem,
 
 void pci_unregister_vga(PCIDevice *pci_dev)
 {
+    PCIBus *bus = pci_get_bus(pci_dev);
+
     if (!pci_dev->has_vga) {
         return;
     }
 
-    memory_region_del_subregion(pci_dev->bus->address_space_mem,
+    memory_region_del_subregion(bus->address_space_mem,
                                 pci_dev->vga_regions[QEMU_PCI_VGA_MEM]);
-    memory_region_del_subregion(pci_dev->bus->address_space_io,
+    memory_region_del_subregion(bus->address_space_io,
                                 pci_dev->vga_regions[QEMU_PCI_VGA_IO_LO]);
-    memory_region_del_subregion(pci_dev->bus->address_space_io,
+    memory_region_del_subregion(bus->address_space_io,
                                 pci_dev->vga_regions[QEMU_PCI_VGA_IO_HI]);
     pci_dev->has_vga = false;
 }
@@ -1316,7 +1307,7 @@ static void pci_update_mappings(PCIDevice *d)
 
         /* now do the real mapping */
         if (r->addr != PCI_BAR_UNMAPPED) {
-            trace_pci_update_mappings_del(d, pci_bus_num(d->bus),
+            trace_pci_update_mappings_del(d, pci_dev_bus_num(d),
                                           PCI_SLOT(d->devfn),
                                           PCI_FUNC(d->devfn),
                                           i, r->addr, r->size);
@@ -1324,7 +1315,7 @@ static void pci_update_mappings(PCIDevice *d)
         }
         r->addr = new_addr;
         if (r->addr != PCI_BAR_UNMAPPED) {
-            trace_pci_update_mappings_add(d, pci_bus_num(d->bus),
+            trace_pci_update_mappings_add(d, pci_dev_bus_num(d),
                                           PCI_SLOT(d->devfn),
                                           PCI_FUNC(d->devfn),
                                           i, r->addr, r->size);
@@ -1443,9 +1434,9 @@ PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
     PCIBus *bus;
 
     do {
-         bus = dev->bus;
-         pin = bus->map_irq(dev, pin);
-         dev = bus->parent_dev;
+        bus = pci_get_bus(dev);
+        pin = bus->map_irq(dev, pin);
+        dev = bus->parent_dev;
     } while (dev);
 
     if (!bus->route_intx_to_irq) {
@@ -2015,7 +2006,6 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
     PCIDevice *pci_dev = (PCIDevice *)qdev;
     PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
     Error *local_err = NULL;
-    PCIBus *bus;
     bool is_default_rom;
 
     /* initialize cap_present for pci_is_express() and pci_config_size() */
@@ -2023,8 +2013,7 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
         pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
     }
 
-    bus = PCI_BUS(qdev_get_parent_bus(qdev));
-    pci_dev = do_pci_register_device(pci_dev, bus,
+    pci_dev = do_pci_register_device(pci_dev,
                                      object_get_typename(OBJECT(qdev)),
                                      pci_dev->devfn, errp);
     if (pci_dev == NULL)
@@ -2317,7 +2306,7 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
                 error_setg(errp, "%s:%02x:%02x.%x "
                            "Attempt to add PCI capability %x at offset "
                            "%x overlaps existing capability %x at offset %x",
-                           pci_root_bus_path(pdev), pci_bus_num(pdev->bus),
+                           pci_root_bus_path(pdev), pci_dev_bus_num(pdev),
                            PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
                            cap_id, offset, overlapping_cap, i);
                 return -EINVAL;
@@ -2381,7 +2370,7 @@ static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent)
 
     monitor_printf(mon, "%*sclass %s, addr %02x:%02x.%x, "
                    "pci id %04x:%04x (sub %04x:%04x)\n",
-                   indent, "", ctxt, pci_bus_num(d->bus),
+                   indent, "", ctxt, pci_dev_bus_num(d),
                    PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
                    pci_get_word(d->config + PCI_VENDOR_ID),
                    pci_get_word(d->config + PCI_DEVICE_ID),
@@ -2464,7 +2453,7 @@ static char *pcibus_get_dev_path(DeviceState *dev)
 
     /* Calculate # of slots on path between device and root. */;
     slot_depth = 0;
-    for (t = d; t; t = t->bus->parent_dev) {
+    for (t = d; t; t = pci_get_bus(t)->parent_dev) {
         ++slot_depth;
     }
 
@@ -2479,7 +2468,7 @@ static char *pcibus_get_dev_path(DeviceState *dev)
     /* Fill in slot numbers. We walk up from device to root, so need to print
      * them in the reverse order, last to first. */
     p = path + path_len;
-    for (t = d; t; t = t->bus->parent_dev) {
+    for (t = d; t; t = pci_get_bus(t)->parent_dev) {
         p -= slot_len;
         s = snprintf(slot, sizeof slot, ":%02x.%x",
                      PCI_SLOT(t->devfn), PCI_FUNC(t->devfn));
@@ -2527,12 +2516,12 @@ int pci_qdev_find_device(const char *id, PCIDevice **pdev)
 
 MemoryRegion *pci_address_space(PCIDevice *dev)
 {
-    return dev->bus->address_space_mem;
+    return pci_get_bus(dev)->address_space_mem;
 }
 
 MemoryRegion *pci_address_space_io(PCIDevice *dev)
 {
-    return dev->bus->address_space_io;
+    return pci_get_bus(dev)->address_space_io;
 }
 
 static void pci_device_class_init(ObjectClass *klass, void *data)
@@ -2560,11 +2549,11 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data)
 
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
 {
-    PCIBus *bus = PCI_BUS(dev->bus);
+    PCIBus *bus = pci_get_bus(dev);
     PCIBus *iommu_bus = bus;
 
     while(iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
-        iommu_bus = PCI_BUS(iommu_bus->parent_dev->bus);
+        iommu_bus = pci_get_bus(iommu_bus->parent_dev);
     }
     if (iommu_bus && iommu_bus->iommu_fn) {
         return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, dev->devfn);
@@ -2635,7 +2624,7 @@ void pci_bus_get_w64_range(PCIBus *bus, Range *range)
 
 static bool pcie_has_upstream_port(PCIDevice *dev)
 {
-    PCIDevice *parent_dev = pci_bridge_get_device(dev->bus);
+    PCIDevice *parent_dev = pci_bridge_get_device(pci_get_bus(dev));
 
     /* Device associated with an upstream port.
      * As there are several types of these, it's easier to check the
@@ -2651,12 +2640,14 @@ static bool pcie_has_upstream_port(PCIDevice *dev)
 
 PCIDevice *pci_get_function_0(PCIDevice *pci_dev)
 {
+    PCIBus *bus = pci_get_bus(pci_dev);
+
     if(pcie_has_upstream_port(pci_dev)) {
         /* With an upstream PCIe port, we only support 1 device at slot 0 */
-        return pci_dev->bus->devices[0];
+        return bus->devices[0];
     } else {
         /* Other bus types might support multiple devices at slots 0-31 */
-        return pci_dev->bus->devices[PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 0)];
+        return bus->devices[PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 0)];
     }
 }
 
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index a47d257149..b2e50c36a0 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -183,7 +183,7 @@ static void pci_bridge_init_vga_aliases(PCIBridge *br, PCIBus *parent,
 static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
 {
     PCIDevice *pd = PCI_DEVICE(br);
-    PCIBus *parent = pd->bus;
+    PCIBus *parent = pci_get_bus(pd);
     PCIBridgeWindows *w = g_new(PCIBridgeWindows, 1);
     uint16_t cmd = pci_get_word(pd->config + PCI_COMMAND);
 
@@ -214,7 +214,7 @@ static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
 static void pci_bridge_region_del(PCIBridge *br, PCIBridgeWindows *w)
 {
     PCIDevice *pd = PCI_DEVICE(br);
-    PCIBus *parent = pd->bus;
+    PCIBus *parent = pci_get_bus(pd);
 
     memory_region_del_subregion(parent->address_space_io, &w->alias_io);
     memory_region_del_subregion(parent->address_space_mem, &w->alias_mem);
@@ -339,7 +339,7 @@ void pci_bridge_reset(DeviceState *qdev)
 /* default qdev initialization function for PCI-to-PCI bridge */
 void pci_bridge_initfn(PCIDevice *dev, const char *typename)
 {
-    PCIBus *parent = dev->bus;
+    PCIBus *parent = pci_get_bus(dev);
     PCIBridge *br = PCI_BRIDGE(dev);
     PCIBus *sec_bus = &br->sec_bus;
 
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 32191f2a55..6c91bd44a0 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -155,7 +155,8 @@ pcie_endpoint_cap_common_init(PCIDevice *dev, uint8_t offset, uint8_t cap_size)
      * a regular Endpoint type is exposed on a root complex.  These
      * should instead be Root Complex Integrated Endpoints.
      */
-    if (pci_bus_is_express(dev->bus) && pci_bus_is_root(dev->bus)) {
+    if (pci_bus_is_express(pci_get_bus(dev))
+        && pci_bus_is_root(pci_get_bus(dev))) {
         type = PCI_EXP_TYPE_RC_END;
     }
 
@@ -369,7 +370,7 @@ void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
 {
     uint8_t *exp_cap;
     PCIDevice *pci_dev = PCI_DEVICE(dev);
-    PCIBus *bus = pci_dev->bus;
+    PCIBus *bus = pci_get_bus(pci_dev);
 
     pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
 
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 97200742b4..b009be7f17 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -409,7 +409,7 @@ static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
              */
             return;
         }
-        dev = pci_bridge_get_device(dev->bus);
+        dev = pci_bridge_get_device(pci_get_bus(dev));
     }
 }
 
@@ -1025,7 +1025,7 @@ static int do_pcie_aer_inject_error(Monitor *mon,
     }
     details->id = id;
     details->root_bus = pci_root_bus_path(dev);
-    details->bus = pci_bus_num(dev->bus);
+    details->bus = pci_dev_bus_num(dev);
     details->devfn = dev->devfn;
 
     return 0;
diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c
index 4765dcecca..b7642bac01 100644
--- a/hw/ppc/ppc4xx_pci.c
+++ b/hw/ppc/ppc4xx_pci.c
@@ -314,9 +314,9 @@ static int ppc4xx_pcihost_initfn(SysBusDevice *dev)
         sysbus_init_irq(dev, &s->irq[i]);
     }
 
-    b = pci_register_bus(DEVICE(dev), NULL, ppc4xx_pci_set_irq,
-                         ppc4xx_pci_map_irq, s->irq, get_system_memory(),
-                         get_system_io(), 0, 4, TYPE_PCI_BUS);
+    b = pci_register_root_bus(DEVICE(dev), NULL, ppc4xx_pci_set_irq,
+                              ppc4xx_pci_map_irq, s->irq, get_system_memory(),
+                              get_system_io(), 0, 4, TYPE_PCI_BUS);
     h->bus = b;
 
     pci_create_simple(b, 0, "ppc4xx-host-bridge");
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 695c820911..37f18b3d32 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -505,7 +505,7 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
             goto param_error_exit;
         }
 
-        rtas_st(rets, 1, (pci_bus_num(pdev->bus) << 16) + 1);
+        rtas_st(rets, 1, (pci_bus_num(pci_get_bus(pdev)) << 16) + 1);
         break;
     case RTAS_GET_PE_MODE:
         rtas_st(rets, 1, RTAS_PE_MODE_SHARED);
@@ -1621,10 +1621,10 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
                                 &sphb->iowindow);
 
-    bus = pci_register_bus(dev, NULL,
-                           pci_spapr_set_irq, pci_spapr_map_irq, sphb,
-                           &sphb->memspace, &sphb->iospace,
-                           PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
+    bus = pci_register_root_bus(dev, NULL,
+                                pci_spapr_set_irq, pci_spapr_map_irq, sphb,
+                                &sphb->memspace, &sphb->iospace,
+                                PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
     phb->bus = bus;
     qbus_set_hotplug_handler(BUS(phb->bus), DEVICE(sphb), NULL);
 
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 2b1e1409bf..7d9c65e719 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -554,10 +554,10 @@ static int s390_pcihost_init(SysBusDevice *dev)
 
     DPRINTF("host_init\n");
 
-    b = pci_register_bus(DEVICE(dev), NULL,
-                         s390_pci_set_irq, s390_pci_map_irq, NULL,
-                         get_system_memory(), get_system_io(), 0, 64,
-                         TYPE_PCI_BUS);
+    b = pci_register_root_bus(DEVICE(dev), NULL,
+                              s390_pci_set_irq, s390_pci_map_irq, NULL,
+                              get_system_memory(), get_system_io(), 0, 64,
+                              TYPE_PCI_BUS);
     pci_setup_iommu(b, s390_pci_dma_iommu, s);
 
     bus = BUS(b);
@@ -680,10 +680,10 @@ static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
             s->bus_no += 1;
             pci_default_write_config(pdev, PCI_SECONDARY_BUS, s->bus_no, 1);
             do {
-                pdev = pdev->bus->parent_dev;
+                pdev = pci_get_bus(pdev)->parent_dev;
                 pci_default_write_config(pdev, PCI_SUBORDINATE_BUS,
                                          s->bus_no, 1);
-            } while (pdev->bus && pci_bus_num(pdev->bus));
+            } while (pci_get_bus(pdev) && pci_dev_bus_num(pdev));
         }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
         pdev = PCI_DEVICE(dev);
@@ -692,7 +692,7 @@ static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
             /* In the case the PCI device does not define an id */
             /* we generate one based on the PCI address         */
             dev->id = g_strdup_printf("auto_%02x:%02x.%01x",
-                                      pci_bus_num(pdev->bus),
+                                      pci_dev_bus_num(pdev),
                                       PCI_SLOT(pdev->devfn),
                                       PCI_FUNC(pdev->devfn));
         }
@@ -713,7 +713,7 @@ static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
         }
 
         pbdev->pdev = pdev;
-        pbdev->iommu = s390_pci_get_iommu(s, pdev->bus, pdev->devfn);
+        pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
         pbdev->iommu->pbdev = pbdev;
         pbdev->state = ZPCI_FS_DISABLED;
 
@@ -807,7 +807,7 @@ static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
 
     s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
                                  pbdev->fh, pbdev->fid);
-    bus = pci_dev->bus;
+    bus = pci_get_bus(pci_dev);
     devfn = pci_dev->devfn;
     object_unparent(OBJECT(pci_dev));
     s390_pci_msix_free(pbdev);
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index d5eae6239a..3e38e9e8aa 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -2372,7 +2372,7 @@ static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
     if (!s->sas_addr) {
         s->sas_addr = ((NAA_LOCALLY_ASSIGNED_ID << 24) |
                        IEEE_COMPANY_LOCALLY_ASSIGNED) << 36;
-        s->sas_addr |= (pci_bus_num(dev->bus) << 16);
+        s->sas_addr |= (pci_dev_bus_num(dev) << 16);
         s->sas_addr |= (PCI_SLOT(dev->devfn) << 8);
         s->sas_addr |= PCI_FUNC(dev->devfn);
     }
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index f6db1b0103..3f061f3f68 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -1312,7 +1312,7 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
     if (!s->sas_addr) {
         s->sas_addr = ((NAA_LOCALLY_ASSIGNED_ID << 24) |
                        IEEE_COMPANY_LOCALLY_ASSIGNED) << 36;
-        s->sas_addr |= (pci_bus_num(dev->bus) << 16);
+        s->sas_addr |= (pci_dev_bus_num(dev) << 16);
         s->sas_addr |= (PCI_SLOT(dev->devfn) << 8);
         s->sas_addr |= PCI_FUNC(dev->devfn);
     }
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index d564e5caff..27749c0e42 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -1133,7 +1133,7 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
 
     pvscsi_init_msi(s);
 
-    if (pci_is_express(pci_dev) && pci_bus_is_express(pci_dev->bus)) {
+    if (pci_is_express(pci_dev) && pci_bus_is_express(pci_get_bus(pci_dev))) {
         pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET);
     }
 
diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index 4008c81002..341da88552 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c
@@ -248,6 +248,10 @@ static void milkymist_memcard_reset(DeviceState *d)
     for (i = 0; i < R_MAX; i++) {
         s->regs[i] = 0;
     }
+    /* Since we're still using the legacy SD API the card is not plugged
+     * into any bus, and we must reset it manually.
+     */
+    device_reset(DEVICE(s->card));
 }
 
 static int milkymist_memcard_init(SysBusDevice *dev)
diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c
index e934cd3656..5b47cadf11 100644
--- a/hw/sd/omap_mmc.c
+++ b/hw/sd/omap_mmc.c
@@ -305,6 +305,12 @@ void omap_mmc_reset(struct omap_mmc_s *host)
     host->cdet_enable = 0;
     qemu_set_irq(host->coverswitch, host->cdet_state);
     host->clkdiv = 0;
+
+    /* Since we're still using the legacy SD API the card is not plugged
+     * into any bus, and we must reset it manually. When omap_mmc is
+     * QOMified this must move into the QOM reset function.
+     */
+    device_reset(DEVICE(host->card));
 }
 
 static uint64_t omap_mmc_read(void *opaque, hwaddr offset,
@@ -587,8 +593,6 @@ struct omap_mmc_s *omap_mmc_init(hwaddr base,
     s->lines = 1;	/* TODO: needs to be settable per-board */
     s->rev = 1;
 
-    omap_mmc_reset(s);
-
     memory_region_init_io(&s->iomem, NULL, &omap_mmc_ops, s, "omap.mmc", 0x800);
     memory_region_add_subregion(sysmem, base, &s->iomem);
 
@@ -598,6 +602,8 @@ struct omap_mmc_s *omap_mmc_init(hwaddr base,
         exit(1);
     }
 
+    omap_mmc_reset(s);
+
     return s;
 }
 
@@ -613,8 +619,6 @@ struct omap_mmc_s *omap2_mmc_init(struct omap_target_agent_s *ta,
     s->lines = 4;
     s->rev = 2;
 
-    omap_mmc_reset(s);
-
     memory_region_init_io(&s->iomem, NULL, &omap_mmc_ops, s, "omap.mmc",
                           omap_l4_region_size(ta, 0));
     omap_l4_attach(ta, 0, &s->iomem);
@@ -628,6 +632,8 @@ struct omap_mmc_s *omap2_mmc_init(struct omap_target_agent_s *ta,
     s->cdet = qemu_allocate_irq(omap_mmc_cover_cb, s, 0);
     sd_set_cb(s->card, NULL, s->cdet);
 
+    omap_mmc_reset(s);
+
     return s;
 }
 
diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 55c8098ecd..3ba1f7dd23 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -480,6 +480,10 @@ static void pl181_reset(DeviceState *d)
 
     /* We can assume our GPIO outputs have been wired up now */
     sd_set_cb(s->card, s->cardstatus[0], s->cardstatus[1]);
+    /* Since we're still using the legacy SD API the card is not plugged
+     * into any bus, and we must reset it manually.
+     */
+    device_reset(DEVICE(s->card));
 }
 
 static void pl181_init(Object *obj)
diff --git a/hw/sd/sdhci-internal.h b/hw/sd/sdhci-internal.h
index 161177cf39..fc807f08f3 100644
--- a/hw/sd/sdhci-internal.h
+++ b/hw/sd/sdhci-internal.h
@@ -24,8 +24,6 @@
 #ifndef SDHCI_INTERNAL_H
 #define SDHCI_INTERNAL_H
 
-#include "hw/sd/sdhci.h"
-
 /* R/W SDMA System Address register 0x0 */
 #define SDHC_SYSAD                     0x00
 
@@ -45,6 +43,7 @@
 #define SDHC_TRNS_ACMD12               0x0004
 #define SDHC_TRNS_READ                 0x0010
 #define SDHC_TRNS_MULTI                0x0020
+#define SDHC_TRNMOD_MASK               0x0037
 
 /* R/W Command Register 0x0 */
 #define SDHC_CMDREG                    0x0E
@@ -175,7 +174,7 @@
 #define SDHC_ACMD12ERRSTS              0x3C
 
 /* HWInit Capabilities Register 0x05E80080 */
-#define SDHC_CAPAREG                   0x40
+#define SDHC_CAPAB                     0x40
 #define SDHC_CAN_DO_DMA                0x00400000
 #define SDHC_CAN_DO_ADMA2              0x00080000
 #define SDHC_CAN_DO_ADMA1              0x00100000
@@ -227,6 +226,4 @@ enum {
     sdhc_gap_write  = 2   /* SDHC stopped at block gap during write operation */
 };
 
-extern const VMStateDescription sdhci_vmstate;
-
 #endif
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index b064a087c9..f9264d3be5 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -23,38 +23,18 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "hw/hw.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "sysemu/dma.h"
 #include "qemu/timer.h"
 #include "qemu/bitops.h"
+#include "hw/sd/sdhci.h"
 #include "sdhci-internal.h"
+#include "qapi/error.h"
 #include "qemu/log.h"
-
-/* host controller debug messages */
-#ifndef SDHC_DEBUG
-#define SDHC_DEBUG                        0
-#endif
-
-#define DPRINT_L1(fmt, args...) \
-    do { \
-        if (SDHC_DEBUG) { \
-            fprintf(stderr, "QEMU SDHC: " fmt, ## args); \
-        } \
-    } while (0)
-#define DPRINT_L2(fmt, args...) \
-    do { \
-        if (SDHC_DEBUG > 1) { \
-            fprintf(stderr, "QEMU SDHC: " fmt, ## args); \
-        } \
-    } while (0)
-#define ERRPRINT(fmt, args...) \
-    do { \
-        if (SDHC_DEBUG) { \
-            fprintf(stderr, "QEMU SDHC ERROR: " fmt, ## args); \
-        } \
-    } while (0)
+#include "trace.h"
 
 #define TYPE_SDHCI_BUS "sdhci-bus"
 #define SDHCI_BUS(obj) OBJECT_CHECK(SDBus, (obj), TYPE_SDHCI_BUS)
@@ -119,7 +99,6 @@
     (SDHC_CAPAB_BASECLKFREQ << 8) | (SDHC_CAPAB_TOUNIT << 7) | \
     (SDHC_CAPAB_TOCLKFREQ))
 
-#define MASK_TRNMOD     0x0037
 #define MASKED_WRITE(reg, mask, val)  (reg = (reg & (mask)) | (val))
 
 static uint8_t sdhci_slotint(SDHCIState *s)
@@ -153,8 +132,8 @@ static void sdhci_raise_insertion_irq(void *opaque)
 static void sdhci_set_inserted(DeviceState *dev, bool level)
 {
     SDHCIState *s = (SDHCIState *)dev;
-    DPRINT_L1("Card state changed: %s!\n", level ? "insert" : "eject");
 
+    trace_sdhci_set_inserted(level ? "insert" : "eject");
     if ((s->norintsts & SDHC_NIS_REMOVE) && level) {
         /* Give target some time to notice card ejection */
         timer_mod(s->insert_timer,
@@ -236,7 +215,8 @@ static void sdhci_send_command(SDHCIState *s)
     s->acmd12errsts = 0;
     request.cmd = s->cmdreg >> 8;
     request.arg = s->argument;
-    DPRINT_L1("sending CMD%u ARG[0x%08x]\n", request.cmd, request.arg);
+
+    trace_sdhci_send_command(request.cmd, request.arg);
     rlen = sdbus_do_command(&s->sdbus, &request, response);
 
     if (s->cmdreg & SDHC_CMD_RESPONSE) {
@@ -244,7 +224,7 @@ static void sdhci_send_command(SDHCIState *s)
             s->rspreg[0] = (response[0] << 24) | (response[1] << 16) |
                            (response[2] << 8)  |  response[3];
             s->rspreg[1] = s->rspreg[2] = s->rspreg[3] = 0;
-            DPRINT_L1("Response: RSPREG[31..0]=0x%08x\n", s->rspreg[0]);
+            trace_sdhci_response4(s->rspreg[0]);
         } else if (rlen == 16) {
             s->rspreg[0] = (response[11] << 24) | (response[12] << 16) |
                            (response[13] << 8) |  response[14];
@@ -254,11 +234,10 @@ static void sdhci_send_command(SDHCIState *s)
                            (response[5] << 8)  |  response[6];
             s->rspreg[3] = (response[0] << 16) | (response[1] << 8) |
                             response[2];
-            DPRINT_L1("Response received:\n RSPREG[127..96]=0x%08x, RSPREG[95.."
-                  "64]=0x%08x,\n RSPREG[63..32]=0x%08x, RSPREG[31..0]=0x%08x\n",
-                  s->rspreg[3], s->rspreg[2], s->rspreg[1], s->rspreg[0]);
+            trace_sdhci_response16(s->rspreg[3], s->rspreg[2],
+                                   s->rspreg[1], s->rspreg[0]);
         } else {
-            ERRPRINT("Timeout waiting for command response\n");
+            trace_sdhci_error("timeout waiting for command response");
             if (s->errintstsen & SDHC_EISEN_CMDTIMEOUT) {
                 s->errintsts |= SDHC_EIS_CMDTIMEOUT;
                 s->norintsts |= SDHC_NIS_ERR;
@@ -292,7 +271,7 @@ static void sdhci_end_transfer(SDHCIState *s)
 
         request.cmd = 0x0C;
         request.arg = 0;
-        DPRINT_L1("Automatically issue CMD%d %08x\n", request.cmd, request.arg);
+        trace_sdhci_end_transfer(request.cmd, request.arg);
         sdbus_do_command(&s->sdbus, &request, response);
         /* Auto CMD12 response goes to the upper Response register */
         s->rspreg[3] = (response[0] << 24) | (response[1] << 16) |
@@ -361,7 +340,7 @@ static uint32_t sdhci_read_dataport(SDHCIState *s, unsigned size)
 
     /* first check that a valid data exists in host controller input buffer */
     if ((s->prnsts & SDHC_DATA_AVAILABLE) == 0) {
-        ERRPRINT("Trying to read from empty buffer\n");
+        trace_sdhci_error("read from empty buffer");
         return 0;
     }
 
@@ -370,8 +349,7 @@ static uint32_t sdhci_read_dataport(SDHCIState *s, unsigned size)
         s->data_count++;
         /* check if we've read all valid data (blksize bytes) from buffer */
         if ((s->data_count) >= (s->blksize & 0x0fff)) {
-            DPRINT_L2("All %u bytes of data have been read from input buffer\n",
-                    s->data_count);
+            trace_sdhci_read_dataport(s->data_count);
             s->prnsts &= ~SDHC_DATA_AVAILABLE; /* no more data in a buffer */
             s->data_count = 0;  /* next buff read must start at position [0] */
 
@@ -454,7 +432,7 @@ static void sdhci_write_dataport(SDHCIState *s, uint32_t value, unsigned size)
 
     /* Check that there is free space left in a buffer */
     if (!(s->prnsts & SDHC_SPACE_AVAILABLE)) {
-        ERRPRINT("Can't write to data buffer: buffer full\n");
+        trace_sdhci_error("Can't write to data buffer: buffer full");
         return;
     }
 
@@ -463,8 +441,7 @@ static void sdhci_write_dataport(SDHCIState *s, uint32_t value, unsigned size)
         s->data_count++;
         value >>= 8;
         if (s->data_count >= (s->blksize & 0x0fff)) {
-            DPRINT_L2("write buffer filled with %u bytes of data\n",
-                    s->data_count);
+            trace_sdhci_write_dataport(s->data_count);
             s->data_count = 0;
             s->prnsts &= ~SDHC_SPACE_AVAILABLE;
             if (s->prnsts & SDHC_DOING_WRITE) {
@@ -519,7 +496,7 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
                     s->blkcnt--;
                 }
             }
-            dma_memory_write(&address_space_memory, s->sdmasysad,
+            dma_memory_write(s->dma_as, s->sdmasysad,
                              &s->fifo_buffer[begin], s->data_count - begin);
             s->sdmasysad += s->data_count - begin;
             if (s->data_count == block_size) {
@@ -541,7 +518,7 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
                 s->data_count = block_size;
                 boundary_count -= block_size - begin;
             }
-            dma_memory_read(&address_space_memory, s->sdmasysad,
+            dma_memory_read(s->dma_as, s->sdmasysad,
                             &s->fifo_buffer[begin], s->data_count - begin);
             s->sdmasysad += s->data_count - begin;
             if (s->data_count == block_size) {
@@ -579,11 +556,9 @@ static void sdhci_sdma_transfer_single_block(SDHCIState *s)
         for (n = 0; n < datacnt; n++) {
             s->fifo_buffer[n] = sdbus_read_data(&s->sdbus);
         }
-        dma_memory_write(&address_space_memory, s->sdmasysad, s->fifo_buffer,
-                         datacnt);
+        dma_memory_write(s->dma_as, s->sdmasysad, s->fifo_buffer, datacnt);
     } else {
-        dma_memory_read(&address_space_memory, s->sdmasysad, s->fifo_buffer,
-                        datacnt);
+        dma_memory_read(s->dma_as, s->sdmasysad, s->fifo_buffer, datacnt);
         for (n = 0; n < datacnt; n++) {
             sdbus_write_data(&s->sdbus, s->fifo_buffer[n]);
         }
@@ -607,7 +582,7 @@ static void get_adma_description(SDHCIState *s, ADMADescr *dscr)
     hwaddr entry_addr = (hwaddr)s->admasysaddr;
     switch (SDHC_DMA_TYPE(s->hostctl)) {
     case SDHC_CTRL_ADMA2_32:
-        dma_memory_read(&address_space_memory, entry_addr, (uint8_t *)&adma2,
+        dma_memory_read(s->dma_as, entry_addr, (uint8_t *)&adma2,
                         sizeof(adma2));
         adma2 = le64_to_cpu(adma2);
         /* The spec does not specify endianness of descriptor table.
@@ -619,7 +594,7 @@ static void get_adma_description(SDHCIState *s, ADMADescr *dscr)
         dscr->incr = 8;
         break;
     case SDHC_CTRL_ADMA1_32:
-        dma_memory_read(&address_space_memory, entry_addr, (uint8_t *)&adma1,
+        dma_memory_read(s->dma_as, entry_addr, (uint8_t *)&adma1,
                         sizeof(adma1));
         adma1 = le32_to_cpu(adma1);
         dscr->addr = (hwaddr)(adma1 & 0xFFFFF000);
@@ -632,12 +607,12 @@ static void get_adma_description(SDHCIState *s, ADMADescr *dscr)
         }
         break;
     case SDHC_CTRL_ADMA2_64:
-        dma_memory_read(&address_space_memory, entry_addr,
+        dma_memory_read(s->dma_as, entry_addr,
                         (uint8_t *)(&dscr->attr), 1);
-        dma_memory_read(&address_space_memory, entry_addr + 2,
+        dma_memory_read(s->dma_as, entry_addr + 2,
                         (uint8_t *)(&dscr->length), 2);
         dscr->length = le16_to_cpu(dscr->length);
-        dma_memory_read(&address_space_memory, entry_addr + 4,
+        dma_memory_read(s->dma_as, entry_addr + 4,
                         (uint8_t *)(&dscr->addr), 8);
         dscr->attr = le64_to_cpu(dscr->attr);
         dscr->attr &= 0xfffffff8;
@@ -652,15 +627,14 @@ static void sdhci_do_adma(SDHCIState *s)
 {
     unsigned int n, begin, length;
     const uint16_t block_size = s->blksize & 0x0fff;
-    ADMADescr dscr;
+    ADMADescr dscr = {};
     int i;
 
     for (i = 0; i < SDHC_ADMA_DESCS_PER_DELAY; ++i) {
         s->admaerr &= ~SDHC_ADMAERR_LENGTH_MISMATCH;
 
         get_adma_description(s, &dscr);
-        DPRINT_L2("ADMA loop: addr=" TARGET_FMT_plx ", len=%d, attr=%x\n",
-                dscr.addr, dscr.length, dscr.attr);
+        trace_sdhci_adma_loop(dscr.addr, dscr.length, dscr.attr);
 
         if ((dscr.attr & SDHC_ADMA_ATTR_VALID) == 0) {
             /* Indicate that error occurred in ST_FDS state */
@@ -697,7 +671,7 @@ static void sdhci_do_adma(SDHCIState *s)
                         s->data_count = block_size;
                         length -= block_size - begin;
                     }
-                    dma_memory_write(&address_space_memory, dscr.addr,
+                    dma_memory_write(s->dma_as, dscr.addr,
                                      &s->fifo_buffer[begin],
                                      s->data_count - begin);
                     dscr.addr += s->data_count - begin;
@@ -721,7 +695,7 @@ static void sdhci_do_adma(SDHCIState *s)
                         s->data_count = block_size;
                         length -= block_size - begin;
                     }
-                    dma_memory_read(&address_space_memory, dscr.addr,
+                    dma_memory_read(s->dma_as, dscr.addr,
                                     &s->fifo_buffer[begin],
                                     s->data_count - begin);
                     dscr.addr += s->data_count - begin;
@@ -743,8 +717,7 @@ static void sdhci_do_adma(SDHCIState *s)
             break;
         case SDHC_ADMA_ATTR_ACT_LINK:   /* link to next descriptor table */
             s->admasysaddr = dscr.addr;
-            DPRINT_L1("ADMA link: admasysaddr=0x%" PRIx64 "\n",
-                      s->admasysaddr);
+            trace_sdhci_adma("link", s->admasysaddr);
             break;
         default:
             s->admasysaddr += dscr.incr;
@@ -752,8 +725,7 @@ static void sdhci_do_adma(SDHCIState *s)
         }
 
         if (dscr.attr & SDHC_ADMA_ATTR_INT) {
-            DPRINT_L1("ADMA interrupt: admasysaddr=0x%" PRIx64 "\n",
-                      s->admasysaddr);
+            trace_sdhci_adma("interrupt", s->admasysaddr);
             if (s->norintstsen & SDHC_NISEN_DMA) {
                 s->norintsts |= SDHC_NIS_DMA;
             }
@@ -764,15 +736,15 @@ static void sdhci_do_adma(SDHCIState *s)
         /* ADMA transfer terminates if blkcnt == 0 or by END attribute */
         if (((s->trnmod & SDHC_TRNS_BLK_CNT_EN) &&
                     (s->blkcnt == 0)) || (dscr.attr & SDHC_ADMA_ATTR_END)) {
-            DPRINT_L2("ADMA transfer completed\n");
+            trace_sdhci_adma_transfer_completed();
             if (length || ((dscr.attr & SDHC_ADMA_ATTR_END) &&
                 (s->trnmod & SDHC_TRNS_BLK_CNT_EN) &&
                 s->blkcnt != 0)) {
-                ERRPRINT("SD/MMC host ADMA length mismatch\n");
+                trace_sdhci_error("SD/MMC host ADMA length mismatch");
                 s->admaerr |= SDHC_ADMAERR_LENGTH_MISMATCH |
                         SDHC_ADMAERR_STATE_ST_TFR;
                 if (s->errintstsen & SDHC_EISEN_ADMAERR) {
-                    ERRPRINT("Set ADMA error flag\n");
+                    trace_sdhci_error("Set ADMA error flag");
                     s->errintsts |= SDHC_EIS_ADMAERR;
                     s->norintsts |= SDHC_NIS_ERR;
                 }
@@ -808,7 +780,7 @@ static void sdhci_data_transfer(void *opaque)
             break;
         case SDHC_CTRL_ADMA1_32:
             if (!(s->capareg & SDHC_CAN_DO_ADMA1)) {
-                ERRPRINT("ADMA1 not supported\n");
+                trace_sdhci_error("ADMA1 not supported");
                 break;
             }
 
@@ -816,7 +788,7 @@ static void sdhci_data_transfer(void *opaque)
             break;
         case SDHC_CTRL_ADMA2_32:
             if (!(s->capareg & SDHC_CAN_DO_ADMA2)) {
-                ERRPRINT("ADMA2 not supported\n");
+                trace_sdhci_error("ADMA2 not supported");
                 break;
             }
 
@@ -825,14 +797,14 @@ static void sdhci_data_transfer(void *opaque)
         case SDHC_CTRL_ADMA2_64:
             if (!(s->capareg & SDHC_CAN_DO_ADMA2) ||
                     !(s->capareg & SDHC_64_BIT_BUS_SUPPORT)) {
-                ERRPRINT("64 bit ADMA not supported\n");
+                trace_sdhci_error("64 bit ADMA not supported");
                 break;
             }
 
             sdhci_do_adma(s);
             break;
         default:
-            ERRPRINT("Unsupported DMA type\n");
+            trace_sdhci_error("Unsupported DMA type");
             break;
         }
     } else {
@@ -867,8 +839,8 @@ static inline bool
 sdhci_buff_access_is_sequential(SDHCIState *s, unsigned byte_num)
 {
     if ((s->data_count & 0x3) != byte_num) {
-        ERRPRINT("Non-sequential access to Buffer Data Port register"
-                "is prohibited\n");
+        trace_sdhci_error("Non-sequential access to Buffer Data Port register"
+                          "is prohibited\n");
         return false;
     }
     return true;
@@ -898,8 +870,7 @@ static uint64_t sdhci_read(void *opaque, hwaddr offset, unsigned size)
     case  SDHC_BDATA:
         if (sdhci_buff_access_is_sequential(s, offset - SDHC_BDATA)) {
             ret = sdhci_read_dataport(s, size);
-            DPRINT_L2("read %ub: addr[0x%04x] -> %u(0x%x)\n", size, (int)offset,
-                      ret, ret);
+            trace_sdhci_access("rd", size << 3, offset, "->", ret, ret);
             return ret;
         }
         break;
@@ -925,11 +896,17 @@ static uint64_t sdhci_read(void *opaque, hwaddr offset, unsigned size)
     case SDHC_ACMD12ERRSTS:
         ret = s->acmd12errsts;
         break;
-    case SDHC_CAPAREG:
-        ret = s->capareg;
+    case SDHC_CAPAB:
+        ret = (uint32_t)s->capareg;
+        break;
+    case SDHC_CAPAB + 4:
+        ret = (uint32_t)(s->capareg >> 32);
         break;
     case SDHC_MAXCURR:
-        ret = s->maxcurr;
+        ret = (uint32_t)s->maxcurr;
+        break;
+    case SDHC_MAXCURR + 4:
+        ret = (uint32_t)(s->maxcurr >> 32);
         break;
     case SDHC_ADMAERR:
         ret =  s->admaerr;
@@ -944,13 +921,14 @@ static uint64_t sdhci_read(void *opaque, hwaddr offset, unsigned size)
         ret = (SD_HOST_SPECv2_VERS << 16) | sdhci_slotint(s);
         break;
     default:
-        ERRPRINT("bad %ub read: addr[0x%04x]\n", size, (int)offset);
+        qemu_log_mask(LOG_UNIMP, "SDHC rd_%ub @0x%02" HWADDR_PRIx " "
+                      "not implemented\n", size, offset);
         break;
     }
 
     ret >>= (offset & 0x3) * 8;
     ret &= (1ULL << (size * 8)) - 1;
-    DPRINT_L2("read %ub: addr[0x%04x] -> %u(0x%x)\n", size, (int)offset, ret, ret);
+    trace_sdhci_access("rd", size << 3, offset, "->", ret, ret);
     return ret;
 }
 
@@ -1051,7 +1029,7 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
         if (!(s->capareg & SDHC_CAN_DO_DMA)) {
             value &= ~SDHC_TRNS_DMA;
         }
-        MASKED_WRITE(s->trnmod, mask, value & MASK_TRNMOD);
+        MASKED_WRITE(s->trnmod, mask, value & SDHC_TRNMOD_MASK);
         MASKED_WRITE(s->cmdreg, mask >> 16, value >> 16);
 
         /* Writing to the upper byte of CMDREG triggers SD command generation */
@@ -1149,13 +1127,25 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
         }
         sdhci_update_irq(s);
         break;
+    case SDHC_ACMD12ERRSTS:
+        MASKED_WRITE(s->acmd12errsts, mask, value);
+        break;
+
+    case SDHC_CAPAB:
+    case SDHC_CAPAB + 4:
+    case SDHC_MAXCURR:
+    case SDHC_MAXCURR + 4:
+        qemu_log_mask(LOG_GUEST_ERROR, "SDHC wr_%ub @0x%02" HWADDR_PRIx
+                      " <- 0x%08x read-only\n", size, offset, value >> shift);
+        break;
+
     default:
-        ERRPRINT("bad %ub write offset: addr[0x%04x] <- %u(0x%x)\n",
-                 size, (int)offset, value >> shift, value >> shift);
+        qemu_log_mask(LOG_UNIMP, "SDHC wr_%ub @0x%02" HWADDR_PRIx " <- 0x%08x "
+                      "not implemented\n", size, offset, value >> shift);
         break;
     }
-    DPRINT_L2("write %ub: addr[0x%04x] <- %u(0x%x)\n",
-              size, (int)offset, value >> shift, value >> shift);
+    trace_sdhci_access("wr", size << 3, offset, "<-",
+                       value >> shift, value >> shift);
 }
 
 static const MemoryRegionOps sdhci_mmio_ops = {
@@ -1184,6 +1174,14 @@ static inline unsigned int sdhci_get_fifolen(SDHCIState *s)
     }
 }
 
+/* --- qdev common --- */
+
+#define DEFINE_SDHCI_COMMON_PROPERTIES(_state) \
+    /* Capabilities registers provide information on supported features
+     * of this specific host controller implementation */ \
+    DEFINE_PROP_UINT64("capareg", _state, capareg, SDHC_CAPAB_REG_DEFAULT), \
+    DEFINE_PROP_UINT64("maxcurr", _state, maxcurr, 0)
+
 static void sdhci_initfn(SDHCIState *s)
 {
     qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
@@ -1199,13 +1197,31 @@ static void sdhci_uninitfn(SDHCIState *s)
     timer_free(s->insert_timer);
     timer_del(s->transfer_timer);
     timer_free(s->transfer_timer);
-    qemu_free_irq(s->eject_cb);
-    qemu_free_irq(s->ro_cb);
 
     g_free(s->fifo_buffer);
     s->fifo_buffer = NULL;
 }
 
+static void sdhci_common_realize(SDHCIState *s, Error **errp)
+{
+    s->buf_maxsz = sdhci_get_fifolen(s);
+    s->fifo_buffer = g_malloc0(s->buf_maxsz);
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &sdhci_mmio_ops, s, "sdhci",
+                          SDHC_REGISTERS_MAP_SIZE);
+}
+
+static void sdhci_common_unrealize(SDHCIState *s, Error **errp)
+{
+    /* This function is expected to be called only once for each class:
+     * - SysBus:    via DeviceClass->unrealize(),
+     * - PCI:       via PCIDeviceClass->exit().
+     * However to avoid double-free and/or use-after-free we still nullify
+     * this variable (better safe than sorry!). */
+    g_free(s->fifo_buffer);
+    s->fifo_buffer = NULL;
+}
+
 static bool sdhci_pending_insert_vmstate_needed(void *opaque)
 {
     SDHCIState *s = opaque;
@@ -1265,32 +1281,44 @@ const VMStateDescription sdhci_vmstate = {
     },
 };
 
-/* Capabilities registers provide information on supported features of this
- * specific host controller implementation */
+static void sdhci_common_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    dc->vmsd = &sdhci_vmstate;
+    dc->reset = sdhci_poweron_reset;
+}
+
+/* --- qdev PCI --- */
+
 static Property sdhci_pci_properties[] = {
-    DEFINE_PROP_UINT32("capareg", SDHCIState, capareg,
-            SDHC_CAPAB_REG_DEFAULT),
-    DEFINE_PROP_UINT32("maxcurr", SDHCIState, maxcurr, 0),
+    DEFINE_SDHCI_COMMON_PROPERTIES(SDHCIState),
     DEFINE_PROP_END_OF_LIST(),
 };
 
 static void sdhci_pci_realize(PCIDevice *dev, Error **errp)
 {
     SDHCIState *s = PCI_SDHCI(dev);
+
+    sdhci_initfn(s);
+    sdhci_common_realize(s, errp);
+    if (errp && *errp) {
+        return;
+    }
+
     dev->config[PCI_CLASS_PROG] = 0x01; /* Standard Host supported DMA */
     dev->config[PCI_INTERRUPT_PIN] = 0x01; /* interrupt pin A */
-    sdhci_initfn(s);
-    s->buf_maxsz = sdhci_get_fifolen(s);
-    s->fifo_buffer = g_malloc0(s->buf_maxsz);
     s->irq = pci_allocate_irq(dev);
-    memory_region_init_io(&s->iomem, OBJECT(s), &sdhci_mmio_ops, s, "sdhci",
-            SDHC_REGISTERS_MAP_SIZE);
-    pci_register_bar(dev, 0, 0, &s->iomem);
+    s->dma_as = pci_get_address_space(dev);
+    pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->iomem);
 }
 
 static void sdhci_pci_exit(PCIDevice *dev)
 {
     SDHCIState *s = PCI_SDHCI(dev);
+
+    sdhci_common_unrealize(s, &error_abort);
     sdhci_uninitfn(s);
 }
 
@@ -1304,10 +1332,9 @@ static void sdhci_pci_class_init(ObjectClass *klass, void *data)
     k->vendor_id = PCI_VENDOR_ID_REDHAT;
     k->device_id = PCI_DEVICE_ID_REDHAT_SDHCI;
     k->class_id = PCI_CLASS_SYSTEM_SDHCI;
-    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-    dc->vmsd = &sdhci_vmstate;
     dc->props = sdhci_pci_properties;
-    dc->reset = sdhci_poweron_reset;
+
+    sdhci_common_class_init(klass, data);
 }
 
 static const TypeInfo sdhci_pci_info = {
@@ -1321,12 +1348,14 @@ static const TypeInfo sdhci_pci_info = {
     },
 };
 
+/* --- qdev SysBus --- */
+
 static Property sdhci_sysbus_properties[] = {
-    DEFINE_PROP_UINT32("capareg", SDHCIState, capareg,
-            SDHC_CAPAB_REG_DEFAULT),
-    DEFINE_PROP_UINT32("maxcurr", SDHCIState, maxcurr, 0),
+    DEFINE_SDHCI_COMMON_PROPERTIES(SDHCIState),
     DEFINE_PROP_BOOL("pending-insert-quirk", SDHCIState, pending_insert_quirk,
                      false),
+    DEFINE_PROP_LINK("dma", SDHCIState,
+                     dma_mr, TYPE_MEMORY_REGION, MemoryRegion *),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1340,6 +1369,11 @@ static void sdhci_sysbus_init(Object *obj)
 static void sdhci_sysbus_finalize(Object *obj)
 {
     SDHCIState *s = SYSBUS_SDHCI(obj);
+
+    if (s->dma_mr) {
+        object_unparent(OBJECT(s->dma_mr));
+    }
+
     sdhci_uninitfn(s);
 }
 
@@ -1348,22 +1382,42 @@ static void sdhci_sysbus_realize(DeviceState *dev, Error ** errp)
     SDHCIState *s = SYSBUS_SDHCI(dev);
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 
-    s->buf_maxsz = sdhci_get_fifolen(s);
-    s->fifo_buffer = g_malloc0(s->buf_maxsz);
+    sdhci_common_realize(s, errp);
+    if (errp && *errp) {
+        return;
+    }
+
+    if (s->dma_mr) {
+        address_space_init(s->dma_as, s->dma_mr, "sdhci-dma");
+    } else {
+        /* use system_memory() if property "dma" not set */
+        s->dma_as = &address_space_memory;
+    }
+
     sysbus_init_irq(sbd, &s->irq);
-    memory_region_init_io(&s->iomem, OBJECT(s), &sdhci_mmio_ops, s, "sdhci",
-            SDHC_REGISTERS_MAP_SIZE);
     sysbus_init_mmio(sbd, &s->iomem);
 }
 
+static void sdhci_sysbus_unrealize(DeviceState *dev, Error **errp)
+{
+    SDHCIState *s = SYSBUS_SDHCI(dev);
+
+    sdhci_common_unrealize(s, &error_abort);
+
+     if (s->dma_mr) {
+        address_space_destroy(s->dma_as);
+    }
+}
+
 static void sdhci_sysbus_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    dc->vmsd = &sdhci_vmstate;
     dc->props = sdhci_sysbus_properties;
     dc->realize = sdhci_sysbus_realize;
-    dc->reset = sdhci_poweron_reset;
+    dc->unrealize = sdhci_sysbus_unrealize;
+
+    sdhci_common_class_init(klass, data);
 }
 
 static const TypeInfo sdhci_sysbus_info = {
@@ -1375,6 +1429,8 @@ static const TypeInfo sdhci_sysbus_info = {
     .class_init = sdhci_sysbus_class_init,
 };
 
+/* --- qdev bus master --- */
+
 static void sdhci_bus_class_init(ObjectClass *klass, void *data)
 {
     SDBusClass *sbc = SD_BUS_CLASS(klass);
diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c
index 24001dc3e6..f88f509e0a 100644
--- a/hw/sd/ssi-sd.c
+++ b/hw/sd/ssi-sd.c
@@ -50,6 +50,9 @@ typedef struct {
     SDState *sd;
 } ssi_sd_state;
 
+#define TYPE_SSI_SD "ssi-sd"
+#define SSI_SD(obj) OBJECT_CHECK(ssi_sd_state, (obj), TYPE_SSI_SD)
+
 /* State word bits.  */
 #define SSI_SDR_LOCKED          0x0001
 #define SSI_SDR_WP_ERASE        0x0002
@@ -241,7 +244,6 @@ static void ssi_sd_realize(SSISlave *d, Error **errp)
     ssi_sd_state *s = FROM_SSI_SLAVE(ssi_sd_state, d);
     DriveInfo *dinfo;
 
-    s->mode = SSI_SD_CMD;
     /* FIXME use a qdev drive property instead of drive_get_next() */
     dinfo = drive_get_next(IF_SD);
     s->sd = sd_init(dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, true);
@@ -251,6 +253,24 @@ static void ssi_sd_realize(SSISlave *d, Error **errp)
     }
 }
 
+static void ssi_sd_reset(DeviceState *dev)
+{
+    ssi_sd_state *s = SSI_SD(dev);
+
+    s->mode = SSI_SD_CMD;
+    s->cmd = 0;
+    memset(s->cmdarg, 0, sizeof(s->cmdarg));
+    memset(s->response, 0, sizeof(s->response));
+    s->arglen = 0;
+    s->response_pos = 0;
+    s->stopping = 0;
+
+    /* Since we're still using the legacy SD API the card is not plugged
+     * into any bus, and we must reset it manually.
+     */
+    device_reset(DEVICE(s->sd));
+}
+
 static void ssi_sd_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -260,10 +280,11 @@ static void ssi_sd_class_init(ObjectClass *klass, void *data)
     k->transfer = ssi_sd_transfer;
     k->cs_polarity = SSI_CS_LOW;
     dc->vmsd = &vmstate_ssi_sd;
+    dc->reset = ssi_sd_reset;
 }
 
 static const TypeInfo ssi_sd_info = {
-    .name          = "ssi-sd",
+    .name          = TYPE_SSI_SD,
     .parent        = TYPE_SSI_SLAVE,
     .instance_size = sizeof(ssi_sd_state),
     .class_init    = ssi_sd_class_init,
diff --git a/hw/sd/trace-events b/hw/sd/trace-events
index 6eca3470e2..0a121156a3 100644
--- a/hw/sd/trace-events
+++ b/hw/sd/trace-events
@@ -1,5 +1,19 @@
 # See docs/devel/tracing.txt for syntax documentation.
 
+# hw/sd/sdhci.c
+sdhci_set_inserted(const char *level) "card state changed: %s"
+sdhci_send_command(uint8_t cmd, uint32_t arg) "CMD%02u ARG[0x%08x]"
+sdhci_error(const char *msg) "%s"
+sdhci_response4(uint32_t r0) "RSPREG[31..0]=0x%08x"
+sdhci_response16(uint32_t r3, uint32_t r2, uint32_t r1, uint32_t r0) "RSPREG[127..96]=0x%08x, RSPREG[95..64]=0x%08x, RSPREG[63..32]=0x%08x, RSPREG[31..0]=0x%08x"
+sdhci_end_transfer(uint8_t cmd, uint32_t arg) "Automatically issue CMD%02u 0x%08x"
+sdhci_adma(const char *desc, uint32_t sysad) "%s: admasysaddr=0x%" PRIx32
+sdhci_adma_loop(uint64_t addr, uint16_t length, uint8_t attr) "addr=0x%08" PRIx64 ", len=%d, attr=0x%x"
+sdhci_adma_transfer_completed(void) ""
+sdhci_access(const char *access, unsigned int size, uint64_t offset, const char *dir, uint64_t val, uint64_t val2) "%s%u: addr[0x%04" PRIx64 "] %s 0x%08" PRIx64 " (%" PRIu64 ")"
+sdhci_read_dataport(uint16_t data_count) "all %u bytes of data have been read from input buffer"
+sdhci_write_dataport(uint16_t data_count) "write buffer filled with %u bytes of data"
+
 # hw/sd/milkymist-memcard.c
 milkymist_memcard_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
 milkymist_memcard_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
diff --git a/hw/sh4/sh_pci.c b/hw/sh4/sh_pci.c
index cbb01af57f..4ec2e35500 100644
--- a/hw/sh4/sh_pci.c
+++ b/hw/sh4/sh_pci.c
@@ -131,12 +131,12 @@ static int sh_pci_device_init(SysBusDevice *dev)
     for (i = 0; i < 4; i++) {
         sysbus_init_irq(dev, &s->irq[i]);
     }
-    phb->bus = pci_register_bus(DEVICE(dev), "pci",
-                                sh_pci_set_irq, sh_pci_map_irq,
-                                s->irq,
-                                get_system_memory(),
-                                get_system_io(),
-                                PCI_DEVFN(0, 0), 4, TYPE_PCI_BUS);
+    phb->bus = pci_register_root_bus(DEVICE(dev), "pci",
+                                     sh_pci_set_irq, sh_pci_map_irq,
+                                     s->irq,
+                                     get_system_memory(),
+                                     get_system_io(),
+                                     PCI_DEVFN(0, 0), 4, TYPE_PCI_BUS);
     memory_region_init_io(&s->memconfig_p4, OBJECT(s), &sh_pci_reg_ops, s,
                           "sh_pci", 0x224);
     memory_region_init_alias(&s->memconfig_a7, OBJECT(s), "sh_pci.2",
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 1a5437a07d..5d11f01874 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -96,6 +96,11 @@ static struct {
 } type4;
 
 static struct {
+    size_t nvalues;
+    const char **values;
+} type11;
+
+static struct {
     const char *loc_pfx, *bank, *manufacturer, *serial, *asset, *part;
     uint16_t speed;
 } type17;
@@ -282,6 +287,14 @@ static const QemuOptDesc qemu_smbios_type4_opts[] = {
     { /* end of list */ }
 };
 
+static const QemuOptDesc qemu_smbios_type11_opts[] = {
+    {
+        .name = "value",
+        .type = QEMU_OPT_STRING,
+        .help = "OEM string data",
+    },
+};
+
 static const QemuOptDesc qemu_smbios_type17_opts[] = {
     {
         .name = "type",
@@ -590,6 +603,27 @@ static void smbios_build_type_4_table(unsigned instance)
     smbios_type4_count++;
 }
 
+static void smbios_build_type_11_table(void)
+{
+    char count_str[128];
+    size_t i;
+
+    if (type11.nvalues == 0) {
+        return;
+    }
+
+    SMBIOS_BUILD_TABLE_PRE(11, 0xe00, true); /* required */
+
+    snprintf(count_str, sizeof(count_str), "%zu", type11.nvalues);
+    t->count = type11.nvalues;
+
+    for (i = 0; i < type11.nvalues; i++) {
+        SMBIOS_TABLE_SET_STR_LIST(11, type11.values[i]);
+    }
+
+    SMBIOS_BUILD_TABLE_POST;
+}
+
 #define ONE_KB ((ram_addr_t)1 << 10)
 #define ONE_MB ((ram_addr_t)1 << 20)
 #define ONE_GB ((ram_addr_t)1 << 30)
@@ -832,6 +866,8 @@ void smbios_get_tables(const struct smbios_phys_mem_area *mem_array,
             smbios_build_type_4_table(i);
         }
 
+        smbios_build_type_11_table();
+
 #define MAX_DIMM_SZ (16ll * ONE_GB)
 #define GET_DIMM_SZ ((i < dimm_cnt - 1) ? MAX_DIMM_SZ \
                                         : ((ram_size - 1) % MAX_DIMM_SZ) + 1)
@@ -882,6 +918,38 @@ static void save_opt(const char **dest, QemuOpts *opts, const char *name)
     }
 }
 
+
+struct opt_list {
+    const char *name;
+    size_t *ndest;
+    const char ***dest;
+};
+
+static int save_opt_one(void *opaque,
+                        const char *name, const char *value,
+                        Error **errp)
+{
+    struct opt_list *opt = opaque;
+
+    if (!g_str_equal(name, opt->name)) {
+        return 0;
+    }
+
+    *opt->dest = g_renew(const char *, *opt->dest, (*opt->ndest) + 1);
+    (*opt->dest)[*opt->ndest] = value;
+    (*opt->ndest)++;
+    return 0;
+}
+
+static void save_opt_list(size_t *ndest, const char ***dest,
+                          QemuOpts *opts, const char *name)
+{
+    struct opt_list opt = {
+        name, ndest, dest,
+    };
+    qemu_opt_foreach(opts, save_opt_one, &opt, NULL);
+}
+
 void smbios_entry_add(QemuOpts *opts, Error **errp)
 {
     const char *val;
@@ -1035,6 +1103,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
             save_opt(&type4.asset, opts, "asset");
             save_opt(&type4.part, opts, "part");
             return;
+        case 11:
+            qemu_opts_validate(opts, qemu_smbios_type11_opts, &error_fatal);
+            save_opt_list(&type11.nvalues, &type11.values, opts, "value");
+            return;
         case 17:
             qemu_opts_validate(opts, qemu_smbios_type17_opts, &error_fatal);
             save_opt(&type17.loc_pfx, opts, "loc_pfx");
diff --git a/hw/smbios/smbios_build.h b/hw/smbios/smbios_build.h
index 68b8b72e09..93b360d520 100644
--- a/hw/smbios/smbios_build.h
+++ b/hw/smbios/smbios_build.h
@@ -63,6 +63,18 @@ extern unsigned smbios_table_cnt;
         }                                                                 \
     } while (0)
 
+#define SMBIOS_TABLE_SET_STR_LIST(tbl_type, value)                        \
+    do {                                                                  \
+        int len = (value != NULL) ? strlen(value) + 1 : 0;                \
+        if (len > 1) {                                                    \
+            smbios_tables = g_realloc(smbios_tables,                      \
+                                      smbios_tables_len + len);           \
+            memcpy(smbios_tables + smbios_tables_len, value, len);        \
+            smbios_tables_len += len;                                     \
+            ++str_index;                                                  \
+        }                                                                 \
+    } while (0)
+
 #define SMBIOS_BUILD_TABLE_POST                                           \
     do {                                                                  \
         size_t term_cnt, t_size;                                          \
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index af3a9d88de..228e82b3fb 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -3416,7 +3416,7 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
                      PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64,
                      &xhci->mem);
 
-    if (pci_bus_is_express(dev->bus) ||
+    if (pci_bus_is_express(pci_get_bus(dev)) ||
         xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) {
         ret = pcie_endpoint_cap_init(dev, 0xa0);
         assert(ret > 0);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c977ee327f..2c71295125 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1654,8 +1654,8 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
         return -EINVAL;
     }
 
-    if (!pci_bus_is_express(vdev->pdev.bus)) {
-        PCIBus *bus = vdev->pdev.bus;
+    if (!pci_bus_is_express(pci_get_bus(&vdev->pdev))) {
+        PCIBus *bus = pci_get_bus(&vdev->pdev);
         PCIDevice *bridge;
 
         /*
@@ -1680,14 +1680,14 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
          */
         while (!pci_bus_is_root(bus)) {
             bridge = pci_bridge_get_device(bus);
-            bus = bridge->bus;
+            bus = pci_get_bus(bridge);
         }
 
         if (pci_bus_is_express(bus)) {
             return 0;
         }
 
-    } else if (pci_bus_is_root(vdev->pdev.bus)) {
+    } else if (pci_bus_is_root(pci_get_bus(&vdev->pdev))) {
         /*
          * On a Root Complex bus Endpoints become Root Complex Integrated
          * Endpoints, which changes the type and clears the LNK & LNK2 fields.
@@ -1890,7 +1890,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
     uint8_t *config;
 
     /* Only add extended caps if we have them and the guest can see them */
-    if (!pci_is_express(pdev) || !pci_bus_is_express(pdev->bus) ||
+    if (!pci_is_express(pdev) || !pci_bus_is_express(pci_get_bus(pdev)) ||
         !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
         return;
     }
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index e92837c42b..6c75cca88a 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1588,9 +1588,11 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
                        "neither legacy nor transitional device.");
             return ;
         }
-        /* legacy and transitional */
-        pci_set_word(config + PCI_SUBSYSTEM_VENDOR_ID,
-                     pci_get_word(config + PCI_VENDOR_ID));
+        /*
+         * Legacy and transitional devices use specific subsystem IDs.
+         * Note that the subsystem vendor ID (config + PCI_SUBSYSTEM_VENDOR_ID)
+         * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default.
+         */
         pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus));
     } else {
         /* pure virtio-1.0 */
@@ -1708,8 +1710,8 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
 {
     VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
     VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(pci_dev);
-    bool pcie_port = pci_bus_is_express(pci_dev->bus) &&
-                     !pci_bus_is_root(pci_dev->bus);
+    bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
+                     !pci_bus_is_root(pci_get_bus(pci_dev));
 
     if (kvm_enabled() && !kvm_has_many_ioeventfds()) {
         proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index ad564b0132..d6002ee550 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2469,7 +2469,7 @@ void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
     va_end(ap);
 
     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
-        virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
+        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
         virtio_notify_config(vdev);
     }
 
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index d57c6d3485..f662f30370 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -73,7 +73,7 @@ void xen_pt_log(const PCIDevice *d, const char *f, ...)
 
     va_start(ap, f);
     if (d) {
-        fprintf(stderr, "[%02x:%02x.%d] ", pci_bus_num(d->bus),
+        fprintf(stderr, "[%02x:%02x.%d] ", pci_dev_bus_num(d),
                 PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
     }
     vfprintf(stderr, f, ap);
@@ -602,7 +602,7 @@ static void xen_pt_region_update(XenPCIPassthroughState *s,
     }
 
     args.type = d->io_regions[bar].type;
-    pci_for_each_device(d->bus, pci_bus_num(d->bus),
+    pci_for_each_device(pci_get_bus(d), pci_dev_bus_num(d),
                         xen_pt_check_bar_overlap, &args);
     if (args.rc) {
         XEN_PT_WARN(d, "Region: %d (addr: %#"FMT_PCIBUS
@@ -695,7 +695,7 @@ xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s,
     PCIDevice *d = &s->dev;
 
     gpu_dev_id = dev->device_id;
-    igd_passthrough_isa_bridge_create(d->bus, gpu_dev_id);
+    igd_passthrough_isa_bridge_create(pci_get_bus(d), gpu_dev_id);
 }
 
 /* destroy. */
@@ -711,7 +711,7 @@ static void xen_pt_destroy(PCIDevice *d) {
         intx = xen_pt_pci_intx(s);
         rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq,
                                      PT_IRQ_TYPE_PCI,
-                                     pci_bus_num(d->bus),
+                                     pci_dev_bus_num(d),
                                      PCI_SLOT(s->dev.devfn),
                                      intx,
                                      0 /* isa_irq */);
@@ -867,7 +867,7 @@ static void xen_pt_realize(PCIDevice *d, Error **errp)
         uint8_t e_intx = xen_pt_pci_intx(s);
 
         rc = xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, machine_irq,
-                                       pci_bus_num(d->bus),
+                                       pci_dev_bus_num(d),
                                        PCI_SLOT(d->devfn),
                                        e_intx);
         if (rc < 0) {
diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
index ef89c0c646..7c71fc7470 100644
--- a/include/hw/i386/x86-iommu.h
+++ b/include/hw/i386/x86-iommu.h
@@ -31,7 +31,6 @@
 #define  X86_IOMMU_GET_CLASS(obj) \
     OBJECT_GET_CLASS(X86IOMMUClass, obj, TYPE_X86_IOMMU_DEVICE)
 
-#define X86_IOMMU_PCI_DEVFN_MAX           256
 #define X86_IOMMU_SID_INVALID             (0xffff)
 
 typedef struct X86IOMMUState X86IOMMUState;
diff --git a/include/hw/pci-host/xilinx-pcie.h b/include/hw/pci-host/xilinx-pcie.h
index bec66b27c5..74c04dc9bb 100644
--- a/include/hw/pci-host/xilinx-pcie.h
+++ b/include/hw/pci-host/xilinx-pcie.h
@@ -23,7 +23,7 @@
 #include "hw/hw.h"
 #include "hw/sysbus.h"
 #include "hw/pci/pci.h"
-#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/pci/pcie_host.h"
 
 #define TYPE_XILINX_PCIE_HOST "xilinx-pcie-host"
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 8d02a0a383..15ced9648c 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -285,7 +285,6 @@ struct PCIDevice {
     uint8_t *used;
 
     /* the following fields are read only */
-    PCIBus *bus;
     int32_t devfn;
     /* Cached device to fetch requester ID from, to avoid the PCI
      * tree walking every time we invoke PCI request (e.g.,
@@ -400,26 +399,27 @@ typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
 
 bool pci_bus_is_express(PCIBus *bus);
 bool pci_bus_is_root(PCIBus *bus);
-void pci_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
-                         const char *name,
+void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
+                              const char *name,
+                              MemoryRegion *address_space_mem,
+                              MemoryRegion *address_space_io,
+                              uint8_t devfn_min, const char *typename);
+PCIBus *pci_root_bus_new(DeviceState *parent, const char *name,
                          MemoryRegion *address_space_mem,
                          MemoryRegion *address_space_io,
                          uint8_t devfn_min, const char *typename);
-PCIBus *pci_bus_new(DeviceState *parent, const char *name,
-                    MemoryRegion *address_space_mem,
-                    MemoryRegion *address_space_io,
-                    uint8_t devfn_min, const char *typename);
 void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
                   void *irq_opaque, int nirq);
 int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
 /* 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD */
 int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin);
-PCIBus *pci_register_bus(DeviceState *parent, const char *name,
-                         pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
-                         void *irq_opaque,
-                         MemoryRegion *address_space_mem,
-                         MemoryRegion *address_space_io,
-                         uint8_t devfn_min, int nirq, const char *typename);
+PCIBus *pci_register_root_bus(DeviceState *parent, const char *name,
+                              pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
+                              void *irq_opaque,
+                              MemoryRegion *address_space_mem,
+                              MemoryRegion *address_space_io,
+                              uint8_t devfn_min, int nirq,
+                              const char *typename);
 void pci_bus_set_route_irq_fn(PCIBus *, pci_route_irq_fn);
 PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin);
 bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
@@ -434,7 +434,16 @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
 
 PCIDevice *pci_vga_init(PCIBus *bus);
 
+static inline PCIBus *pci_get_bus(const PCIDevice *dev)
+{
+    return PCI_BUS(qdev_get_parent_bus(DEVICE(dev)));
+}
 int pci_bus_num(PCIBus *s);
+static inline int pci_dev_bus_num(const PCIDevice *dev)
+{
+    return pci_bus_num(pci_get_bus(dev));
+}
+
 int pci_bus_numa_node(PCIBus *bus);
 void pci_for_each_device(PCIBus *bus, int bus_num,
                          void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
@@ -458,7 +467,6 @@ void pci_for_each_bus(PCIBus *bus,
     pci_for_each_bus_depth_first(bus, NULL, fn, opaque);
 }
 
-PCIBus *pci_find_primary_bus(void);
 PCIBus *pci_device_root_bus(const PCIDevice *d);
 const char *pci_root_bus_path(PCIDevice *dev);
 PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn);
@@ -739,7 +747,7 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
 
 static inline uint16_t pci_get_bdf(PCIDevice *dev)
 {
-    return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
+    return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn);
 }
 
 uint16_t pci_requester_id(PCIDevice *dev);
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index 1acadc2c15..9b44ffd22a 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -27,6 +27,54 @@
 #define QEMU_PCI_BRIDGE_H
 
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
+
+typedef struct PCIBridgeWindows PCIBridgeWindows;
+
+/*
+ * Aliases for each of the address space windows that the bridge
+ * can forward. Mapped into the bridge's parent's address space,
+ * as subregions.
+ */
+struct PCIBridgeWindows {
+    MemoryRegion alias_pref_mem;
+    MemoryRegion alias_mem;
+    MemoryRegion alias_io;
+    /*
+     * When bridge control VGA forwarding is enabled, bridges will
+     * provide positive decode on the PCI VGA defined I/O port and
+     * MMIO ranges.  When enabled forwarding is only qualified on the
+     * I/O and memory enable bits in the bridge command register.
+     */
+    MemoryRegion alias_vga[QEMU_PCI_VGA_NUM_REGIONS];
+};
+
+#define TYPE_PCI_BRIDGE "base-pci-bridge"
+#define PCI_BRIDGE(obj) OBJECT_CHECK(PCIBridge, (obj), TYPE_PCI_BRIDGE)
+
+struct PCIBridge {
+    /*< private >*/
+    PCIDevice parent_obj;
+    /*< public >*/
+
+    /* private member */
+    PCIBus sec_bus;
+    /*
+     * Memory regions for the bridge's address spaces.  These regions are not
+     * directly added to system_memory/system_io or its descendants.
+     * Bridge's secondary bus points to these, so that devices
+     * under the bridge see these regions as its address spaces.
+     * The regions are as large as the entire address space -
+     * they don't take into account any windows.
+     */
+    MemoryRegion address_space_mem;
+    MemoryRegion address_space_io;
+
+    PCIBridgeWindows *windows;
+
+    pci_map_irq_fn map_irq;
+    const char *bus_name;
+};
 
 #define PCI_BRIDGE_DEV_PROP_CHASSIS_NR "chassis_nr"
 #define PCI_BRIDGE_DEV_PROP_MSI        "msi"
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index bc34fd0017..b7da8f555b 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -2,10 +2,10 @@
 #define QEMU_PCI_BUS_H
 
 /*
- * PCI Bus and Bridge datastructures.
+ * PCI Bus datastructures.
  *
  * Do not access the following members directly;
- * use accessor functions in pci.h, pci_bridge.h
+ * use accessor functions in pci.h
  */
 
 typedef struct PCIBusClass {
@@ -44,51 +44,4 @@ struct PCIBus {
     Notifier machine_done;
 };
 
-typedef struct PCIBridgeWindows PCIBridgeWindows;
-
-/*
- * Aliases for each of the address space windows that the bridge
- * can forward. Mapped into the bridge's parent's address space,
- * as subregions.
- */
-struct PCIBridgeWindows {
-    MemoryRegion alias_pref_mem;
-    MemoryRegion alias_mem;
-    MemoryRegion alias_io;
-    /*
-     * When bridge control VGA forwarding is enabled, bridges will
-     * provide positive decode on the PCI VGA defined I/O port and
-     * MMIO ranges.  When enabled forwarding is only qualified on the
-     * I/O and memory enable bits in the bridge command register.
-     */
-    MemoryRegion alias_vga[QEMU_PCI_VGA_NUM_REGIONS];
-};
-
-#define TYPE_PCI_BRIDGE "base-pci-bridge"
-#define PCI_BRIDGE(obj) OBJECT_CHECK(PCIBridge, (obj), TYPE_PCI_BRIDGE)
-
-struct PCIBridge {
-    /*< private >*/
-    PCIDevice parent_obj;
-    /*< public >*/
-
-    /* private member */
-    PCIBus sec_bus;
-    /*
-     * Memory regions for the bridge's address spaces.  These regions are not
-     * directly added to system_memory/system_io or its descendants.
-     * Bridge's secondary bus points to these, so that devices
-     * under the bridge see these regions as its address spaces.
-     * The regions are as large as the entire address space -
-     * they don't take into account any windows.
-     */
-    MemoryRegion address_space_mem;
-    MemoryRegion address_space_io;
-
-    PCIBridgeWindows *windows;
-
-    pci_map_irq_fn map_irq;
-    const char *bus_name;
-};
-
 #endif /* QEMU_PCI_BUS_H */
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 60b42ac561..5bbfec634b 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -31,6 +31,7 @@ extern const PropertyInfo qdev_prop_vlan;
 extern const PropertyInfo qdev_prop_pci_devfn;
 extern const PropertyInfo qdev_prop_blocksize;
 extern const PropertyInfo qdev_prop_pci_host_devaddr;
+extern const PropertyInfo qdev_prop_uuid;
 extern const PropertyInfo qdev_prop_arraylen;
 extern const PropertyInfo qdev_prop_link;
 
@@ -214,6 +215,14 @@ extern const PropertyInfo qdev_prop_link;
 #define DEFINE_PROP_MEMORY_REGION(_n, _s, _f)             \
     DEFINE_PROP(_n, _s, _f, qdev_prop_ptr, MemoryRegion *)
 
+#define DEFINE_PROP_UUID(_name, _state, _field) {                  \
+        .name      = (_name),                                      \
+        .info      = &qdev_prop_uuid,                              \
+        .offset    = offsetof(_state, _field)                      \
+            + type_check(QemuUUID, typeof_field(_state, _field)),  \
+        .set_default = true,                                       \
+        }
+
 #define DEFINE_PROP_END_OF_LIST()               \
     {}
 
diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h
index 0f0c3f1e64..cb37182536 100644
--- a/include/hw/sd/sdhci.h
+++ b/include/hw/sd/sdhci.h
@@ -26,26 +26,29 @@
 #define SDHCI_H
 
 #include "qemu-common.h"
-#include "hw/block/block.h"
 #include "hw/pci/pci.h"
 #include "hw/sysbus.h"
 #include "hw/sd/sd.h"
 
 /* SD/MMC host controller state */
 typedef struct SDHCIState {
+    /*< private >*/
     union {
         PCIDevice pcidev;
         SysBusDevice busdev;
     };
+
+    /*< public >*/
     SDBus sdbus;
     MemoryRegion iomem;
+    AddressSpace *dma_as;
+    MemoryRegion *dma_mr;
 
     QEMUTimer *insert_timer;       /* timer for 'changing' sd card. */
     QEMUTimer *transfer_timer;
-    qemu_irq eject_cb;
-    qemu_irq ro_cb;
     qemu_irq irq;
 
+    /* Registers cleared on reset */
     uint32_t sdmasysad;    /* SDMA System Address register */
     uint16_t blksize;      /* Host DMA Buff Boundary and Transfer BlkSize Reg */
     uint16_t blkcnt;       /* Blocks count for current transfer */
@@ -70,19 +73,23 @@ typedef struct SDHCIState {
     uint16_t acmd12errsts; /* Auto CMD12 error status register */
     uint64_t admasysaddr;  /* ADMA System Address Register */
 
-    uint32_t capareg;      /* Capabilities Register */
-    uint32_t maxcurr;      /* Maximum Current Capabilities Register */
+    /* Read-only registers */
+    uint64_t capareg;      /* Capabilities Register */
+    uint64_t maxcurr;      /* Maximum Current Capabilities Register */
+
     uint8_t  *fifo_buffer; /* SD host i/o FIFO buffer */
     uint32_t buf_maxsz;
     uint16_t data_count;   /* current element in FIFO buffer */
     uint8_t  stopped_state;/* Current SDHC state */
-    bool     pending_insert_quirk;/* Quirk for Raspberry Pi card insert int */
     bool     pending_insert_state;
     /* Buffer Data Port Register - virtual access point to R and W buffers */
     /* Software Reset Register - always reads as 0 */
     /* Force Event Auto CMD12 Error Interrupt Reg - write only */
     /* Force Event Error Interrupt Register- write only */
     /* RO Host Controller Version Register always reads as 0x2401 */
+
+    /* Configurable properties */
+    bool pending_insert_quirk; /* Quirk for Raspberry Pi card insert int */
 } SDHCIState;
 
 #define TYPE_PCI_SDHCI "sdhci-pci"
diff --git a/include/hw/smbios/smbios.h b/include/hw/smbios/smbios.h
index 31e8d5f47e..a83adb93d7 100644
--- a/include/hw/smbios/smbios.h
+++ b/include/hw/smbios/smbios.h
@@ -195,6 +195,12 @@ struct smbios_type_4 {
     uint16_t processor_family2;
 } QEMU_PACKED;
 
+/* SMBIOS type 11 - OEM strings */
+struct smbios_type_11 {
+    struct smbios_structure_header header;
+    uint8_t count;
+} QEMU_PACKED;
+
 /* SMBIOS type 16 - Physical Memory Array (v2.7) */
 struct smbios_type_16 {
     struct smbios_structure_header header;
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 86c7f26106..64a978e4e0 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -542,10 +542,10 @@ static inline void xen_map_pcidev(domid_t dom,
         return;
     }
 
-    trace_xen_map_pcidev(ioservid, pci_bus_num(pci_dev->bus),
+    trace_xen_map_pcidev(ioservid, pci_dev_bus_num(pci_dev),
                          PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn));
     xendevicemodel_map_pcidev_to_ioreq_server(xen_dmod, dom, ioservid, 0,
-                                              pci_bus_num(pci_dev->bus),
+                                              pci_dev_bus_num(pci_dev),
                                               PCI_SLOT(pci_dev->devfn),
                                               PCI_FUNC(pci_dev->devfn));
 }
@@ -558,10 +558,10 @@ static inline void xen_unmap_pcidev(domid_t dom,
         return;
     }
 
-    trace_xen_unmap_pcidev(ioservid, pci_bus_num(pci_dev->bus),
+    trace_xen_unmap_pcidev(ioservid, pci_dev_bus_num(pci_dev),
                            PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn));
     xendevicemodel_unmap_pcidev_from_ioreq_server(xen_dmod, dom, ioservid, 0,
-                                                  pci_bus_num(pci_dev->bus),
+                                                  pci_dev_bus_num(pci_dev),
                                                   PCI_SLOT(pci_dev->devfn),
                                                   PCI_FUNC(pci_dev->devfn));
 }
diff --git a/include/migration/misc.h b/include/migration/misc.h
index c079b7771b..77fd4f587c 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -44,6 +44,7 @@ void dump_vmstate_json_to_file(FILE *out_fp);
 
 /* migration/migration.c */
 void migration_object_init(void);
+void migration_object_finalize(void);
 void qemu_start_incoming_migration(const char *uri, Error **errp);
 bool migration_is_idle(void);
 void add_migration_state_change_notifier(Notifier *notify);
diff --git a/include/ui/sdl2.h b/include/ui/sdl2.h
index b29cf803c9..51084e6320 100644
--- a/include/ui/sdl2.h
+++ b/include/ui/sdl2.h
@@ -24,6 +24,7 @@ struct sdl2_console {
     int opengl;
     int updates;
     int idle_counter;
+    int ignore_hotkeys;
     SDL_GLContext winctx;
 #ifdef CONFIG_OPENGL
     QemuGLShader *gls;
diff --git a/include/ui/spice-display.h b/include/ui/spice-display.h
index aaf2019889..6b5c73b21c 100644
--- a/include/ui/spice-display.h
+++ b/include/ui/spice-display.h
@@ -86,7 +86,6 @@ struct SimpleSpiceDisplay {
     DisplayChangeListener dcl;
     void *buf;
     int bufsize;
-    QXLWorker *worker;
     QXLInstance qxl;
     uint32_t unique;
     pixman_image_t *surface;
diff --git a/migration/migration.c b/migration/migration.c
index 4de3b551fe..d3a1c494c0 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -132,6 +132,11 @@ void migration_object_init(void)
     }
 }
 
+void migration_object_finalize(void)
+{
+    object_unref(OBJECT(current_migration));
+}
+
 /* For outgoing */
 MigrationState *migrate_get_current(void)
 {
@@ -591,14 +596,15 @@ static void populate_disk_info(MigrationInfo *info)
     }
 }
 
-MigrationInfo *qmp_query_migrate(Error **errp)
+static void fill_source_migration_info(MigrationInfo *info)
 {
-    MigrationInfo *info = g_malloc0(sizeof(*info));
     MigrationState *s = migrate_get_current();
 
     switch (s->state) {
     case MIGRATION_STATUS_NONE:
         /* no migration has happened ever */
+        /* do not overwrite destination migration status */
+        return;
         break;
     case MIGRATION_STATUS_SETUP:
         info->has_status = true;
@@ -613,7 +619,7 @@ MigrationInfo *qmp_query_migrate(Error **errp)
         info->has_status = true;
         info->has_total_time = true;
         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
-            - s->total_time;
+            - s->start_time;
         info->has_expected_downtime = true;
         info->expected_downtime = s->expected_downtime;
         info->has_setup_time = true;
@@ -649,8 +655,6 @@ MigrationInfo *qmp_query_migrate(Error **errp)
         break;
     }
     info->status = s->state;
-
-    return info;
 }
 
 /**
@@ -714,6 +718,41 @@ static bool migrate_caps_check(bool *cap_list,
     return true;
 }
 
+static void fill_destination_migration_info(MigrationInfo *info)
+{
+    MigrationIncomingState *mis = migration_incoming_get_current();
+
+    switch (mis->state) {
+    case MIGRATION_STATUS_NONE:
+        return;
+        break;
+    case MIGRATION_STATUS_SETUP:
+    case MIGRATION_STATUS_CANCELLING:
+    case MIGRATION_STATUS_CANCELLED:
+    case MIGRATION_STATUS_ACTIVE:
+    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+    case MIGRATION_STATUS_FAILED:
+    case MIGRATION_STATUS_COLO:
+        info->has_status = true;
+        break;
+    case MIGRATION_STATUS_COMPLETED:
+        info->has_status = true;
+        fill_destination_postcopy_migration_info(info);
+        break;
+    }
+    info->status = mis->state;
+}
+
+MigrationInfo *qmp_query_migrate(Error **errp)
+{
+    MigrationInfo *info = g_malloc0(sizeof(*info));
+
+    fill_destination_migration_info(info);
+    fill_source_migration_info(info);
+
+    return info;
+}
+
 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
                                   Error **errp)
 {
@@ -741,22 +780,20 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 static bool migrate_params_check(MigrationParameters *params, Error **errp)
 {
     if (params->has_compress_level &&
-        (params->compress_level < 0 || params->compress_level > 9)) {
+        (params->compress_level > 9)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
                    "is invalid, it should be in the range of 0 to 9");
         return false;
     }
 
-    if (params->has_compress_threads &&
-        (params->compress_threads < 1 || params->compress_threads > 255)) {
+    if (params->has_compress_threads && (params->compress_threads < 1)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                    "compress_threads",
                    "is invalid, it should be in the range of 1 to 255");
         return false;
     }
 
-    if (params->has_decompress_threads &&
-        (params->decompress_threads < 1 || params->decompress_threads > 255)) {
+    if (params->has_decompress_threads && (params->decompress_threads < 1)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                    "decompress_threads",
                    "is invalid, it should be in the range of 1 to 255");
@@ -781,38 +818,31 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
         return false;
     }
 
-    if (params->has_max_bandwidth &&
-        (params->max_bandwidth < 0 || params->max_bandwidth > SIZE_MAX)) {
+    if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
         error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
                          " range of 0 to %zu bytes/second", SIZE_MAX);
         return false;
     }
 
     if (params->has_downtime_limit &&
-        (params->downtime_limit < 0 ||
-         params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
+        (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
         error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
                          "the range of 0 to %d milliseconds",
                          MAX_MIGRATE_DOWNTIME);
         return false;
     }
 
-    if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
-        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
-                    "x_checkpoint_delay",
-                    "is invalid, it should be positive");
-        return false;
-    }
-    if (params->has_x_multifd_channels &&
-        (params->x_multifd_channels < 1 || params->x_multifd_channels > 255)) {
+    /* x_checkpoint_delay is now always positive */
+
+    if (params->has_x_multifd_channels && (params->x_multifd_channels < 1)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                    "multifd_channels",
                    "is invalid, it should be in the range of 1 to 255");
         return false;
     }
     if (params->has_x_multifd_page_count &&
-            (params->x_multifd_page_count < 1 ||
-             params->x_multifd_page_count > 10000)) {
+        (params->x_multifd_page_count < 1 ||
+         params->x_multifd_page_count > 10000)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                    "multifd_page_count",
                    "is invalid, it should be in the range of 1 to 10000");
@@ -1077,6 +1107,8 @@ static void migrate_fd_cleanup(void *opaque)
     qemu_bh_delete(s->cleanup_bh);
     s->cleanup_bh = NULL;
 
+    qemu_savevm_state_cleanup();
+
     if (s->to_dst_file) {
         Error *local_err = NULL;
 
@@ -1127,8 +1159,6 @@ void migrate_fd_error(MigrationState *s, const Error *error)
     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
                       MIGRATION_STATUS_FAILED);
     migrate_set_error(s, error);
-    notifier_list_notify(&migration_state_notifiers, s);
-    block_cleanup_parameters(s);
 }
 
 static void migrate_fd_cancel(MigrationState *s)
@@ -1174,7 +1204,6 @@ static void migrate_fd_cancel(MigrationState *s)
             s->block_inactive = false;
         }
     }
-    block_cleanup_parameters(s);
 }
 
 void add_migration_state_change_notifier(Notifier *notify)
@@ -1268,7 +1297,11 @@ MigrationState *migrate_init(void)
 
     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
 
-    s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    s->total_time = 0;
+    s->vm_was_running = false;
+    s->iteration_initial_bytes = 0;
+    s->threshold_size = 0;
     return s;
 }
 
@@ -1508,6 +1541,15 @@ bool migrate_zero_blocks(void)
     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
 }
 
+bool migrate_postcopy_blocktime(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
+}
+
 bool migrate_use_compression(void)
 {
     MigrationState *s;
@@ -1843,7 +1885,7 @@ static int await_return_path_close_on_source(MigrationState *ms)
  * Switch from normal iteration to postcopy
  * Returns non-0 on error
  */
-static int postcopy_start(MigrationState *ms, bool *old_vm_running)
+static int postcopy_start(MigrationState *ms)
 {
     int ret;
     QIOChannelBuffer *bioc;
@@ -1861,7 +1903,6 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
     trace_postcopy_start_set_run();
 
     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
-    *old_vm_running = runstate_is_running();
     global_state_store();
     ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
     if (ret < 0) {
@@ -2051,21 +2092,17 @@ static int migration_maybe_pause(MigrationState *s,
  *   The caller 'breaks' the loop when this returns.
  *
  * @s: Current migration state
- * @current_active_state: The migration state we expect to be in
- * @*old_vm_running: Pointer to old_vm_running flag
- * @*start_time: Pointer to time to update
  */
-static void migration_completion(MigrationState *s, int current_active_state,
-                                 bool *old_vm_running,
-                                 int64_t *start_time)
+static void migration_completion(MigrationState *s)
 {
     int ret;
+    int current_active_state = s->state;
 
     if (s->state == MIGRATION_STATUS_ACTIVE) {
         qemu_mutex_lock_iothread();
-        *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+        s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
-        *old_vm_running = runstate_is_running();
+        s->vm_was_running = runstate_is_running();
         ret = global_state_store();
 
         if (!ret) {
@@ -2152,6 +2189,155 @@ bool migrate_colo_enabled(void)
     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
 }
 
+static void migration_calculate_complete(MigrationState *s)
+{
+    uint64_t bytes = qemu_ftell(s->to_dst_file);
+    int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+    s->total_time = end_time - s->start_time;
+    if (!s->downtime) {
+        /*
+         * It's still not set, so we are precopy migration.  For
+         * postcopy, downtime is calculated during postcopy_start().
+         */
+        s->downtime = end_time - s->downtime_start;
+    }
+
+    if (s->total_time) {
+        s->mbps = ((double) bytes * 8.0) / s->total_time / 1000;
+    }
+}
+
+static void migration_update_counters(MigrationState *s,
+                                      int64_t current_time)
+{
+    uint64_t transferred, time_spent;
+    int64_t threshold_size;
+    double bandwidth;
+
+    if (current_time < s->iteration_start_time + BUFFER_DELAY) {
+        return;
+    }
+
+    transferred = qemu_ftell(s->to_dst_file) - s->iteration_initial_bytes;
+    time_spent = current_time - s->iteration_start_time;
+    bandwidth = (double)transferred / time_spent;
+    threshold_size = bandwidth * s->parameters.downtime_limit;
+
+    s->mbps = (((double) transferred * 8.0) /
+               ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
+
+    /*
+     * if we haven't sent anything, we don't want to
+     * recalculate. 10000 is a small enough number for our purposes
+     */
+    if (ram_counters.dirty_pages_rate && transferred > 10000) {
+        s->expected_downtime = ram_counters.dirty_pages_rate *
+            qemu_target_page_size() / bandwidth;
+    }
+
+    qemu_file_reset_rate_limit(s->to_dst_file);
+
+    s->iteration_start_time = current_time;
+    s->iteration_initial_bytes = qemu_ftell(s->to_dst_file);
+
+    trace_migrate_transferred(transferred, time_spent,
+                              bandwidth, threshold_size);
+}
+
+/* Migration thread iteration status */
+typedef enum {
+    MIG_ITERATE_RESUME,         /* Resume current iteration */
+    MIG_ITERATE_SKIP,           /* Skip current iteration */
+    MIG_ITERATE_BREAK,          /* Break the loop */
+} MigIterateState;
+
+/*
+ * Return true if continue to the next iteration directly, false
+ * otherwise.
+ */
+static MigIterateState migration_iteration_run(MigrationState *s)
+{
+    uint64_t pending_size, pend_post, pend_nonpost;
+    bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
+
+    qemu_savevm_state_pending(s->to_dst_file, s->threshold_size,
+                              &pend_nonpost, &pend_post);
+    pending_size = pend_nonpost + pend_post;
+
+    trace_migrate_pending(pending_size, s->threshold_size,
+                          pend_post, pend_nonpost);
+
+    if (pending_size && pending_size >= s->threshold_size) {
+        /* Still a significant amount to transfer */
+        if (migrate_postcopy() && !in_postcopy &&
+            pend_nonpost <= s->threshold_size &&
+            atomic_read(&s->start_postcopy)) {
+            if (postcopy_start(s)) {
+                error_report("%s: postcopy failed to start", __func__);
+            }
+            return MIG_ITERATE_SKIP;
+        }
+        /* Just another iteration step */
+        qemu_savevm_state_iterate(s->to_dst_file,
+            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
+    } else {
+        trace_migration_thread_low_pending(pending_size);
+        migration_completion(s);
+        return MIG_ITERATE_BREAK;
+    }
+
+    return MIG_ITERATE_RESUME;
+}
+
+static void migration_iteration_finish(MigrationState *s)
+{
+    /* If we enabled cpu throttling for auto-converge, turn it off. */
+    cpu_throttle_stop();
+
+    qemu_mutex_lock_iothread();
+    switch (s->state) {
+    case MIGRATION_STATUS_COMPLETED:
+        migration_calculate_complete(s);
+        runstate_set(RUN_STATE_POSTMIGRATE);
+        break;
+
+    case MIGRATION_STATUS_ACTIVE:
+        /*
+         * We should really assert here, but since it's during
+         * migration, let's try to reduce the usage of assertions.
+         */
+        if (!migrate_colo_enabled()) {
+            error_report("%s: critical error: calling COLO code without "
+                         "COLO enabled", __func__);
+        }
+        migrate_start_colo_process(s);
+        /*
+         * Fixme: we will run VM in COLO no matter its old running state.
+         * After exited COLO, we will keep running.
+         */
+        s->vm_was_running = true;
+        /* Fallthrough */
+    case MIGRATION_STATUS_FAILED:
+    case MIGRATION_STATUS_CANCELLED:
+        if (s->vm_was_running) {
+            vm_start();
+        } else {
+            if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
+                runstate_set(RUN_STATE_POSTMIGRATE);
+            }
+        }
+        break;
+
+    default:
+        /* Should not reach here, but if so, forgive the VM. */
+        error_report("%s: Unknown ending state %d", __func__, s->state);
+        break;
+    }
+    qemu_bh_schedule(s->cleanup_bh);
+    qemu_mutex_unlock_iothread();
+}
+
 /*
  * Master migration thread on the source VM.
  * It drives the migration and pumps the data down the outgoing channel.
@@ -2159,26 +2345,12 @@ bool migrate_colo_enabled(void)
 static void *migration_thread(void *opaque)
 {
     MigrationState *s = opaque;
-    /* Used by the bandwidth calcs, updated later */
-    int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
-    int64_t initial_bytes = 0;
-    /*
-     * The final stage happens when the remaining data is smaller than
-     * this threshold; it's calculated from the requested downtime and
-     * measured bandwidth
-     */
-    int64_t threshold_size = 0;
-    int64_t start_time = initial_time;
-    int64_t end_time;
-    bool old_vm_running = false;
-    bool entered_postcopy = false;
-    /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
-    enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
-    bool enable_colo = migrate_colo_enabled();
 
     rcu_register_thread();
 
+    s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
     qemu_savevm_state_header(s->to_dst_file);
 
     /*
@@ -2213,122 +2385,38 @@ static void *migration_thread(void *opaque)
     while (s->state == MIGRATION_STATUS_ACTIVE ||
            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
         int64_t current_time;
-        uint64_t pending_size;
 
         if (!qemu_file_rate_limit(s->to_dst_file)) {
-            uint64_t pend_post, pend_nonpost;
-
-            qemu_savevm_state_pending(s->to_dst_file, threshold_size,
-                                      &pend_nonpost, &pend_post);
-            pending_size = pend_nonpost + pend_post;
-            trace_migrate_pending(pending_size, threshold_size,
-                                  pend_post, pend_nonpost);
-            if (pending_size && pending_size >= threshold_size) {
-                /* Still a significant amount to transfer */
-
-                if (migrate_postcopy() &&
-                    s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE &&
-                    pend_nonpost <= threshold_size &&
-                    atomic_read(&s->start_postcopy)) {
-
-                    if (!postcopy_start(s, &old_vm_running)) {
-                        current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE;
-                        entered_postcopy = true;
-                    }
-
-                    continue;
-                }
-                /* Just another iteration step */
-                qemu_savevm_state_iterate(s->to_dst_file, entered_postcopy);
-            } else {
-                trace_migration_thread_low_pending(pending_size);
-                migration_completion(s, current_active_state,
-                                     &old_vm_running, &start_time);
+            MigIterateState iter_state = migration_iteration_run(s);
+            if (iter_state == MIG_ITERATE_SKIP) {
+                continue;
+            } else if (iter_state == MIG_ITERATE_BREAK) {
                 break;
             }
         }
 
         if (qemu_file_get_error(s->to_dst_file)) {
-            migrate_set_state(&s->state, current_active_state,
-                              MIGRATION_STATUS_FAILED);
+            if (migration_is_setup_or_active(s->state)) {
+                migrate_set_state(&s->state, s->state,
+                                  MIGRATION_STATUS_FAILED);
+            }
             trace_migration_thread_file_err();
             break;
         }
+
         current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-        if (current_time >= initial_time + BUFFER_DELAY) {
-            uint64_t transferred_bytes = qemu_ftell(s->to_dst_file) -
-                                         initial_bytes;
-            uint64_t time_spent = current_time - initial_time;
-            double bandwidth = (double)transferred_bytes / time_spent;
-            threshold_size = bandwidth * s->parameters.downtime_limit;
-
-            s->mbps = (((double) transferred_bytes * 8.0) /
-                    ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
-
-            trace_migrate_transferred(transferred_bytes, time_spent,
-                                      bandwidth, threshold_size);
-            /* if we haven't sent anything, we don't want to recalculate
-               10000 is a small enough number for our purposes */
-            if (ram_counters.dirty_pages_rate && transferred_bytes > 10000) {
-                s->expected_downtime = ram_counters.dirty_pages_rate *
-                    qemu_target_page_size() / bandwidth;
-            }
 
-            qemu_file_reset_rate_limit(s->to_dst_file);
-            initial_time = current_time;
-            initial_bytes = qemu_ftell(s->to_dst_file);
-        }
+        migration_update_counters(s, current_time);
+
         if (qemu_file_rate_limit(s->to_dst_file)) {
             /* usleep expects microseconds */
-            g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
+            g_usleep((s->iteration_start_time + BUFFER_DELAY -
+                      current_time) * 1000);
         }
     }
 
     trace_migration_thread_after_loop();
-    /* If we enabled cpu throttling for auto-converge, turn it off. */
-    cpu_throttle_stop();
-    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-
-    qemu_mutex_lock_iothread();
-    /*
-     * The resource has been allocated by migration will be reused in COLO
-     * process, so don't release them.
-     */
-    if (!enable_colo) {
-        qemu_savevm_state_cleanup();
-    }
-    if (s->state == MIGRATION_STATUS_COMPLETED) {
-        uint64_t transferred_bytes = qemu_ftell(s->to_dst_file);
-        s->total_time = end_time - s->total_time;
-        if (!entered_postcopy) {
-            s->downtime = end_time - start_time;
-        }
-        if (s->total_time) {
-            s->mbps = (((double) transferred_bytes * 8.0) /
-                       ((double) s->total_time)) / 1000;
-        }
-        runstate_set(RUN_STATE_POSTMIGRATE);
-    } else {
-        if (s->state == MIGRATION_STATUS_ACTIVE && enable_colo) {
-            migrate_start_colo_process(s);
-            qemu_savevm_state_cleanup();
-            /*
-            * Fixme: we will run VM in COLO no matter its old running state.
-            * After exited COLO, we will keep running.
-            */
-            old_vm_running = true;
-        }
-        if (old_vm_running && !entered_postcopy) {
-            vm_start();
-        } else {
-            if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
-                runstate_set(RUN_STATE_POSTMIGRATE);
-            }
-        }
-    }
-    qemu_bh_schedule(s->cleanup_bh);
-    qemu_mutex_unlock_iothread();
-
+    migration_iteration_finish(s);
     rcu_unregister_thread();
     return NULL;
 }
@@ -2375,10 +2463,15 @@ void migration_global_dump(Monitor *mon)
 {
     MigrationState *ms = migrate_get_current();
 
-    monitor_printf(mon, "globals: store-global-state=%d, only_migratable=%d, "
-                   "send-configuration=%d, send-section-footer=%d\n",
-                   ms->store_global_state, ms->only_migratable,
-                   ms->send_configuration, ms->send_section_footer);
+    monitor_printf(mon, "globals:\n");
+    monitor_printf(mon, "store-global-state: %s\n",
+                   ms->store_global_state ? "on" : "off");
+    monitor_printf(mon, "only-migratable: %s\n",
+                   ms->only_migratable ? "on" : "off");
+    monitor_printf(mon, "send-configuration: %s\n",
+                   ms->send_configuration ? "on" : "off");
+    monitor_printf(mon, "send-section-footer: %s\n",
+                   ms->send_section_footer ? "on" : "off");
 }
 
 #define DEFINE_PROP_MIG_CAP(name, x)             \
@@ -2394,33 +2487,33 @@ static Property migration_properties[] = {
                      send_section_footer, true),
 
     /* Migration parameters */
-    DEFINE_PROP_INT64("x-compress-level", MigrationState,
+    DEFINE_PROP_UINT8("x-compress-level", MigrationState,
                       parameters.compress_level,
                       DEFAULT_MIGRATE_COMPRESS_LEVEL),
-    DEFINE_PROP_INT64("x-compress-threads", MigrationState,
+    DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
                       parameters.compress_threads,
                       DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
-    DEFINE_PROP_INT64("x-decompress-threads", MigrationState,
+    DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
                       parameters.decompress_threads,
                       DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
-    DEFINE_PROP_INT64("x-cpu-throttle-initial", MigrationState,
+    DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
                       parameters.cpu_throttle_initial,
                       DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
-    DEFINE_PROP_INT64("x-cpu-throttle-increment", MigrationState,
+    DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
                       parameters.cpu_throttle_increment,
                       DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
-    DEFINE_PROP_INT64("x-max-bandwidth", MigrationState,
+    DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
                       parameters.max_bandwidth, MAX_THROTTLE),
-    DEFINE_PROP_INT64("x-downtime-limit", MigrationState,
+    DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
                       parameters.downtime_limit,
                       DEFAULT_MIGRATE_SET_DOWNTIME),
-    DEFINE_PROP_INT64("x-checkpoint-delay", MigrationState,
+    DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
                       parameters.x_checkpoint_delay,
                       DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
-    DEFINE_PROP_INT64("x-multifd-channels", MigrationState,
+    DEFINE_PROP_UINT8("x-multifd-channels", MigrationState,
                       parameters.x_multifd_channels,
                       DEFAULT_MIGRATE_MULTIFD_CHANNELS),
-    DEFINE_PROP_INT64("x-multifd-page-count", MigrationState,
+    DEFINE_PROP_UINT32("x-multifd-page-count", MigrationState,
                       parameters.x_multifd_page_count,
                       DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT),
     DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
diff --git a/migration/migration.h b/migration/migration.h
index 663415fe48..f2bc1aaf85 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -22,6 +22,8 @@
 #include "hw/qdev.h"
 #include "io/channel.h"
 
+struct PostcopyBlocktimeContext;
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
     QEMUFile *from_src_file;
@@ -59,10 +61,20 @@ struct MigrationIncomingState {
     /* The coroutine we should enter (back) after failover */
     Coroutine *migration_incoming_co;
     QemuSemaphore colo_incoming_sem;
+
+    /*
+     * PostcopyBlocktimeContext to keep information for postcopy
+     * live migration, to calculate vCPU block time
+     * */
+    struct PostcopyBlocktimeContext *blocktime_ctx;
 };
 
 MigrationIncomingState *migration_incoming_get_current(void);
 void migration_incoming_state_destroy(void);
+/*
+ * Functions to work with blocktime context
+ */
+void fill_destination_postcopy_migration_info(MigrationInfo *info);
 
 #define TYPE_MIGRATION "migration"
 
@@ -90,6 +102,17 @@ struct MigrationState
     QEMUBH *cleanup_bh;
     QEMUFile *to_dst_file;
 
+    /* bytes already send at the beggining of current interation */
+    uint64_t iteration_initial_bytes;
+    /* time at the start of current iteration */
+    int64_t iteration_start_time;
+    /*
+     * The final stage happens when the remaining data is smaller than
+     * this threshold; it's calculated from the requested downtime and
+     * measured bandwidth
+     */
+    int64_t threshold_size;
+
     /* params from 'migrate-set-parameters' */
     MigrationParameters parameters;
 
@@ -103,11 +126,22 @@ struct MigrationState
     } rp_state;
 
     double mbps;
+    /* Timestamp when recent migration starts (ms) */
+    int64_t start_time;
+    /* Total time used by latest migration (ms) */
     int64_t total_time;
+    /* Timestamp when VM is down (ms) to migrate the last stuff */
+    int64_t downtime_start;
     int64_t downtime;
     int64_t expected_downtime;
     bool enabled_capabilities[MIGRATION_CAPABILITY__MAX];
     int64_t setup_time;
+    /*
+     * Whether guest was running when we enter the completion stage.
+     * If migration is interrupted by any reason, we need to continue
+     * running the guest on source.
+     */
+    bool vm_was_running;
 
     /* Flag set once the migration has been asked to enter postcopy */
     bool start_postcopy;
@@ -201,6 +235,7 @@ int migrate_compress_level(void);
 int migrate_compress_threads(void);
 int migrate_decompress_threads(void);
 bool migrate_use_events(void);
+bool migrate_postcopy_blocktime(void);
 
 /* Sending on the return path - generic and then for each message type */
 void migrate_send_rp_shut(MigrationIncomingState *mis,
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index bec6c2c66b..7814da5b4b 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -61,6 +61,101 @@ struct PostcopyDiscardState {
 #include <sys/eventfd.h>
 #include <linux/userfaultfd.h>
 
+typedef struct PostcopyBlocktimeContext {
+    /* time when page fault initiated per vCPU */
+    int64_t *page_fault_vcpu_time;
+    /* page address per vCPU */
+    uintptr_t *vcpu_addr;
+    int64_t total_blocktime;
+    /* blocktime per vCPU */
+    int64_t *vcpu_blocktime;
+    /* point in time when last page fault was initiated */
+    int64_t last_begin;
+    /* number of vCPU are suspended */
+    int smp_cpus_down;
+
+    /*
+     * Handler for exit event, necessary for
+     * releasing whole blocktime_ctx
+     */
+    Notifier exit_notifier;
+} PostcopyBlocktimeContext;
+
+static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx)
+{
+    g_free(ctx->page_fault_vcpu_time);
+    g_free(ctx->vcpu_addr);
+    g_free(ctx->vcpu_blocktime);
+    g_free(ctx);
+}
+
+static void migration_exit_cb(Notifier *n, void *data)
+{
+    PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext,
+                                                 exit_notifier);
+    destroy_blocktime_context(ctx);
+}
+
+static struct PostcopyBlocktimeContext *blocktime_context_new(void)
+{
+    PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
+    ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+    ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
+    ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+
+    ctx->exit_notifier.notify = migration_exit_cb;
+    qemu_add_exit_notifier(&ctx->exit_notifier);
+    return ctx;
+}
+
+static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
+{
+    int64List *list = NULL, *entry = NULL;
+    int i;
+
+    for (i = smp_cpus - 1; i >= 0; i--) {
+        entry = g_new0(int64List, 1);
+        entry->value = ctx->vcpu_blocktime[i];
+        entry->next = list;
+        list = entry;
+    }
+
+    return list;
+}
+
+/*
+ * This function just populates MigrationInfo from postcopy's
+ * blocktime context. It will not populate MigrationInfo,
+ * unless postcopy-blocktime capability was set.
+ *
+ * @info: pointer to MigrationInfo to populate
+ */
+void fill_destination_postcopy_migration_info(MigrationInfo *info)
+{
+    MigrationIncomingState *mis = migration_incoming_get_current();
+    PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
+
+    if (!bc) {
+        return;
+    }
+
+    info->has_postcopy_blocktime = true;
+    info->postcopy_blocktime = bc->total_blocktime;
+    info->has_postcopy_vcpu_blocktime = true;
+    info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
+}
+
+static uint64_t get_postcopy_total_blocktime(void)
+{
+    MigrationIncomingState *mis = migration_incoming_get_current();
+    PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
+
+    if (!bc) {
+        return 0;
+    }
+
+    return bc->total_blocktime;
+}
 
 /**
  * receive_ufd_features: check userfault fd features, to request only supported
@@ -153,6 +248,19 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
         }
     }
 
+#ifdef UFFD_FEATURE_THREAD_ID
+    if (migrate_postcopy_blocktime() && mis &&
+        UFFD_FEATURE_THREAD_ID & supported_features) {
+        /* kernel supports that feature */
+        /* don't create blocktime_context if it exists */
+        if (!mis->blocktime_ctx) {
+            mis->blocktime_ctx = blocktime_context_new();
+        }
+
+        asked_features |= UFFD_FEATURE_THREAD_ID;
+    }
+#endif
+
     /*
      * request features, even if asked_features is 0, due to
      * kernel expects UFFD_API before UFFDIO_REGISTER, per
@@ -423,6 +531,9 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
         munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
         mis->postcopy_tmp_zero_page = NULL;
     }
+    trace_postcopy_ram_incoming_cleanup_blocktime(
+            get_postcopy_total_blocktime());
+
     trace_postcopy_ram_incoming_cleanup_exit();
     return 0;
 }
@@ -494,6 +605,142 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
     return 0;
 }
 
+static int get_mem_fault_cpu_index(uint32_t pid)
+{
+    CPUState *cpu_iter;
+
+    CPU_FOREACH(cpu_iter) {
+        if (cpu_iter->thread_id == pid) {
+            trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid);
+            return cpu_iter->cpu_index;
+        }
+    }
+    trace_get_mem_fault_cpu_index(-1, pid);
+    return -1;
+}
+
+/*
+ * This function is being called when pagefault occurs. It
+ * tracks down vCPU blocking time.
+ *
+ * @addr: faulted host virtual address
+ * @ptid: faulted process thread id
+ * @rb: ramblock appropriate to addr
+ */
+static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
+                                          RAMBlock *rb)
+{
+    int cpu, already_received;
+    MigrationIncomingState *mis = migration_incoming_get_current();
+    PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+    int64_t now_ms;
+
+    if (!dc || ptid == 0) {
+        return;
+    }
+    cpu = get_mem_fault_cpu_index(ptid);
+    if (cpu < 0) {
+        return;
+    }
+
+    now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    if (dc->vcpu_addr[cpu] == 0) {
+        atomic_inc(&dc->smp_cpus_down);
+    }
+
+    atomic_xchg__nocheck(&dc->last_begin, now_ms);
+    atomic_xchg__nocheck(&dc->page_fault_vcpu_time[cpu], now_ms);
+    atomic_xchg__nocheck(&dc->vcpu_addr[cpu], addr);
+
+    /* check it here, not at the begining of the function,
+     * due to, check could accur early than bitmap_set in
+     * qemu_ufd_copy_ioctl */
+    already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
+    if (already_received) {
+        atomic_xchg__nocheck(&dc->vcpu_addr[cpu], 0);
+        atomic_xchg__nocheck(&dc->page_fault_vcpu_time[cpu], 0);
+        atomic_dec(&dc->smp_cpus_down);
+    }
+    trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu],
+                                        cpu, already_received);
+}
+
+/*
+ *  This function just provide calculated blocktime per cpu and trace it.
+ *  Total blocktime is calculated in mark_postcopy_blocktime_end.
+ *
+ *
+ * Assume we have 3 CPU
+ *
+ *      S1        E1           S1               E1
+ * -----***********------------xxx***************------------------------> CPU1
+ *
+ *             S2                E2
+ * ------------****************xxx---------------------------------------> CPU2
+ *
+ *                         S3            E3
+ * ------------------------****xxx********-------------------------------> CPU3
+ *
+ * We have sequence S1,S2,E1,S3,S1,E2,E3,E1
+ * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include CPU3
+ * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 -
+ *            it's a part of total blocktime.
+ * S1 - here is last_begin
+ * Legend of the picture is following:
+ *              * - means blocktime per vCPU
+ *              x - means overlapped blocktime (total blocktime)
+ *
+ * @addr: host virtual address
+ */
+static void mark_postcopy_blocktime_end(uintptr_t addr)
+{
+    MigrationIncomingState *mis = migration_incoming_get_current();
+    PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+    int i, affected_cpu = 0;
+    int64_t now_ms;
+    bool vcpu_total_blocktime = false;
+    int64_t read_vcpu_time;
+
+    if (!dc) {
+        return;
+    }
+
+    now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+    /* lookup cpu, to clear it,
+     * that algorithm looks straighforward, but it's not
+     * optimal, more optimal algorithm is keeping tree or hash
+     * where key is address value is a list of  */
+    for (i = 0; i < smp_cpus; i++) {
+        uint64_t vcpu_blocktime = 0;
+
+        read_vcpu_time = atomic_fetch_add(&dc->page_fault_vcpu_time[i], 0);
+        if (atomic_fetch_add(&dc->vcpu_addr[i], 0) != addr ||
+            read_vcpu_time == 0) {
+            continue;
+        }
+        atomic_xchg__nocheck(&dc->vcpu_addr[i], 0);
+        vcpu_blocktime = now_ms - read_vcpu_time;
+        affected_cpu += 1;
+        /* we need to know is that mark_postcopy_end was due to
+         * faulted page, another possible case it's prefetched
+         * page and in that case we shouldn't be here */
+        if (!vcpu_total_blocktime &&
+            atomic_fetch_add(&dc->smp_cpus_down, 0) == smp_cpus) {
+            vcpu_total_blocktime = true;
+        }
+        /* continue cycle, due to one page could affect several vCPUs */
+        dc->vcpu_blocktime[i] += vcpu_blocktime;
+    }
+
+    atomic_sub(&dc->smp_cpus_down, affected_cpu);
+    if (vcpu_total_blocktime) {
+        dc->total_blocktime += now_ms - atomic_fetch_add(&dc->last_begin, 0);
+    }
+    trace_mark_postcopy_blocktime_end(addr, dc, dc->total_blocktime,
+                                      affected_cpu);
+}
+
 /*
  * Handle faults detected by the USERFAULT markings
  */
@@ -571,8 +818,11 @@ static void *postcopy_ram_fault_thread(void *opaque)
         rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
         trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
                                                 qemu_ram_get_idstr(rb),
-                                                rb_offset);
+                                                rb_offset,
+                                                msg.arg.pagefault.feat.ptid);
 
+        mark_postcopy_blocktime_begin((uintptr_t)(msg.arg.pagefault.address),
+                                      msg.arg.pagefault.feat.ptid, rb);
         /*
          * Send the request to the source - we want to request one
          * of our host page sizes (which is >= TPS)
@@ -662,6 +912,8 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
     if (!ret) {
         ramblock_recv_bitmap_set_range(rb, host_addr,
                                        pagesize / qemu_target_page_size());
+        mark_postcopy_blocktime_end((uintptr_t)host_addr);
+
     }
     return ret;
 }
@@ -759,6 +1011,10 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 
 #else
 /* No target OS support, stubs just fail */
+void fill_destination_postcopy_migration_info(MigrationInfo *info)
+{
+}
+
 bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
 {
     error_report("%s: No OS support", __func__);
diff --git a/migration/ram.c b/migration/ram.c
index 021d583b9b..cb1950f3eb 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -237,7 +237,8 @@ static RAMState *ram_state;
 
 uint64_t ram_bytes_remaining(void)
 {
-    return ram_state->migration_dirty_pages * TARGET_PAGE_SIZE;
+    return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
+                       0;
 }
 
 MigrationStats ram_counters;
diff --git a/migration/socket.c b/migration/socket.c
index dee869044a..3a8232dd2d 100644
--- a/migration/socket.c
+++ b/migration/socket.c
@@ -172,7 +172,6 @@ static void socket_start_incoming_migration(SocketAddress *saddr,
 
     if (qio_channel_socket_listen_sync(listen_ioc, saddr, errp) < 0) {
         object_unref(OBJECT(listen_ioc));
-        qapi_free_SocketAddress(saddr);
         return;
     }
 
@@ -181,7 +180,6 @@ static void socket_start_incoming_migration(SocketAddress *saddr,
                           socket_accept_incoming_migration,
                           listen_ioc,
                           (GDestroyNotify)object_unref);
-    qapi_free_SocketAddress(saddr);
 }
 
 void tcp_start_incoming_migration(const char *host_port, Error **errp)
@@ -191,6 +189,7 @@ void tcp_start_incoming_migration(const char *host_port, Error **errp)
     if (!err) {
         socket_start_incoming_migration(saddr, &err);
     }
+    qapi_free_SocketAddress(saddr);
     error_propagate(errp, err);
 }
 
@@ -198,4 +197,5 @@ void unix_start_incoming_migration(const char *path, Error **errp)
 {
     SocketAddress *saddr = unix_build_address(path);
     socket_start_incoming_migration(saddr, errp);
+    qapi_free_SocketAddress(saddr);
 }
diff --git a/migration/trace-events b/migration/trace-events
index 6f29fcc686..141e773305 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -115,6 +115,8 @@ process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
 process_incoming_migration_co_postcopy_end_main(void) ""
 migration_set_incoming_channel(void *ioc, const char *ioctype) "ioc=%p ioctype=%s"
 migration_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname)  "ioc=%p ioctype=%s hostname=%s"
+mark_postcopy_blocktime_begin(uint64_t addr, void *dd, int64_t time, int cpu, int received) "addr: 0x%" PRIx64 ", dd: %p, time: %" PRId64 ", cpu: %d, already_received: %d"
+mark_postcopy_blocktime_end(uint64_t addr, void *dd, int64_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %" PRId64 ", affected_cpu: %d"
 
 # migration/rdma.c
 qemu_rdma_accept_incoming_migration(void) ""
@@ -191,15 +193,17 @@ postcopy_ram_enable_notify(void) ""
 postcopy_ram_fault_thread_entry(void) ""
 postcopy_ram_fault_thread_exit(void) ""
 postcopy_ram_fault_thread_quit(void) ""
-postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx"
+postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset, uint32_t pid) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx pid=%u"
 postcopy_ram_incoming_cleanup_closeuf(void) ""
 postcopy_ram_incoming_cleanup_entry(void) ""
 postcopy_ram_incoming_cleanup_exit(void) ""
 postcopy_ram_incoming_cleanup_join(void) ""
+postcopy_ram_incoming_cleanup_blocktime(uint64_t total) "total blocktime %" PRIu64
 save_xbzrle_page_skipping(void) ""
 save_xbzrle_page_overflow(void) ""
 ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRIu64 " milliseconds, %d iterations"
 ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64
+get_mem_fault_cpu_index(int cpu, uint32_t pid) "cpu: %d, pid: %u"
 
 # migration/exec.c
 migration_exec_outgoing(const char *cmd) "cmd=%s"
diff --git a/qapi/migration.json b/qapi/migration.json
index 03f57c9616..70e7b677ef 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -156,6 +156,13 @@
 #              @status is 'failed'. Clients should not attempt to parse the
 #              error strings. (Since 2.7)
 #
+# @postcopy-blocktime: total time when all vCPU were blocked during postcopy
+#           live migration (Since 2.12)
+#
+# @postcopy-vcpu-blocktime: list of the postcopy blocktime per vCPU (Since 2.12)
+#
+
+#
 # Since: 0.14.0
 ##
 { 'struct': 'MigrationInfo',
@@ -167,7 +174,9 @@
            '*downtime': 'int',
            '*setup-time': 'int',
            '*cpu-throttle-percentage': 'int',
-           '*error-desc': 'str'} }
+           '*error-desc': 'str',
+           '*postcopy-blocktime' : 'int64',
+           '*postcopy-vcpu-blocktime': ['int64']} }
 
 ##
 # @query-migrate:
@@ -352,12 +361,16 @@
 #
 # @x-multifd: Use more than one fd for migration (since 2.11)
 #
+# @postcopy-blocktime: Calculate downtime for postcopy live migration
+#                     (since 2.12)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
            'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
-           'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] }
+           'block', 'return-path', 'pause-before-switchover', 'x-multifd',
+           'postcopy-blocktime' ] }
 
 ##
 # @MigrationCapabilityStatus:
@@ -668,19 +681,19 @@
 # Since: 2.4
 ##
 { 'struct': 'MigrationParameters',
-  'data': { '*compress-level': 'int',
-            '*compress-threads': 'int',
-            '*decompress-threads': 'int',
-            '*cpu-throttle-initial': 'int',
-            '*cpu-throttle-increment': 'int',
+  'data': { '*compress-level': 'uint8',
+            '*compress-threads': 'uint8',
+            '*decompress-threads': 'uint8',
+            '*cpu-throttle-initial': 'uint8',
+            '*cpu-throttle-increment': 'uint8',
             '*tls-creds': 'str',
             '*tls-hostname': 'str',
-            '*max-bandwidth': 'int',
-            '*downtime-limit': 'int',
-            '*x-checkpoint-delay': 'int',
+            '*max-bandwidth': 'size',
+            '*downtime-limit': 'uint64',
+            '*x-checkpoint-delay': 'uint32',
             '*block-incremental': 'bool' ,
-            '*x-multifd-channels': 'int',
-            '*x-multifd-page-count': 'int',
+            '*x-multifd-channels': 'uint8',
+            '*x-multifd-page-count': 'uint32',
             '*xbzrle-cache-size': 'size' } }
 
 ##
diff --git a/qemu-doc.texi b/qemu-doc.texi
index a3d2054c90..3e9eb819a6 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -2587,6 +2587,15 @@ they were first deprecated in the 2.10.0 release.
 What follows is a list of all features currently marked as
 deprecated.
 
+@section Build options
+
+@subsection GTK 2.x
+
+Previously QEMU has supported building against both GTK 2.x
+and 3.x series APIs. Support for the GTK 2.x builds will be
+discontinued, so maintainers should switch to using GTK 3.x,
+which is the default.
+
 @section System emulator command line arguments
 
 @subsection -tdf (since 1.3.0)
diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index 14553876a2..88ff4adb30 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -234,6 +234,10 @@ class HTABSection(object):
 
         header = self.file.read32()
 
+        if (header == -1):
+            # "no HPT" encoding
+            return
+
         if (header > 0):
             # First section, just the hash shift
             return
diff --git a/slirp/dhcpv6.h b/slirp/dhcpv6.h
index 9189cd3f2d..3373f6cb89 100644
--- a/slirp/dhcpv6.h
+++ b/slirp/dhcpv6.h
@@ -17,6 +17,9 @@
                             0x00, 0x00, 0x00, 0x00,\
                             0x00, 0x01, 0x00, 0x02 } }
 
+#define in6_dhcp_multicast(a)\
+    in6_equal(a, &(struct in6_addr)ALLDHCP_MULTICAST)
+
 void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m);
 
 #endif
diff --git a/slirp/ip.h b/slirp/ip.h
index 1df6723357..59cf4aa918 100644
--- a/slirp/ip.h
+++ b/slirp/ip.h
@@ -233,17 +233,4 @@ struct	ipasfrag {
 #define ipf_next     ipf_link.next
 #define ipf_prev     ipf_link.prev
 
-/*
- * Structure stored in mbuf in inpcb.ip_options
- * and passed to ip_output when ip options are in use.
- * The actual length of the options (including ipopt_dst)
- * is in m_len.
- */
-#define MAX_IPOPTLEN	40
-
-struct ipoption {
-	struct	in_addr ipopt_dst;	/* first-hop dst if source routed */
-	int8_t	ipopt_list[MAX_IPOPTLEN];	/* options proper */
-} QEMU_PACKED;
-
 #endif
diff --git a/slirp/ip6_icmp.c b/slirp/ip6_icmp.c
index 777eb574be..ee333d05a2 100644
--- a/slirp/ip6_icmp.c
+++ b/slirp/ip6_icmp.c
@@ -77,7 +77,7 @@ void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code)
     DEBUG_ARGS((dfd, " type = %d, code = %d\n", type, code));
 
     if (IN6_IS_ADDR_MULTICAST(&ip->ip_src) ||
-            IN6_IS_ADDR_UNSPECIFIED(&ip->ip_src)) {
+            in6_zero(&ip->ip_src)) {
         /* TODO icmp error? */
         return;
     }
@@ -272,7 +272,7 @@ static void ndp_send_na(Slirp *slirp, struct ip6 *ip, struct icmp6 *icmp)
     struct mbuf *t = m_get(slirp);
     struct ip6 *rip = mtod(t, struct ip6 *);
     rip->ip_src = icmp->icmp6_nns.target;
-    if (IN6_IS_ADDR_UNSPECIFIED(&ip->ip_src)) {
+    if (in6_zero(&ip->ip_src)) {
         rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST;
     } else {
         rip->ip_dst = ip->ip_src;
@@ -350,7 +350,7 @@ static void ndp_input(struct mbuf *m, Slirp *slirp, struct ip6 *ip,
                 && icmp->icmp6_code == 0
                 && !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nns.target)
                 && ntohs(ip->ip_pl) >= ICMP6_NDP_NS_MINLEN
-                && (!IN6_IS_ADDR_UNSPECIFIED(&ip->ip_src)
+                && (!in6_zero(&ip->ip_src)
                     || in6_solicitednode_multicast(&ip->ip_dst))) {
             if (in6_equal_host(&icmp->icmp6_nns.target)) {
                 /* Gratuitous NDP */
diff --git a/slirp/libslirp.h b/slirp/libslirp.h
index f90f0f524c..540b3e5903 100644
--- a/slirp/libslirp.h
+++ b/slirp/libslirp.h
@@ -3,7 +3,6 @@
 
 #include "qemu-common.h"
 
-struct Slirp;
 typedef struct Slirp Slirp;
 
 int get_dns_addr(struct in_addr *pdns_addr);
diff --git a/slirp/ndp_table.c b/slirp/ndp_table.c
index 9d4c39b45c..e1676a0a7b 100644
--- a/slirp/ndp_table.c
+++ b/slirp/ndp_table.c
@@ -23,7 +23,7 @@ void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr,
                 ethaddr[0], ethaddr[1], ethaddr[2],
                 ethaddr[3], ethaddr[4], ethaddr[5]));
 
-    if (IN6_IS_ADDR_MULTICAST(&ip_addr) || IN6_IS_ADDR_UNSPECIFIED(&ip_addr)) {
+    if (IN6_IS_ADDR_MULTICAST(&ip_addr) || in6_zero(&ip_addr)) {
         /* Do not register multicast or unspecified addresses */
         DEBUG_CALL(" abort: do not register multicast or unspecified address");
         return;
@@ -60,7 +60,7 @@ bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr,
     DEBUG_ARG("ip = %s", addrstr);
 #endif
 
-    assert(!IN6_IS_ADDR_UNSPECIFIED(&ip_addr));
+    assert(!in6_zero(&ip_addr));
 
     /* Multicast address: fec0::abcd:efgh/8 -> 33:33:ab:cd:ef:gh */
     if (IN6_IS_ADDR_MULTICAST(&ip_addr)) {
diff --git a/slirp/slirp.h b/slirp/slirp.h
index 898ec9516d..06febfc78b 100644
--- a/slirp/slirp.h
+++ b/slirp/slirp.h
@@ -1,7 +1,6 @@
 #ifndef SLIRP_H
 #define SLIRP_H
 
-#include "qemu/host-utils.h"
 #include "slirp_config.h"
 
 #ifdef _WIN32
diff --git a/slirp/udp6.c b/slirp/udp6.c
index 9fa314bc2d..7c4a6b003a 100644
--- a/slirp/udp6.c
+++ b/slirp/udp6.c
@@ -65,7 +65,7 @@ void udp6_input(struct mbuf *m)
     /* handle DHCPv6 */
     if (ntohs(uh->uh_dport) == DHCPV6_SERVER_PORT &&
         (in6_equal(&ip->ip_dst, &slirp->vhost_addr6) ||
-         in6_equal(&ip->ip_dst, &(struct in6_addr)ALLDHCP_MULTICAST))) {
+         in6_dhcp_multicast(&ip->ip_dst))) {
         m->m_data += iphlen;
         m->m_len -= iphlen;
         dhcpv6_input(&lhost, m);
diff --git a/target/arm/helper.c b/target/arm/helper.c
index d1395f9b73..c83c901a86 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8305,6 +8305,7 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
         ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_S2NS, &s2pa,
                                  &txattrs, &s2prot, &s2size, fi, NULL);
         if (ret) {
+            assert(fi->type != ARMFault_None);
             fi->s2addr = addr;
             fi->stage2 = true;
             fi->s1ptw = true;
@@ -8328,7 +8329,9 @@ static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
     MemTxAttrs attrs = {};
+    MemTxResult result = MEMTX_OK;
     AddressSpace *as;
+    uint32_t data;
 
     attrs.secure = is_secure;
     as = arm_addressspace(cs, attrs);
@@ -8337,10 +8340,16 @@ static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
         return 0;
     }
     if (regime_translation_big_endian(env, mmu_idx)) {
-        return address_space_ldl_be(as, addr, attrs, NULL);
+        data = address_space_ldl_be(as, addr, attrs, &result);
     } else {
-        return address_space_ldl_le(as, addr, attrs, NULL);
+        data = address_space_ldl_le(as, addr, attrs, &result);
     }
+    if (result == MEMTX_OK) {
+        return data;
+    }
+    fi->type = ARMFault_SyncExternalOnWalk;
+    fi->ea = arm_extabort_type(result);
+    return 0;
 }
 
 static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
@@ -8349,7 +8358,9 @@ static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
     MemTxAttrs attrs = {};
+    MemTxResult result = MEMTX_OK;
     AddressSpace *as;
+    uint32_t data;
 
     attrs.secure = is_secure;
     as = arm_addressspace(cs, attrs);
@@ -8358,10 +8369,16 @@ static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
         return 0;
     }
     if (regime_translation_big_endian(env, mmu_idx)) {
-        return address_space_ldq_be(as, addr, attrs, NULL);
+        data = address_space_ldq_be(as, addr, attrs, &result);
     } else {
-        return address_space_ldq_le(as, addr, attrs, NULL);
+        data = address_space_ldq_le(as, addr, attrs, &result);
+    }
+    if (result == MEMTX_OK) {
+        return data;
     }
+    fi->type = ARMFault_SyncExternalOnWalk;
+    fi->ea = arm_extabort_type(result);
+    return 0;
 }
 
 static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
@@ -8390,6 +8407,9 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
     }
     desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
                        mmu_idx, fi);
+    if (fi->type != ARMFault_None) {
+        goto do_fault;
+    }
     type = (desc & 3);
     domain = (desc >> 5) & 0x0f;
     if (regime_el(env, mmu_idx) == 1) {
@@ -8426,6 +8446,9 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
         }
         desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
                            mmu_idx, fi);
+        if (fi->type != ARMFault_None) {
+            goto do_fault;
+        }
         switch (desc & 3) {
         case 0: /* Page translation fault.  */
             fi->type = ARMFault_Translation;
@@ -8508,6 +8531,9 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
     }
     desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
                        mmu_idx, fi);
+    if (fi->type != ARMFault_None) {
+        goto do_fault;
+    }
     type = (desc & 3);
     if (type == 0 || (type == 3 && !arm_feature(env, ARM_FEATURE_PXN))) {
         /* Section translation fault, or attempt to use the encoding
@@ -8559,6 +8585,9 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
         table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
         desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
                            mmu_idx, fi);
+        if (fi->type != ARMFault_None) {
+            goto do_fault;
+        }
         ap = ((desc >> 4) & 3) | ((desc >> 7) & 4);
         switch (desc & 3) {
         case 0: /* Page translation fault.  */
@@ -8964,7 +8993,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         descaddr &= ~7ULL;
         nstable = extract32(tableattrs, 4, 1);
         descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fi);
-        if (fi->s1ptw) {
+        if (fi->type != ARMFault_None) {
             goto do_fault;
         }
 
@@ -9272,6 +9301,13 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
                 case 6:
                     *prot |= PAGE_READ | PAGE_EXEC;
                     break;
+                case 7:
+                    /* for v7M, same as 6; for R profile a reserved value */
+                    if (arm_feature(env, ARM_FEATURE_M)) {
+                        *prot |= PAGE_READ | PAGE_EXEC;
+                        break;
+                    }
+                    /* fall through */
                 default:
                     qemu_log_mask(LOG_GUEST_ERROR,
                                   "DRACR[%d]: Bad value for AP bits: 0x%"
@@ -9290,6 +9326,13 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
                 case 6:
                     *prot |= PAGE_READ | PAGE_EXEC;
                     break;
+                case 7:
+                    /* for v7M, same as 6; for R profile a reserved value */
+                    if (arm_feature(env, ARM_FEATURE_M)) {
+                        *prot |= PAGE_READ | PAGE_EXEC;
+                        break;
+                    }
+                    /* fall through */
                 default:
                     qemu_log_mask(LOG_GUEST_ERROR,
                                   "DRACR[%d]: Bad value for AP bits: 0x%"
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 876854d876..89f5d2fe12 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -687,6 +687,16 @@ static inline uint32_t arm_fi_to_lfsc(ARMMMUFaultInfo *fi)
     return fsc;
 }
 
+static inline bool arm_extabort_type(MemTxResult result)
+{
+    /* The EA bit in syndromes and fault status registers is an
+     * IMPDEF classification of external aborts. ARM implementations
+     * usually use this to indicate AXI bus Decode error (0) or
+     * Slave error (1); in QEMU we follow that.
+     */
+    return result != MEMTX_DECODE_ERROR;
+}
+
 /* Do a page table walk and add page to TLB if possible */
 bool arm_tlb_fill(CPUState *cpu, vaddr address,
                   MMUAccessType access_type, int mmu_idx,
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index b36206343d..712c5c55b6 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -220,12 +220,7 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
     /* now we have a real cpu fault */
     cpu_restore_state(cs, retaddr);
 
-    /* The EA bit in syndromes and fault status registers is an
-     * IMPDEF classification of external aborts. ARM implementations
-     * usually use this to indicate AXI bus Decode error (0) or
-     * Slave error (1); in QEMU we follow that.
-     */
-    fi.ea = (response != MEMTX_DECODE_ERROR);
+    fi.ea = arm_extabort_type(response);
     fi.type = ARMFault_SyncExternal;
     deliver_fault(cpu, addr, access_type, mmu_idx, &fi);
 }
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index cba5587812..70c1e08a36 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -4985,6 +4985,38 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn)
     }
 }
 
+/* The imm8 encodes the sign bit, enough bits to represent an exponent in
+ * the range 01....1xx to 10....0xx, and the most significant 4 bits of
+ * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
+ */
+static uint64_t vfp_expand_imm(int size, uint8_t imm8)
+{
+    uint64_t imm;
+
+    switch (size) {
+    case MO_64:
+        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
+            extract32(imm8, 0, 6);
+        imm <<= 48;
+        break;
+    case MO_32:
+        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
+            (extract32(imm8, 0, 6) << 3);
+        imm <<= 16;
+        break;
+    case MO_16:
+        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+            (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
+            (extract32(imm8, 0, 6) << 6);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return imm;
+}
+
 /* Floating point immediate
  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
  * +---+---+---+-----------+------+---+------------+-------+------+------+
@@ -5008,22 +5040,7 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn)
         return;
     }
 
-    /* The imm8 encodes the sign bit, enough bits to represent
-     * an exponent in the range 01....1xx to 10....0xx,
-     * and the most significant 4 bits of the mantissa; see
-     * VFPExpandImm() in the v8 ARM ARM.
-     */
-    if (is_double) {
-        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
-            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
-            extract32(imm8, 0, 6);
-        imm <<= 48;
-    } else {
-        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
-            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
-            (extract32(imm8, 0, 6) << 3);
-        imm <<= 16;
-    }
+    imm = vfp_expand_imm(MO_32 + is_double, imm8);
 
     tcg_res = tcg_const_i64(imm);
     write_fp_dreg(s, rd, tcg_res);
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index 4bdfcd24d0..bec9efc4d8 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -50,7 +50,7 @@
 /* is_jmp field values */
 #define DISAS_UPDATE  DISAS_TARGET_0 /* cpu state was modified dynamically */
 
-typedef struct DisasContext {
+struct DisasContext {
     const XtensaConfig *config;
     TranslationBlock *tb;
     uint32_t pc;
@@ -78,7 +78,7 @@ typedef struct DisasContext {
     uint32_t *raw_arg;
     xtensa_insnbuf insnbuf;
     xtensa_insnbuf slotbuf;
-} DisasContext;
+};
 
 static TCGv_i32 cpu_pc;
 static TCGv_i32 cpu_R[16];
diff --git a/tests/migration-test.c b/tests/migration-test.c
index 799e24ebc6..9fd5dadc0d 100644
--- a/tests/migration-test.c
+++ b/tests/migration-test.c
@@ -25,6 +25,7 @@
 const unsigned start_address = 1024 * 1024;
 const unsigned end_address = 100 * 1024 * 1024;
 bool got_stop;
+static bool uffd_feature_thread_id;
 
 #if defined(__linux__)
 #include <sys/syscall.h>
@@ -54,6 +55,7 @@ static bool ufd_version_check(void)
         g_test_message("Skipping test: UFFDIO_API failed");
         return false;
     }
+    uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID;
 
     ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
                  (__u64)1 << _UFFDIO_UNREGISTER;
@@ -266,6 +268,16 @@ static uint64_t get_migration_pass(QTestState *who)
     return result;
 }
 
+static void read_blocktime(QTestState *who)
+{
+    QDict *rsp, *rsp_return;
+
+    rsp = wait_command(who, "{ 'execute': 'query-migrate' }");
+    rsp_return = qdict_get_qdict(rsp, "return");
+    g_assert(qdict_haskey(rsp_return, "postcopy-blocktime"));
+    QDECREF(rsp);
+}
+
 static void wait_for_migration_complete(QTestState *who)
 {
     QDict *rsp, *rsp_return;
@@ -525,6 +537,7 @@ static void test_migrate(void)
 
     migrate_set_capability(from, "postcopy-ram", "true");
     migrate_set_capability(to, "postcopy-ram", "true");
+    migrate_set_capability(to, "postcopy-blocktime", "true");
 
     /* We want to pick a speed slow enough that the test completes
      * quickly, but that it doesn't complete precopy even on a slow
@@ -553,6 +566,9 @@ static void test_migrate(void)
     wait_for_serial("dest_serial");
     wait_for_migration_complete(from);
 
+    if (uffd_feature_thread_id) {
+        read_blocktime(to);
+    }
     g_free(uri);
 
     test_migrate_end(from, to);
diff --git a/tests/pxe-test.c b/tests/pxe-test.c
index 937f29e631..5ca84805eb 100644
--- a/tests/pxe-test.c
+++ b/tests/pxe-test.c
@@ -22,14 +22,53 @@
 
 static char disk[] = "tests/pxe-test-disk-XXXXXX";
 
-static void test_pxe_one(const char *params, bool ipv6)
+typedef struct testdef {
+    const char *machine;    /* Machine type */
+    const char *model;      /* NIC device model */
+} testdef_t;
+
+static testdef_t x86_tests[] = {
+    { "pc", "e1000" },
+    { "pc", "virtio-net-pci" },
+    { "q35", "e1000e" },
+    { "q35", "virtio-net-pci", },
+    { NULL },
+};
+
+static testdef_t x86_tests_slow[] = {
+    { "pc", "ne2k_pci", },
+    { "pc", "i82550", },
+    { "pc", "rtl8139" },
+    { "pc", "vmxnet3" },
+    { NULL },
+};
+
+static testdef_t ppc64_tests[] = {
+    { "pseries", "spapr-vlan" },
+    { "pseries", "virtio-net-pci", },
+    { NULL },
+};
+
+static testdef_t ppc64_tests_slow[] = {
+    { "pseries", "e1000" },
+    { NULL },
+};
+
+static testdef_t s390x_tests[] = {
+    { "s390-ccw-virtio", "virtio-net-ccw" },
+    { NULL },
+};
+
+static void test_pxe_one(const testdef_t *test, bool ipv6)
 {
     char *args;
 
-    args = g_strdup_printf("-machine accel=kvm:tcg -nodefaults -boot order=n "
-                           "-netdev user,id=" NETNAME ",tftp=./,bootfile=%s,"
-                           "ipv4=%s,ipv6=%s %s", disk, ipv6 ? "off" : "on",
-                           ipv6 ? "on" : "off", params);
+    args = g_strdup_printf(
+        "-machine %s,accel=kvm:tcg -nodefaults -boot order=n "
+        "-netdev user,id=" NETNAME ",tftp=./,bootfile=%s,ipv4=%s,ipv6=%s "
+        "-device %s,bootindex=1,netdev=" NETNAME,
+        test->machine, disk, ipv6 ? "off" : "on", ipv6 ? "on" : "off",
+        test->model);
 
     qtest_start(args);
     boot_sector_test();
@@ -39,22 +78,38 @@ static void test_pxe_one(const char *params, bool ipv6)
 
 static void test_pxe_ipv4(gconstpointer data)
 {
-    const char *model = data;
-    char *dev_arg;
+    const testdef_t *test = data;
 
-    dev_arg = g_strdup_printf("-device %s,netdev=" NETNAME, model);
-    test_pxe_one(dev_arg, false);
-    g_free(dev_arg);
+    test_pxe_one(test, false);
 }
 
-static void test_pxe_spapr_vlan(void)
+static void test_pxe_ipv6(gconstpointer data)
 {
-    test_pxe_one("-device spapr-vlan,netdev=" NETNAME, true);
+    const testdef_t *test = data;
+
+    test_pxe_one(test, true);
 }
 
-static void test_pxe_virtio_ccw(void)
+static void test_batch(const testdef_t *tests, bool ipv6)
 {
-    test_pxe_one("-device virtio-net-ccw,bootindex=1,netdev=" NETNAME, false);
+    int i;
+
+    for (i = 0; tests[i].machine; i++) {
+        const testdef_t *test = &tests[i];
+        char *testname;
+
+        testname = g_strdup_printf("pxe/ipv4/%s/%s",
+                                   test->machine, test->model);
+        qtest_add_data_func(testname, test, test_pxe_ipv4);
+        g_free(testname);
+
+        if (ipv6) {
+            testname = g_strdup_printf("pxe/ipv6/%s/%s",
+                                       test->machine, test->model);
+            qtest_add_data_func(testname, test, test_pxe_ipv6);
+            g_free(testname);
+        }
+    }
 }
 
 int main(int argc, char *argv[])
@@ -69,23 +124,17 @@ int main(int argc, char *argv[])
     g_test_init(&argc, &argv, NULL);
 
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
-        qtest_add_data_func("pxe/e1000", "e1000", test_pxe_ipv4);
-        qtest_add_data_func("pxe/virtio", "virtio-net-pci", test_pxe_ipv4);
+        test_batch(x86_tests, false);
         if (g_test_slow()) {
-            qtest_add_data_func("pxe/ne2000", "ne2k_pci", test_pxe_ipv4);
-            qtest_add_data_func("pxe/eepro100", "i82550", test_pxe_ipv4);
-            qtest_add_data_func("pxe/pcnet", "pcnet", test_pxe_ipv4);
-            qtest_add_data_func("pxe/rtl8139", "rtl8139", test_pxe_ipv4);
-            qtest_add_data_func("pxe/vmxnet3", "vmxnet3", test_pxe_ipv4);
+            test_batch(x86_tests_slow, false);
         }
     } else if (strcmp(arch, "ppc64") == 0) {
-        qtest_add_func("pxe/spapr-vlan", test_pxe_spapr_vlan);
+        test_batch(ppc64_tests, g_test_slow());
         if (g_test_slow()) {
-            qtest_add_data_func("pxe/virtio", "virtio-net-pci", test_pxe_ipv4);
-            qtest_add_data_func("pxe/e1000", "e1000", test_pxe_ipv4);
+            test_batch(ppc64_tests_slow, true);
         }
     } else if (g_str_equal(arch, "s390x")) {
-        qtest_add_func("pxe/virtio-ccw", test_pxe_virtio_ccw);
+        test_batch(s390x_tests, g_test_slow());
     }
     ret = g_test_run();
     boot_sector_cleanup(disk);
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index e6fb9bac87..45f368dcd9 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -674,6 +674,30 @@ static void pci_hotplug(void)
     qtest_shutdown(qs);
 }
 
+/*
+ * Check that setting the vring addr on a non-existent virtqueue does
+ * not crash.
+ */
+static void test_nonexistent_virtqueue(void)
+{
+    QPCIBar bar0;
+    QOSState *qs;
+    QPCIDevice *dev;
+
+    qs = pci_test_start();
+    dev = qpci_device_find(qs->pcibus, QPCI_DEVFN(4, 0));
+    g_assert(dev != NULL);
+
+    qpci_device_enable(dev);
+    bar0 = qpci_iomap(dev, 0, NULL);
+
+    qpci_io_writeb(dev, bar0, VIRTIO_PCI_QUEUE_SEL, 2);
+    qpci_io_writel(dev, bar0, VIRTIO_PCI_QUEUE_PFN, 1);
+
+    g_free(dev);
+    qtest_shutdown(qs);
+}
+
 static void mmio_basic(void)
 {
     QVirtioMMIODevice *dev;
@@ -724,6 +748,7 @@ int main(int argc, char **argv)
         qtest_add_func("/virtio/blk/pci/basic", pci_basic);
         qtest_add_func("/virtio/blk/pci/indirect", pci_indirect);
         qtest_add_func("/virtio/blk/pci/config", pci_config);
+        qtest_add_func("/virtio/blk/pci/nxvirtq", test_nonexistent_virtqueue);
         if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
             qtest_add_func("/virtio/blk/pci/msix", pci_msix);
             qtest_add_func("/virtio/blk/pci/idx", pci_idx);
diff --git a/ui/gtk.c b/ui/gtk.c
index 342e96fbe9..f3b7567984 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -2248,6 +2248,11 @@ void gtk_display_init(DisplayState *ds, bool full_screen, bool grab_on_hover)
         exit(1);
     }
 
+#if !GTK_CHECK_VERSION(3, 0, 0)
+    g_printerr("Running QEMU with GTK 2.x is deprecated, and will be removed\n"
+               "in a future release. Please switch to GTK 3.x instead\n");
+#endif
+
     s->window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
 #if GTK_CHECK_VERSION(3, 2, 0)
     s->vbox = gtk_box_new(GTK_ORIENTATION_VERTICAL, 0);
diff --git a/ui/input.c b/ui/input.c
index 3e2d324278..e5b78aae9e 100644
--- a/ui/input.c
+++ b/ui/input.c
@@ -421,6 +421,8 @@ void qemu_input_event_send_key(QemuConsole *src, KeyValue *key, bool down)
     } else if (queue_count < queue_limit) {
         qemu_input_queue_event(&kbd_queue, src, evt);
         qemu_input_queue_sync(&kbd_queue);
+    } else {
+        qapi_free_InputEvent(evt);
     }
 }
 
diff --git a/ui/sdl2.c b/ui/sdl2.c
index 8718cf36b5..89c6a2633c 100644
--- a/ui/sdl2.c
+++ b/ui/sdl2.c
@@ -276,32 +276,10 @@ static void sdl_send_mouse_event(struct sdl2_console *scon, int dx, int dy,
     }
 
     if (qemu_input_is_absolute()) {
-        int scr_w, scr_h;
-        int max_w = 0, max_h = 0;
-        int off_x = 0, off_y = 0;
-        int cur_off_x = 0, cur_off_y = 0;
-        int i;
-
-        for (i = 0; i < sdl2_num_outputs; i++) {
-            struct sdl2_console *thiscon = &sdl2_console[i];
-            if (thiscon->real_window && thiscon->surface) {
-                SDL_GetWindowSize(thiscon->real_window, &scr_w, &scr_h);
-                cur_off_x = thiscon->x;
-                cur_off_y = thiscon->y;
-                if (scr_w + cur_off_x > max_w) {
-                    max_w = scr_w + cur_off_x;
-                }
-                if (scr_h + cur_off_y > max_h) {
-                    max_h = scr_h + cur_off_y;
-                }
-                if (i == scon->idx) {
-                    off_x = cur_off_x;
-                    off_y = cur_off_y;
-                }
-            }
-        }
-        qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_X, off_x + x, 0, max_w);
-        qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_Y, off_y + y, 0, max_h);
+        qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_X,
+                             x, 0, surface_width(scon->surface));
+        qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_Y,
+                             y, 0, surface_height(scon->surface));
     } else {
         if (guest_cursor) {
             x -= guest_x;
@@ -334,22 +312,28 @@ static void toggle_full_screen(struct sdl2_console *scon)
     sdl2_redraw(scon);
 }
 
-static void handle_keydown(SDL_Event *ev)
+static int get_mod_state(void)
 {
-    int mod_state, win;
-    struct sdl2_console *scon = get_scon_from_window(ev->key.windowID);
+    SDL_Keymod mod = SDL_GetModState();
 
     if (alt_grab) {
-        mod_state = (SDL_GetModState() & (gui_grab_code | KMOD_LSHIFT)) ==
+        return (mod & (gui_grab_code | KMOD_LSHIFT)) ==
             (gui_grab_code | KMOD_LSHIFT);
     } else if (ctrl_grab) {
-        mod_state = (SDL_GetModState() & KMOD_RCTRL) == KMOD_RCTRL;
+        return (mod & KMOD_RCTRL) == KMOD_RCTRL;
     } else {
-        mod_state = (SDL_GetModState() & gui_grab_code) == gui_grab_code;
+        return (mod & gui_grab_code) == gui_grab_code;
     }
-    gui_key_modifier_pressed = mod_state;
+}
+
+static void handle_keydown(SDL_Event *ev)
+{
+    int win;
+    struct sdl2_console *scon = get_scon_from_window(ev->key.windowID);
+
+    gui_key_modifier_pressed = get_mod_state();
 
-    if (gui_key_modifier_pressed) {
+    if (!scon->ignore_hotkeys && gui_key_modifier_pressed && !ev->key.repeat) {
         switch (ev->key.keysym.scancode) {
         case SDL_SCANCODE_2:
         case SDL_SCANCODE_3:
@@ -423,6 +407,8 @@ static void handle_keyup(SDL_Event *ev)
     int mod_state;
     struct sdl2_console *scon = get_scon_from_window(ev->key.windowID);
 
+    scon->ignore_hotkeys = false;
+
     if (!alt_grab) {
         mod_state = (ev->key.keysym.mod & gui_grab_code);
     } else {
@@ -466,6 +452,10 @@ static void handle_mousemotion(SDL_Event *ev)
     int max_x, max_y;
     struct sdl2_console *scon = get_scon_from_window(ev->key.windowID);
 
+    if (!qemu_console_is_graphic(scon->dcl.con)) {
+        return;
+    }
+
     if (qemu_input_is_absolute() || absolute_enabled) {
         int scr_w, scr_h;
         SDL_GetWindowSize(scon->real_window, &scr_w, &scr_h);
@@ -494,6 +484,10 @@ static void handle_mousebutton(SDL_Event *ev)
     SDL_MouseButtonEvent *bev;
     struct sdl2_console *scon = get_scon_from_window(ev->key.windowID);
 
+    if (!qemu_console_is_graphic(scon->dcl.con)) {
+        return;
+    }
+
     bev = &ev->button;
     if (!gui_grab && !qemu_input_is_absolute()) {
         if (ev->type == SDL_MOUSEBUTTONUP && bev->button == SDL_BUTTON_LEFT) {
@@ -516,6 +510,10 @@ static void handle_mousewheel(SDL_Event *ev)
     SDL_MouseWheelEvent *wev = &ev->wheel;
     InputButton btn;
 
+    if (!qemu_console_is_graphic(scon->dcl.con)) {
+        return;
+    }
+
     if (wev->y > 0) {
         btn = INPUT_BUTTON_WHEEL_UP;
     } else if (wev->y < 0) {
@@ -557,6 +555,14 @@ static void handle_windowevent(SDL_Event *ev)
         if (!gui_grab && (qemu_input_is_absolute() || absolute_enabled)) {
             absolute_mouse_grab(scon);
         }
+        /* If a new console window opened using a hotkey receives the
+         * focus, SDL sends another KEYDOWN event to the new window,
+         * closing the console window immediately after.
+         *
+         * Work around this by ignoring further hotkey events until a
+         * key is released.
+         */
+        scon->ignore_hotkeys = get_mod_state();
         break;
     case SDL_WINDOWEVENT_FOCUS_LOST:
         if (gui_grab && !gui_fullscreen) {
@@ -657,6 +663,11 @@ static void sdl_mouse_warp(DisplayChangeListener *dcl,
                            int x, int y, int on)
 {
     struct sdl2_console *scon = container_of(dcl, struct sdl2_console, dcl);
+
+    if (!qemu_console_is_graphic(scon->dcl.con)) {
+        return;
+    }
+
     if (on) {
         if (!guest_cursor) {
             sdl_show_cursor();
diff --git a/ui/spice-core.c b/ui/spice-core.c
index ea04dc69b5..2baf0c7120 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -55,9 +55,7 @@ static QemuThread me;
 
 struct SpiceTimer {
     QEMUTimer *timer;
-    QTAILQ_ENTRY(SpiceTimer) next;
 };
-static QTAILQ_HEAD(, SpiceTimer) timers = QTAILQ_HEAD_INITIALIZER(timers);
 
 static SpiceTimer *timer_add(SpiceTimerFunc func, void *opaque)
 {
@@ -65,7 +63,6 @@ static SpiceTimer *timer_add(SpiceTimerFunc func, void *opaque)
 
     timer = g_malloc0(sizeof(*timer));
     timer->timer = timer_new_ms(QEMU_CLOCK_REALTIME, func, opaque);
-    QTAILQ_INSERT_TAIL(&timers, timer, next);
     return timer;
 }
 
@@ -83,18 +80,14 @@ static void timer_remove(SpiceTimer *timer)
 {
     timer_del(timer->timer);
     timer_free(timer->timer);
-    QTAILQ_REMOVE(&timers, timer, next);
     g_free(timer);
 }
 
 struct SpiceWatch {
     int fd;
-    int event_mask;
     SpiceWatchFunc func;
     void *opaque;
-    QTAILQ_ENTRY(SpiceWatch) next;
 };
-static QTAILQ_HEAD(, SpiceWatch) watches = QTAILQ_HEAD_INITIALIZER(watches);
 
 static void watch_read(void *opaque)
 {
@@ -113,11 +106,10 @@ static void watch_update_mask(SpiceWatch *watch, int event_mask)
     IOHandler *on_read = NULL;
     IOHandler *on_write = NULL;
 
-    watch->event_mask = event_mask;
-    if (watch->event_mask & SPICE_WATCH_EVENT_READ) {
+    if (event_mask & SPICE_WATCH_EVENT_READ) {
         on_read = watch_read;
     }
-    if (watch->event_mask & SPICE_WATCH_EVENT_WRITE) {
+    if (event_mask & SPICE_WATCH_EVENT_WRITE) {
         on_write = watch_write;
     }
     qemu_set_fd_handler(watch->fd, on_read, on_write, watch);
@@ -131,7 +123,6 @@ static SpiceWatch *watch_add(int fd, int event_mask, SpiceWatchFunc func, void *
     watch->fd     = fd;
     watch->func   = func;
     watch->opaque = opaque;
-    QTAILQ_INSERT_TAIL(&watches, watch, next);
 
     watch_update_mask(watch, event_mask);
     return watch;
@@ -140,7 +131,6 @@ static SpiceWatch *watch_add(int fd, int event_mask, SpiceWatchFunc func, void *
 static void watch_remove(SpiceWatch *watch)
 {
     qemu_set_fd_handler(watch->fd, NULL, NULL, NULL);
-    QTAILQ_REMOVE(&watches, watch, next);
     g_free(watch);
 }
 
diff --git a/ui/spice-display.c b/ui/spice-display.c
index ad1ceafb3f..85a72fe76a 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -519,7 +519,6 @@ static void interface_attach_worker(QXLInstance *sin, QXLWorker *qxl_worker)
     SimpleSpiceDisplay *ssd = container_of(sin, SimpleSpiceDisplay, qxl);
 
     dprint(1, "%s/%d:\n", __func__, ssd->qxl.id);
-    ssd->worker = qxl_worker;
 }
 
 static void interface_set_compression_level(QXLInstance *sin, int level)
@@ -1028,7 +1027,6 @@ static void qemu_spice_display_init_one(QemuConsole *con)
 
     ssd->qxl.base.sif = &dpy_interface.base;
     qemu_spice_add_display_interface(&ssd->qxl, con);
-    assert(ssd->worker);
     qemu_spice_create_host_memslot(ssd);
 
     register_displaychangelistener(&ssd->dcl);
diff --git a/ui/trace-events b/ui/trace-events
index 1a9f126330..85f74f948b 100644
--- a/ui/trace-events
+++ b/ui/trace-events
@@ -35,6 +35,13 @@ vnc_client_connect(void *state, void *ioc) "VNC client connect state=%p ioc=%p"
 vnc_client_disconnect_start(void *state, void *ioc) "VNC client disconnect start state=%p ioc=%p"
 vnc_client_disconnect_finish(void *state, void *ioc) "VNC client disconnect finish state=%p ioc=%p"
 vnc_client_io_wrap(void *state, void *ioc, const char *type) "VNC client I/O wrap state=%p ioc=%p type=%s"
+vnc_client_throttle_threshold(void *state, void *ioc, size_t oldoffset, size_t offset, int client_width, int client_height, int bytes_per_pixel, void *audio_cap) "VNC client throttle threshold state=%p ioc=%p oldoffset=%zu newoffset=%zu width=%d height=%d bpp=%d audio=%p"
+vnc_client_throttle_incremental(void *state, void *ioc, int job_update, size_t offset) "VNC client throttle incremental state=%p ioc=%p job-update=%d offset=%zu"
+vnc_client_throttle_forced(void *state, void *ioc, int job_update, size_t offset) "VNC client throttle forced state=%p ioc=%p job-update=%d offset=%zu"
+vnc_client_throttle_audio(void *state, void *ioc, size_t offset) "VNC client throttle audio state=%p ioc=%p offset=%zu"
+vnc_client_unthrottle_forced(void *state, void *ioc) "VNC client unthrottle forced offset state=%p ioc=%p"
+vnc_client_unthrottle_incremental(void *state, void *ioc, size_t offset) "VNC client unthrottle incremental state=%p ioc=%p offset=%zu"
+vnc_client_output_limit(void *state, void *ioc, size_t offset, size_t threshold) "VNC client output limit state=%p ioc=%p offset=%zu threshold=%zu"
 vnc_auth_init(void *display, int websock, int auth, int subauth) "VNC auth init state=%p websock=%d auth=%d subauth=%d"
 vnc_auth_start(void *state, int method) "VNC client auth start state=%p method=%d"
 vnc_auth_pass(void *state, int method) "VNC client auth passed state=%p method=%d"
diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index 23f28280e7..74a5f513f2 100644
--- a/ui/vnc-auth-sasl.c
+++ b/ui/vnc-auth-sasl.c
@@ -48,9 +48,9 @@ void vnc_sasl_client_cleanup(VncState *vs)
 }
 
 
-long vnc_client_write_sasl(VncState *vs)
+size_t vnc_client_write_sasl(VncState *vs)
 {
-    long ret;
+    size_t ret;
 
     VNC_DEBUG("Write SASL: Pending output %p size %zd offset %zd "
               "Encoded: %p size %d offset %d\n",
@@ -67,6 +67,7 @@ long vnc_client_write_sasl(VncState *vs)
         if (err != SASL_OK)
             return vnc_client_io_error(vs, -1, NULL);
 
+        vs->sasl.encodedRawLength = vs->output.offset;
         vs->sasl.encodedOffset = 0;
     }
 
@@ -78,7 +79,12 @@ long vnc_client_write_sasl(VncState *vs)
 
     vs->sasl.encodedOffset += ret;
     if (vs->sasl.encodedOffset == vs->sasl.encodedLength) {
-        vs->output.offset = 0;
+        if (vs->sasl.encodedRawLength >= vs->force_update_offset) {
+            vs->force_update_offset = 0;
+        } else {
+            vs->force_update_offset -= vs->sasl.encodedRawLength;
+        }
+        vs->output.offset -= vs->sasl.encodedRawLength;
         vs->sasl.encoded = NULL;
         vs->sasl.encodedOffset = vs->sasl.encodedLength = 0;
     }
@@ -100,9 +106,9 @@ long vnc_client_write_sasl(VncState *vs)
 }
 
 
-long vnc_client_read_sasl(VncState *vs)
+size_t vnc_client_read_sasl(VncState *vs)
 {
-    long ret;
+    size_t ret;
     uint8_t encoded[4096];
     const char *decoded;
     unsigned int decodedLen;
diff --git a/ui/vnc-auth-sasl.h b/ui/vnc-auth-sasl.h
index cb42745a6b..2ae224ee3a 100644
--- a/ui/vnc-auth-sasl.h
+++ b/ui/vnc-auth-sasl.h
@@ -53,6 +53,7 @@ struct VncStateSASL {
      */
     const uint8_t *encoded;
     unsigned int encodedLength;
+    unsigned int encodedRawLength;
     unsigned int encodedOffset;
     char *username;
     char *mechlist;
@@ -64,8 +65,8 @@ struct VncDisplaySASL {
 
 void vnc_sasl_client_cleanup(VncState *vs);
 
-long vnc_client_read_sasl(VncState *vs);
-long vnc_client_write_sasl(VncState *vs);
+size_t vnc_client_read_sasl(VncState *vs);
+size_t vnc_client_write_sasl(VncState *vs);
 
 void start_auth_sasl(VncState *vs);
 
diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
index f7867771ae..e326679dd0 100644
--- a/ui/vnc-jobs.c
+++ b/ui/vnc-jobs.c
@@ -152,6 +152,11 @@ void vnc_jobs_consume_buffer(VncState *vs)
                 vs->ioc, G_IO_IN | G_IO_OUT, vnc_client_io, vs, NULL);
         }
         buffer_move(&vs->output, &vs->jobs_buffer);
+
+        if (vs->job_update == VNC_STATE_UPDATE_FORCE) {
+            vs->force_update_offset = vs->output.offset;
+        }
+        vs->job_update = VNC_STATE_UPDATE_NONE;
     }
     flush = vs->ioc != NULL && vs->abort != true;
     vnc_unlock_output(vs);
diff --git a/ui/vnc.c b/ui/vnc.c
index 9f8d5a1b1f..665a143578 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -60,6 +60,7 @@ static QTAILQ_HEAD(, VncDisplay) vnc_displays =
 
 static int vnc_cursor_define(VncState *vs);
 static void vnc_release_modifiers(VncState *vs);
+static void vnc_update_throttle_offset(VncState *vs);
 
 static void vnc_set_share_mode(VncState *vs, VncShareMode mode)
 {
@@ -596,7 +597,7 @@ VncInfo2List *qmp_query_vnc_servers(Error **errp)
    3) resolutions > 1024
 */
 
-static int vnc_update_client(VncState *vs, int has_dirty, bool sync);
+static int vnc_update_client(VncState *vs, int has_dirty);
 static void vnc_disconnect_start(VncState *vs);
 
 static void vnc_colordepth(VncState *vs);
@@ -766,6 +767,7 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
         vnc_set_area_dirty(vs->dirty, vd, 0, 0,
                            vnc_width(vd),
                            vnc_height(vd));
+        vnc_update_throttle_offset(vs);
     }
 }
 
@@ -961,85 +963,168 @@ static int find_and_clear_dirty_height(VncState *vs,
     return h;
 }
 
-static int vnc_update_client(VncState *vs, int has_dirty, bool sync)
+/*
+ * Figure out how much pending data we should allow in the output
+ * buffer before we throttle incremental display updates, and/or
+ * drop audio samples.
+ *
+ * We allow for equiv of 1 full display's worth of FB updates,
+ * and 1 second of audio samples. If audio backlog was larger
+ * than that the client would already suffering awful audio
+ * glitches, so dropping samples is no worse really).
+ */
+static void vnc_update_throttle_offset(VncState *vs)
 {
-    if (vs->disconnecting) {
-        vnc_disconnect_finish(vs);
-        return 0;
+    size_t offset =
+        vs->client_width * vs->client_height * vs->client_pf.bytes_per_pixel;
+
+    if (vs->audio_cap) {
+        int freq = vs->as.freq;
+        /* We don't limit freq when reading settings from client, so
+         * it could be upto MAX_INT in size. 48khz is a sensible
+         * upper bound for trustworthy clients */
+        int bps;
+        if (freq > 48000) {
+            freq = 48000;
+        }
+        switch (vs->as.fmt) {
+        default:
+        case  AUD_FMT_U8:
+        case  AUD_FMT_S8:
+            bps = 1;
+            break;
+        case  AUD_FMT_U16:
+        case  AUD_FMT_S16:
+            bps = 2;
+            break;
+        case  AUD_FMT_U32:
+        case  AUD_FMT_S32:
+            bps = 4;
+            break;
+        }
+        offset += freq * bps * vs->as.nchannels;
     }
 
-    vs->has_dirty += has_dirty;
-    if (vs->need_update && !vs->disconnecting) {
-        VncDisplay *vd = vs->vd;
-        VncJob *job;
-        int y;
-        int height, width;
-        int n = 0;
-
-        if (vs->output.offset && !vs->audio_cap && !vs->force_update)
-            /* kernel send buffers are full -> drop frames to throttle */
-            return 0;
+    /* Put a floor of 1MB on offset, so that if we have a large pending
+     * buffer and the display is resized to a small size & back again
+     * we don't suddenly apply a tiny send limit
+     */
+    offset = MAX(offset, 1024 * 1024);
 
-        if (!vs->has_dirty && !vs->audio_cap && !vs->force_update)
-            return 0;
+    if (vs->throttle_output_offset != offset) {
+        trace_vnc_client_throttle_threshold(
+            vs, vs->ioc, vs->throttle_output_offset, offset, vs->client_width,
+            vs->client_height, vs->client_pf.bytes_per_pixel, vs->audio_cap);
+    }
 
-        /*
-         * Send screen updates to the vnc client using the server
-         * surface and server dirty map.  guest surface updates
-         * happening in parallel don't disturb us, the next pass will
-         * send them to the client.
+    vs->throttle_output_offset = offset;
+}
+
+static bool vnc_should_update(VncState *vs)
+{
+    switch (vs->update) {
+    case VNC_STATE_UPDATE_NONE:
+        break;
+    case VNC_STATE_UPDATE_INCREMENTAL:
+        /* Only allow incremental updates if the pending send queue
+         * is less than the permitted threshold, and the job worker
+         * is completely idle.
          */
-        job = vnc_job_new(vs);
-
-        height = pixman_image_get_height(vd->server);
-        width = pixman_image_get_width(vd->server);
-
-        y = 0;
-        for (;;) {
-            int x, h;
-            unsigned long x2;
-            unsigned long offset = find_next_bit((unsigned long *) &vs->dirty,
-                                                 height * VNC_DIRTY_BPL(vs),
-                                                 y * VNC_DIRTY_BPL(vs));
-            if (offset == height * VNC_DIRTY_BPL(vs)) {
-                /* no more dirty bits */
-                break;
-            }
-            y = offset / VNC_DIRTY_BPL(vs);
-            x = offset % VNC_DIRTY_BPL(vs);
-            x2 = find_next_zero_bit((unsigned long *) &vs->dirty[y],
-                                    VNC_DIRTY_BPL(vs), x);
-            bitmap_clear(vs->dirty[y], x, x2 - x);
-            h = find_and_clear_dirty_height(vs, y, x, x2, height);
-            x2 = MIN(x2, width / VNC_DIRTY_PIXELS_PER_BIT);
-            if (x2 > x) {
-                n += vnc_job_add_rect(job, x * VNC_DIRTY_PIXELS_PER_BIT, y,
-                                      (x2 - x) * VNC_DIRTY_PIXELS_PER_BIT, h);
-            }
-            if (!x && x2 == width / VNC_DIRTY_PIXELS_PER_BIT) {
-                y += h;
-                if (y == height) {
-                    break;
-                }
-            }
+        if (vs->output.offset < vs->throttle_output_offset &&
+            vs->job_update == VNC_STATE_UPDATE_NONE) {
+            return true;
         }
-
-        vnc_job_push(job);
-        if (sync) {
-            vnc_jobs_join(vs);
+        trace_vnc_client_throttle_incremental(
+            vs, vs->ioc, vs->job_update, vs->output.offset);
+        break;
+    case VNC_STATE_UPDATE_FORCE:
+        /* Only allow forced updates if the pending send queue
+         * does not contain a previous forced update, and the
+         * job worker is completely idle.
+         *
+         * Note this means we'll queue a forced update, even if
+         * the output buffer size is otherwise over the throttle
+         * output limit.
+         */
+        if (vs->force_update_offset == 0 &&
+            vs->job_update == VNC_STATE_UPDATE_NONE) {
+            return true;
         }
-        vs->force_update = 0;
-        vs->has_dirty = 0;
-        return n;
+        trace_vnc_client_throttle_forced(
+            vs, vs->ioc, vs->job_update, vs->force_update_offset);
+        break;
     }
+    return false;
+}
+
+static int vnc_update_client(VncState *vs, int has_dirty)
+{
+    VncDisplay *vd = vs->vd;
+    VncJob *job;
+    int y;
+    int height, width;
+    int n = 0;
 
     if (vs->disconnecting) {
         vnc_disconnect_finish(vs);
-    } else if (sync) {
-        vnc_jobs_join(vs);
+        return 0;
     }
 
-    return 0;
+    vs->has_dirty += has_dirty;
+    if (!vnc_should_update(vs)) {
+        return 0;
+    }
+
+    if (!vs->has_dirty && vs->update != VNC_STATE_UPDATE_FORCE) {
+        return 0;
+    }
+
+    /*
+     * Send screen updates to the vnc client using the server
+     * surface and server dirty map.  guest surface updates
+     * happening in parallel don't disturb us, the next pass will
+     * send them to the client.
+     */
+    job = vnc_job_new(vs);
+
+    height = pixman_image_get_height(vd->server);
+    width = pixman_image_get_width(vd->server);
+
+    y = 0;
+    for (;;) {
+        int x, h;
+        unsigned long x2;
+        unsigned long offset = find_next_bit((unsigned long *) &vs->dirty,
+                                             height * VNC_DIRTY_BPL(vs),
+                                             y * VNC_DIRTY_BPL(vs));
+        if (offset == height * VNC_DIRTY_BPL(vs)) {
+            /* no more dirty bits */
+            break;
+        }
+        y = offset / VNC_DIRTY_BPL(vs);
+        x = offset % VNC_DIRTY_BPL(vs);
+        x2 = find_next_zero_bit((unsigned long *) &vs->dirty[y],
+                                VNC_DIRTY_BPL(vs), x);
+        bitmap_clear(vs->dirty[y], x, x2 - x);
+        h = find_and_clear_dirty_height(vs, y, x, x2, height);
+        x2 = MIN(x2, width / VNC_DIRTY_PIXELS_PER_BIT);
+        if (x2 > x) {
+            n += vnc_job_add_rect(job, x * VNC_DIRTY_PIXELS_PER_BIT, y,
+                                  (x2 - x) * VNC_DIRTY_PIXELS_PER_BIT, h);
+        }
+        if (!x && x2 == width / VNC_DIRTY_PIXELS_PER_BIT) {
+            y += h;
+            if (y == height) {
+                break;
+            }
+        }
+    }
+
+    vs->job_update = vs->update;
+    vs->update = VNC_STATE_UPDATE_NONE;
+    vnc_job_push(job);
+    vs->has_dirty = 0;
+    return n;
 }
 
 /* audio */
@@ -1077,11 +1162,15 @@ static void audio_capture(void *opaque, void *buf, int size)
     VncState *vs = opaque;
 
     vnc_lock_output(vs);
-    vnc_write_u8(vs, VNC_MSG_SERVER_QEMU);
-    vnc_write_u8(vs, VNC_MSG_SERVER_QEMU_AUDIO);
-    vnc_write_u16(vs, VNC_MSG_SERVER_QEMU_AUDIO_DATA);
-    vnc_write_u32(vs, size);
-    vnc_write(vs, buf, size);
+    if (vs->output.offset < vs->throttle_output_offset) {
+        vnc_write_u8(vs, VNC_MSG_SERVER_QEMU);
+        vnc_write_u8(vs, VNC_MSG_SERVER_QEMU_AUDIO);
+        vnc_write_u16(vs, VNC_MSG_SERVER_QEMU_AUDIO_DATA);
+        vnc_write_u32(vs, size);
+        vnc_write(vs, buf, size);
+    } else {
+        trace_vnc_client_throttle_audio(vs, vs->ioc, vs->output.offset);
+    }
     vnc_unlock_output(vs);
     vnc_flush(vs);
 }
@@ -1183,7 +1272,7 @@ void vnc_disconnect_finish(VncState *vs)
     g_free(vs);
 }
 
-ssize_t vnc_client_io_error(VncState *vs, ssize_t ret, Error **errp)
+size_t vnc_client_io_error(VncState *vs, ssize_t ret, Error **errp)
 {
     if (ret <= 0) {
         if (ret == 0) {
@@ -1226,9 +1315,9 @@ void vnc_client_error(VncState *vs)
  *
  * Returns the number of bytes written, which may be less than
  * the requested 'datalen' if the socket would block. Returns
- * -1 on error, and disconnects the client socket.
+ * 0 on I/O error, and disconnects the client socket.
  */
-ssize_t vnc_client_write_buf(VncState *vs, const uint8_t *data, size_t datalen)
+size_t vnc_client_write_buf(VncState *vs, const uint8_t *data, size_t datalen)
 {
     Error *err = NULL;
     ssize_t ret;
@@ -1246,12 +1335,13 @@ ssize_t vnc_client_write_buf(VncState *vs, const uint8_t *data, size_t datalen)
  * will switch the FD poll() handler back to read monitoring.
  *
  * Returns the number of bytes written, which may be less than
- * the buffered output data if the socket would block. Returns
- * -1 on error, and disconnects the client socket.
+ * the buffered output data if the socket would block.  Returns
+ * 0 on I/O error, and disconnects the client socket.
  */
-static ssize_t vnc_client_write_plain(VncState *vs)
+static size_t vnc_client_write_plain(VncState *vs)
 {
-    ssize_t ret;
+    size_t offset;
+    size_t ret;
 
 #ifdef CONFIG_VNC_SASL
     VNC_DEBUG("Write Plain: Pending output %p size %zd offset %zd. Wait SSF %d\n",
@@ -1270,7 +1360,20 @@ static ssize_t vnc_client_write_plain(VncState *vs)
     if (!ret)
         return 0;
 
+    if (ret >= vs->force_update_offset) {
+        if (vs->force_update_offset != 0) {
+            trace_vnc_client_unthrottle_forced(vs, vs->ioc);
+        }
+        vs->force_update_offset = 0;
+    } else {
+        vs->force_update_offset -= ret;
+    }
+    offset = vs->output.offset;
     buffer_advance(&vs->output, ret);
+    if (offset >= vs->throttle_output_offset &&
+        vs->output.offset < vs->throttle_output_offset) {
+        trace_vnc_client_unthrottle_incremental(vs, vs->ioc, vs->output.offset);
+    }
 
     if (vs->output.offset == 0) {
         if (vs->ioc_tag) {
@@ -1339,9 +1442,9 @@ void vnc_read_when(VncState *vs, VncReadEvent *func, size_t expecting)
  *
  * Returns the number of bytes read, which may be less than
  * the requested 'datalen' if the socket would block. Returns
- * -1 on error, and disconnects the client socket.
+ * 0 on I/O error or EOF, and disconnects the client socket.
  */
-ssize_t vnc_client_read_buf(VncState *vs, uint8_t *data, size_t datalen)
+size_t vnc_client_read_buf(VncState *vs, uint8_t *data, size_t datalen)
 {
     ssize_t ret;
     Error *err = NULL;
@@ -1357,12 +1460,13 @@ ssize_t vnc_client_read_buf(VncState *vs, uint8_t *data, size_t datalen)
  * when not using any SASL SSF encryption layers. Will read as much
  * data as possible without blocking.
  *
- * Returns the number of bytes read. Returns -1 on error, and
- * disconnects the client socket.
+ * Returns the number of bytes read, which may be less than
+ * the requested 'datalen' if the socket would block. Returns
+ * 0 on I/O error or EOF, and disconnects the client socket.
  */
-static ssize_t vnc_client_read_plain(VncState *vs)
+static size_t vnc_client_read_plain(VncState *vs)
 {
-    ssize_t ret;
+    size_t ret;
     VNC_DEBUG("Read plain %p size %zd offset %zd\n",
               vs->input.buffer, vs->input.capacity, vs->input.offset);
     buffer_reserve(&vs->input, 4096);
@@ -1388,7 +1492,7 @@ static void vnc_jobs_bh(void *opaque)
  */
 static int vnc_client_read(VncState *vs)
 {
-    ssize_t ret;
+    size_t ret;
 
 #ifdef CONFIG_VNC_SASL
     if (vs->sasl.conn && vs->sasl.runSSF)
@@ -1439,8 +1543,39 @@ gboolean vnc_client_io(QIOChannel *ioc G_GNUC_UNUSED,
 }
 
 
+/*
+ * Scale factor to apply to vs->throttle_output_offset when checking for
+ * hard limit. Worst case normal usage could be x2, if we have a complete
+ * incremental update and complete forced update in the output buffer.
+ * So x3 should be good enough, but we pick x5 to be conservative and thus
+ * (hopefully) never trigger incorrectly.
+ */
+#define VNC_THROTTLE_OUTPUT_LIMIT_SCALE 5
+
 void vnc_write(VncState *vs, const void *data, size_t len)
 {
+    if (vs->disconnecting) {
+        return;
+    }
+    /* Protection against malicious client/guest to prevent our output
+     * buffer growing without bound if client stops reading data. This
+     * should rarely trigger, because we have earlier throttling code
+     * which stops issuing framebuffer updates and drops audio data
+     * if the throttle_output_offset value is exceeded. So we only reach
+     * this higher level if a huge number of pseudo-encodings get
+     * triggered while data can't be sent on the socket.
+     *
+     * NB throttle_output_offset can be zero during early protocol
+     * handshake, or from the job thread's VncState clone
+     */
+    if (vs->throttle_output_offset != 0 &&
+        vs->output.offset > (vs->throttle_output_offset *
+                             VNC_THROTTLE_OUTPUT_LIMIT_SCALE)) {
+        trace_vnc_client_output_limit(vs, vs->ioc, vs->output.offset,
+                                      vs->throttle_output_offset);
+        vnc_disconnect_start(vs);
+        return;
+    }
     buffer_reserve(&vs->output, len);
 
     if (vs->ioc != NULL && buffer_empty(&vs->output)) {
@@ -1876,14 +2011,14 @@ static void ext_key_event(VncState *vs, int down,
 static void framebuffer_update_request(VncState *vs, int incremental,
                                        int x, int y, int w, int h)
 {
-    vs->need_update = 1;
-
     if (incremental) {
-        return;
+        if (vs->update != VNC_STATE_UPDATE_FORCE) {
+            vs->update = VNC_STATE_UPDATE_INCREMENTAL;
+        }
+    } else {
+        vs->update = VNC_STATE_UPDATE_FORCE;
+        vnc_set_area_dirty(vs->dirty, vs->vd, x, y, w, h);
     }
-
-    vs->force_update = 1;
-    vnc_set_area_dirty(vs->dirty, vs->vd, x, y, w, h);
 }
 
 static void send_ext_key_event_ack(VncState *vs)
@@ -2255,7 +2390,7 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len)
                 }
                 vs->as.nchannels = read_u8(data, 5);
                 if (vs->as.nchannels != 1 && vs->as.nchannels != 2) {
-                    VNC_DEBUG("Invalid audio channel coount %d\n",
+                    VNC_DEBUG("Invalid audio channel count %d\n",
                               read_u8(data, 5));
                     vnc_client_error(vs);
                     break;
@@ -2281,6 +2416,7 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len)
         break;
     }
 
+    vnc_update_throttle_offset(vs);
     vnc_read_when(vs, protocol_client_msg, 1);
     return 0;
 }
@@ -2863,7 +2999,7 @@ static void vnc_refresh(DisplayChangeListener *dcl)
     vnc_unlock_display(vd);
 
     QTAILQ_FOREACH_SAFE(vs, &vd->clients, next, vn) {
-        rects += vnc_update_client(vs, has_dirty, false);
+        rects += vnc_update_client(vs, has_dirty);
         /* vs might be free()ed here */
     }
 
diff --git a/ui/vnc.h b/ui/vnc.h
index 694cf32ca9..0c33a5f7fe 100644
--- a/ui/vnc.h
+++ b/ui/vnc.h
@@ -252,6 +252,12 @@ struct VncJob
     QTAILQ_ENTRY(VncJob) next;
 };
 
+typedef enum {
+    VNC_STATE_UPDATE_NONE,
+    VNC_STATE_UPDATE_INCREMENTAL,
+    VNC_STATE_UPDATE_FORCE,
+} VncStateUpdate;
+
 struct VncState
 {
     QIOChannelSocket *sioc; /* The underlying socket */
@@ -264,8 +270,8 @@ struct VncState
                            * vnc-jobs-async.c */
 
     VncDisplay *vd;
-    int need_update;
-    int force_update;
+    VncStateUpdate update; /* Most recent pending request from client */
+    VncStateUpdate job_update; /* Currently processed by job thread */
     int has_dirty;
     uint32_t features;
     int absolute;
@@ -293,6 +299,18 @@ struct VncState
 
     VncClientInfo *info;
 
+    /* Job thread bottom half has put data for a forced update
+     * into the output buffer. This offset points to the end of
+     * the update data in the output buffer. This lets us determine
+     * when a force update is fully sent to the client, allowing
+     * us to process further forced updates. */
+    size_t force_update_offset;
+    /* We allow multiple incremental updates or audio capture
+     * samples to be queued in output buffer, provided the
+     * buffer size doesn't exceed this threshold. The value
+     * is calculating dynamically based on framebuffer size
+     * and audio sample settings in vnc_update_throttle_offset() */
+    size_t throttle_output_offset;
     Buffer output;
     Buffer input;
     /* current output mode information */
@@ -506,8 +524,8 @@ gboolean vnc_client_io(QIOChannel *ioc,
                        GIOCondition condition,
                        void *opaque);
 
-ssize_t vnc_client_read_buf(VncState *vs, uint8_t *data, size_t datalen);
-ssize_t vnc_client_write_buf(VncState *vs, const uint8_t *data, size_t datalen);
+size_t vnc_client_read_buf(VncState *vs, uint8_t *data, size_t datalen);
+size_t vnc_client_write_buf(VncState *vs, const uint8_t *data, size_t datalen);
 
 /* Protocol I/O functions */
 void vnc_write(VncState *vs, const void *data, size_t len);
@@ -526,7 +544,7 @@ uint32_t read_u32(uint8_t *data, size_t offset);
 
 /* Protocol stage functions */
 void vnc_client_error(VncState *vs);
-ssize_t vnc_client_io_error(VncState *vs, ssize_t ret, Error **errp);
+size_t vnc_client_io_error(VncState *vs, ssize_t ret, Error **errp);
 
 void start_client_init(VncState *vs);
 void start_auth_vnc(VncState *vs);
diff --git a/vl.c b/vl.c
index 3599485226..2586f25952 100644
--- a/vl.c
+++ b/vl.c
@@ -4795,6 +4795,7 @@ int main(int argc, char **argv, char **envp)
     monitor_cleanup();
     qemu_chr_cleanup();
     user_creatable_cleanup();
+    migration_object_finalize();
     /* TODO: unref root container, check all devices are ok */
 
     return 0;