79 files changed, 5086 insertions, 1686 deletions
diff --git a/Kconfig.host b/Kconfig.host
index d763d89269..2ee71578f3 100644
--- a/Kconfig.host
+++ b/Kconfig.host
@@ -46,3 +46,6 @@ config FUZZ
 config VFIO_USER_SERVER_ALLOWED
     bool
     imply VFIO_USER_SERVER
+
+config HV_BALLOON_POSSIBLE
+    bool
diff --git a/MAINTAINERS b/MAINTAINERS
index 8e8a7d5be5..d4a480ce5a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2656,6 +2656,14 @@ F: hw/usb/canokey.c
 F: hw/usb/canokey.h
 F: docs/system/devices/canokey.rst
 
+Hyper-V Dynamic Memory Protocol
+M: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+S: Supported
+F: hw/hyperv/hv-balloon*.c
+F: hw/hyperv/hv-balloon*.h
+F: include/hw/hyperv/dynmem-proto.h
+F: include/hw/hyperv/hv-balloon.h
+
 Subsystems
 ----------
 Overall Audio backends
diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
index 240eb16d90..5adf4f12f7 100644
--- a/docs/devel/migration.rst
+++ b/docs/devel/migration.rst
@@ -594,6 +594,77 @@ path.
      Return path  - opened by main thread, written by main thread AND postcopy
      thread (protected by rp_mutex)
 
+Dirty limit
+=====================
+The dirty limit, short for dirty page rate upper limit, is a new capability
+introduced in the 8.1 QEMU release that uses a new algorithm based on the KVM
+dirty ring to throttle down the guest during live migration.
+
+The algorithm framework is as follows:
+
+::
+
+  ------------------------------------------------------------------------------
+  main   --------------> throttle thread ------------> PREPARE(1) <--------
+  thread  \                                                |              |
+           \                                               |              |
+            \                                              V              |
+             -\                                        CALCULATE(2)       |
+               \                                           |              |
+                \                                          |              |
+                 \                                         V              |
+                  \                                    SET PENALTY(3) -----
+                   -\                                      |
+                     \                                     |
+                      \                                    V
+                       -> virtual CPU thread -------> ACCEPT PENALTY(4)
+  ------------------------------------------------------------------------------
+
+When the qmp command qmp_set_vcpu_dirty_limit is called for the first time,
+the QEMU main thread starts the throttle thread. The throttle thread, once
+launched, executes the loop, which consists of three steps:
+
+  - PREPARE (1)
+
+     The entire work of PREPARE (1) is preparation for the second stage,
+     CALCULATE(2), as the name implies. It involves preparing the dirty
+     page rate value and the corresponding upper limit of the VM:
+     The dirty page rate is calculated via the KVM dirty ring mechanism,
+     which tells QEMU how many dirty pages a virtual CPU has had since the
+     last KVM_EXIT_DIRTY_RING_FULL exception; The dirty page rate upper
+     limit is specified by caller, therefore fetch it directly.
+
+  - CALCULATE (2)
+
+     Calculate a suitable sleep period for each virtual CPU, which will be
+     used to determine the penalty for the target virtual CPU. The
+     computation must be done carefully in order to reduce the dirty page
+     rate progressively down to the upper limit without oscillation. To
+     achieve this, two strategies are provided: the first is to add or
+     subtract sleep time based on the ratio of the current dirty page rate
+     to the limit, which is used when the current dirty page rate is far
+     from the limit; the second is to add or subtract a fixed time when
+     the current dirty page rate is close to the limit.
+
+  - SET PENALTY (3)
+
+     Set the sleep time for each virtual CPU that should be penalized based
+     on the results of the calculation supplied by step CALCULATE (2).
+
+After completing the three above stages, the throttle thread loops back
+to step PREPARE (1) until the dirty limit is reached.
+
+On the other hand, each virtual CPU thread reads the sleep duration and
+sleeps in the path of the KVM_EXIT_DIRTY_RING_FULL exception handler, that
+is ACCEPT PENALTY (4). Virtual CPUs tied with writing processes will
+obviously exit to the path and get penalized, whereas virtual CPUs involved
+with read processes will not.
+
+In summary, thanks to the KVM dirty ring technology, the dirty limit
+algorithm will restrict virtual CPUs as needed to keep their dirty page
+rate inside the limit. This leads to more steady reading performance during
+live migration and can aid in improving large guest responsiveness.
+
 Postcopy
 ========
 
diff --git a/dump/dump-hmp-cmds.c b/dump/dump-hmp-cmds.c
index b038785fee..b428ec33df 100644
--- a/dump/dump-hmp-cmds.c
+++ b/dump/dump-hmp-cmds.c
@@ -19,6 +19,7 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict)
     bool paging = qdict_get_try_bool(qdict, "paging", false);
     bool zlib = qdict_get_try_bool(qdict, "zlib", false);
     bool lzo = qdict_get_try_bool(qdict, "lzo", false);
+    bool raw = qdict_get_try_bool(qdict, "raw", false);
     bool snappy = qdict_get_try_bool(qdict, "snappy", false);
     const char *file = qdict_get_str(qdict, "filename");
     bool has_begin = qdict_haskey(qdict, "begin");
@@ -40,16 +41,28 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict)
         dump_format = DUMP_GUEST_MEMORY_FORMAT_WIN_DMP;
     }
 
-    if (zlib) {
-        dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB;
+    if (zlib && raw) {
+        if (raw) {
+            dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_ZLIB;
+        } else {
+            dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB;
+        }
     }
 
     if (lzo) {
-        dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO;
+        if (raw) {
+            dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_LZO;
+        } else {
+            dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO;
+        }
     }
 
     if (snappy) {
-        dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY;
+        if (raw) {
+            dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_SNAPPY;
+        } else {
+            dump_format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY;
+        }
     }
 
     if (has_begin) {
diff --git a/dump/dump.c b/dump/dump.c
index d355ada62e..1c304cadfd 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -100,7 +100,7 @@ static int dump_cleanup(DumpState *s)
     memory_mapping_list_free(&s->list);
     close(s->fd);
     g_free(s->guest_note);
-    g_array_unref(s->string_table_buf);
+    g_clear_pointer(&s->string_table_buf, g_array_unref);
     s->guest_note = NULL;
     if (s->resume) {
         if (s->detached) {
@@ -809,11 +809,15 @@ static void create_vmcore(DumpState *s, Error **errp)
     dump_end(s, errp);
 }
 
-static int write_start_flat_header(int fd)
+static int write_start_flat_header(DumpState *s)
 {
     MakedumpfileHeader *mh;
     int ret = 0;
 
+    if (s->kdump_raw) {
+        return 0;
+    }
+
     QEMU_BUILD_BUG_ON(sizeof *mh > MAX_SIZE_MDF_HEADER);
     mh = g_malloc0(MAX_SIZE_MDF_HEADER);
 
@@ -824,7 +828,7 @@ static int write_start_flat_header(int fd)
     mh->version = cpu_to_be64(VERSION_FLAT_HEADER);
 
     size_t written_size;
-    written_size = qemu_write_full(fd, mh, MAX_SIZE_MDF_HEADER);
+    written_size = qemu_write_full(s->fd, mh, MAX_SIZE_MDF_HEADER);
     if (written_size != MAX_SIZE_MDF_HEADER) {
         ret = -1;
     }
@@ -833,15 +837,19 @@ static int write_start_flat_header(int fd)
     return ret;
 }
 
-static int write_end_flat_header(int fd)
+static int write_end_flat_header(DumpState *s)
 {
     MakedumpfileDataHeader mdh;
 
+    if (s->kdump_raw) {
+        return 0;
+    }
+
     mdh.offset = END_FLAG_FLAT_HEADER;
     mdh.buf_size = END_FLAG_FLAT_HEADER;
 
     size_t written_size;
-    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
+    written_size = qemu_write_full(s->fd, &mdh, sizeof(mdh));
     if (written_size != sizeof(mdh)) {
         return -1;
     }
@@ -849,20 +857,28 @@ static int write_end_flat_header(int fd)
     return 0;
 }
 
-static int write_buffer(int fd, off_t offset, const void *buf, size_t size)
+static int write_buffer(DumpState *s, off_t offset, const void *buf, size_t size)
 {
     size_t written_size;
     MakedumpfileDataHeader mdh;
+    off_t seek_loc;
 
-    mdh.offset = cpu_to_be64(offset);
-    mdh.buf_size = cpu_to_be64(size);
+    if (s->kdump_raw) {
+        seek_loc = lseek(s->fd, offset, SEEK_SET);
+        if (seek_loc == (off_t) -1) {
+            return -1;
+        }
+    } else {
+        mdh.offset = cpu_to_be64(offset);
+        mdh.buf_size = cpu_to_be64(size);
 
-    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
-    if (written_size != sizeof(mdh)) {
-        return -1;
+        written_size = qemu_write_full(s->fd, &mdh, sizeof(mdh));
+        if (written_size != sizeof(mdh)) {
+            return -1;
+        }
     }
 
-    written_size = qemu_write_full(fd, buf, size);
+    written_size = qemu_write_full(s->fd, buf, size);
     if (written_size != size) {
         return -1;
     }
@@ -982,7 +998,7 @@ static void create_header32(DumpState *s, Error **errp)
 #endif
     dh->status = cpu_to_dump32(s, status);
 
-    if (write_buffer(s->fd, 0, dh, size) < 0) {
+    if (write_buffer(s, 0, dh, size) < 0) {
         error_setg(errp, "dump: failed to write disk dump header");
         goto out;
     }
@@ -1012,7 +1028,7 @@ static void create_header32(DumpState *s, Error **errp)
     kh->offset_note = cpu_to_dump64(s, offset_note);
     kh->note_size = cpu_to_dump32(s, s->note_size);
 
-    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
+    if (write_buffer(s, DISKDUMP_HEADER_BLOCKS *
                      block_size, kh, size) < 0) {
         error_setg(errp, "dump: failed to write kdump sub header");
         goto out;
@@ -1027,7 +1043,7 @@ static void create_header32(DumpState *s, Error **errp)
     if (*errp) {
         goto out;
     }
-    if (write_buffer(s->fd, offset_note, s->note_buf,
+    if (write_buffer(s, offset_note, s->note_buf,
                      s->note_size) < 0) {
         error_setg(errp, "dump: failed to write notes");
         goto out;
@@ -1093,7 +1109,7 @@ static void create_header64(DumpState *s, Error **errp)
 #endif
     dh->status = cpu_to_dump32(s, status);
 
-    if (write_buffer(s->fd, 0, dh, size) < 0) {
+    if (write_buffer(s, 0, dh, size) < 0) {
         error_setg(errp, "dump: failed to write disk dump header");
         goto out;
     }
@@ -1123,7 +1139,7 @@ static void create_header64(DumpState *s, Error **errp)
     kh->offset_note = cpu_to_dump64(s, offset_note);
     kh->note_size = cpu_to_dump64(s, s->note_size);
 
-    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
+    if (write_buffer(s, DISKDUMP_HEADER_BLOCKS *
                      block_size, kh, size) < 0) {
         error_setg(errp, "dump: failed to write kdump sub header");
         goto out;
@@ -1139,7 +1155,7 @@ static void create_header64(DumpState *s, Error **errp)
         goto out;
     }
 
-    if (write_buffer(s->fd, offset_note, s->note_buf,
+    if (write_buffer(s, offset_note, s->note_buf,
                      s->note_size) < 0) {
         error_setg(errp, "dump: failed to write notes");
         goto out;
@@ -1204,7 +1220,7 @@ static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
     while (old_offset < new_offset) {
         /* calculate the offset and write dump_bitmap */
         offset_bitmap1 = s->offset_dump_bitmap + old_offset;
-        if (write_buffer(s->fd, offset_bitmap1, buf,
+        if (write_buffer(s, offset_bitmap1, buf,
                          bitmap_bufsize) < 0) {
             return -1;
         }
@@ -1212,7 +1228,7 @@ static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
         /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
         offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
                          old_offset;
-        if (write_buffer(s->fd, offset_bitmap2, buf,
+        if (write_buffer(s, offset_bitmap2, buf,
                          bitmap_bufsize) < 0) {
             return -1;
         }
@@ -1380,7 +1396,7 @@ out:
 static void prepare_data_cache(DataCache *data_cache, DumpState *s,
                                off_t offset)
 {
-    data_cache->fd = s->fd;
+    data_cache->state = s;
     data_cache->data_size = 0;
     data_cache->buf_size = 4 * dump_bitmap_get_bufsize(s);
     data_cache->buf = g_malloc0(data_cache->buf_size);
@@ -1399,11 +1415,11 @@ static int write_cache(DataCache *dc, const void *buf, size_t size,
     /*
      * if flag_sync is set, synchronize data in dc->buf into vmcore.
      * otherwise check if the space is enough for caching data in buf, if not,
-     * write the data in dc->buf to dc->fd and reset dc->buf
+     * write the data in dc->buf to dc->state->fd and reset dc->buf
      */
     if ((!flag_sync && dc->data_size + size > dc->buf_size) ||
         (flag_sync && dc->data_size > 0)) {
-        if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) {
+        if (write_buffer(dc->state, dc->offset, dc->buf, dc->data_size) < 0) {
             return -1;
         }
 
@@ -1644,7 +1660,7 @@ static void create_kdump_vmcore(DumpState *s, Error **errp)
      *  +------------------------------------------+
      */
 
-    ret = write_start_flat_header(s->fd);
+    ret = write_start_flat_header(s);
     if (ret < 0) {
         error_setg(errp, "dump: failed to write start flat header");
         return;
@@ -1665,33 +1681,13 @@ static void create_kdump_vmcore(DumpState *s, Error **errp)
         return;
     }
 
-    ret = write_end_flat_header(s->fd);
+    ret = write_end_flat_header(s);
     if (ret < 0) {
         error_setg(errp, "dump: failed to write end flat header");
         return;
     }
 }
 
-static int validate_start_block(DumpState *s)
-{
-    GuestPhysBlock *block;
-
-    if (!dump_has_filter(s)) {
-        return 0;
-    }
-
-    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
-        /* This block is out of the range */
-        if (block->target_start >= s->filter_area_begin + s->filter_area_length ||
-            block->target_end <= s->filter_area_begin) {
-            continue;
-        }
-        return 0;
-   }
-
-    return -1;
-}
-
 static void get_max_mapnr(DumpState *s)
 {
     GuestPhysBlock *last_block;
@@ -1775,7 +1771,8 @@ static void vmcoreinfo_update_phys_base(DumpState *s)
 
 static void dump_init(DumpState *s, int fd, bool has_format,
                       DumpGuestMemoryFormat format, bool paging, bool has_filter,
-                      int64_t begin, int64_t length, Error **errp)
+                      int64_t begin, int64_t length, bool kdump_raw,
+                      Error **errp)
 {
     ERRP_GUARD();
     VMCoreInfoState *vmci = vmcoreinfo_find();
@@ -1786,6 +1783,7 @@ static void dump_init(DumpState *s, int fd, bool has_format,
     s->has_format = has_format;
     s->format = format;
     s->written_size = 0;
+    s->kdump_raw = kdump_raw;
 
     /* kdump-compressed is conflict with paging and filter */
     if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
@@ -1810,7 +1808,7 @@ static void dump_init(DumpState *s, int fd, bool has_format,
 
     s->fd = fd;
     if (has_filter && !length) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "length");
+        error_setg(errp, "parameter 'length' expects a non-zero size");
         goto cleanup;
     }
     s->filter_area_begin = begin;
@@ -1839,12 +1837,6 @@ static void dump_init(DumpState *s, int fd, bool has_format,
         goto cleanup;
     }
 
-    /* Is the filter filtering everything? */
-    if (validate_start_block(s) == -1) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "begin");
-        goto cleanup;
-    }
-
     /* get dump info: endian, class and architecture.
      * If the target architecture is not supported, cpu_get_dump_info() will
      * return -1.
@@ -2061,17 +2053,19 @@ DumpQueryResult *qmp_query_dump(Error **errp)
     return result;
 }
 
-void qmp_dump_guest_memory(bool paging, const char *file,
+void qmp_dump_guest_memory(bool paging, const char *protocol,
                            bool has_detach, bool detach,
-                           bool has_begin, int64_t begin, bool has_length,
-                           int64_t length, bool has_format,
-                           DumpGuestMemoryFormat format, Error **errp)
+                           bool has_begin, int64_t begin,
+                           bool has_length, int64_t length,
+                           bool has_format, DumpGuestMemoryFormat format,
+                           Error **errp)
 {
     ERRP_GUARD();
     const char *p;
-    int fd = -1;
+    int fd;
     DumpState *s;
     bool detach_p = false;
+    bool kdump_raw = false;
 
     if (runstate_check(RUN_STATE_INMIGRATE)) {
         error_setg(errp, "Dump not allowed during incoming migration.");
@@ -2086,6 +2080,29 @@ void qmp_dump_guest_memory(bool paging, const char *file,
     }
 
     /*
+     * externally, we represent kdump-raw-* as separate formats, but internally
+     * they are handled the same, except for the "raw" flag
+     */
+    if (has_format) {
+        switch (format) {
+        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_ZLIB:
+            format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB;
+            kdump_raw = true;
+            break;
+        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_LZO:
+            format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO;
+            kdump_raw = true;
+            break;
+        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_SNAPPY:
+            format = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY;
+            kdump_raw = true;
+            break;
+        default:
+            break;
+        }
+    }
+
+    /*
      * kdump-compressed format need the whole memory dumped, so paging or
      * filter is not supported here.
      */
@@ -2127,25 +2144,23 @@ void qmp_dump_guest_memory(bool paging, const char *file,
         return;
     }
 
-#if !defined(WIN32)
-    if (strstart(file, "fd:", &p)) {
+    if (strstart(protocol, "fd:", &p)) {
         fd = monitor_get_fd(monitor_cur(), p, errp);
         if (fd == -1) {
             return;
         }
-    }
-#endif
-
-    if  (strstart(file, "file:", &p)) {
-        fd = qemu_open_old(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
+    } else if  (strstart(protocol, "file:", &p)) {
+        fd = qemu_create(p, O_WRONLY | O_TRUNC | O_BINARY, S_IRUSR, errp);
         if (fd < 0) {
-            error_setg_file_open(errp, errno, p);
             return;
         }
+    } else {
+        error_setg(errp,
+                   "parameter 'protocol' must start with 'file:' or 'fd:'");
+        return;
     }
-
-    if (fd == -1) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "protocol");
+    if (kdump_raw && lseek(fd, 0, SEEK_CUR) == (off_t) -1) {
+        error_setg(errp, "kdump-raw formats require a seekable file");
         return;
     }
 
@@ -2168,7 +2183,7 @@ void qmp_dump_guest_memory(bool paging, const char *file,
     dump_state_prepare(s);
 
     dump_init(s, fd, has_format, format, paging, has_begin,
-              begin, length, errp);
+              begin, length, kdump_raw, errp);
     if (*errp) {
         qatomic_set(&s->status, DUMP_STATUS_FAILED);
         return;
@@ -2196,15 +2211,18 @@ DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
 
     /* kdump-zlib is always available */
     QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB);
+    QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_ZLIB);
 
     /* add new item if kdump-lzo is available */
 #ifdef CONFIG_LZO
     QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO);
+    QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_LZO);
 #endif
 
     /* add new item if kdump-snappy is available */
 #ifdef CONFIG_SNAPPY
     QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY);
+    QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_RAW_SNAPPY);
 #endif
 
     if (win_dump_available(NULL)) {
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 63eac22734..c0a27688b6 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1085,14 +1085,16 @@ ERST
 
     {
         .name       = "dump-guest-memory",
-        .args_type  = "paging:-p,detach:-d,windmp:-w,zlib:-z,lzo:-l,snappy:-s,filename:F,begin:l?,length:l?",
-        .params     = "[-p] [-d] [-z|-l|-s|-w] filename [begin length]",
+        .args_type  = "paging:-p,detach:-d,windmp:-w,zlib:-z,lzo:-l,snappy:-s,raw:-R,filename:F,begin:l?,length:l?",
+        .params     = "[-p] [-d] [-z|-l|-s|-w] [-R] filename [begin length]",
         .help       = "dump guest memory into file 'filename'.\n\t\t\t"
                       "-p: do paging to get guest's memory mapping.\n\t\t\t"
                       "-d: return immediately (do not wait for completion).\n\t\t\t"
                       "-z: dump in kdump-compressed format, with zlib compression.\n\t\t\t"
                       "-l: dump in kdump-compressed format, with lzo compression.\n\t\t\t"
                       "-s: dump in kdump-compressed format, with snappy compression.\n\t\t\t"
+                      "-R: when using kdump (-z, -l, -s), use raw rather than makedumpfile-flattened\n\t\t\t"
+                      "    format\n\t\t\t"
                       "-w: dump in Windows crashdump format (can be used instead of ELF-dump converting),\n\t\t\t"
                       "    for Windows x86 and x64 guests with vmcoreinfo driver only.\n\t\t\t"
                       "begin: the starting physical address.\n\t\t\t"
@@ -1115,6 +1117,9 @@ SRST
     dump in kdump-compressed format, with lzo compression.
   ``-s``
     dump in kdump-compressed format, with snappy compression.
+  ``-R``
+    when using kdump (-z, -l, -s), use raw rather than makedumpfile-flattened
+    format
   ``-w``
     dump in Windows crashdump format (can be used instead of ELF-dump converting),
     for Windows x64 guests with vmcoreinfo driver only
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index a07cd7eb5d..bfa53960c3 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -115,9 +115,13 @@ static void xen_block_connect(XenDevice *xendev, Error **errp)
         return;
     }
 
-    if (xen_device_frontend_scanf(xendev, "protocol", "%ms",
-                                  &str) != 1) {
-        protocol = BLKIF_PROTOCOL_NATIVE;
+    if (xen_device_frontend_scanf(xendev, "protocol", "%ms", &str) != 1) {
+        /* x86 defaults to the 32-bit protocol even for 64-bit guests. */
+        if (object_dynamic_cast(OBJECT(qdev_get_machine()), "x86-machine")) {
+            protocol = BLKIF_PROTOCOL_X86_32;
+        } else {
+            protocol = BLKIF_PROTOCOL_NATIVE;
+        }
     } else {
         if (strcmp(str, XEN_IO_PROTO_ABI_X86_32) == 0) {
             protocol = BLKIF_PROTOCOL_X86_32;
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index 9a4b59c6f2..a6ff6a4875 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -253,6 +253,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
     MemoryDeviceInfo *value;
     PCDIMMDeviceInfo *di;
     SgxEPCDeviceInfo *se;
+    HvBalloonDeviceInfo *hi;
 
     for (info = info_list; info; info = info->next) {
         value = info->value;
@@ -310,6 +311,20 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
                 monitor_printf(mon, "  node: %" PRId64 "\n", se->node);
                 monitor_printf(mon, "  memdev: %s\n", se->memdev);
                 break;
+            case MEMORY_DEVICE_INFO_KIND_HV_BALLOON:
+                hi = value->u.hv_balloon.data;
+                monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
+                               MemoryDeviceInfoKind_str(value->type),
+                               hi->id ? hi->id : "");
+                if (hi->has_memaddr) {
+                    monitor_printf(mon, "  memaddr: 0x%" PRIx64 "\n",
+                                   hi->memaddr);
+                }
+                monitor_printf(mon, "  max-size: %" PRIu64 "\n", hi->max_size);
+                if (hi->memdev) {
+                    monitor_printf(mon, "  memdev: %s\n", hi->memdev);
+                }
+                break;
             default:
                 g_assert_not_reached();
             }
diff --git a/hw/display/ati.c b/hw/display/ati.c
index 6e38e00502..9a87a5504a 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -319,11 +319,13 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size)
     case DAC_CNTL:
         val = s->regs.dac_cntl;
         break;
-    case GPIO_VGA_DDC:
-        val = s->regs.gpio_vga_ddc;
+    case GPIO_VGA_DDC ... GPIO_VGA_DDC + 3:
+        val = ati_reg_read_offs(s->regs.gpio_vga_ddc,
+                                addr - GPIO_VGA_DDC, size);
         break;
-    case GPIO_DVI_DDC:
-        val = s->regs.gpio_dvi_ddc;
+    case GPIO_DVI_DDC ... GPIO_DVI_DDC + 3:
+        val = ati_reg_read_offs(s->regs.gpio_dvi_ddc,
+                                addr - GPIO_DVI_DDC, size);
         break;
     case GPIO_MONID ... GPIO_MONID + 3:
         val = ati_reg_read_offs(s->regs.gpio_monid,
@@ -337,6 +339,9 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size)
     case PALETTE_DATA:
         val = vga_ioport_read(&s->vga, VGA_PEL_D);
         break;
+    case PALETTE_30_DATA:
+        val = s->regs.palette[vga_ioport_read(&s->vga, VGA_PEL_IR)];
+        break;
     case CNFG_CNTL:
         val = s->regs.config_cntl;
         break;
@@ -349,14 +354,17 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size)
                                       PCI_BASE_ADDRESS_0, size) & 0xfffffff0;
         break;
     case CONFIG_APER_SIZE:
-        val = s->vga.vram_size;
+        val = s->vga.vram_size / 2;
         break;
     case CONFIG_REG_1_BASE:
         val = pci_default_read_config(&s->dev,
                                       PCI_BASE_ADDRESS_2, size) & 0xfffffff0;
         break;
     case CONFIG_REG_APER_SIZE:
-        val = memory_region_size(&s->mm);
+        val = memory_region_size(&s->mm) / 2;
+        break;
+    case HOST_PATH_CNTL:
+        val = BIT(23); /* Radeon HDP_APER_CNTL */
         break;
     case MC_STATUS:
         val = 5;
@@ -612,29 +620,34 @@ static void ati_mm_write(void *opaque, hwaddr addr,
         s->regs.dac_cntl = data & 0xffffe3ff;
         s->vga.dac_8bit = !!(data & DAC_8BIT_EN);
         break;
-    case GPIO_VGA_DDC:
+    /*
+     * GPIO regs for DDC access. Because some drivers access these via
+     * multiple byte writes we have to be careful when we send bits to
+     * avoid spurious changes in bitbang_i2c state. Only do it when either
+     * the enable bits are changed or output bits changed while enabled.
+     */
+    case GPIO_VGA_DDC ... GPIO_VGA_DDC + 3:
         if (s->dev_id != PCI_DEVICE_ID_ATI_RAGE128_PF) {
             /* FIXME: Maybe add a property to select VGA or DVI port? */
         }
         break;
-    case GPIO_DVI_DDC:
+    case GPIO_DVI_DDC ... GPIO_DVI_DDC + 3:
         if (s->dev_id != PCI_DEVICE_ID_ATI_RAGE128_PF) {
-            s->regs.gpio_dvi_ddc = ati_i2c(&s->bbi2c, data, 0);
+            ati_reg_write_offs(&s->regs.gpio_dvi_ddc,
+                               addr - GPIO_DVI_DDC, data, size);
+            if ((addr <= GPIO_DVI_DDC + 2 && addr + size > GPIO_DVI_DDC + 2) ||
+                (addr == GPIO_DVI_DDC && (s->regs.gpio_dvi_ddc & 0x30000))) {
+                s->regs.gpio_dvi_ddc = ati_i2c(&s->bbi2c,
+                                               s->regs.gpio_dvi_ddc, 0);
+            }
         }
         break;
     case GPIO_MONID ... GPIO_MONID + 3:
         /* FIXME What does Radeon have here? */
         if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) {
+            /* Rage128p accesses DDC via MONID(1-2) with additional mask bit */
             ati_reg_write_offs(&s->regs.gpio_monid,
                                addr - GPIO_MONID, data, size);
-            /*
-             * Rage128p accesses DDC used to get EDID via these bits.
-             * Because some drivers access this via multiple byte writes
-             * we have to be careful when we send bits to avoid spurious
-             * changes in bitbang_i2c state. So only do it when mask is set
-             * and either the enable bits are changed or output bits changed
-             * while enabled.
-             */
             if ((s->regs.gpio_monid & BIT(25)) &&
                 ((addr <= GPIO_MONID + 2 && addr + size > GPIO_MONID + 2) ||
                  (addr == GPIO_MONID && (s->regs.gpio_monid & 0x60000)))) {
@@ -663,6 +676,12 @@ static void ati_mm_write(void *opaque, hwaddr addr,
         data >>= 8;
         vga_ioport_write(&s->vga, VGA_PEL_D, data & 0xff);
         break;
+    case PALETTE_30_DATA:
+        s->regs.palette[vga_ioport_read(&s->vga, VGA_PEL_IW)] = data;
+        vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 22) & 0xff);
+        vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 12) & 0xff);
+        vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 2) & 0xff);
+        break;
     case CNFG_CNTL:
         s->regs.config_cntl = data;
         break;
@@ -1014,6 +1033,7 @@ static Property ati_vga_properties[] = {
     DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id,
                        PCI_DEVICE_ID_ATI_RAGE128_PF),
     DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false),
+    DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -1035,11 +1055,18 @@ static void ati_vga_class_init(ObjectClass *klass, void *data)
     k->exit = ati_vga_exit;
 }
 
+static void ati_vga_init(Object *o)
+{
+    object_property_set_description(o, "x-pixman", "Use pixman for: "
+                                    "1: fill, 2: blit");
+}
+
 static const TypeInfo ati_vga_info = {
     .name = TYPE_ATI_VGA,
     .parent = TYPE_PCI_DEVICE,
     .instance_size = sizeof(ATIVGAState),
     .class_init = ati_vga_class_init,
+    .instance_init = ati_vga_init,
     .interfaces = (InterfaceInfo[]) {
           { INTERFACE_CONVENTIONAL_PCI_DEVICE },
           { },
diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
index 7d786653e8..0e6b8e4367 100644
--- a/hw/display/ati_2d.c
+++ b/hw/display/ati_2d.c
@@ -92,6 +92,7 @@ void ati_2d_blt(ATIVGAState *s)
     switch (s->regs.dp_mix & GMC_ROP3_MASK) {
     case ROP3_SRCCOPY:
     {
+        bool fallback = false;
         unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
                        s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
         unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
@@ -122,27 +123,50 @@ void ati_2d_blt(ATIVGAState *s)
                 src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
                 src_x, src_y, dst_x, dst_y,
                 s->regs.dst_width, s->regs.dst_height);
-        if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
+        if ((s->use_pixman & BIT(1)) &&
+            s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
             s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
-            pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
-                       src_stride, dst_stride, bpp, bpp,
-                       src_x, src_y, dst_x, dst_y,
-                       s->regs.dst_width, s->regs.dst_height);
-        } else {
+            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
+                                   src_stride, dst_stride, bpp, bpp,
+                                   src_x, src_y, dst_x, dst_y,
+                                   s->regs.dst_width, s->regs.dst_height);
+        } else if (s->use_pixman & BIT(1)) {
             /* FIXME: We only really need a temporary if src and dst overlap */
             int llb = s->regs.dst_width * (bpp / 8);
             int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
             uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
                                      s->regs.dst_height);
-            pixman_blt((uint32_t *)src_bits, tmp,
-                       src_stride, tmp_stride, bpp, bpp,
-                       src_x, src_y, 0, 0,
-                       s->regs.dst_width, s->regs.dst_height);
-            pixman_blt(tmp, (uint32_t *)dst_bits,
-                       tmp_stride, dst_stride, bpp, bpp,
-                       0, 0, dst_x, dst_y,
-                       s->regs.dst_width, s->regs.dst_height);
+            fallback = !pixman_blt((uint32_t *)src_bits, tmp,
+                                   src_stride, tmp_stride, bpp, bpp,
+                                   src_x, src_y, 0, 0,
+                                   s->regs.dst_width, s->regs.dst_height);
+            if (!fallback) {
+                fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
+                                       tmp_stride, dst_stride, bpp, bpp,
+                                       0, 0, dst_x, dst_y,
+                                       s->regs.dst_width, s->regs.dst_height);
+            }
             g_free(tmp);
+        } else {
+            fallback = true;
+        }
+        if (fallback) {
+            unsigned int y, i, j, bypp = bpp / 8;
+            unsigned int src_pitch = src_stride * sizeof(uint32_t);
+            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
+
+            for (y = 0; y < s->regs.dst_height; y++) {
+                i = dst_x * bypp;
+                j = src_x * bypp;
+                if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
+                    i += (dst_y + y) * dst_pitch;
+                    j += (src_y + y) * src_pitch;
+                } else {
+                    i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
+                    j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
+                }
+                memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
+            }
         }
         if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
             dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
@@ -180,14 +204,21 @@ void ati_2d_blt(ATIVGAState *s)
 
         dst_stride /= sizeof(uint32_t);
         DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
-                dst_bits, dst_stride, bpp,
-                dst_x, dst_y,
-                s->regs.dst_width, s->regs.dst_height,
-                filler);
-        pixman_fill((uint32_t *)dst_bits, dst_stride, bpp,
-                    dst_x, dst_y,
-                    s->regs.dst_width, s->regs.dst_height,
-                    filler);
+                dst_bits, dst_stride, bpp, dst_x, dst_y,
+                s->regs.dst_width, s->regs.dst_height, filler);
+        if (!(s->use_pixman & BIT(0)) ||
+            !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
+                    s->regs.dst_width, s->regs.dst_height, filler)) {
+            /* fallback when pixman failed or we don't want to call it */
+            unsigned int x, y, i, bypp = bpp / 8;
+            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
+            for (y = 0; y < s->regs.dst_height; y++) {
+                i = dst_x * bypp + (dst_y + y) * dst_pitch;
+                for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
+                    stn_he_p(&dst_bits[i], bypp, filler);
+                }
+            }
+        }
         if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
             dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
             s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
diff --git a/hw/display/ati_dbg.c b/hw/display/ati_dbg.c
index bd0ecd48c7..3ffa7f35df 100644
--- a/hw/display/ati_dbg.c
+++ b/hw/display/ati_dbg.c
@@ -30,6 +30,7 @@ static struct ati_regdesc ati_reg_names[] = {
     {"AMCGPIO_EN_MIR", 0x00a8},
     {"PALETTE_INDEX", 0x00b0},
     {"PALETTE_DATA", 0x00b4},
+    {"PALETTE_30_DATA", 0x00b8},
     {"CNFG_CNTL", 0x00e0},
     {"GEN_RESET_CNTL", 0x00f0},
     {"CNFG_MEMSIZE", 0x00f8},
@@ -38,6 +39,7 @@ static struct ati_regdesc ati_reg_names[] = {
     {"CONFIG_APER_SIZE", 0x0108},
     {"CONFIG_REG_1_BASE", 0x010c},
     {"CONFIG_REG_APER_SIZE", 0x0110},
+    {"HOST_PATH_CNTL", 0x0130},
     {"MEM_CNTL", 0x0140},
     {"MC_FB_LOCATION", 0x0148},
     {"MC_AGP_LOCATION", 0x014C},
diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
index e8d3c7af75..f5a47b82b0 100644
--- a/hw/display/ati_int.h
+++ b/hw/display/ati_int.h
@@ -44,6 +44,7 @@ typedef struct ATIVGARegs {
     uint32_t gpio_dvi_ddc;
     uint32_t gpio_monid;
     uint32_t config_cntl;
+    uint32_t palette[256];
     uint32_t crtc_h_total_disp;
     uint32_t crtc_h_sync_strt_wid;
     uint32_t crtc_v_total_disp;
@@ -89,6 +90,7 @@ struct ATIVGAState {
     char *model;
     uint16_t dev_id;
     uint8_t mode;
+    uint8_t use_pixman;
     bool cursor_guest_mode;
     uint16_t cursor_size;
     uint32_t cursor_offset;
diff --git a/hw/display/ati_regs.h b/hw/display/ati_regs.h
index d6282b2ef2..d7127748ff 100644
--- a/hw/display/ati_regs.h
+++ b/hw/display/ati_regs.h
@@ -48,6 +48,7 @@
 #define AMCGPIO_EN_MIR                          0x00a8
 #define PALETTE_INDEX                           0x00b0
 #define PALETTE_DATA                            0x00b4
+#define PALETTE_30_DATA                         0x00b8
 #define CNFG_CNTL                               0x00e0
 #define GEN_RESET_CNTL                          0x00f0
 #define CNFG_MEMSIZE                            0x00f8
@@ -56,6 +57,7 @@
 #define CONFIG_APER_SIZE                        0x0108
 #define CONFIG_REG_1_BASE                       0x010c
 #define CONFIG_REG_APER_SIZE                    0x0110
+#define HOST_PATH_CNTL                          0x0130
 #define MEM_CNTL                                0x0140
 #define MC_FB_LOCATION                          0x0148
 #define MC_AGP_LOCATION                         0x014C
diff --git a/hw/display/macfb.c b/hw/display/macfb.c
index 2f8e016566..d61541ccb5 100644
--- a/hw/display/macfb.c
+++ b/hw/display/macfb.c
@@ -36,8 +36,8 @@
 #define DAFB_INTR_MASK      0x104
 #define DAFB_INTR_STAT      0x108
 #define DAFB_INTR_CLEAR     0x10c
-#define DAFB_RESET          0x200
-#define DAFB_LUT            0x213
+#define DAFB_LUT_INDEX      0x200
+#define DAFB_LUT            0x210
 
 #define DAFB_INTR_VBL   0x4
 
@@ -537,6 +537,11 @@ static uint64_t macfb_ctrl_read(void *opaque,
     case DAFB_MODE_SENSE:
         val = macfb_sense_read(s);
         break;
+    case DAFB_LUT ... DAFB_LUT + 3:
+        val = s->color_palette[s->palette_current];
+        s->palette_current = (s->palette_current + 1) %
+                             ARRAY_SIZE(s->color_palette);
+        break;
     default:
         if (addr < MACFB_CTRL_TOPADDR) {
             val = s->regs[addr >> 2];
@@ -583,13 +588,11 @@ static void macfb_ctrl_write(void *opaque,
         s->regs[DAFB_INTR_STAT >> 2] &= ~DAFB_INTR_VBL;
         macfb_update_irq(s);
         break;
-    case DAFB_RESET:
-        s->palette_current = 0;
-        s->regs[DAFB_INTR_STAT >> 2] &= ~DAFB_INTR_VBL;
-        macfb_update_irq(s);
+    case DAFB_LUT_INDEX:
+        s->palette_current = (val & 0xff) * 3;
         break;
-    case DAFB_LUT:
-        s->color_palette[s->palette_current] = val;
+    case DAFB_LUT ... DAFB_LUT + 3:
+        s->color_palette[s->palette_current] = val & 0xff;
         s->palette_current = (s->palette_current + 1) %
                              ARRAY_SIZE(s->color_palette);
         if (s->palette_current % 3) {
diff --git a/hw/display/virtio-gpu-pci-rutabaga.c b/hw/display/virtio-gpu-pci-rutabaga.c
index c96729e198..abbb898c65 100644
--- a/hw/display/virtio-gpu-pci-rutabaga.c
+++ b/hw/display/virtio-gpu-pci-rutabaga.c
@@ -36,6 +36,7 @@ static const TypeInfo virtio_gpu_rutabaga_pci_info[] = {
         .instance_init = virtio_gpu_rutabaga_initfn,
         .interfaces = (InterfaceInfo[]) {
             { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+            { },
         }
     },
 };
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 4265316cbb..2707bceea8 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1213,6 +1213,9 @@ static int virtio_gpu_save(QEMUFile *f, void *opaque, size_t size,
     assert(QTAILQ_EMPTY(&g->cmdq));
 
     QTAILQ_FOREACH(res, &g->reslist, next) {
+        if (res->blob_size) {
+            continue;
+        }
         qemu_put_be32(f, res->resource_id);
         qemu_put_be32(f, res->width);
         qemu_put_be32(f, res->height);
@@ -1230,12 +1233,40 @@ static int virtio_gpu_save(QEMUFile *f, void *opaque, size_t size,
     return vmstate_save_state(f, &vmstate_virtio_gpu_scanouts, g, NULL);
 }
 
+static bool virtio_gpu_load_restore_mapping(VirtIOGPU *g,
+                                            struct virtio_gpu_simple_resource *res)
+{
+    int i;
+
+    for (i = 0; i < res->iov_cnt; i++) {
+        hwaddr len = res->iov[i].iov_len;
+        res->iov[i].iov_base =
+            dma_memory_map(VIRTIO_DEVICE(g)->dma_as, res->addrs[i], &len,
+                           DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
+
+        if (!res->iov[i].iov_base || len != res->iov[i].iov_len) {
+            /* Clean up the half-a-mapping we just created... */
+            if (res->iov[i].iov_base) {
+                dma_memory_unmap(VIRTIO_DEVICE(g)->dma_as, res->iov[i].iov_base,
+                                 len, DMA_DIRECTION_TO_DEVICE, 0);
+            }
+            /* ...and the mappings for previous loop iterations */
+            res->iov_cnt = i;
+            virtio_gpu_cleanup_mapping(g, res);
+            return false;
+        }
+    }
+
+    QTAILQ_INSERT_HEAD(&g->reslist, res, next);
+    g->hostmem += res->hostmem;
+    return true;
+}
+
 static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
                            const VMStateField *field)
 {
     VirtIOGPU *g = opaque;
     struct virtio_gpu_simple_resource *res;
-    struct virtio_gpu_scanout *scanout;
     uint32_t resource_id, pformat;
     void *bits = NULL;
     int i;
@@ -1294,40 +1325,96 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
         qemu_get_buffer(f, (void *)pixman_image_get_data(res->image),
                         pixman_image_get_stride(res->image) * res->height);
 
-        /* restore mapping */
-        for (i = 0; i < res->iov_cnt; i++) {
-            hwaddr len = res->iov[i].iov_len;
-            res->iov[i].iov_base =
-                dma_memory_map(VIRTIO_DEVICE(g)->dma_as, res->addrs[i], &len,
-                               DMA_DIRECTION_TO_DEVICE,
-                               MEMTXATTRS_UNSPECIFIED);
-
-            if (!res->iov[i].iov_base || len != res->iov[i].iov_len) {
-                /* Clean up the half-a-mapping we just created... */
-                if (res->iov[i].iov_base) {
-                    dma_memory_unmap(VIRTIO_DEVICE(g)->dma_as,
-                                     res->iov[i].iov_base,
-                                     len,
-                                     DMA_DIRECTION_TO_DEVICE,
-                                     0);
-                }
-                /* ...and the mappings for previous loop iterations */
-                res->iov_cnt = i;
-                virtio_gpu_cleanup_mapping(g, res);
-                pixman_image_unref(res->image);
-                g_free(res);
-                return -EINVAL;
-            }
+        if (!virtio_gpu_load_restore_mapping(g, res)) {
+            pixman_image_unref(res->image);
+            g_free(res);
+            return -EINVAL;
         }
 
-        QTAILQ_INSERT_HEAD(&g->reslist, res, next);
-        g->hostmem += res->hostmem;
-
         resource_id = qemu_get_be32(f);
     }
 
     /* load & apply scanout state */
     vmstate_load_state(f, &vmstate_virtio_gpu_scanouts, g, 1);
+
+    return 0;
+}
+
+static int virtio_gpu_blob_save(QEMUFile *f, void *opaque, size_t size,
+                                const VMStateField *field, JSONWriter *vmdesc)
+{
+    VirtIOGPU *g = opaque;
+    struct virtio_gpu_simple_resource *res;
+    int i;
+
+    /* in 2d mode we should never find unprocessed commands here */
+    assert(QTAILQ_EMPTY(&g->cmdq));
+
+    QTAILQ_FOREACH(res, &g->reslist, next) {
+        if (!res->blob_size) {
+            continue;
+        }
+        qemu_put_be32(f, res->resource_id);
+        qemu_put_be32(f, res->blob_size);
+        qemu_put_be32(f, res->iov_cnt);
+        for (i = 0; i < res->iov_cnt; i++) {
+            qemu_put_be64(f, res->addrs[i]);
+            qemu_put_be32(f, res->iov[i].iov_len);
+        }
+    }
+    qemu_put_be32(f, 0); /* end of list */
+
+    return 0;
+}
+
+static int virtio_gpu_blob_load(QEMUFile *f, void *opaque, size_t size,
+                                const VMStateField *field)
+{
+    VirtIOGPU *g = opaque;
+    struct virtio_gpu_simple_resource *res;
+    uint32_t resource_id;
+    int i;
+
+    resource_id = qemu_get_be32(f);
+    while (resource_id != 0) {
+        res = virtio_gpu_find_resource(g, resource_id);
+        if (res) {
+            return -EINVAL;
+        }
+
+        res = g_new0(struct virtio_gpu_simple_resource, 1);
+        res->resource_id = resource_id;
+        res->blob_size = qemu_get_be32(f);
+        res->iov_cnt = qemu_get_be32(f);
+        res->addrs = g_new(uint64_t, res->iov_cnt);
+        res->iov = g_new(struct iovec, res->iov_cnt);
+
+        /* read data */
+        for (i = 0; i < res->iov_cnt; i++) {
+            res->addrs[i] = qemu_get_be64(f);
+            res->iov[i].iov_len = qemu_get_be32(f);
+        }
+
+        if (!virtio_gpu_load_restore_mapping(g, res)) {
+            g_free(res);
+            return -EINVAL;
+        }
+
+        virtio_gpu_init_udmabuf(res);
+
+        resource_id = qemu_get_be32(f);
+    }
+
+    return 0;
+}
+
+static int virtio_gpu_post_load(void *opaque, int version_id)
+{
+    VirtIOGPU *g = opaque;
+    struct virtio_gpu_scanout *scanout;
+    struct virtio_gpu_simple_resource *res;
+    int i;
+
     for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
         /* FIXME: should take scanout.r.{x,y} into account */
         scanout = &g->parent_obj.scanout[i];
@@ -1475,6 +1562,32 @@ virtio_gpu_set_config(VirtIODevice *vdev, const uint8_t *config)
     }
 }
 
+static bool virtio_gpu_blob_state_needed(void *opaque)
+{
+    VirtIOGPU *g = VIRTIO_GPU(opaque);
+
+    return virtio_gpu_blob_enabled(g->parent_obj.conf);
+}
+
+const VMStateDescription vmstate_virtio_gpu_blob_state = {
+    .name = "virtio-gpu/blob",
+    .minimum_version_id = VIRTIO_GPU_VM_VERSION,
+    .version_id = VIRTIO_GPU_VM_VERSION,
+    .needed = virtio_gpu_blob_state_needed,
+    .fields = (const VMStateField[]){
+        {
+            .name = "virtio-gpu/blob",
+            .info = &(const VMStateInfo) {
+                .name = "blob",
+                .get = virtio_gpu_blob_load,
+                .put = virtio_gpu_blob_save,
+            },
+            .flags = VMS_SINGLE,
+        } /* device */,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 /*
  * For historical reasons virtio_gpu does not adhere to virtio migration
  * scheme as described in doc/virtio-migration.txt, in a sense that no
@@ -1500,6 +1613,11 @@ static const VMStateDescription vmstate_virtio_gpu = {
         } /* device */,
         VMSTATE_END_OF_LIST()
     },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_gpu_blob_state,
+        NULL
+    },
+    .post_load = virtio_gpu_post_load,
 };
 
 static Property virtio_gpu_properties[] = {
diff --git a/hw/hyperv/Kconfig b/hw/hyperv/Kconfig
index fcf65903bd..41dd827c84 100644
--- a/hw/hyperv/Kconfig
+++ b/hw/hyperv/Kconfig
@@ -16,3 +16,13 @@ config SYNDBG
     bool
     default y
     depends on VMBUS
+
+config HV_BALLOON_SUPPORTED
+    bool
+
+config HV_BALLOON
+    bool
+    default y
+    depends on VMBUS
+    depends on HV_BALLOON_POSSIBLE
+    depends on HV_BALLOON_SUPPORTED
diff --git a/hw/hyperv/hv-balloon-internal.h b/hw/hyperv/hv-balloon-internal.h
new file mode 100644
index 0000000000..164c2e5825
--- /dev/null
+++ b/hw/hyperv/hv-balloon-internal.h
@@ -0,0 +1,33 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HV_BALLOON_INTERNAL_H
+#define HW_HYPERV_HV_BALLOON_INTERNAL_H
+
+#include "qemu/osdep.h"
+
+#define HV_BALLOON_PFN_SHIFT 12
+#define HV_BALLOON_PAGE_SIZE (1 << HV_BALLOON_PFN_SHIFT)
+
+#define SUM_OVERFLOW_U64(in1, in2) ((in1) > UINT64_MAX - (in2))
+#define SUM_SATURATE_U64(in1, in2)              \
+    ({                                          \
+        uint64_t _in1 = (in1), _in2 = (in2);    \
+        uint64_t _result;                       \
+                                                \
+        if (!SUM_OVERFLOW_U64(_in1, _in2)) {    \
+            _result = _in1 + _in2;              \
+        } else {                                \
+            _result = UINT64_MAX;               \
+        }                                       \
+                                                \
+        _result;                                \
+    })
+
+#endif
diff --git a/hw/hyperv/hv-balloon-our_range_memslots.c b/hw/hyperv/hv-balloon-our_range_memslots.c
new file mode 100644
index 0000000000..99bae870f3
--- /dev/null
+++ b/hw/hyperv/hv-balloon-our_range_memslots.c
@@ -0,0 +1,201 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hv-balloon-internal.h"
+#include "hv-balloon-our_range_memslots.h"
+#include "trace.h"
+
+/* OurRange */
+static void our_range_init(OurRange *our_range, uint64_t start, uint64_t count)
+{
+    assert(count <= UINT64_MAX - start);
+    our_range->range.start = start;
+    our_range->range.count = count;
+
+    hvb_page_range_tree_init(&our_range->removed_guest);
+    hvb_page_range_tree_init(&our_range->removed_both);
+
+    /* mark the whole range as unused but for potential use */
+    our_range->added = 0;
+    our_range->unusable_tail = 0;
+}
+
+static void our_range_destroy(OurRange *our_range)
+{
+    hvb_page_range_tree_destroy(&our_range->removed_guest);
+    hvb_page_range_tree_destroy(&our_range->removed_both);
+}
+
+void hvb_our_range_clear_removed_trees(OurRange *our_range)
+{
+    hvb_page_range_tree_destroy(&our_range->removed_guest);
+    hvb_page_range_tree_destroy(&our_range->removed_both);
+    hvb_page_range_tree_init(&our_range->removed_guest);
+    hvb_page_range_tree_init(&our_range->removed_both);
+}
+
+void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size)
+{
+    assert(additional_size <= UINT64_MAX - our_range->added);
+
+    our_range->added += additional_size;
+
+    assert(our_range->added <= UINT64_MAX - our_range->unusable_tail);
+    assert(our_range->added + our_range->unusable_tail <=
+           our_range->range.count);
+}
+
+/* OurRangeMemslots */
+static void our_range_memslots_init_slots(OurRangeMemslots *our_range,
+                                          MemoryRegion *backing_mr,
+                                          Object *memslot_owner)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    unsigned int idx;
+    uint64_t memslot_offset;
+
+    assert(memslots->count > 0);
+    memslots->slots = g_new0(MemoryRegion, memslots->count);
+
+    /* Initialize our memslots, but don't map them yet. */
+    assert(memslots->size_each > 0);
+    for (idx = 0, memslot_offset = 0; idx < memslots->count;
+         idx++, memslot_offset += memslots->size_each) {
+        uint64_t memslot_size;
+        g_autofree char *name = NULL;
+
+        /* The size of the last memslot might be smaller. */
+        if (idx == memslots->count - 1) {
+            uint64_t region_size;
+
+            assert(our_range->mr);
+            region_size = memory_region_size(our_range->mr);
+            memslot_size = region_size - memslot_offset;
+        } else {
+            memslot_size = memslots->size_each;
+        }
+
+        name = g_strdup_printf("memslot-%u", idx);
+        memory_region_init_alias(&memslots->slots[idx], memslot_owner, name,
+                                 backing_mr, memslot_offset, memslot_size);
+        /*
+         * We want to be able to atomically and efficiently activate/deactivate
+         * individual memslots without affecting adjacent memslots in memory
+         * notifiers.
+         */
+        memory_region_set_unmergeable(&memslots->slots[idx], true);
+    }
+
+    memslots->mapped_count = 0;
+}
+
+OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr,
+                                             MemoryRegion *parent_mr,
+                                             MemoryRegion *backing_mr,
+                                             Object *memslot_owner,
+                                             unsigned int memslot_count,
+                                             uint64_t memslot_size)
+{
+    OurRangeMemslots *our_range;
+
+    our_range = g_malloc(sizeof(*our_range));
+    our_range_init(&our_range->range,
+                   addr / HV_BALLOON_PAGE_SIZE,
+                   memory_region_size(parent_mr) / HV_BALLOON_PAGE_SIZE);
+    our_range->slots.size_each = memslot_size;
+    our_range->slots.count = memslot_count;
+    our_range->mr = parent_mr;
+    our_range_memslots_init_slots(our_range, backing_mr, memslot_owner);
+
+    return our_range;
+}
+
+static void our_range_memslots_free_memslots(OurRangeMemslots *our_range)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    unsigned int idx;
+    uint64_t offset;
+
+    memory_region_transaction_begin();
+    for (idx = 0, offset = 0; idx < memslots->mapped_count;
+         idx++, offset += memslots->size_each) {
+        trace_hv_balloon_unmap_slot(idx, memslots->count, offset);
+        assert(memory_region_is_mapped(&memslots->slots[idx]));
+        memory_region_del_subregion(our_range->mr, &memslots->slots[idx]);
+    }
+    memory_region_transaction_commit();
+
+    for (idx = 0; idx < memslots->count; idx++) {
+        object_unparent(OBJECT(&memslots->slots[idx]));
+    }
+
+    g_clear_pointer(&our_range->slots.slots, g_free);
+}
+
+void hvb_our_range_memslots_free(OurRangeMemslots *our_range)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    MemoryRegion *hostmem_mr;
+    RAMBlock *rb;
+
+    assert(our_range->slots.count > 0);
+    assert(our_range->slots.slots);
+
+    hostmem_mr = memslots->slots[0].alias;
+    rb = hostmem_mr->ram_block;
+    ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
+
+    our_range_memslots_free_memslots(our_range);
+    our_range_destroy(&our_range->range);
+    g_free(our_range);
+}
+
+void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range,
+                                                     uint64_t additional_map_size)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    uint64_t total_map_size;
+    unsigned int idx;
+    uint64_t offset;
+
+    total_map_size = (our_range->range.added + additional_map_size) *
+        HV_BALLOON_PAGE_SIZE;
+    idx = memslots->mapped_count;
+    assert(memslots->size_each > 0);
+    offset = idx * memslots->size_each;
+
+    /*
+     * Activate all memslots covered by the newly added region in a single
+     * transaction.
+     */
+    memory_region_transaction_begin();
+    for ( ; idx < memslots->count;
+          idx++, offset += memslots->size_each) {
+        /*
+         * If this memslot starts beyond or at the end of the range to map so
+         * does every next one.
+         */
+        if (offset >= total_map_size) {
+            break;
+        }
+
+        /*
+         * Instead of enabling/disabling memslot, we add/remove them. This
+         * should make address space updates faster, because we don't have to
+         * loop over many disabled subregions.
+         */
+        trace_hv_balloon_map_slot(idx, memslots->count, offset);
+        assert(!memory_region_is_mapped(&memslots->slots[idx]));
+        memory_region_add_subregion(our_range->mr, offset,
+                                    &memslots->slots[idx]);
+
+        memslots->mapped_count++;
+    }
+    memory_region_transaction_commit();
+}
diff --git a/hw/hyperv/hv-balloon-our_range_memslots.h b/hw/hyperv/hv-balloon-our_range_memslots.h
new file mode 100644
index 0000000000..b6f592d34b
--- /dev/null
+++ b/hw/hyperv/hv-balloon-our_range_memslots.h
@@ -0,0 +1,110 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H
+#define HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H
+
+#include "qemu/osdep.h"
+
+#include "exec/memory.h"
+#include "qom/object.h"
+#include "hv-balloon-page_range_tree.h"
+
+/* OurRange */
+#define OUR_RANGE(ptr) ((OurRange *)(ptr))
+
+/* "our range" means the memory range owned by this driver (for hot-adding) */
+typedef struct OurRange {
+    PageRange range;
+
+    /* How many pages were hot-added to the guest */
+    uint64_t added;
+
+    /* Pages at the end not currently usable */
+    uint64_t unusable_tail;
+
+    /* Memory removed from the guest */
+    PageRangeTree removed_guest, removed_both;
+} OurRange;
+
+static inline uint64_t our_range_get_remaining_start(OurRange *our_range)
+{
+    return our_range->range.start + our_range->added;
+}
+
+static inline uint64_t our_range_get_remaining_size(OurRange *our_range)
+{
+    return our_range->range.count - our_range->added - our_range->unusable_tail;
+}
+
+void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size);
+
+static inline void our_range_mark_remaining_unusable(OurRange *our_range)
+{
+    our_range->unusable_tail = our_range->range.count - our_range->added;
+}
+
+static inline PageRangeTree our_range_get_removed_tree(OurRange *our_range,
+                                                       bool both)
+{
+    if (both) {
+        return our_range->removed_both;
+    } else {
+        return our_range->removed_guest;
+    }
+}
+
+static inline bool our_range_is_removed_tree_empty(OurRange *our_range,
+                                                   bool both)
+{
+    if (both) {
+        return page_range_tree_is_empty(our_range->removed_both);
+    } else {
+        return page_range_tree_is_empty(our_range->removed_guest);
+    }
+}
+
+void hvb_our_range_clear_removed_trees(OurRange *our_range);
+
+/* OurRangeMemslots */
+typedef struct OurRangeMemslotsSlots {
+    /* Nominal size of each memslot (the last one might be smaller) */
+    uint64_t size_each;
+
+    /* Slots array and its element count */
+    MemoryRegion *slots;
+    unsigned int count;
+
+    /* How many slots are currently mapped */
+    unsigned int mapped_count;
+} OurRangeMemslotsSlots;
+
+typedef struct OurRangeMemslots {
+    OurRange range;
+
+    /* Memslots covering our range */
+    OurRangeMemslotsSlots slots;
+
+    MemoryRegion *mr;
+} OurRangeMemslots;
+
+OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr,
+                                             MemoryRegion *parent_mr,
+                                             MemoryRegion *backing_mr,
+                                             Object *memslot_owner,
+                                             unsigned int memslot_count,
+                                             uint64_t memslot_size);
+void hvb_our_range_memslots_free(OurRangeMemslots *our_range);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(OurRangeMemslots, hvb_our_range_memslots_free)
+
+void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range,
+                                                     uint64_t additional_map_size);
+
+#endif
diff --git a/hw/hyperv/hv-balloon-page_range_tree.c b/hw/hyperv/hv-balloon-page_range_tree.c
new file mode 100644
index 0000000000..e178d8b413
--- /dev/null
+++ b/hw/hyperv/hv-balloon-page_range_tree.c
@@ -0,0 +1,228 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hv-balloon-internal.h"
+#include "hv-balloon-page_range_tree.h"
+
+/*
+ * temporarily avoid warnings about enhanced GTree API usage requiring a
+ * too recent Glib version until GLIB_VERSION_MAX_ALLOWED finally reaches
+ * the Glib version with this API
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+/* PageRangeTree */
+static gint page_range_tree_key_compare(gconstpointer leftp,
+                                        gconstpointer rightp,
+                                        gpointer user_data)
+{
+    const uint64_t *left = leftp, *right = rightp;
+
+    if (*left < *right) {
+        return -1;
+    } else if (*left > *right) {
+        return 1;
+    } else { /* *left == *right */
+        return 0;
+    }
+}
+
+static GTreeNode *page_range_tree_insert_new(PageRangeTree tree,
+                                             uint64_t start, uint64_t count)
+{
+    uint64_t *key = g_malloc(sizeof(*key));
+    PageRange *range = g_malloc(sizeof(*range));
+
+    assert(count > 0);
+
+    *key = range->start = start;
+    range->count = count;
+
+    return g_tree_insert_node(tree.t, key, range);
+}
+
+void hvb_page_range_tree_insert(PageRangeTree tree,
+                                uint64_t start, uint64_t count,
+                                uint64_t *dupcount)
+{
+    GTreeNode *node;
+    bool joinable;
+    uint64_t intersection;
+    PageRange *range;
+
+    assert(!SUM_OVERFLOW_U64(start, count));
+    if (count == 0) {
+        return;
+    }
+
+    node = g_tree_upper_bound(tree.t, &start);
+    if (node) {
+        node = g_tree_node_previous(node);
+    } else {
+        node = g_tree_node_last(tree.t);
+    }
+
+    if (node) {
+        range = g_tree_node_value(node);
+        assert(range);
+        intersection = page_range_intersection_size(range, start, count);
+        joinable = page_range_joinable_right(range, start, count);
+    }
+
+    if (!node ||
+        (!intersection && !joinable)) {
+        /*
+         * !node case: the tree is empty or the very first node in the tree
+         * already has a higher key (the start of its range).
+         * the other case: there is a gap in the tree between the new range
+         * and the previous one.
+         * anyway, let's just insert the new range into the tree.
+         */
+        node = page_range_tree_insert_new(tree, start, count);
+        assert(node);
+        range = g_tree_node_value(node);
+        assert(range);
+    } else {
+        /*
+         * the previous range in the tree either partially covers the new
+         * range or ends just at its beginning - extend it
+         */
+        if (dupcount) {
+            *dupcount += intersection;
+        }
+
+        count += start - range->start;
+        range->count = MAX(range->count, count);
+    }
+
+    /* check next nodes for possible merging */
+    for (node = g_tree_node_next(node); node; ) {
+        PageRange *rangecur;
+
+        rangecur = g_tree_node_value(node);
+        assert(rangecur);
+
+        intersection = page_range_intersection_size(rangecur,
+                                                    range->start, range->count);
+        joinable = page_range_joinable_left(rangecur,
+                                            range->start, range->count);
+        if (!intersection && !joinable) {
+            /* the current node is disjoint */
+            break;
+        }
+
+        if (dupcount) {
+            *dupcount += intersection;
+        }
+
+        count = rangecur->count + (rangecur->start - range->start);
+        range->count = MAX(range->count, count);
+
+        /* the current node was merged in, remove it */
+        start = rangecur->start;
+        node = g_tree_node_next(node);
+        /* no hinted removal in GTree... */
+        g_tree_remove(tree.t, &start);
+    }
+}
+
+bool hvb_page_range_tree_pop(PageRangeTree tree, PageRange *out,
+                             uint64_t maxcount)
+{
+    GTreeNode *node;
+    PageRange *range;
+
+    node = g_tree_node_last(tree.t);
+    if (!node) {
+        return false;
+    }
+
+    range = g_tree_node_value(node);
+    assert(range);
+
+    out->start = range->start;
+
+    /* can't modify range->start as it is the node key */
+    if (range->count > maxcount) {
+        out->start += range->count - maxcount;
+        out->count = maxcount;
+        range->count -= maxcount;
+    } else {
+        out->count = range->count;
+        /* no hinted removal in GTree... */
+        g_tree_remove(tree.t, &out->start);
+    }
+
+    return true;
+}
+
+bool hvb_page_range_tree_intree_any(PageRangeTree tree,
+                                    uint64_t start, uint64_t count)
+{
+    GTreeNode *node;
+
+    if (count == 0) {
+        return false;
+    }
+
+    /* find the first node that can possibly intersect our range */
+    node = g_tree_upper_bound(tree.t, &start);
+    if (node) {
+        /*
+         * a NULL node below means that the very first node in the tree
+         * already has a higher key (the start of its range).
+         */
+        node = g_tree_node_previous(node);
+    } else {
+        /* a NULL node below means that the tree is empty */
+        node = g_tree_node_last(tree.t);
+    }
+    /* node range start <= range start */
+
+    if (!node) {
+        /* node range start > range start */
+        node = g_tree_node_first(tree.t);
+    }
+
+    for ( ; node; node = g_tree_node_next(node)) {
+        PageRange *range = g_tree_node_value(node);
+
+        assert(range);
+        /*
+         * if this node starts beyond or at the end of our range so does
+         * every next one
+         */
+        if (range->start >= start + count) {
+            break;
+        }
+
+        if (page_range_intersection_size(range, start, count) > 0) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+void hvb_page_range_tree_init(PageRangeTree *tree)
+{
+    tree->t = g_tree_new_full(page_range_tree_key_compare, NULL,
+                              g_free, g_free);
+}
+
+void hvb_page_range_tree_destroy(PageRangeTree *tree)
+{
+    /* g_tree_destroy() is not NULL-safe */
+    if (!tree->t) {
+        return;
+    }
+
+    g_tree_destroy(tree->t);
+    tree->t = NULL;
+}
diff --git a/hw/hyperv/hv-balloon-page_range_tree.h b/hw/hyperv/hv-balloon-page_range_tree.h
new file mode 100644
index 0000000000..07a9ae0da6
--- /dev/null
+++ b/hw/hyperv/hv-balloon-page_range_tree.h
@@ -0,0 +1,118 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HV_BALLOON_PAGE_RANGE_TREE_H
+#define HW_HYPERV_HV_BALLOON_PAGE_RANGE_TREE_H
+
+#include "qemu/osdep.h"
+
+/* PageRange */
+typedef struct PageRange {
+    uint64_t start;
+    uint64_t count;
+} PageRange;
+
+/* return just the part of range before (start) */
+static inline void page_range_part_before(const PageRange *range,
+                                          uint64_t start, PageRange *out)
+{
+    uint64_t endr = range->start + range->count;
+    uint64_t end = MIN(endr, start);
+
+    out->start = range->start;
+    if (end > out->start) {
+        out->count = end - out->start;
+    } else {
+        out->count = 0;
+    }
+}
+
+/* return just the part of range after (start, count) */
+static inline void page_range_part_after(const PageRange *range,
+                                         uint64_t start, uint64_t count,
+                                         PageRange *out)
+{
+    uint64_t end = range->start + range->count;
+    uint64_t ends = start + count;
+
+    out->start = MAX(range->start, ends);
+    if (end > out->start) {
+        out->count = end - out->start;
+    } else {
+        out->count = 0;
+    }
+}
+
+static inline void page_range_intersect(const PageRange *range,
+                                        uint64_t start, uint64_t count,
+                                        PageRange *out)
+{
+    uint64_t end1 = range->start + range->count;
+    uint64_t end2 = start + count;
+    uint64_t end = MIN(end1, end2);
+
+    out->start = MAX(range->start, start);
+    out->count = out->start < end ? end - out->start : 0;
+}
+
+static inline uint64_t page_range_intersection_size(const PageRange *range,
+                                                    uint64_t start, uint64_t count)
+{
+    PageRange trange;
+
+    page_range_intersect(range, start, count, &trange);
+    return trange.count;
+}
+
+static inline bool page_range_joinable_left(const PageRange *range,
+                                            uint64_t start, uint64_t count)
+{
+    return start + count == range->start;
+}
+
+static inline bool page_range_joinable_right(const PageRange *range,
+                                             uint64_t start, uint64_t count)
+{
+    return range->start + range->count == start;
+}
+
+static inline bool page_range_joinable(const PageRange *range,
+                                       uint64_t start, uint64_t count)
+{
+    return page_range_joinable_left(range, start, count) ||
+        page_range_joinable_right(range, start, count);
+}
+
+/* PageRangeTree */
+/* type safety */
+typedef struct PageRangeTree {
+    GTree *t;
+} PageRangeTree;
+
+static inline bool page_range_tree_is_empty(PageRangeTree tree)
+{
+    guint nnodes = g_tree_nnodes(tree.t);
+
+    return nnodes == 0;
+}
+
+void hvb_page_range_tree_init(PageRangeTree *tree);
+void hvb_page_range_tree_destroy(PageRangeTree *tree);
+
+bool hvb_page_range_tree_intree_any(PageRangeTree tree,
+                                    uint64_t start, uint64_t count);
+
+bool hvb_page_range_tree_pop(PageRangeTree tree, PageRange *out,
+                             uint64_t maxcount);
+
+void hvb_page_range_tree_insert(PageRangeTree tree,
+                                uint64_t start, uint64_t count,
+                                uint64_t *dupcount);
+
+#endif
diff --git a/hw/hyperv/hv-balloon-stub.c b/hw/hyperv/hv-balloon-stub.c
new file mode 100644
index 0000000000..a47412d4a8
--- /dev/null
+++ b/hw/hyperv/hv-balloon-stub.c
@@ -0,0 +1,19 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-types-machine.h"
+
+HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp)
+{
+    error_setg(errp, "hv-balloon device not enabled in this build");
+    return NULL;
+}
diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c
new file mode 100644
index 0000000000..66f297c1d7
--- /dev/null
+++ b/hw/hyperv/hv-balloon.c
@@ -0,0 +1,1769 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hv-balloon-internal.h"
+
+#include "exec/address-spaces.h"
+#include "exec/cpu-common.h"
+#include "exec/ramblock.h"
+#include "hw/boards.h"
+#include "hw/hyperv/dynmem-proto.h"
+#include "hw/hyperv/hv-balloon.h"
+#include "hw/hyperv/vmbus.h"
+#include "hw/mem/memory-device.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
+#include "monitor/qdev.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-types-machine.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/visitor.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+#include "qemu/timer.h"
+#include "sysemu/balloon.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/reset.h"
+#include "hv-balloon-our_range_memslots.h"
+#include "hv-balloon-page_range_tree.h"
+#include "trace.h"
+
+#define HV_BALLOON_ADDR_PROP "addr"
+#define HV_BALLOON_MEMDEV_PROP "memdev"
+#define HV_BALLOON_GUID "525074DC-8985-46e2-8057-A307DC18A502"
+
+/*
+ * Some Windows versions (at least Server 2019) will crash with various
+ * error codes when receiving DM protocol requests (at least
+ * DM_MEM_HOT_ADD_REQUEST) immediately after boot.
+ *
+ * It looks like Hyper-V from Server 2016 uses a 50-second after-boot
+ * delay, probably to workaround this issue, so we'll use this value, too.
+ */
+#define HV_BALLOON_POST_INIT_WAIT (50 * 1000)
+
+#define HV_BALLOON_HA_CHUNK_SIZE (2 * GiB)
+#define HV_BALLOON_HA_CHUNK_PAGES (HV_BALLOON_HA_CHUNK_SIZE / HV_BALLOON_PAGE_SIZE)
+
+#define HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN (128 * MiB)
+
+#define HV_BALLOON_HR_CHUNK_PAGES 585728
+/*
+ *                                ^ that's the maximum number of pages
+ * that Windows returns in one hot remove response
+ *
+ * If the number requested is too high Windows will no longer honor
+ * these requests
+ */
+
+struct HvBalloonClass {
+    VMBusDeviceClass parent_class;
+} HvBalloonClass;
+
+typedef enum State {
+    /* not a real state */
+    S_NO_CHANGE = 0,
+
+    S_WAIT_RESET,
+    S_POST_RESET_CLOSED,
+
+    /* init flow */
+    S_VERSION,
+    S_CAPS,
+    S_POST_INIT_WAIT,
+
+    S_IDLE,
+
+    /* balloon op flow */
+    S_BALLOON_POSTING,
+    S_BALLOON_RB_WAIT,
+    S_BALLOON_REPLY_WAIT,
+
+    /* unballoon + hot add ops flow */
+    S_UNBALLOON_POSTING,
+    S_UNBALLOON_RB_WAIT,
+    S_UNBALLOON_REPLY_WAIT,
+    S_HOT_ADD_SETUP,
+    S_HOT_ADD_RB_WAIT,
+    S_HOT_ADD_POSTING,
+    S_HOT_ADD_REPLY_WAIT,
+} State;
+
+typedef struct StateDesc {
+    State state;
+    const char *desc;
+} StateDesc;
+
+typedef struct HvBalloon {
+    VMBusDevice parent;
+    State state;
+
+    union dm_version version;
+    union dm_caps caps;
+
+    QEMUTimer post_init_timer;
+
+    unsigned int trans_id;
+
+    struct {
+        bool enabled;
+        bool received;
+        uint64_t committed;
+        uint64_t available;
+    } status_report;
+
+    /* Guest target size */
+    uint64_t target;
+    bool target_changed;
+
+    /* Current (un)balloon / hot-add operation parameters */
+    union {
+        uint64_t balloon_diff;
+
+        struct {
+            uint64_t unballoon_diff;
+            uint64_t hot_add_diff;
+        };
+
+        struct {
+            PageRange hot_add_range;
+            uint64_t ha_current_count;
+        };
+    };
+
+    OurRangeMemslots *our_range;
+
+    /* Count of memslots covering our memory */
+    unsigned int memslot_count;
+
+    /* Nominal size of each memslot (the last one might be smaller) */
+    uint64_t memslot_size;
+
+    /* Non-ours removed memory */
+    PageRangeTree removed_guest, removed_both;
+
+    /* Grand totals of removed memory (both ours and non-ours) */
+    uint64_t removed_guest_ctr, removed_both_ctr;
+
+    /* MEMORY_DEVICE props */
+    uint64_t addr;
+    HostMemoryBackend *hostmem;
+    MemoryRegion *mr;
+} HvBalloon;
+
+OBJECT_DEFINE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, HV_BALLOON, VMBUS_DEVICE, \
+                                   { TYPE_MEMORY_DEVICE }, { })
+
+#define HV_BALLOON_SET_STATE(hvb, news)             \
+    do {                                            \
+        assert(news != S_NO_CHANGE);                \
+        hv_balloon_state_set(hvb, news, # news);    \
+    } while (0)
+
+#define HV_BALLOON_STATE_DESC_SET(stdesc, news)         \
+    _hv_balloon_state_desc_set(stdesc, news, # news)
+
+#define HV_BALLOON_STATE_DESC_INIT \
+    {                              \
+        .state = S_NO_CHANGE,      \
+    }
+
+typedef struct HvBalloonReq {
+    VMBusChanReq vmreq;
+} HvBalloonReq;
+
+/* total our memory includes parts currently removed from the guest */
+static uint64_t hv_balloon_total_our_ram(HvBalloon *balloon)
+{
+    if (!balloon->our_range) {
+        return 0;
+    }
+
+    return balloon->our_range->range.added;
+}
+
+/* TODO: unify the code below with virtio-balloon and cache the value */
+static int build_dimm_list(Object *obj, void *opaque)
+{
+    GSList **list = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
+        DeviceState *dev = DEVICE(obj);
+        if (dev->realized) { /* only realized DIMMs matter */
+            *list = g_slist_prepend(*list, dev);
+        }
+    }
+
+    object_child_foreach(obj, build_dimm_list, opaque);
+    return 0;
+}
+
+static ram_addr_t get_current_ram_size(void)
+{
+    GSList *list = NULL, *item;
+    ram_addr_t size = current_machine->ram_size;
+
+    build_dimm_list(qdev_get_machine(), &list);
+    for (item = list; item; item = g_slist_next(item)) {
+        Object *obj = OBJECT(item->data);
+        if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM))
+            size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
+                                            &error_abort);
+    }
+    g_slist_free(list);
+
+    return size;
+}
+
+/* total RAM includes memory currently removed from the guest */
+static uint64_t hv_balloon_total_ram(HvBalloon *balloon)
+{
+    ram_addr_t ram_size = get_current_ram_size();
+    uint64_t ram_size_pages = ram_size >> HV_BALLOON_PFN_SHIFT;
+    uint64_t our_ram_size_pages = hv_balloon_total_our_ram(balloon);
+
+    assert(ram_size_pages > 0);
+
+    return SUM_SATURATE_U64(ram_size_pages, our_ram_size_pages);
+}
+
+/*
+ * calculating the total RAM size is a slow operation,
+ * avoid it as much as possible
+ */
+static uint64_t hv_balloon_total_removed_rs(HvBalloon *balloon,
+                                            uint64_t ram_size_pages)
+{
+    uint64_t total_removed;
+
+    total_removed = SUM_SATURATE_U64(balloon->removed_guest_ctr,
+                                     balloon->removed_both_ctr);
+
+    /* possible if guest returns pages outside actual RAM */
+    if (total_removed > ram_size_pages) {
+        total_removed = ram_size_pages;
+    }
+
+    return total_removed;
+}
+
+/* Returns whether the state has actually changed */
+static bool hv_balloon_state_set(HvBalloon *balloon,
+                                 State newst, const char *newststr)
+{
+    if (newst == S_NO_CHANGE || balloon->state == newst) {
+        return false;
+    }
+
+    balloon->state = newst;
+    trace_hv_balloon_state_change(newststr);
+    return true;
+}
+
+static void _hv_balloon_state_desc_set(StateDesc *stdesc,
+                                       State newst, const char *newststr)
+{
+    /* state setting is only permitted on a freshly init desc */
+    assert(stdesc->state == S_NO_CHANGE);
+
+    assert(newst != S_NO_CHANGE);
+
+    stdesc->state = newst;
+    stdesc->desc = newststr;
+}
+
+static VMBusChannel *hv_balloon_get_channel_maybe(HvBalloon *balloon)
+{
+    return vmbus_device_channel(&balloon->parent, 0);
+}
+
+static VMBusChannel *hv_balloon_get_channel(HvBalloon *balloon)
+{
+    VMBusChannel *chan;
+
+    chan = hv_balloon_get_channel_maybe(balloon);
+    assert(chan != NULL);
+    return chan;
+}
+
+static ssize_t hv_balloon_send_packet(VMBusChannel *chan,
+                                      struct dm_message *msg)
+{
+    int ret;
+
+    ret = vmbus_channel_reserve(chan, 0, msg->hdr.size);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                              NULL, 0, msg, msg->hdr.size, false,
+                              msg->hdr.trans_id);
+}
+
+static bool hv_balloon_unballoon_get_source(HvBalloon *balloon,
+                                            PageRangeTree *dtree,
+                                            uint64_t **dctr,
+                                            bool *is_our_range)
+{
+    OurRange *our_range = OUR_RANGE(balloon->our_range);
+
+    /* Try the boot memory first */
+    if (g_tree_nnodes(balloon->removed_guest.t) > 0) {
+        *dtree = balloon->removed_guest;
+        *dctr = &balloon->removed_guest_ctr;
+        *is_our_range = false;
+    } else if (g_tree_nnodes(balloon->removed_both.t) > 0) {
+        *dtree = balloon->removed_both;
+        *dctr = &balloon->removed_both_ctr;
+        *is_our_range = false;
+    } else if (!our_range) {
+        return false;
+    } else if (!our_range_is_removed_tree_empty(our_range, false)) {
+        *dtree = our_range_get_removed_tree(our_range, false);
+        *dctr = &balloon->removed_guest_ctr;
+        *is_our_range = true;
+    } else if (!our_range_is_removed_tree_empty(our_range, true)) {
+        *dtree = our_range_get_removed_tree(our_range, true);
+        *dctr = &balloon->removed_both_ctr;
+        *is_our_range = true;
+    } else {
+        return false;
+    }
+
+    return true;
+}
+
+static void hv_balloon_unballoon_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_unballoon_request *ur;
+    size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]);
+
+    assert(balloon->state == S_UNBALLOON_RB_WAIT);
+
+    if (vmbus_channel_reserve(chan, 0, ur_size) < 0) {
+        return;
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_POSTING);
+}
+
+static void hv_balloon_unballoon_posting(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    PageRangeTree dtree;
+    uint64_t *dctr;
+    bool our_range;
+    struct dm_unballoon_request *ur;
+    size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]);
+    PageRange range;
+    bool bret;
+    ssize_t ret;
+
+    assert(balloon->state == S_UNBALLOON_POSTING);
+    assert(balloon->unballoon_diff > 0);
+
+    if (!hv_balloon_unballoon_get_source(balloon, &dtree, &dctr, &our_range)) {
+        error_report("trying to unballoon but nothing seems to be ballooned");
+        /*
+         * there is little we can do as we might have already
+         * sent the guest a partial request we can't cancel
+         */
+        return;
+    }
+
+    assert(balloon->our_range || !our_range);
+    assert(dtree.t);
+    assert(dctr);
+
+    ur = alloca(ur_size);
+    memset(ur, 0, ur_size);
+    ur->hdr.type = DM_UNBALLOON_REQUEST;
+    ur->hdr.size = ur_size;
+    ur->hdr.trans_id = balloon->trans_id;
+
+    bret = hvb_page_range_tree_pop(dtree, &range, MIN(balloon->unballoon_diff,
+                                                      HV_BALLOON_HA_CHUNK_PAGES));
+    assert(bret);
+    /* TODO: madvise? */
+
+    *dctr -= range.count;
+    balloon->unballoon_diff -= range.count;
+
+    ur->range_count = 1;
+    ur->range_array[0].finfo.start_page = range.start;
+    ur->range_array[0].finfo.page_cnt = range.count;
+    ur->more_pages = balloon->unballoon_diff > 0;
+
+    trace_hv_balloon_outgoing_unballoon(ur->hdr.trans_id,
+                                        range.count, range.start,
+                                        balloon->unballoon_diff);
+
+    if (ur->more_pages) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT);
+    } else {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_REPLY_WAIT);
+    }
+
+    ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                             NULL, 0, ur, ur_size, false,
+                             ur->hdr.trans_id);
+    if (ret <= 0) {
+        error_report("error %zd when posting unballoon msg, expect problems",
+                     ret);
+    }
+}
+
+static bool hv_balloon_our_range_ensure(HvBalloon *balloon)
+{
+    uint64_t align;
+    MemoryRegion *hostmem_mr;
+    g_autoptr(OurRangeMemslots) our_range_memslots = NULL;
+    OurRange *our_range;
+
+    if (balloon->our_range) {
+        return true;
+    }
+
+    if (!balloon->hostmem) {
+        return false;
+    }
+
+    align = (1 << balloon->caps.cap_bits.hot_add_alignment) * MiB;
+    assert(QEMU_IS_ALIGNED(balloon->addr, align));
+
+    hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
+
+    our_range_memslots = hvb_our_range_memslots_new(balloon->addr,
+                                                    balloon->mr, hostmem_mr,
+                                                    OBJECT(balloon),
+                                                    balloon->memslot_count,
+                                                    balloon->memslot_size);
+    our_range = OUR_RANGE(our_range_memslots);
+
+    if (hvb_page_range_tree_intree_any(balloon->removed_guest,
+                                       our_range->range.start,
+                                       our_range->range.count) ||
+        hvb_page_range_tree_intree_any(balloon->removed_both,
+                                       our_range->range.start,
+                                       our_range->range.count)) {
+        error_report("some parts of the memory backend were already returned by the guest. this should not happen, please reboot the guest and try again");
+        return false;
+    }
+
+    trace_hv_balloon_our_range_add(our_range->range.count,
+                                   our_range->range.start);
+
+    balloon->our_range = g_steal_pointer(&our_range_memslots);
+    return true;
+}
+
+static void hv_balloon_hot_add_setup(HvBalloon *balloon, StateDesc *stdesc)
+{
+    /* need to make copy since it is in union with hot_add_range */
+    uint64_t hot_add_diff = balloon->hot_add_diff;
+    PageRange *hot_add_range = &balloon->hot_add_range;
+    uint64_t align, our_range_remaining;
+    OurRange *our_range;
+
+    assert(balloon->state == S_HOT_ADD_SETUP);
+    assert(hot_add_diff > 0);
+
+    if (!hv_balloon_our_range_ensure(balloon)) {
+        goto ret_idle;
+    }
+
+    our_range = OUR_RANGE(balloon->our_range);
+
+    align = (1 << balloon->caps.cap_bits.hot_add_alignment) *
+        (MiB / HV_BALLOON_PAGE_SIZE);
+
+    /* Absolute GPA in pages */
+    hot_add_range->start = our_range_get_remaining_start(our_range);
+    assert(QEMU_IS_ALIGNED(hot_add_range->start, align));
+
+    our_range_remaining = our_range_get_remaining_size(our_range);
+    hot_add_range->count = MIN(our_range_remaining, hot_add_diff);
+    hot_add_range->count = QEMU_ALIGN_DOWN(hot_add_range->count, align);
+    if (hot_add_range->count == 0) {
+        goto ret_idle;
+    }
+
+    hvb_our_range_memslots_ensure_mapped_additional(balloon->our_range,
+                                                    hot_add_range->count);
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT);
+    return;
+
+ret_idle:
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+}
+
+static void hv_balloon_hot_add_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_hot_add *ha;
+    size_t ha_size = sizeof(*ha) + sizeof(ha->range);
+
+    assert(balloon->state == S_HOT_ADD_RB_WAIT);
+
+    if (vmbus_channel_reserve(chan, 0, ha_size) < 0) {
+        return;
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_POSTING);
+}
+
+static void hv_balloon_hot_add_posting(HvBalloon *balloon, StateDesc *stdesc)
+{
+    PageRange *hot_add_range = &balloon->hot_add_range;
+    uint64_t *current_count = &balloon->ha_current_count;
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_hot_add *ha;
+    size_t ha_size = sizeof(*ha) + sizeof(ha->range);
+    union dm_mem_page_range *ha_region;
+    uint64_t align, chunk_max_size;
+    ssize_t ret;
+
+    assert(balloon->state == S_HOT_ADD_POSTING);
+    assert(hot_add_range->count > 0);
+
+    align = (1 << balloon->caps.cap_bits.hot_add_alignment) *
+        (MiB / HV_BALLOON_PAGE_SIZE);
+    if (align >= HV_BALLOON_HA_CHUNK_PAGES) {
+        /*
+         * If the required alignment is higher than the chunk size we let it
+         * override that size.
+         */
+        chunk_max_size = align;
+    } else {
+        chunk_max_size = QEMU_ALIGN_DOWN(HV_BALLOON_HA_CHUNK_PAGES, align);
+    }
+
+    /*
+     * hot_add_range->count starts aligned in hv_balloon_hot_add_setup(),
+     * then it is either reduced by subtracting aligned current_count or
+     * further hot-adds are prevented by marking the whole remaining our range
+     * as unusable in hv_balloon_handle_hot_add_response().
+     */
+    *current_count = MIN(hot_add_range->count, chunk_max_size);
+
+    ha = alloca(ha_size);
+    ha_region = &(&ha->range)[1];
+    memset(ha, 0, ha_size);
+    ha->hdr.type = DM_MEM_HOT_ADD_REQUEST;
+    ha->hdr.size = ha_size;
+    ha->hdr.trans_id = balloon->trans_id;
+
+    ha->range.finfo.start_page = hot_add_range->start;
+    ha->range.finfo.page_cnt = *current_count;
+    ha_region->finfo.start_page = hot_add_range->start;
+    ha_region->finfo.page_cnt = ha->range.finfo.page_cnt;
+
+    trace_hv_balloon_outgoing_hot_add(ha->hdr.trans_id,
+                                      *current_count, hot_add_range->start);
+
+    ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                             NULL, 0, ha, ha_size, false,
+                             ha->hdr.trans_id);
+    if (ret <= 0) {
+        error_report("error %zd when posting hot add msg, expect problems",
+                     ret);
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_REPLY_WAIT);
+}
+
+static void hv_balloon_balloon_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    size_t bl_size = sizeof(struct dm_balloon);
+
+    assert(balloon->state == S_BALLOON_RB_WAIT);
+
+    if (vmbus_channel_reserve(chan, 0, bl_size) < 0) {
+        return;
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_POSTING);
+}
+
+static void hv_balloon_balloon_posting(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_balloon bl;
+    size_t bl_size = sizeof(bl);
+    ssize_t ret;
+
+    assert(balloon->state == S_BALLOON_POSTING);
+    assert(balloon->balloon_diff > 0);
+
+    memset(&bl, 0, sizeof(bl));
+    bl.hdr.type = DM_BALLOON_REQUEST;
+    bl.hdr.size = bl_size;
+    bl.hdr.trans_id = balloon->trans_id;
+    bl.num_pages = MIN(balloon->balloon_diff, HV_BALLOON_HR_CHUNK_PAGES);
+
+    trace_hv_balloon_outgoing_balloon(bl.hdr.trans_id, bl.num_pages,
+                                      balloon->balloon_diff);
+
+    ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                             NULL, 0, &bl, bl_size, false,
+                             bl.hdr.trans_id);
+    if (ret <= 0) {
+        error_report("error %zd when posting balloon msg, expect problems",
+                     ret);
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_REPLY_WAIT);
+}
+
+static void hv_balloon_idle_state_process_target(HvBalloon *balloon,
+                                                 StateDesc *stdesc)
+{
+    bool can_balloon = balloon->caps.cap_bits.balloon;
+    uint64_t ram_size_pages, total_removed;
+
+    ram_size_pages = hv_balloon_total_ram(balloon);
+    total_removed = hv_balloon_total_removed_rs(balloon, ram_size_pages);
+
+    /*
+     * we need to cache the values computed from the balloon target value when
+     * starting the adjustment procedure in case someone changes the target when
+     * the procedure is in progress
+     */
+    if (balloon->target > ram_size_pages - total_removed) {
+        bool can_hot_add = balloon->caps.cap_bits.hot_add;
+        uint64_t target_diff = balloon->target -
+            (ram_size_pages - total_removed);
+
+        balloon->unballoon_diff = MIN(target_diff, total_removed);
+
+        if (can_hot_add) {
+            balloon->hot_add_diff = target_diff - balloon->unballoon_diff;
+        } else {
+            balloon->hot_add_diff = 0;
+        }
+
+        if (balloon->unballoon_diff > 0) {
+            assert(can_balloon);
+            HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT);
+        } else if (balloon->hot_add_diff > 0) {
+            HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP);
+        }
+    } else if (can_balloon &&
+               balloon->target < ram_size_pages - total_removed) {
+        balloon->balloon_diff = ram_size_pages - total_removed -
+            balloon->target;
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT);
+    }
+}
+
+static void hv_balloon_idle_state(HvBalloon *balloon,
+                                  StateDesc *stdesc)
+{
+    assert(balloon->state == S_IDLE);
+
+    if (balloon->target_changed) {
+        balloon->target_changed = false;
+        hv_balloon_idle_state_process_target(balloon, stdesc);
+        return;
+    }
+}
+
+static const struct {
+    void (*handler)(HvBalloon *balloon, StateDesc *stdesc);
+} state_handlers[] = {
+    [S_IDLE].handler = hv_balloon_idle_state,
+    [S_BALLOON_POSTING].handler = hv_balloon_balloon_posting,
+    [S_BALLOON_RB_WAIT].handler = hv_balloon_balloon_rb_wait,
+    [S_UNBALLOON_POSTING].handler = hv_balloon_unballoon_posting,
+    [S_UNBALLOON_RB_WAIT].handler = hv_balloon_unballoon_rb_wait,
+    [S_HOT_ADD_SETUP].handler = hv_balloon_hot_add_setup,
+    [S_HOT_ADD_RB_WAIT].handler = hv_balloon_hot_add_rb_wait,
+    [S_HOT_ADD_POSTING].handler = hv_balloon_hot_add_posting,
+};
+
+static void hv_balloon_handle_state(HvBalloon *balloon, StateDesc *stdesc)
+{
+    if (balloon->state >= ARRAY_SIZE(state_handlers) ||
+        !state_handlers[balloon->state].handler) {
+        return;
+    }
+
+    state_handlers[balloon->state].handler(balloon, stdesc);
+}
+
+static void hv_balloon_remove_response_insert_range(PageRangeTree tree,
+                                                    const PageRange *range,
+                                                    uint64_t *ctr1,
+                                                    uint64_t *ctr2,
+                                                    uint64_t *ctr3)
+{
+    uint64_t dupcount, effcount;
+
+    if (range->count == 0) {
+        return;
+    }
+
+    dupcount = 0;
+    hvb_page_range_tree_insert(tree, range->start, range->count, &dupcount);
+
+    assert(dupcount <= range->count);
+    effcount = range->count - dupcount;
+
+    *ctr1 += effcount;
+    *ctr2 += effcount;
+    if (ctr3) {
+        *ctr3 += effcount;
+    }
+}
+
+static void hv_balloon_remove_response_handle_range(HvBalloon *balloon,
+                                                    PageRange *range,
+                                                    bool both,
+                                                    uint64_t *removedctr)
+{
+    OurRange *our_range = OUR_RANGE(balloon->our_range);
+    PageRangeTree globaltree =
+        both ? balloon->removed_both : balloon->removed_guest;
+    uint64_t *globalctr =
+        both ? &balloon->removed_both_ctr : &balloon->removed_guest_ctr;
+    PageRange rangeeff;
+
+    if (range->count == 0) {
+        return;
+    }
+
+    trace_hv_balloon_remove_response(range->count, range->start, both);
+
+    if (our_range) {
+        /* Includes the not-yet-hot-added and unusable parts. */
+        rangeeff = our_range->range;
+    } else {
+        rangeeff.start = rangeeff.count = 0;
+    }
+
+    if (page_range_intersection_size(range, rangeeff.start, rangeeff.count) > 0) {
+        PageRangeTree ourtree = our_range_get_removed_tree(our_range, both);
+        PageRange rangehole, rangecommon;
+        uint64_t ourremoved = 0;
+
+        /* process the hole before our range, if it exists */
+        page_range_part_before(range, rangeeff.start, &rangehole);
+        hv_balloon_remove_response_insert_range(globaltree, &rangehole,
+                                                globalctr, removedctr, NULL);
+        if (rangehole.count > 0) {
+            trace_hv_balloon_remove_response_hole(rangehole.count,
+                                                  rangehole.start,
+                                                  range->count, range->start,
+                                                  rangeeff.start, both);
+        }
+
+        /* process our part */
+        page_range_intersect(range, rangeeff.start, rangeeff.count,
+                             &rangecommon);
+        hv_balloon_remove_response_insert_range(ourtree, &rangecommon,
+                                                globalctr, removedctr,
+                                                &ourremoved);
+        if (rangecommon.count > 0) {
+            trace_hv_balloon_remove_response_common(rangecommon.count,
+                                                    rangecommon.start,
+                                                    range->count, range->start,
+                                                    rangeeff.count,
+                                                    rangeeff.start, ourremoved,
+                                                    both);
+        }
+
+        /* calculate what's left after our range */
+        rangecommon = *range;
+        page_range_part_after(&rangecommon, rangeeff.start, rangeeff.count,
+                              range);
+    }
+
+    /* process the remainder of the range that lies after our range */
+    if (range->count > 0) {
+        hv_balloon_remove_response_insert_range(globaltree, range,
+                                                globalctr, removedctr, NULL);
+        trace_hv_balloon_remove_response_remainder(range->count, range->start,
+                                                   both);
+        range->count = 0;
+    }
+}
+
+static void hv_balloon_remove_response_handle_pages(HvBalloon *balloon,
+                                                    PageRange *range,
+                                                    uint64_t start,
+                                                    uint64_t count,
+                                                    bool both,
+                                                    uint64_t *removedctr)
+{
+    assert(count > 0);
+
+    /*
+     * if there is an existing range that the new range can't be joined to
+     * dump it into tree(s)
+     */
+    if (range->count > 0 && !page_range_joinable(range, start, count)) {
+        hv_balloon_remove_response_handle_range(balloon, range, both,
+                                                removedctr);
+    }
+
+    if (range->count == 0) {
+        range->start = start;
+        range->count = count;
+    } else if (page_range_joinable_left(range, start, count)) {
+        range->start = start;
+        range->count += count;
+    } else { /* page_range_joinable_right() */
+        range->count += count;
+    }
+}
+
+static gboolean hv_balloon_handle_remove_host_addr_node(gpointer key,
+                                                        gpointer value,
+                                                        gpointer data)
+{
+    PageRange *range = value;
+    uint64_t pageoff;
+
+    for (pageoff = 0; pageoff < range->count; ) {
+        uint64_t addr_64 = (range->start + pageoff) * HV_BALLOON_PAGE_SIZE;
+        void *addr;
+        RAMBlock *rb;
+        ram_addr_t rb_offset;
+        size_t rb_page_size;
+        size_t discard_size;
+
+        assert(addr_64 <= UINTPTR_MAX);
+        addr = (void *)((uintptr_t)addr_64);
+        rb = qemu_ram_block_from_host(addr, false, &rb_offset);
+        rb_page_size = qemu_ram_pagesize(rb);
+
+        if (rb_page_size != HV_BALLOON_PAGE_SIZE) {
+            /* TODO: these should end in "removed_guest" */
+            warn_report("guest reported removed page backed by unsupported page size %zu",
+                        rb_page_size);
+            pageoff++;
+            continue;
+        }
+
+        discard_size = MIN(range->count - pageoff,
+                           (rb->max_length - rb_offset) /
+                           HV_BALLOON_PAGE_SIZE);
+        discard_size = MAX(discard_size, 1);
+
+        if (ram_block_discard_range(rb, rb_offset, discard_size *
+                                    HV_BALLOON_PAGE_SIZE) != 0) {
+            warn_report("guest reported removed page failed discard");
+        }
+
+        pageoff += discard_size;
+    }
+
+    return false;
+}
+
+static void hv_balloon_handle_remove_host_addr_tree(PageRangeTree tree)
+{
+    g_tree_foreach(tree.t, hv_balloon_handle_remove_host_addr_node, NULL);
+}
+
+static int hv_balloon_handle_remove_section(PageRangeTree tree,
+                                            const MemoryRegionSection *section,
+                                            uint64_t count)
+{
+    void *addr = memory_region_get_ram_ptr(section->mr) +
+        section->offset_within_region;
+    uint64_t addr_page;
+
+    assert(count > 0);
+
+    if ((uintptr_t)addr % HV_BALLOON_PAGE_SIZE) {
+        warn_report("guest reported removed pages at an unaligned host addr %p",
+                    addr);
+        return -EINVAL;
+    }
+
+    addr_page = (uintptr_t)addr / HV_BALLOON_PAGE_SIZE;
+    hvb_page_range_tree_insert(tree, addr_page, count, NULL);
+
+    return 0;
+}
+
+static void hv_balloon_handle_remove_ranges(HvBalloon *balloon,
+                                            union dm_mem_page_range ranges[],
+                                            uint32_t count)
+{
+    uint64_t removedcnt;
+    PageRangeTree removed_host_addr;
+    PageRange range_guest, range_both;
+
+    hvb_page_range_tree_init(&removed_host_addr);
+    range_guest.count = range_both.count = removedcnt = 0;
+    for (unsigned int ctr = 0; ctr < count; ctr++) {
+        union dm_mem_page_range *mr = &ranges[ctr];
+        hwaddr pa;
+        MemoryRegionSection section;
+
+        for (unsigned int offset = 0; offset < mr->finfo.page_cnt; ) {
+            int ret;
+            uint64_t pageno = mr->finfo.start_page + offset;
+            uint64_t pagecnt = 1;
+
+            pa = (hwaddr)pageno << HV_BALLOON_PFN_SHIFT;
+            section = memory_region_find(get_system_memory(), pa,
+                                         (mr->finfo.page_cnt - offset) *
+                                         HV_BALLOON_PAGE_SIZE);
+            if (!section.mr) {
+                warn_report("guest reported removed page %"PRIu64" not found in RAM",
+                            pageno);
+                ret = -EINVAL;
+                goto finish_page;
+            }
+
+            pagecnt = int128_get64(section.size) / HV_BALLOON_PAGE_SIZE;
+            if (pagecnt <= 0) {
+                warn_report("guest reported removed page %"PRIu64" in a section smaller than page size",
+                            pageno);
+                pagecnt = 1; /* skip the whole page */
+                ret = -EINVAL;
+                goto finish_page;
+            }
+
+            if (!memory_region_is_ram(section.mr) ||
+                memory_region_is_rom(section.mr) ||
+                memory_region_is_romd(section.mr)) {
+                warn_report("guest reported removed page %"PRIu64" in a section that is not an ordinary RAM",
+                            pageno);
+                ret = -EINVAL;
+                goto finish_page;
+            }
+
+            ret = hv_balloon_handle_remove_section(removed_host_addr, &section,
+                                                   pagecnt);
+
+        finish_page:
+            if (ret == 0) {
+                hv_balloon_remove_response_handle_pages(balloon,
+                                                        &range_both,
+                                                        pageno, pagecnt,
+                                                        true, &removedcnt);
+            } else {
+                hv_balloon_remove_response_handle_pages(balloon,
+                                                        &range_guest,
+                                                        pageno, pagecnt,
+                                                        false, &removedcnt);
+            }
+
+            if (section.mr) {
+                memory_region_unref(section.mr);
+            }
+
+            offset += pagecnt;
+        }
+    }
+
+    hv_balloon_remove_response_handle_range(balloon, &range_both, true,
+                                            &removedcnt);
+    hv_balloon_remove_response_handle_range(balloon, &range_guest, false,
+                                            &removedcnt);
+
+    hv_balloon_handle_remove_host_addr_tree(removed_host_addr);
+    hvb_page_range_tree_destroy(&removed_host_addr);
+
+    if (removedcnt > balloon->balloon_diff) {
+        warn_report("guest reported more pages removed than currently pending (%"PRIu64" vs %"PRIu64")",
+                    removedcnt, balloon->balloon_diff);
+        balloon->balloon_diff = 0;
+    } else {
+        balloon->balloon_diff -= removedcnt;
+    }
+}
+
+static bool hv_balloon_handle_msg_size(HvBalloonReq *req, size_t minsize,
+                                       const char *msgname)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    uint32_t msglen = vmreq->msglen;
+
+    if (msglen >= minsize) {
+        return true;
+    }
+
+    warn_report("%s message too short (%u vs %zu), ignoring", msgname,
+                (unsigned int)msglen, minsize);
+    return false;
+}
+
+static void hv_balloon_handle_version_request(HvBalloon *balloon,
+                                              HvBalloonReq *req,
+                                              StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_version_request *msgVr = vmreq->msg;
+    struct dm_version_response respVr;
+
+    if (balloon->state != S_VERSION) {
+        warn_report("unexpected DM_VERSION_REQUEST in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgVr),
+                                    "DM_VERSION_REQUEST")) {
+        return;
+    }
+
+    trace_hv_balloon_incoming_version(msgVr->version.major_version,
+                                      msgVr->version.minor_version);
+
+    memset(&respVr, 0, sizeof(respVr));
+    respVr.hdr.type = DM_VERSION_RESPONSE;
+    respVr.hdr.size = sizeof(respVr);
+    respVr.hdr.trans_id = msgVr->hdr.trans_id;
+    respVr.is_accepted = msgVr->version.version >= DYNMEM_PROTOCOL_VERSION_1 &&
+        msgVr->version.version <= DYNMEM_PROTOCOL_VERSION_3;
+
+    hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respVr);
+
+    if (respVr.is_accepted) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_CAPS);
+    }
+}
+
+static void hv_balloon_handle_caps_report(HvBalloon *balloon,
+                                          HvBalloonReq *req,
+                                          StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_capabilities *msgCap = vmreq->msg;
+    struct dm_capabilities_resp_msg respCap;
+
+    if (balloon->state != S_CAPS) {
+        warn_report("unexpected DM_CAPABILITIES_REPORT in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgCap),
+                                    "DM_CAPABILITIES_REPORT")) {
+        return;
+    }
+
+    trace_hv_balloon_incoming_caps(msgCap->caps.caps);
+    balloon->caps = msgCap->caps;
+
+    memset(&respCap, 0, sizeof(respCap));
+    respCap.hdr.type = DM_CAPABILITIES_RESPONSE;
+    respCap.hdr.size = sizeof(respCap);
+    respCap.hdr.trans_id = msgCap->hdr.trans_id;
+    respCap.is_accepted = 1;
+    respCap.hot_remove = 1;
+    respCap.suppress_pressure_reports = !balloon->status_report.enabled;
+    hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respCap);
+
+    timer_mod(&balloon->post_init_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+              HV_BALLOON_POST_INIT_WAIT);
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_POST_INIT_WAIT);
+}
+
+static void hv_balloon_handle_status_report(HvBalloon *balloon,
+                                            HvBalloonReq *req)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_status *msgStatus = vmreq->msg;
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgStatus),
+                                    "DM_STATUS_REPORT")) {
+        return;
+    }
+
+    if (!balloon->status_report.enabled) {
+        return;
+    }
+
+    balloon->status_report.committed = msgStatus->num_committed;
+    balloon->status_report.committed *= HV_BALLOON_PAGE_SIZE;
+    balloon->status_report.available = msgStatus->num_avail;
+    balloon->status_report.available *= HV_BALLOON_PAGE_SIZE;
+    balloon->status_report.received = true;
+
+    qapi_event_send_hv_balloon_status_report(balloon->status_report.committed,
+                                             balloon->status_report.available);
+}
+
+HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp)
+{
+    HvBalloon *balloon;
+    HvBalloonInfo *info;
+
+    balloon = HV_BALLOON(object_resolve_path_type("", TYPE_HV_BALLOON, NULL));
+    if (!balloon) {
+        error_setg(errp, "no %s device present", TYPE_HV_BALLOON);
+        return NULL;
+    }
+
+    if (!balloon->status_report.enabled) {
+        error_setg(errp, "guest memory status reporting not enabled");
+        return NULL;
+    }
+
+    if (!balloon->status_report.received) {
+        error_setg(errp, "no guest memory status report received yet");
+        return NULL;
+    }
+
+    info = g_malloc0(sizeof(*info));
+    info->committed = balloon->status_report.committed;
+    info->available = balloon->status_report.available;
+    return info;
+}
+
+static void hv_balloon_handle_unballoon_response(HvBalloon *balloon,
+                                                 HvBalloonReq *req,
+                                                 StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_unballoon_response *msgUrR = vmreq->msg;
+
+    if (balloon->state != S_UNBALLOON_REPLY_WAIT) {
+        warn_report("unexpected DM_UNBALLOON_RESPONSE in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgUrR),
+                                    "DM_UNBALLOON_RESPONSE"))
+        return;
+
+    trace_hv_balloon_incoming_unballoon(msgUrR->hdr.trans_id);
+
+    balloon->trans_id++;
+
+    if (balloon->hot_add_diff > 0) {
+        bool can_hot_add = balloon->caps.cap_bits.hot_add;
+
+        assert(can_hot_add);
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP);
+    } else {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+    }
+}
+
+static void hv_balloon_handle_hot_add_response(HvBalloon *balloon,
+                                               HvBalloonReq *req,
+                                               StateDesc *stdesc)
+{
+    PageRange *hot_add_range = &balloon->hot_add_range;
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_hot_add_response *msgHaR = vmreq->msg;
+    OurRange *our_range;
+
+    if (balloon->state != S_HOT_ADD_REPLY_WAIT) {
+        warn_report("unexpected DM_HOT_ADD_RESPONSE in %d state",
+                    balloon->state);
+        return;
+    }
+
+    assert(balloon->our_range);
+    our_range = OUR_RANGE(balloon->our_range);
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgHaR),
+                                    "DM_HOT_ADD_RESPONSE"))
+        return;
+
+    trace_hv_balloon_incoming_hot_add(msgHaR->hdr.trans_id, msgHaR->result,
+                                      msgHaR->page_count);
+
+    balloon->trans_id++;
+
+    if (msgHaR->result) {
+        if (msgHaR->page_count > balloon->ha_current_count) {
+            warn_report("DM_HOT_ADD_RESPONSE page count higher than requested (%"PRIu32" vs %"PRIu64")",
+                        msgHaR->page_count, balloon->ha_current_count);
+            msgHaR->page_count = balloon->ha_current_count;
+        }
+
+        hvb_our_range_mark_added(our_range, msgHaR->page_count);
+        hot_add_range->start += msgHaR->page_count;
+        hot_add_range->count -= msgHaR->page_count;
+    }
+
+    if (!msgHaR->result || msgHaR->page_count < balloon->ha_current_count) {
+        /*
+         * the current planned range was only partially hot-added, take note
+         * how much of it remains and don't attempt any further hot adds
+         */
+        our_range_mark_remaining_unusable(our_range);
+
+        goto ret_idle;
+    }
+
+    /* any pages remaining to hot-add in our range? */
+    if (hot_add_range->count > 0) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT);
+        return;
+    }
+
+ret_idle:
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+}
+
+static void hv_balloon_handle_balloon_response(HvBalloon *balloon,
+                                               HvBalloonReq *req,
+                                               StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_balloon_response *msgBR = vmreq->msg;
+
+    if (balloon->state != S_BALLOON_REPLY_WAIT) {
+        warn_report("unexpected DM_BALLOON_RESPONSE in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgBR),
+                                    "DM_BALLOON_RESPONSE"))
+        return;
+
+    trace_hv_balloon_incoming_balloon(msgBR->hdr.trans_id, msgBR->range_count,
+                                      msgBR->more_pages);
+
+    if (vmreq->msglen < sizeof(*msgBR) +
+        (uint64_t)sizeof(msgBR->range_array[0]) * msgBR->range_count) {
+        warn_report("DM_BALLOON_RESPONSE too short for the range count");
+        return;
+    }
+
+    if (msgBR->range_count == 0) {
+        /* The guest is already at its minimum size */
+        balloon->balloon_diff = 0;
+        goto ret_end_trans;
+    } else {
+        hv_balloon_handle_remove_ranges(balloon,
+                                        msgBR->range_array,
+                                        msgBR->range_count);
+    }
+
+    /* More responses expected? */
+    if (msgBR->more_pages) {
+        return;
+    }
+
+ret_end_trans:
+    balloon->trans_id++;
+
+    if (balloon->balloon_diff > 0) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT);
+    } else {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+    }
+}
+
+static void hv_balloon_handle_packet(HvBalloon *balloon, HvBalloonReq *req,
+                                     StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_message *msg = vmreq->msg;
+
+    if (vmreq->msglen < sizeof(msg->hdr)) {
+        return;
+    }
+
+    switch (msg->hdr.type) {
+    case DM_VERSION_REQUEST:
+        hv_balloon_handle_version_request(balloon, req, stdesc);
+        break;
+
+    case DM_CAPABILITIES_REPORT:
+        hv_balloon_handle_caps_report(balloon, req, stdesc);
+        break;
+
+    case DM_STATUS_REPORT:
+        hv_balloon_handle_status_report(balloon, req);
+        break;
+
+    case DM_MEM_HOT_ADD_RESPONSE:
+        hv_balloon_handle_hot_add_response(balloon, req, stdesc);
+        break;
+
+    case DM_UNBALLOON_RESPONSE:
+        hv_balloon_handle_unballoon_response(balloon, req, stdesc);
+        break;
+
+    case DM_BALLOON_RESPONSE:
+        hv_balloon_handle_balloon_response(balloon, req, stdesc);
+        break;
+
+    default:
+        warn_report("unknown DM message %u", msg->hdr.type);
+        break;
+    }
+}
+
+static bool hv_balloon_recv_channel(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan;
+    HvBalloonReq *req;
+
+    if (balloon->state == S_WAIT_RESET ||
+        balloon->state == S_POST_RESET_CLOSED) {
+        return false;
+    }
+
+    chan = hv_balloon_get_channel(balloon);
+    if (vmbus_channel_recv_start(chan)) {
+        return false;
+    }
+
+    while ((req = vmbus_channel_recv_peek(chan, sizeof(*req)))) {
+        hv_balloon_handle_packet(balloon, req, stdesc);
+        vmbus_free_req(req);
+        vmbus_channel_recv_pop(chan);
+
+        if (stdesc->state != S_NO_CHANGE) {
+            break;
+        }
+    }
+
+    return vmbus_channel_recv_done(chan) > 0;
+}
+
+/* old state handler -> new state transition (potential) */
+static bool hv_balloon_event_loop_state(HvBalloon *balloon)
+{
+    StateDesc state_new = HV_BALLOON_STATE_DESC_INIT;
+
+    hv_balloon_handle_state(balloon, &state_new);
+    return hv_balloon_state_set(balloon, state_new.state, state_new.desc);
+}
+
+/* VMBus message -> new state transition (potential) */
+static bool hv_balloon_event_loop_recv(HvBalloon *balloon)
+{
+    StateDesc state_new = HV_BALLOON_STATE_DESC_INIT;
+    bool any_recv, state_changed;
+
+    any_recv = hv_balloon_recv_channel(balloon, &state_new);
+    state_changed = hv_balloon_state_set(balloon,
+                                         state_new.state, state_new.desc);
+
+    return state_changed || any_recv;
+}
+
+static void hv_balloon_event_loop(HvBalloon *balloon)
+{
+    bool state_repeat, recv_repeat;
+
+    do {
+        state_repeat = hv_balloon_event_loop_state(balloon);
+        recv_repeat = hv_balloon_event_loop_recv(balloon);
+    } while (state_repeat || recv_repeat);
+}
+
+static void hv_balloon_vmdev_chan_notify(VMBusChannel *chan)
+{
+    HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
+
+    hv_balloon_event_loop(balloon);
+}
+
+static void hv_balloon_stat(void *opaque, BalloonInfo *info)
+{
+    HvBalloon *balloon = opaque;
+    info->actual = (hv_balloon_total_ram(balloon) - balloon->removed_both_ctr)
+        << HV_BALLOON_PFN_SHIFT;
+}
+
+static void hv_balloon_to_target(void *opaque, ram_addr_t target)
+{
+    HvBalloon *balloon = opaque;
+    uint64_t target_pages = target >> HV_BALLOON_PFN_SHIFT;
+
+    if (!target_pages) {
+        return;
+    }
+
+    /*
+     * always set target_changed, even with unchanged target, as the user
+     * might be asking us to try again reaching it
+     */
+    balloon->target = target_pages;
+    balloon->target_changed = true;
+
+    hv_balloon_event_loop(balloon);
+}
+
+static int hv_balloon_vmdev_open_channel(VMBusChannel *chan)
+{
+    HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
+
+    if (balloon->state != S_POST_RESET_CLOSED) {
+        warn_report("guest trying to open a DM channel in invalid %d state",
+                    balloon->state);
+        return -EINVAL;
+    }
+
+    HV_BALLOON_SET_STATE(balloon, S_VERSION);
+    hv_balloon_event_loop(balloon);
+
+    return 0;
+}
+
+static void hv_balloon_vmdev_close_channel(VMBusChannel *chan)
+{
+    HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
+
+    timer_del(&balloon->post_init_timer);
+
+    /* Don't report stale data */
+    balloon->status_report.received = false;
+
+    HV_BALLOON_SET_STATE(balloon, S_WAIT_RESET);
+    hv_balloon_event_loop(balloon);
+}
+
+static void hv_balloon_post_init_timer(void *opaque)
+{
+    HvBalloon *balloon = opaque;
+
+    if (balloon->state != S_POST_INIT_WAIT) {
+        return;
+    }
+
+    HV_BALLOON_SET_STATE(balloon, S_IDLE);
+    hv_balloon_event_loop(balloon);
+}
+
+static void hv_balloon_system_reset_unrealize_common(HvBalloon *balloon)
+{
+    g_clear_pointer(&balloon->our_range, hvb_our_range_memslots_free);
+}
+
+static void hv_balloon_system_reset(void *opaque)
+{
+    HvBalloon *balloon = HV_BALLOON(opaque);
+
+    hv_balloon_system_reset_unrealize_common(balloon);
+}
+
+static void hv_balloon_ensure_mr(HvBalloon *balloon)
+{
+    MemoryRegion *hostmem_mr;
+
+    assert(balloon->hostmem);
+
+    if (balloon->mr) {
+        return;
+    }
+
+    hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
+
+    balloon->mr = g_new0(MemoryRegion, 1);
+    memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON,
+                       memory_region_size(hostmem_mr));
+
+    /*
+     * The VM can indicate an alignment up to 32 GiB. Memory device core can
+     * usually only handle/guarantee 1 GiB alignment. The user will have to
+     * specify a larger maxmem eventually.
+     *
+     * The memory device core will warn the user in case maxmem might have to be
+     * increased and will fail plugging the device if there is not sufficient
+     * space after alignment.
+     *
+     * TODO: we could do the alignment ourselves in a slightly bigger region.
+     * But this feels better, although the warning might be annoying. Maybe
+     * we can optimize that in the future (e.g., with such a device on the
+     * cmdline place/size the device memory region differently.
+     */
+    balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr));
+}
+
+static void hv_balloon_free_mr(HvBalloon *balloon)
+{
+    if (!balloon->mr) {
+        return;
+    }
+
+    object_unparent(OBJECT(balloon->mr));
+    g_clear_pointer(&balloon->mr, g_free);
+}
+
+static void hv_balloon_vmdev_realize(VMBusDevice *vdev, Error **errp)
+{
+    ERRP_GUARD();
+    HvBalloon *balloon = HV_BALLOON(vdev);
+    int ret;
+
+    balloon->state = S_WAIT_RESET;
+
+    ret = qemu_add_balloon_handler(hv_balloon_to_target, hv_balloon_stat,
+                                   balloon);
+    if (ret < 0) {
+        /* This also protects against having multiple hv-balloon instances */
+        error_setg(errp, "Only one balloon device is supported");
+        return;
+    }
+
+    if (balloon->hostmem) {
+        if (host_memory_backend_is_mapped(balloon->hostmem)) {
+            Object *obj = OBJECT(balloon->hostmem);
+
+            error_setg(errp, "'%s' property specifies a busy memdev: %s",
+                       HV_BALLOON_MEMDEV_PROP,
+                       object_get_canonical_path_component(obj));
+            goto out_balloon_handler;
+        }
+
+        hv_balloon_ensure_mr(balloon);
+
+        /* This is rather unlikely to happen, but let's still check for it. */
+        if (!QEMU_IS_ALIGNED(memory_region_size(balloon->mr),
+                             HV_BALLOON_PAGE_SIZE)) {
+            error_setg(errp, "'%s' property memdev size has to be a multiple of 0x%" PRIx64,
+                       HV_BALLOON_MEMDEV_PROP, (uint64_t)HV_BALLOON_PAGE_SIZE);
+            goto out_balloon_handler;
+        }
+
+        host_memory_backend_set_mapped(balloon->hostmem, true);
+        vmstate_register_ram(host_memory_backend_get_memory(balloon->hostmem),
+                             DEVICE(balloon));
+    } else if (balloon->addr) {
+        error_setg(errp, "'%s' property must not be set without a memdev",
+                   HV_BALLOON_MEMDEV_PROP);
+        goto out_balloon_handler;
+    }
+
+    timer_init_ms(&balloon->post_init_timer, QEMU_CLOCK_VIRTUAL,
+                  hv_balloon_post_init_timer, balloon);
+
+    qemu_register_reset(hv_balloon_system_reset, balloon);
+
+    return;
+
+out_balloon_handler:
+    qemu_remove_balloon_handler(balloon);
+}
+
+/*
+ * VMBus device reset has to be implemented in case the guest decides to
+ * disconnect and reconnect to the VMBus without rebooting the whole system.
+ *
+ * However, the hot-added memory can't be removed here as Windows keeps on using
+ * it until the system is restarted, even after disconnecting from the VMBus.
+ */
+static void hv_balloon_vmdev_reset(VMBusDevice *vdev)
+{
+    HvBalloon *balloon = HV_BALLOON(vdev);
+
+    if (balloon->state == S_POST_RESET_CLOSED) {
+        return;
+    }
+
+    if (balloon->our_range) {
+        hvb_our_range_clear_removed_trees(OUR_RANGE(balloon->our_range));
+    }
+
+    hvb_page_range_tree_destroy(&balloon->removed_guest);
+    hvb_page_range_tree_destroy(&balloon->removed_both);
+    hvb_page_range_tree_init(&balloon->removed_guest);
+    hvb_page_range_tree_init(&balloon->removed_both);
+
+    balloon->trans_id = 0;
+    balloon->removed_guest_ctr = 0;
+    balloon->removed_both_ctr = 0;
+
+    HV_BALLOON_SET_STATE(balloon, S_POST_RESET_CLOSED);
+    hv_balloon_event_loop(balloon);
+}
+
+/*
+ * Clean up things that were (possibly) allocated pre-realization, for example
+ * from memory_device_pre_plug(), so we don't leak them if the device don't
+ * actually get realized in the end.
+ */
+static void hv_balloon_unrealize_finalize_common(HvBalloon *balloon)
+{
+    hv_balloon_free_mr(balloon);
+    balloon->addr = 0;
+
+    balloon->memslot_count = 0;
+}
+
+static void hv_balloon_vmdev_unrealize(VMBusDevice *vdev)
+{
+    HvBalloon *balloon = HV_BALLOON(vdev);
+
+    qemu_unregister_reset(hv_balloon_system_reset, balloon);
+
+    hv_balloon_system_reset_unrealize_common(balloon);
+
+    qemu_remove_balloon_handler(balloon);
+
+    if (balloon->hostmem) {
+        vmstate_unregister_ram(host_memory_backend_get_memory(balloon->hostmem),
+                               DEVICE(balloon));
+        host_memory_backend_set_mapped(balloon->hostmem, false);
+    }
+
+    hvb_page_range_tree_destroy(&balloon->removed_guest);
+    hvb_page_range_tree_destroy(&balloon->removed_both);
+
+    hv_balloon_unrealize_finalize_common(balloon);
+}
+
+static uint64_t hv_balloon_md_get_addr(const MemoryDeviceState *md)
+{
+    return object_property_get_uint(OBJECT(md), HV_BALLOON_ADDR_PROP,
+                                    &error_abort);
+}
+
+static void hv_balloon_md_set_addr(MemoryDeviceState *md, uint64_t addr,
+                                   Error **errp)
+{
+    object_property_set_uint(OBJECT(md), HV_BALLOON_ADDR_PROP, addr, errp);
+}
+
+static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md,
+                                                     Error **errp)
+{
+    HvBalloon *balloon = HV_BALLOON(md);
+
+    if (!balloon->hostmem) {
+        return NULL;
+    }
+
+    hv_balloon_ensure_mr(balloon);
+
+    return balloon->mr;
+}
+
+static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md,
+                                           MemoryDeviceInfo *info)
+{
+    HvBalloonDeviceInfo *hi = g_new0(HvBalloonDeviceInfo, 1);
+    const HvBalloon *balloon = HV_BALLOON(md);
+    DeviceState *dev = DEVICE(md);
+
+    if (dev->id) {
+        hi->id = g_strdup(dev->id);
+    }
+
+    if (balloon->hostmem) {
+        hi->memdev = object_get_canonical_path(OBJECT(balloon->hostmem));
+        hi->memaddr = balloon->addr;
+        hi->has_memaddr = true;
+        hi->max_size = memory_region_size(balloon->mr);
+        /* TODO: expose current provided size or something else? */
+    } else {
+        hi->max_size = 0;
+    }
+
+    info->u.hv_balloon.data = hi;
+    info->type = MEMORY_DEVICE_INFO_KIND_HV_BALLOON;
+}
+
+static void hv_balloon_decide_memslots(MemoryDeviceState *md,
+                                       unsigned int limit)
+{
+    HvBalloon *balloon = HV_BALLOON(md);
+    MemoryRegion *hostmem_mr;
+    uint64_t region_size, memslot_size, memslots;
+
+    /* We're called exactly once, before realizing the device. */
+    assert(!balloon->memslot_count);
+
+    /* We should not be called if we don't have a memory backend */
+    assert(balloon->hostmem);
+
+    hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
+    region_size = memory_region_size(hostmem_mr);
+
+    assert(region_size > 0);
+    memslot_size = QEMU_ALIGN_UP(region_size / limit,
+                                 HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN);
+    memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size;
+
+    if (memslots > 1) {
+        balloon->memslot_size = memslot_size;
+    } else {
+        balloon->memslot_size = region_size;
+    }
+
+    assert(memslots <= UINT_MAX);
+    balloon->memslot_count = memslots;
+}
+
+static unsigned int hv_balloon_get_memslots(MemoryDeviceState *md)
+{
+    const HvBalloon *balloon = HV_BALLOON(md);
+
+    /* We're called after setting the suggested limit. */
+    assert(balloon->memslot_count > 0);
+
+    return balloon->memslot_count;
+}
+
+static void hv_balloon_init(Object *obj)
+{
+}
+
+static void hv_balloon_finalize(Object *obj)
+{
+    HvBalloon *balloon = HV_BALLOON(obj);
+
+    hv_balloon_unrealize_finalize_common(balloon);
+}
+
+static Property hv_balloon_properties[] = {
+    DEFINE_PROP_BOOL("status-report", HvBalloon,
+                     status_report.enabled, false),
+
+    /* MEMORY_DEVICE props */
+    DEFINE_PROP_LINK(HV_BALLOON_MEMDEV_PROP, HvBalloon, hostmem,
+                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+    DEFINE_PROP_UINT64(HV_BALLOON_ADDR_PROP, HvBalloon, addr, 0),
+
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void hv_balloon_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VMBusDeviceClass *vdc = VMBUS_DEVICE_CLASS(klass);
+    MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, hv_balloon_properties);
+    qemu_uuid_parse(HV_BALLOON_GUID, &vdc->classid);
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+
+    vdc->vmdev_realize = hv_balloon_vmdev_realize;
+    vdc->vmdev_unrealize = hv_balloon_vmdev_unrealize;
+    vdc->vmdev_reset = hv_balloon_vmdev_reset;
+    vdc->open_channel = hv_balloon_vmdev_open_channel;
+    vdc->close_channel = hv_balloon_vmdev_close_channel;
+    vdc->chan_notify_cb = hv_balloon_vmdev_chan_notify;
+
+    mdc->get_addr = hv_balloon_md_get_addr;
+    mdc->set_addr = hv_balloon_md_set_addr;
+    mdc->get_plugged_size = memory_device_get_region_size;
+    mdc->get_memory_region = hv_balloon_md_get_memory_region;
+    mdc->decide_memslots = hv_balloon_decide_memslots;
+    mdc->get_memslots = hv_balloon_get_memslots;
+    mdc->fill_device_info = hv_balloon_md_fill_device_info;
+}
diff --git a/hw/hyperv/meson.build b/hw/hyperv/meson.build
index b43f119ea5..d3d2668c71 100644
--- a/hw/hyperv/meson.build
+++ b/hw/hyperv/meson.build
@@ -2,3 +2,4 @@ specific_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'))
 specific_ss.add(when: 'CONFIG_HYPERV_TESTDEV', if_true: files('hyperv_testdev.c'))
 specific_ss.add(when: 'CONFIG_VMBUS', if_true: files('vmbus.c'))
 specific_ss.add(when: 'CONFIG_SYNDBG', if_true: files('syndbg.c'))
+specific_ss.add(when: 'CONFIG_HV_BALLOON', if_true: files('hv-balloon.c', 'hv-balloon-page_range_tree.c', 'hv-balloon-our_range_memslots.c'), if_false: files('hv-balloon-stub.c'))
diff --git a/hw/hyperv/trace-events b/hw/hyperv/trace-events
index b4c35ca8e3..7963c215b1 100644
--- a/hw/hyperv/trace-events
+++ b/hw/hyperv/trace-events
@@ -16,3 +16,21 @@ vmbus_gpadl_torndown(uint32_t gpadl_id) "gpadl #%d"
 vmbus_open_channel(uint32_t chan_id, uint32_t gpadl_id, uint32_t target_vp) "channel #%d gpadl #%d target vp %d"
 vmbus_channel_open(uint32_t chan_id, uint32_t status) "channel #%d status %d"
 vmbus_close_channel(uint32_t chan_id) "channel #%d"
+
+# hv-balloon
+hv_balloon_state_change(const char *tostr) "-> %s"
+hv_balloon_incoming_version(uint16_t major, uint16_t minor) "incoming proto version %u.%u"
+hv_balloon_incoming_caps(uint32_t caps) "incoming caps 0x%x"
+hv_balloon_outgoing_unballoon(uint32_t trans_id, uint64_t count, uint64_t start, uint64_t rempages) "posting unballoon %"PRIu32" for %"PRIu64" @ 0x%"PRIx64", remaining %"PRIu64
+hv_balloon_incoming_unballoon(uint32_t trans_id) "incoming unballoon response %"PRIu32
+hv_balloon_outgoing_hot_add(uint32_t trans_id, uint64_t count, uint64_t start) "posting hot add %"PRIu32" for %"PRIu64" @ 0x%"PRIx64
+hv_balloon_incoming_hot_add(uint32_t trans_id, uint32_t result, uint32_t count) "incoming hot add response %"PRIu32", result %"PRIu32", count %"PRIu32
+hv_balloon_outgoing_balloon(uint32_t trans_id, uint64_t count, uint64_t rempages) "posting balloon %"PRIu32" for %"PRIu64", remaining %"PRIu64
+hv_balloon_incoming_balloon(uint32_t trans_id, uint32_t range_count, uint32_t more_pages) "incoming balloon response %"PRIu32", ranges %"PRIu32", more %"PRIu32
+hv_balloon_our_range_add(uint64_t count, uint64_t start) "adding our range %"PRIu64" @ 0x%"PRIx64
+hv_balloon_remove_response(uint64_t count, uint64_t start, unsigned int both) "processing remove response range %"PRIu64" @ 0x%"PRIx64", both %u"
+hv_balloon_remove_response_hole(uint64_t counthole, uint64_t starthole, uint64_t countrange, uint64_t startrange, uint64_t starthpr, unsigned int both) "response range hole %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64", before our start 0x%"PRIx64", both %u"
+hv_balloon_remove_response_common(uint64_t countcommon, uint64_t startcommon, uint64_t countrange, uint64_t startrange, uint64_t counthpr, uint64_t starthpr, uint64_t removed, unsigned int both) "response common range %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64" with our %"PRIu64" @ 0x%"PRIx64", removed %"PRIu64", both %u"
+hv_balloon_remove_response_remainder(uint64_t count, uint64_t start, unsigned int both) "remove response remaining range %"PRIu64" @ 0x%"PRIx64", both %u"
+hv_balloon_map_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "mapping memslot %u / %u @ 0x%"PRIx64
+hv_balloon_unmap_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "unmapping memslot %u / %u @ 0x%"PRIx64
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index 94772c726b..55850791df 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -45,6 +45,7 @@ config PC
     select ACPI_VMGENID
     select VIRTIO_PMEM_SUPPORTED
     select VIRTIO_MEM_SUPPORTED
+    select HV_BALLOON_SUPPORTED
 
 config PC_PCI
     bool
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index a731738411..b2b4be9983 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -490,6 +490,12 @@ int xen_evtchn_set_callback_param(uint64_t param)
         break;
     }
 
+    /* If the guest has set a per-vCPU callback vector, prefer that. */
+    if (gsi && kvm_xen_has_vcpu_callback_vector()) {
+        in_kernel = kvm_xen_has_cap(EVTCHN_SEND);
+        gsi = 0;
+    }
+
     if (!ret) {
         /* If vector delivery was turned *off* then tell the kernel */
         if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
@@ -1129,6 +1135,7 @@ int xen_evtchn_reset_op(struct evtchn_reset *reset)
         return -ESRCH;
     }
 
+    QEMU_IOTHREAD_LOCK_GUARD();
     return xen_evtchn_soft_reset();
 }
 
diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 21c30e3659..839ec920a1 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -541,7 +541,5 @@ int xen_gnttab_reset(void)
     s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
     s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
 
-    memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1);
-
     return 0;
 }
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 660d0b72f9..8e716a7009 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -1357,10 +1357,12 @@ static void fire_watch_cb(void *opaque, const char *path, const char *token)
     } else {
         deliver_watch(s, path, token);
         /*
-         * If the message was queued because there was already ring activity,
-         * no need to wake the guest. But if not, we need to send the evtchn.
+         * Attempt to queue the message into the actual ring, and send
+         * the event channel notification if any bytes are copied.
          */
-        xen_be_evtchn_notify(s->eh, s->be_port);
+        if (s->rsp_pending && put_rsp(s) > 0) {
+            xen_be_evtchn_notify(s->eh, s->be_port);
+        }
     }
 }
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 6031234a73..1aef21aa2c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -27,6 +27,7 @@
 #include "hw/i386/pc.h"
 #include "hw/char/serial.h"
 #include "hw/char/parallel.h"
+#include "hw/hyperv/hv-balloon.h"
 #include "hw/i386/fw_cfg.h"
 #include "hw/i386/vmport.h"
 #include "sysemu/cpus.h"
@@ -57,6 +58,7 @@
 #include "hw/i386/kvm/xen_evtchn.h"
 #include "hw/i386/kvm/xen_gnttab.h"
 #include "hw/i386/kvm/xen_xenstore.h"
+#include "hw/mem/memory-device.h"
 #include "e820_memory_layout.h"
 #include "trace.h"
 #include CONFIG_DEVICES
@@ -1422,6 +1424,21 @@ static void pc_memory_unplug(HotplugHandler *hotplug_dev,
     error_propagate(errp, local_err);
 }
 
+static void pc_hv_balloon_pre_plug(HotplugHandler *hotplug_dev,
+                                   DeviceState *dev, Error **errp)
+{
+    /* The vmbus handler has no hotplug handler; we should never end up here. */
+    g_assert(!dev->hotplugged);
+    memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL,
+                           errp);
+}
+
+static void pc_hv_balloon_plug(HotplugHandler *hotplug_dev,
+                               DeviceState *dev, Error **errp)
+{
+    memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
+}
+
 static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
                                           DeviceState *dev, Error **errp)
 {
@@ -1452,6 +1469,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
             return;
         }
         pcms->iommu = dev;
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) {
+        pc_hv_balloon_pre_plug(hotplug_dev, dev, errp);
     }
 }
 
@@ -1464,6 +1483,8 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
         x86_cpu_plug(hotplug_dev, dev, errp);
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
         virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) {
+        pc_hv_balloon_plug(hotplug_dev, dev, errp);
     }
 }
 
@@ -1505,6 +1526,7 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine,
         object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
         object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) ||
         object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON) ||
         object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) {
         return HOTPLUG_HANDLER(machine);
     }
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index ae38f48f16..e0704b8dc3 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -20,6 +20,22 @@
 #include "exec/address-spaces.h"
 #include "trace.h"
 
+static bool memory_device_is_empty(const MemoryDeviceState *md)
+{
+    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+    Error *local_err = NULL;
+    MemoryRegion *mr;
+
+    /* dropping const here is fine as we don't touch the memory region */
+    mr = mdc->get_memory_region((MemoryDeviceState *)md, &local_err);
+    if (local_err) {
+        /* Not empty, we'll report errors later when ontaining the MR again. */
+        error_free(local_err);
+        return false;
+    }
+    return !mr;
+}
+
 static gint memory_device_addr_sort(gconstpointer a, gconstpointer b)
 {
     const MemoryDeviceState *md_a = MEMORY_DEVICE(a);
@@ -220,12 +236,6 @@ static uint64_t memory_device_get_free_addr(MachineState *ms,
         return 0;
     }
 
-    if (!QEMU_IS_ALIGNED(size, align)) {
-        error_setg(errp, "backend memory size must be multiple of 0x%"
-                   PRIx64, align);
-        return 0;
-    }
-
     if (hint) {
         if (range_init(&new, *hint, size) || !range_contains_range(&as, &new)) {
             error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64
@@ -249,6 +259,10 @@ static uint64_t memory_device_get_free_addr(MachineState *ms,
         uint64_t next_addr;
         Range tmp;
 
+        if (memory_device_is_empty(md)) {
+            continue;
+        }
+
         range_init_nofail(&tmp, mdc->get_addr(md),
                           memory_device_get_region_size(md, &error_abort));
 
@@ -292,6 +306,7 @@ MemoryDeviceInfoList *qmp_memory_device_list(void)
         const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(item->data);
         MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
 
+        /* Let's query infotmation even for empty memory devices. */
         mdc->fill_device_info(md, info);
 
         QAPI_LIST_APPEND(tail, info);
@@ -311,7 +326,7 @@ static int memory_device_plugged_size(Object *obj, void *opaque)
         const MemoryDeviceState *md = MEMORY_DEVICE(obj);
         const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj);
 
-        if (dev->realized) {
+        if (dev->realized && !memory_device_is_empty(md)) {
             *size += mdc->get_plugged_size(md, &error_abort);
         }
     }
@@ -337,6 +352,11 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
     uint64_t addr, align = 0;
     MemoryRegion *mr;
 
+    /* We support empty memory devices even without device memory. */
+    if (memory_device_is_empty(md)) {
+        return;
+    }
+
     if (!ms->device_memory) {
         error_setg(errp, "the configuration is not prepared for memory devices"
                          " (e.g., for memory hotplug), consider specifying the"
@@ -380,10 +400,17 @@ out:
 void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
 {
     const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
-    const unsigned int memslots = memory_device_get_memslots(md);
-    const uint64_t addr = mdc->get_addr(md);
+    unsigned int memslots;
+    uint64_t addr;
     MemoryRegion *mr;
 
+    if (memory_device_is_empty(md)) {
+        return;
+    }
+
+    memslots = memory_device_get_memslots(md);
+    addr = mdc->get_addr(md);
+
     /*
      * We expect that a previous call to memory_device_pre_plug() succeeded, so
      * it can't fail at this point.
@@ -408,6 +435,10 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
     const unsigned int memslots = memory_device_get_memslots(md);
     MemoryRegion *mr;
 
+    if (memory_device_is_empty(md)) {
+        return;
+    }
+
     /*
      * We expect that a previous call to memory_device_pre_plug() succeeded, so
      * it can't fail at this point.
diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c
index cc24812d2e..c3512c2dae 100644
--- a/hw/virtio/virtio-pmem.c
+++ b/hw/virtio/virtio-pmem.c
@@ -147,7 +147,10 @@ static void virtio_pmem_fill_device_info(const VirtIOPMEM *pmem,
 static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem,
                                                    Error **errp)
 {
-    assert(pmem->memdev);
+    if (!pmem->memdev) {
+        error_setg(errp, "'%s' property must be set", VIRTIO_PMEM_MEMDEV_PROP);
+        return NULL;
+    }
 
     return &pmem->memdev->mr;
 }
diff --git a/include/hw/hyperv/dynmem-proto.h b/include/hw/hyperv/dynmem-proto.h
new file mode 100644
index 0000000000..d0f9090ac4
--- /dev/null
+++ b/include/hw/hyperv/dynmem-proto.h
@@ -0,0 +1,423 @@
+#ifndef HW_HYPERV_DYNMEM_PROTO_H
+#define HW_HYPERV_DYNMEM_PROTO_H
+
+/*
+ * Hyper-V Dynamic Memory Protocol definitions
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * Based on drivers/hv/hv_balloon.c from Linux kernel:
+ * Copyright (c) 2012, Microsoft Corporation.
+ *
+ * Author: K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * Protocol versions. The low word is the minor version, the high word the major
+ * version.
+ *
+ * History:
+ * Initial version 1.0
+ * Changed to 0.1 on 2009/03/25
+ * Changes to 0.2 on 2009/05/14
+ * Changes to 0.3 on 2009/12/03
+ * Changed to 1.0 on 2011/04/05
+ * Changed to 2.0 on 2019/12/10
+ */
+
+#define DYNMEM_MAKE_VERSION(Major, Minor) ((uint32_t)(((Major) << 16) | (Minor)))
+#define DYNMEM_MAJOR_VERSION(Version) ((uint32_t)(Version) >> 16)
+#define DYNMEM_MINOR_VERSION(Version) ((uint32_t)(Version) & 0xff)
+
+enum {
+    DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
+    DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
+    DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0),
+
+    DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1,
+    DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2,
+    DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3,
+
+    DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10
+};
+
+
+
+/*
+ * Message Types
+ */
+
+enum dm_message_type {
+    /*
+     * Version 0.3
+     */
+    DM_ERROR = 0,
+    DM_VERSION_REQUEST = 1,
+    DM_VERSION_RESPONSE = 2,
+    DM_CAPABILITIES_REPORT = 3,
+    DM_CAPABILITIES_RESPONSE = 4,
+    DM_STATUS_REPORT = 5,
+    DM_BALLOON_REQUEST = 6,
+    DM_BALLOON_RESPONSE = 7,
+    DM_UNBALLOON_REQUEST = 8,
+    DM_UNBALLOON_RESPONSE = 9,
+    DM_MEM_HOT_ADD_REQUEST = 10,
+    DM_MEM_HOT_ADD_RESPONSE = 11,
+    DM_VERSION_03_MAX = 11,
+    /*
+     * Version 1.0.
+     */
+    DM_INFO_MESSAGE = 12,
+    DM_VERSION_1_MAX = 12,
+
+    /*
+     * Version 2.0
+     */
+    DM_MEM_HOT_REMOVE_REQUEST = 13,
+    DM_MEM_HOT_REMOVE_RESPONSE = 14
+};
+
+
+/*
+ * Structures defining the dynamic memory management
+ * protocol.
+ */
+
+union dm_version {
+    struct {
+        uint16_t minor_version;
+        uint16_t major_version;
+    };
+    uint32_t version;
+} QEMU_PACKED;
+
+
+union dm_caps {
+    struct {
+        uint64_t balloon:1;
+        uint64_t hot_add:1;
+        /*
+         * To support guests that may have alignment
+         * limitations on hot-add, the guest can specify
+         * its alignment requirements; a value of n
+         * represents an alignment of 2^n in mega bytes.
+         */
+        uint64_t hot_add_alignment:4;
+        uint64_t hot_remove:1;
+        uint64_t reservedz:57;
+    } cap_bits;
+    uint64_t caps;
+} QEMU_PACKED;
+
+union dm_mem_page_range {
+    struct  {
+        /*
+         * The PFN number of the first page in the range.
+         * 40 bits is the architectural limit of a PFN
+         * number for AMD64.
+         */
+        uint64_t start_page:40;
+        /*
+         * The number of pages in the range.
+         */
+        uint64_t page_cnt:24;
+    } finfo;
+    uint64_t  page_range;
+} QEMU_PACKED;
+
+
+
+/*
+ * The header for all dynamic memory messages:
+ *
+ * type: Type of the message.
+ * size: Size of the message in bytes; including the header.
+ * trans_id: The guest is responsible for manufacturing this ID.
+ */
+
+struct dm_header {
+    uint16_t type;
+    uint16_t size;
+    uint32_t trans_id;
+} QEMU_PACKED;
+
+/*
+ * A generic message format for dynamic memory.
+ * Specific message formats are defined later in the file.
+ */
+
+struct dm_message {
+    struct dm_header hdr;
+    uint8_t data[]; /* enclosed message */
+} QEMU_PACKED;
+
+
+/*
+ * Specific message types supporting the dynamic memory protocol.
+ */
+
+/*
+ * Version negotiation message. Sent from the guest to the host.
+ * The guest is free to try different versions until the host
+ * accepts the version.
+ *
+ * dm_version: The protocol version requested.
+ * is_last_attempt: If TRUE, this is the last version guest will request.
+ * reservedz: Reserved field, set to zero.
+ */
+
+struct dm_version_request {
+    struct dm_header hdr;
+    union dm_version version;
+    uint32_t is_last_attempt:1;
+    uint32_t reservedz:31;
+} QEMU_PACKED;
+
+/*
+ * Version response message; Host to Guest and indicates
+ * if the host has accepted the version sent by the guest.
+ *
+ * is_accepted: If TRUE, host has accepted the version and the guest
+ * should proceed to the next stage of the protocol. FALSE indicates that
+ * guest should re-try with a different version.
+ *
+ * reservedz: Reserved field, set to zero.
+ */
+
+struct dm_version_response {
+    struct dm_header hdr;
+    uint64_t is_accepted:1;
+    uint64_t reservedz:63;
+} QEMU_PACKED;
+
+/*
+ * Message reporting capabilities. This is sent from the guest to the
+ * host.
+ */
+
+struct dm_capabilities {
+    struct dm_header hdr;
+    union dm_caps caps;
+    uint64_t min_page_cnt;
+    uint64_t max_page_number;
+} QEMU_PACKED;
+
+/*
+ * Response to the capabilities message. This is sent from the host to the
+ * guest. This message notifies if the host has accepted the guest's
+ * capabilities. If the host has not accepted, the guest must shutdown
+ * the service.
+ *
+ * is_accepted: Indicates if the host has accepted guest's capabilities.
+ * reservedz: Must be 0.
+ */
+
+struct dm_capabilities_resp_msg {
+    struct dm_header hdr;
+    uint64_t is_accepted:1;
+    uint64_t hot_remove:1;
+    uint64_t suppress_pressure_reports:1;
+    uint64_t reservedz:61;
+} QEMU_PACKED;
+
+/*
+ * This message is used to report memory pressure from the guest.
+ * This message is not part of any transaction and there is no
+ * response to this message.
+ *
+ * num_avail: Available memory in pages.
+ * num_committed: Committed memory in pages.
+ * page_file_size: The accumulated size of all page files
+ *                 in the system in pages.
+ * zero_free: The nunber of zero and free pages.
+ * page_file_writes: The writes to the page file in pages.
+ * io_diff: An indicator of file cache efficiency or page file activity,
+ *          calculated as File Cache Page Fault Count - Page Read Count.
+ *          This value is in pages.
+ *
+ * Some of these metrics are Windows specific and fortunately
+ * the algorithm on the host side that computes the guest memory
+ * pressure only uses num_committed value.
+ */
+
+struct dm_status {
+    struct dm_header hdr;
+    uint64_t num_avail;
+    uint64_t num_committed;
+    uint64_t page_file_size;
+    uint64_t zero_free;
+    uint32_t page_file_writes;
+    uint32_t io_diff;
+} QEMU_PACKED;
+
+
+/*
+ * Message to ask the guest to allocate memory - balloon up message.
+ * This message is sent from the host to the guest. The guest may not be
+ * able to allocate as much memory as requested.
+ *
+ * num_pages: number of pages to allocate.
+ */
+
+struct dm_balloon {
+    struct dm_header hdr;
+    uint32_t num_pages;
+    uint32_t reservedz;
+} QEMU_PACKED;
+
+
+/*
+ * Balloon response message; this message is sent from the guest
+ * to the host in response to the balloon message.
+ *
+ * reservedz: Reserved; must be set to zero.
+ * more_pages: If FALSE, this is the last message of the transaction.
+ * if TRUE there will atleast one more message from the guest.
+ *
+ * range_count: The number of ranges in the range array.
+ *
+ * range_array: An array of page ranges returned to the host.
+ *
+ */
+
+struct dm_balloon_response {
+    struct dm_header hdr;
+    uint32_t reservedz;
+    uint32_t more_pages:1;
+    uint32_t range_count:31;
+    union dm_mem_page_range range_array[];
+} QEMU_PACKED;
+
+/*
+ * Un-balloon message; this message is sent from the host
+ * to the guest to give guest more memory.
+ *
+ * more_pages: If FALSE, this is the last message of the transaction.
+ * if TRUE there will atleast one more message from the guest.
+ *
+ * reservedz: Reserved; must be set to zero.
+ *
+ * range_count: The number of ranges in the range array.
+ *
+ * range_array: An array of page ranges returned to the host.
+ *
+ */
+
+struct dm_unballoon_request {
+    struct dm_header hdr;
+    uint32_t more_pages:1;
+    uint32_t reservedz:31;
+    uint32_t range_count;
+    union dm_mem_page_range range_array[];
+} QEMU_PACKED;
+
+/*
+ * Un-balloon response message; this message is sent from the guest
+ * to the host in response to an unballoon request.
+ *
+ */
+
+struct dm_unballoon_response {
+    struct dm_header hdr;
+} QEMU_PACKED;
+
+
+/*
+ * Hot add request message. Message sent from the host to the guest.
+ *
+ * mem_range: Memory range to hot add.
+ *
+ */
+
+struct dm_hot_add {
+    struct dm_header hdr;
+    union dm_mem_page_range range;
+} QEMU_PACKED;
+
+/*
+ * Hot add response message.
+ * This message is sent by the guest to report the status of a hot add request.
+ * If page_count is less than the requested page count, then the host should
+ * assume all further hot add requests will fail, since this indicates that
+ * the guest has hit an upper physical memory barrier.
+ *
+ * Hot adds may also fail due to low resources; in this case, the guest must
+ * not complete this message until the hot add can succeed, and the host must
+ * not send a new hot add request until the response is sent.
+ * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS
+ * times it fails the request.
+ *
+ *
+ * page_count: number of pages that were successfully hot added.
+ *
+ * result: result of the operation 1: success, 0: failure.
+ *
+ */
+
+struct dm_hot_add_response {
+    struct dm_header hdr;
+    uint32_t page_count;
+    uint32_t result;
+} QEMU_PACKED;
+
+struct dm_hot_remove {
+    struct dm_header hdr;
+    uint32_t virtual_node;
+    uint32_t page_count;
+    uint32_t qos_flags;
+    uint32_t reservedZ;
+} QEMU_PACKED;
+
+struct dm_hot_remove_response {
+    struct dm_header hdr;
+    uint32_t result;
+    uint32_t range_count;
+    uint64_t more_pages:1;
+    uint64_t reservedz:63;
+    union dm_mem_page_range range_array[];
+} QEMU_PACKED;
+
+#define DM_REMOVE_QOS_LARGE (1 << 0)
+#define DM_REMOVE_QOS_LOCAL (1 << 1)
+#define DM_REMOVE_QOS_MASK (0x3)
+
+/*
+ * Types of information sent from host to the guest.
+ */
+
+enum dm_info_type {
+    INFO_TYPE_MAX_PAGE_CNT = 0,
+    MAX_INFO_TYPE
+};
+
+
+/*
+ * Header for the information message.
+ */
+
+struct dm_info_header {
+    enum dm_info_type type;
+    uint32_t data_size;
+    uint8_t  data[];
+} QEMU_PACKED;
+
+/*
+ * This message is sent from the host to the guest to pass
+ * some relevant information (win8 addition).
+ *
+ * reserved: no used.
+ * info_size: size of the information blob.
+ * info: information blob.
+ */
+
+struct dm_info_msg {
+    struct dm_header hdr;
+    uint32_t reserved;
+    uint32_t info_size;
+    uint8_t  info[];
+};
+
+#endif
diff --git a/include/hw/hyperv/hv-balloon.h b/include/hw/hyperv/hv-balloon.h
new file mode 100644
index 0000000000..c1efe70fc2
--- /dev/null
+++ b/include/hw/hyperv/hv-balloon.h
@@ -0,0 +1,18 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HV_BALLOON_H
+#define HW_HV_BALLOON_H
+
+#include "qom/object.h"
+
+#define TYPE_HV_BALLOON "hv-balloon"
+OBJECT_DECLARE_SIMPLE_TYPE(HvBalloon, HV_BALLOON)
+
+#endif
diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h
index 3354d6c166..a1d62cc551 100644
--- a/include/hw/mem/memory-device.h
+++ b/include/hw/mem/memory-device.h
@@ -38,6 +38,10 @@ typedef struct MemoryDeviceState MemoryDeviceState;
  * address in guest physical memory can either be specified explicitly
  * or get assigned automatically.
  *
+ * Some memory device might not own a memory region in certain device
+ * configurations. Such devices can logically get (un)plugged, however,
+ * empty memory devices are mostly ignored by the memory device code.
+ *
  * Conceptually, memory devices only span one memory region. If multiple
  * successive memory regions are used, a covering memory region has to
  * be provided. Scattered memory regions are not supported for single
@@ -91,7 +95,8 @@ struct MemoryDeviceClass {
     uint64_t (*get_plugged_size)(const MemoryDeviceState *md, Error **errp);
 
     /*
-     * Return the memory region of the memory device.
+     * Return the memory region of the memory device. If the device is
+     * completely empty, returns NULL without an error.
      *
      * Called when (un)plugging the memory device, to (un)map the
      * memory region in guest physical memory, but also to detect the
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 7008d43d04..d702854853 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -137,7 +137,7 @@ typedef struct QEMU_PACKED KdumpSubHeader64 {
 } KdumpSubHeader64;
 
 typedef struct DataCache {
-    int fd;             /* fd of the file where to write the cached data */
+    DumpState *state;   /* dump state related to this data */
     uint8_t *buf;       /* buffer for cached data */
     size_t buf_size;    /* size of the buf */
     size_t data_size;   /* size of cached data in buf */
@@ -157,6 +157,7 @@ typedef struct DumpState {
     MemoryMappingList list;
     bool resume;
     bool detached;
+    bool kdump_raw;
     hwaddr memory_offset;
     int fd;
 
diff --git a/include/sysemu/kvm_xen.h b/include/sysemu/kvm_xen.h
index 595abfbe40..961c702c4e 100644
--- a/include/sysemu/kvm_xen.h
+++ b/include/sysemu/kvm_xen.h
@@ -22,6 +22,7 @@
 int kvm_xen_soft_reset(void);
 uint32_t kvm_xen_get_caps(void);
 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id);
+bool kvm_xen_has_vcpu_callback_vector(void);
 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type);
 void kvm_xen_set_callback_asserted(void);
 int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port);
diff --git a/linux-user/loongarch64/cpu_loop.c b/linux-user/loongarch64/cpu_loop.c
index 894fdd111a..73d7b6796a 100644
--- a/linux-user/loongarch64/cpu_loop.c
+++ b/linux-user/loongarch64/cpu_loop.c
@@ -72,6 +72,19 @@ void cpu_loop(CPULoongArchState *env)
         case EXCCODE_BCE:
             force_sig_fault(TARGET_SIGSYS, TARGET_SI_KERNEL, env->pc);
             break;
+
+        /*
+         * Begin with LSX and LASX disabled, then enable on the first trap.
+         * In this way we can tell if the unit is in use.  This is used to
+         * choose the layout of any signal frame.
+         */
+        case EXCCODE_SXD:
+            env->CSR_EUEN |= R_CSR_EUEN_SXE_MASK;
+            break;
+        case EXCCODE_ASXD:
+            env->CSR_EUEN |= R_CSR_EUEN_ASXE_MASK;
+            break;
+
         case EXCP_ATOMIC:
             cpu_exec_step_atomic(cs);
             break;
diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c
index afcee641a6..39ea82c814 100644
--- a/linux-user/loongarch64/signal.c
+++ b/linux-user/loongarch64/signal.c
@@ -18,10 +18,10 @@
 #define SC_USED_FP              (1 << 0)
 
 struct target_sigcontext {
-    uint64_t sc_pc;
-    uint64_t sc_regs[32];
-    uint32_t sc_flags;
-    uint64_t sc_extcontext[0]   QEMU_ALIGNED(16);
+    abi_ulong sc_pc;
+    abi_ulong sc_regs[32];
+    abi_uint  sc_flags;
+    abi_ulong sc_extcontext[0]   QEMU_ALIGNED(16);
 };
 
 QEMU_BUILD_BUG_ON(sizeof(struct target_sigcontext) != sizeof_sigcontext);
@@ -33,19 +33,35 @@ QEMU_BUILD_BUG_ON(offsetof(struct target_sigcontext, sc_regs)
 #define FPU_CTX_MAGIC           0x46505501
 #define FPU_CTX_ALIGN           8
 struct target_fpu_context {
-    uint64_t regs[32];
-    uint64_t fcc;
-    uint32_t fcsr;
+    abi_ulong regs[32];
+    abi_ulong fcc;
+    abi_uint  fcsr;
 } QEMU_ALIGNED(FPU_CTX_ALIGN);
 
 QEMU_BUILD_BUG_ON(offsetof(struct target_fpu_context, regs)
                   != offsetof_fpucontext_fr);
 
+#define LSX_CTX_MAGIC           0x53580001
+#define LSX_CTX_ALIGN           16
+struct target_lsx_context {
+    abi_ulong regs[2 * 32];
+    abi_ulong fcc;
+    abi_uint  fcsr;
+} QEMU_ALIGNED(LSX_CTX_ALIGN);
+
+#define LASX_CTX_MAGIC          0x41535801
+#define LASX_CTX_ALIGN          32
+struct target_lasx_context {
+    abi_ulong regs[4 * 32];
+    abi_ulong fcc;
+    abi_uint  fcsr;
+} QEMU_ALIGNED(LASX_CTX_ALIGN);
+
 #define CONTEXT_INFO_ALIGN      16
 struct target_sctx_info {
-    uint32_t magic;
-    uint32_t size;
-    uint64_t padding;
+    abi_uint  magic;
+    abi_uint  size;
+    abi_ulong padding;
 } QEMU_ALIGNED(CONTEXT_INFO_ALIGN);
 
 QEMU_BUILD_BUG_ON(sizeof(struct target_sctx_info) != sizeof_sctx_info);
@@ -81,9 +97,11 @@ struct ctx_layout {
 };
 
 struct extctx_layout {
-    unsigned int size;
+    unsigned long size;
     unsigned int flags;
     struct ctx_layout fpu;
+    struct ctx_layout lsx;
+    struct ctx_layout lasx;
     struct ctx_layout end;
 };
 
@@ -105,7 +123,8 @@ static abi_ptr extframe_alloc(struct extctx_layout *extctx,
     return sp;
 }
 
-static abi_ptr setup_extcontext(struct extctx_layout *extctx, abi_ptr sp)
+static abi_ptr setup_extcontext(CPULoongArchState *env,
+                                struct extctx_layout *extctx, abi_ptr sp)
 {
     memset(extctx, 0, sizeof(struct extctx_layout));
 
@@ -114,8 +133,17 @@ static abi_ptr setup_extcontext(struct extctx_layout *extctx, abi_ptr sp)
 
     /* For qemu, there is no lazy fp context switch, so fp always present. */
     extctx->flags = SC_USED_FP;
-    sp = extframe_alloc(extctx, &extctx->fpu,
-                        sizeof(struct target_rt_sigframe), FPU_CTX_ALIGN, sp);
+
+    if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE)) {
+        sp = extframe_alloc(extctx, &extctx->lasx,
+                        sizeof(struct target_lasx_context), LASX_CTX_ALIGN, sp);
+    } else if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE)) {
+        sp = extframe_alloc(extctx, &extctx->lsx,
+                        sizeof(struct target_lsx_context), LSX_CTX_ALIGN, sp);
+    } else {
+        sp = extframe_alloc(extctx, &extctx->fpu,
+                        sizeof(struct target_fpu_context), FPU_CTX_ALIGN, sp);
+    }
 
     return sp;
 }
@@ -125,7 +153,6 @@ static void setup_sigframe(CPULoongArchState *env,
                            struct extctx_layout *extctx)
 {
     struct target_sctx_info *info;
-    struct target_fpu_context *fpu_ctx;
     int i;
 
     __put_user(extctx->flags, &sc->sc_flags);
@@ -136,25 +163,63 @@ static void setup_sigframe(CPULoongArchState *env,
     }
 
     /*
-     * Set fpu context
+     * Set extension context
      */
-    info = extctx->fpu.haddr;
-    __put_user(FPU_CTX_MAGIC, &info->magic);
-    __put_user(extctx->fpu.size, &info->size);
 
-    fpu_ctx = (struct target_fpu_context *)(info + 1);
-    for (i = 0; i < 32; ++i) {
-        __put_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]);
+    if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE)) {
+        struct target_lasx_context *lasx_ctx;
+        info = extctx->lasx.haddr;
+
+        __put_user(LASX_CTX_MAGIC, &info->magic);
+        __put_user(extctx->lasx.size, &info->size);
+
+        lasx_ctx = (struct target_lasx_context *)(info + 1);
+
+        for (i = 0; i < 32; ++i) {
+            __put_user(env->fpr[i].vreg.UD(0), &lasx_ctx->regs[4 * i]);
+            __put_user(env->fpr[i].vreg.UD(1), &lasx_ctx->regs[4 * i + 1]);
+            __put_user(env->fpr[i].vreg.UD(2), &lasx_ctx->regs[4 * i + 2]);
+            __put_user(env->fpr[i].vreg.UD(3), &lasx_ctx->regs[4 * i + 3]);
+        }
+        __put_user(read_fcc(env), &lasx_ctx->fcc);
+        __put_user(env->fcsr0, &lasx_ctx->fcsr);
+    } else if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE)) {
+        struct target_lsx_context *lsx_ctx;
+        info = extctx->lsx.haddr;
+
+        __put_user(LSX_CTX_MAGIC, &info->magic);
+        __put_user(extctx->lsx.size, &info->size);
+
+        lsx_ctx = (struct target_lsx_context *)(info + 1);
+
+        for (i = 0; i < 32; ++i) {
+            __put_user(env->fpr[i].vreg.UD(0), &lsx_ctx->regs[2 * i]);
+            __put_user(env->fpr[i].vreg.UD(1), &lsx_ctx->regs[2 * i + 1]);
+        }
+        __put_user(read_fcc(env), &lsx_ctx->fcc);
+        __put_user(env->fcsr0, &lsx_ctx->fcsr);
+    } else {
+        struct target_fpu_context *fpu_ctx;
+        info = extctx->fpu.haddr;
+
+        __put_user(FPU_CTX_MAGIC, &info->magic);
+        __put_user(extctx->fpu.size, &info->size);
+
+        fpu_ctx = (struct target_fpu_context *)(info + 1);
+
+        for (i = 0; i < 32; ++i) {
+            __put_user(env->fpr[i].vreg.UD(0), &fpu_ctx->regs[i]);
+        }
+        __put_user(read_fcc(env), &fpu_ctx->fcc);
+        __put_user(env->fcsr0, &fpu_ctx->fcsr);
     }
-    __put_user(read_fcc(env), &fpu_ctx->fcc);
-    __put_user(env->fcsr0, &fpu_ctx->fcsr);
 
     /*
      * Set end context
      */
     info = extctx->end.haddr;
     __put_user(0, &info->magic);
-    __put_user(extctx->end.size, &info->size);
+    __put_user(0, &info->size);
 }
 
 static bool parse_extcontext(struct extctx_layout *extctx, abi_ptr frame)
@@ -162,7 +227,7 @@ static bool parse_extcontext(struct extctx_layout *extctx, abi_ptr frame)
     memset(extctx, 0, sizeof(*extctx));
 
     while (1) {
-        uint32_t magic, size;
+        abi_uint magic, size;
 
         if (get_user_u32(magic, frame) || get_user_u32(size, frame + 4)) {
             return false;
@@ -184,6 +249,24 @@ static bool parse_extcontext(struct extctx_layout *extctx, abi_ptr frame)
             extctx->fpu.size = size;
             extctx->size += size;
             break;
+        case LSX_CTX_MAGIC:
+            if (size < (sizeof(struct target_sctx_info) +
+                        sizeof(struct target_lsx_context))) {
+                return false;
+            }
+            extctx->lsx.gaddr = frame;
+            extctx->lsx.size = size;
+            extctx->size += size;
+            break;
+        case LASX_CTX_MAGIC:
+            if (size < (sizeof(struct target_sctx_info) +
+                        sizeof(struct target_lasx_context))) {
+                return false;
+            }
+            extctx->lasx.gaddr = frame;
+            extctx->lasx.size = size;
+            extctx->size += size;
+            break;
         default:
             return false;
         }
@@ -197,19 +280,45 @@ static void restore_sigframe(CPULoongArchState *env,
                              struct extctx_layout *extctx)
 {
     int i;
+    abi_ulong fcc;
 
     __get_user(env->pc, &sc->sc_pc);
     for (i = 1; i < 32; ++i) {
         __get_user(env->gpr[i], &sc->sc_regs[i]);
     }
 
-    if (extctx->fpu.haddr) {
+    if (extctx->lasx.haddr) {
+        struct target_lasx_context *lasx_ctx =
+            extctx->lasx.haddr + sizeof(struct target_sctx_info);
+
+        for (i = 0; i < 32; ++i) {
+            __get_user(env->fpr[i].vreg.UD(0), &lasx_ctx->regs[4 * i]);
+            __get_user(env->fpr[i].vreg.UD(1), &lasx_ctx->regs[4 * i + 1]);
+            __get_user(env->fpr[i].vreg.UD(2), &lasx_ctx->regs[4 * i + 2]);
+            __get_user(env->fpr[i].vreg.UD(3), &lasx_ctx->regs[4 * i + 3]);
+        }
+        __get_user(fcc, &lasx_ctx->fcc);
+        write_fcc(env, fcc);
+        __get_user(env->fcsr0, &lasx_ctx->fcsr);
+        restore_fp_status(env);
+    } else if (extctx->lsx.haddr) {
+        struct target_lsx_context *lsx_ctx =
+            extctx->lsx.haddr + sizeof(struct target_sctx_info);
+
+        for (i = 0; i < 32; ++i) {
+            __get_user(env->fpr[i].vreg.UD(0), &lsx_ctx->regs[2 * i]);
+            __get_user(env->fpr[i].vreg.UD(1), &lsx_ctx->regs[2 * i + 1]);
+        }
+        __get_user(fcc, &lsx_ctx->fcc);
+        write_fcc(env, fcc);
+        __get_user(env->fcsr0, &lsx_ctx->fcsr);
+        restore_fp_status(env);
+    } else if (extctx->fpu.haddr) {
         struct target_fpu_context *fpu_ctx =
             extctx->fpu.haddr + sizeof(struct target_sctx_info);
-        uint64_t fcc;
 
         for (i = 0; i < 32; ++i) {
-            __get_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]);
+            __get_user(env->fpr[i].vreg.UD(0), &fpu_ctx->regs[i]);
         }
         __get_user(fcc, &fpu_ctx->fcc);
         write_fcc(env, fcc);
@@ -229,7 +338,7 @@ static abi_ptr get_sigframe(struct target_sigaction *ka,
 
     sp = target_sigsp(get_sp_from_cpustate(env), ka);
     sp = ROUND_DOWN(sp, 16);
-    sp = setup_extcontext(extctx, sp);
+    sp = setup_extcontext(env, extctx, sp);
     sp -= sizeof(struct target_rt_sigframe);
 
     assert(QEMU_IS_ALIGNED(sp, 16));
@@ -255,8 +364,17 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
         force_sigsegv(sig);
         return;
     }
-    extctx.fpu.haddr = (void *)frame + (extctx.fpu.gaddr - frame_addr);
-    extctx.end.haddr = (void *)frame + (extctx.end.gaddr - frame_addr);
+
+    if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE)) {
+        extctx.lasx.haddr = (void *)frame + (extctx.lasx.gaddr - frame_addr);
+        extctx.end.haddr = (void *)frame + (extctx.end.gaddr - frame_addr);
+    } else if (FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE)) {
+        extctx.lsx.haddr = (void *)frame + (extctx.lsx.gaddr - frame_addr);
+        extctx.end.haddr = (void *)frame + (extctx.end.gaddr - frame_addr);
+    } else {
+        extctx.fpu.haddr = (void *)frame + (extctx.fpu.gaddr - frame_addr);
+        extctx.end.haddr = (void *)frame + (extctx.end.gaddr - frame_addr);
+    }
 
     tswap_siginfo(&frame->rs_info, info);
 
@@ -299,7 +417,12 @@ long do_rt_sigreturn(CPULoongArchState *env)
     if (!frame) {
         goto badframe;
     }
-    if (extctx.fpu.gaddr) {
+
+    if (extctx.lasx.gaddr) {
+        extctx.lasx.haddr = (void *)frame + (extctx.lasx.gaddr - frame_addr);
+    } else if (extctx.lsx.gaddr) {
+        extctx.lsx.haddr = (void *)frame + (extctx.lsx.gaddr - frame_addr);
+    } else if (extctx.fpu.gaddr) {
         extctx.fpu.haddr = (void *)frame + (extctx.fpu.gaddr - frame_addr);
     }
 
diff --git a/linux-user/sparc/cpu_loop.c b/linux-user/sparc/cpu_loop.c
index b36bb2574b..3c1bde00dd 100644
--- a/linux-user/sparc/cpu_loop.c
+++ b/linux-user/sparc/cpu_loop.c
@@ -197,10 +197,8 @@ static uint32_t do_getpsr(CPUSPARCState *env)
 /* Avoid ifdefs below for the abi32 and abi64 paths. */
 #ifdef TARGET_ABI32
 #define TARGET_TT_SYSCALL  (TT_TRAP + 0x10) /* t_linux */
-#define syscall_cc         psr
 #else
 #define TARGET_TT_SYSCALL  (TT_TRAP + 0x6d) /* tl0_linux64 */
-#define syscall_cc         xcc
 #endif
 
 /* Avoid ifdefs below for the v9 and pre-v9 hw traps. */
@@ -224,11 +222,6 @@ void cpu_loop (CPUSPARCState *env)
         cpu_exec_end(cs);
         process_queued_cpu_work(cs);
 
-        /* Compute PSR before exposing state.  */
-        if (env->cc_op != CC_OP_FLAGS) {
-            cpu_get_psr(env);
-        }
-
         switch (trapnr) {
         case TARGET_TT_SYSCALL:
             ret = do_syscall (env, env->gregs[1],
@@ -240,10 +233,10 @@ void cpu_loop (CPUSPARCState *env)
                 break;
             }
             if ((abi_ulong)ret >= (abi_ulong)(-515)) {
-                env->syscall_cc |= PSR_CARRY;
+                set_syscall_C(env, 1);
                 ret = -ret;
             } else {
-                env->syscall_cc &= ~PSR_CARRY;
+                set_syscall_C(env, 0);
             }
             env->regwptr[0] = ret;
             /* next instruction */
diff --git a/linux-user/sparc/signal.c b/linux-user/sparc/signal.c
index 2be9000b9e..dfcae707e0 100644
--- a/linux-user/sparc/signal.c
+++ b/linux-user/sparc/signal.c
@@ -164,7 +164,7 @@ static void restore_pt_regs(struct target_pt_regs *regs, CPUSPARCState *env)
      */
     uint32_t psr;
     __get_user(psr, &regs->psr);
-    env->psr = (psr & PSR_ICC) | (env->psr & ~PSR_ICC);
+    cpu_put_psr_icc(env, psr);
 #endif
 
     /* Note that pc and npc are handled in the caller. */
diff --git a/linux-user/sparc/target_cpu.h b/linux-user/sparc/target_cpu.h
index 1f4bed50f4..5f62c5eb75 100644
--- a/linux-user/sparc/target_cpu.h
+++ b/linux-user/sparc/target_cpu.h
@@ -26,6 +26,17 @@
 # define TARGET_STACK_BIAS 0
 #endif
 
+static void set_syscall_C(CPUSPARCState *env, bool val)
+{
+#ifndef TARGET_SPARC64
+    env->icc_C = val;
+#elif defined(TARGET_ABI32)
+    env->icc_C = (uint64_t)val << 32;
+#else
+    env->xcc_C = val;
+#endif
+}
+
 static inline void cpu_clone_regs_child(CPUSPARCState *env, target_ulong newsp,
                                         unsigned flags)
 {
@@ -58,11 +69,7 @@ static inline void cpu_clone_regs_child(CPUSPARCState *env, target_ulong newsp,
          * do the pc advance twice.
          */
         env->regwptr[WREG_O0] = 0;
-#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
-        env->xcc &= ~PSR_CARRY;
-#else
-        env->psr &= ~PSR_CARRY;
-#endif
+        set_syscall_C(env, 0);
         env->pc = env->npc;
         env->npc = env->npc + 4;
     }
diff --git a/meson.build b/meson.build
index dcef8b1e79..51a51075db 100644
--- a/meson.build
+++ b/meson.build
@@ -1323,6 +1323,30 @@ if not get_option('glusterfs').auto() or have_block
   endif
 endif
 
+hv_balloon = false
+if get_option('hv_balloon').allowed() and have_system
+  if cc.links('''
+    #include <string.h>
+    #include <gmodule.h>
+    int main(void) {
+        GTree *tree;
+
+        tree = g_tree_new((GCompareFunc)strcmp);
+        (void)g_tree_node_first(tree);
+        g_tree_destroy(tree);
+        return 0;
+    }
+  ''', dependencies: glib)
+    hv_balloon = true
+  else
+    if get_option('hv_balloon').enabled()
+      error('could not enable hv-balloon, update your glib')
+    else
+      warning('could not find glib support for hv-balloon, disabling')
+    endif
+  endif
+endif
+
 libssh = not_found
 if not get_option('libssh').auto() or have_block
   libssh = dependency('libssh', version: '>=0.8.7',
@@ -2855,7 +2879,8 @@ host_kconfig = \
   (targetos == 'linux' ? ['CONFIG_LINUX=y'] : []) + \
   (have_pvrdma ? ['CONFIG_PVRDMA=y'] : []) + \
   (multiprocess_allowed ? ['CONFIG_MULTIPROCESS_ALLOWED=y'] : []) + \
-  (vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : [])
+  (vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : []) + \
+  (hv_balloon ? ['CONFIG_HV_BALLOON_POSSIBLE=y'] : [])
 
 ignored = [ 'TARGET_XML_FILES', 'TARGET_ABI_DIR', 'TARGET_ARCH' ]
 
@@ -4321,6 +4346,7 @@ if targetos == 'windows'
 endif
 summary_info += {'seccomp support':   seccomp}
 summary_info += {'GlusterFS support': glusterfs}
+summary_info += {'hv-balloon support': hv_balloon}
 summary_info += {'TPM support':       have_tpm}
 summary_info += {'libssh support':    libssh}
 summary_info += {'lzo support':       lzo}
diff --git a/meson_options.txt b/meson_options.txt
index 3c7398f3c6..5c212fcd45 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -150,6 +150,8 @@ option('gio', type : 'feature', value : 'auto',
        description: 'use libgio for D-Bus support')
 option('glusterfs', type : 'feature', value : 'auto',
        description: 'Glusterfs block device driver')
+option('hv_balloon', type : 'feature', value : 'auto',
+       description: 'hv-balloon driver (requires Glib 2.68+ GTree API)')
 option('libdw', type : 'feature', value : 'auto',
        description: 'debuginfo support')
 option('libiscsi', type : 'feature', value : 'auto',
diff --git a/migration/ram.c b/migration/ram.c
index a0f3b86663..8c7886ab79 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3030,71 +3030,71 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
      * MAX_WAIT (if curious, further see commit 4508bd9ed8053ce) below, which
      * guarantees that we'll at least released it in a regular basis.
      */
-    qemu_mutex_lock(&rs->bitmap_mutex);
-    WITH_RCU_READ_LOCK_GUARD() {
-        if (ram_list.version != rs->last_version) {
-            ram_state_reset(rs);
-        }
+    WITH_QEMU_LOCK_GUARD(&rs->bitmap_mutex) {
+        WITH_RCU_READ_LOCK_GUARD() {
+            if (ram_list.version != rs->last_version) {
+                ram_state_reset(rs);
+            }
 
-        /* Read version before ram_list.blocks */
-        smp_rmb();
+            /* Read version before ram_list.blocks */
+            smp_rmb();
 
-        ret = rdma_registration_start(f, RAM_CONTROL_ROUND);
-        if (ret < 0) {
-            qemu_file_set_error(f, ret);
-            goto out;
-        }
+            ret = rdma_registration_start(f, RAM_CONTROL_ROUND);
+            if (ret < 0) {
+                qemu_file_set_error(f, ret);
+                goto out;
+            }
 
-        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-        i = 0;
-        while ((ret = migration_rate_exceeded(f)) == 0 ||
-               postcopy_has_request(rs)) {
-            int pages;
+            t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+            i = 0;
+            while ((ret = migration_rate_exceeded(f)) == 0 ||
+                   postcopy_has_request(rs)) {
+                int pages;
 
-            if (qemu_file_get_error(f)) {
-                break;
-            }
+                if (qemu_file_get_error(f)) {
+                    break;
+                }
 
-            pages = ram_find_and_save_block(rs);
-            /* no more pages to sent */
-            if (pages == 0) {
-                done = 1;
-                break;
-            }
+                pages = ram_find_and_save_block(rs);
+                /* no more pages to sent */
+                if (pages == 0) {
+                    done = 1;
+                    break;
+                }
 
-            if (pages < 0) {
-                qemu_file_set_error(f, pages);
-                break;
-            }
+                if (pages < 0) {
+                    qemu_file_set_error(f, pages);
+                    break;
+                }
 
-            rs->target_page_count += pages;
+                rs->target_page_count += pages;
 
-            /*
-             * During postcopy, it is necessary to make sure one whole host
-             * page is sent in one chunk.
-             */
-            if (migrate_postcopy_ram()) {
-                compress_flush_data();
-            }
+                /*
+                 * During postcopy, it is necessary to make sure one whole host
+                 * page is sent in one chunk.
+                 */
+                if (migrate_postcopy_ram()) {
+                    compress_flush_data();
+                }
 
-            /*
-             * we want to check in the 1st loop, just in case it was the 1st
-             * time and we had to sync the dirty bitmap.
-             * qemu_clock_get_ns() is a bit expensive, so we only check each
-             * some iterations
-             */
-            if ((i & 63) == 0) {
-                uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
-                              1000000;
-                if (t1 > MAX_WAIT) {
-                    trace_ram_save_iterate_big_wait(t1, i);
-                    break;
+                /*
+                 * we want to check in the 1st loop, just in case it was the 1st
+                 * time and we had to sync the dirty bitmap.
+                 * qemu_clock_get_ns() is a bit expensive, so we only check each
+                 * some iterations
+                 */
+                if ((i & 63) == 0) {
+                    uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
+                        1000000;
+                    if (t1 > MAX_WAIT) {
+                        trace_ram_save_iterate_big_wait(t1, i);
+                        break;
+                    }
                 }
+                i++;
             }
-            i++;
         }
     }
-    qemu_mutex_unlock(&rs->bitmap_mutex);
 
     /*
      * Must occur before EOS (or any QEMUFile operation)
diff --git a/monitor/monitor.c b/monitor/monitor.c
index 941f87815a..01ede1babd 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -315,6 +315,7 @@ static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
     [QAPI_EVENT_QUORUM_FAILURE]    = { 1000 * SCALE_MS },
     [QAPI_EVENT_VSERPORT_CHANGE]   = { 1000 * SCALE_MS },
     [QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE] = { 1000 * SCALE_MS },
+    [QAPI_EVENT_HV_BALLOON_STATUS_REPORT] = { 1000 * SCALE_MS },
 };
 
 /*
diff --git a/qapi/dump.json b/qapi/dump.json
index 4ae1f722a9..5cbc237ad9 100644
--- a/qapi/dump.json
+++ b/qapi/dump.json
@@ -15,11 +15,23 @@
 #
 # @elf: elf format
 #
-# @kdump-zlib: kdump-compressed format with zlib-compressed
+# @kdump-zlib: makedumpfile flattened, kdump-compressed format with zlib
+#     compression
 #
-# @kdump-lzo: kdump-compressed format with lzo-compressed
+# @kdump-lzo: makedumpfile flattened, kdump-compressed format with lzo
+#     compression
 #
-# @kdump-snappy: kdump-compressed format with snappy-compressed
+# @kdump-snappy: makedumpfile flattened, kdump-compressed format with snappy
+#     compression
+#
+# @kdump-raw-zlib: raw assembled kdump-compressed format with zlib compression
+#     (since 8.2)
+#
+# @kdump-raw-lzo: raw assembled kdump-compressed format with lzo compression
+#     (since 8.2)
+#
+# @kdump-raw-snappy: raw assembled kdump-compressed format with snappy
+#     compression (since 8.2)
 #
 # @win-dmp: Windows full crashdump format, can be used instead of ELF
 #     converting (since 2.13)
@@ -27,7 +39,11 @@
 # Since: 2.0
 ##
 { 'enum': 'DumpGuestMemoryFormat',
-  'data': [ 'elf', 'kdump-zlib', 'kdump-lzo', 'kdump-snappy', 'win-dmp' ] }
+  'data': [
+      'elf',
+      'kdump-zlib', 'kdump-lzo', 'kdump-snappy',
+      'kdump-raw-zlib', 'kdump-raw-lzo', 'kdump-raw-snappy',
+      'win-dmp' ] }
 
 ##
 # @dump-guest-memory:
diff --git a/qapi/machine-target.json b/qapi/machine-target.json
index 4e55adbe00..c8d7d9868d 100644
--- a/qapi/machine-target.json
+++ b/qapi/machine-target.json
@@ -230,7 +230,8 @@
   'data': { 'model': 'CpuModelInfo' },
   'if': { 'any': [ 'TARGET_S390X',
                    'TARGET_I386',
-                   'TARGET_ARM' ] } }
+                   'TARGET_ARM',
+                   'TARGET_LOONGARCH64' ] } }
 
 ##
 # @query-cpu-model-expansion:
@@ -275,7 +276,8 @@
   'returns': 'CpuModelExpansionInfo',
   'if': { 'any': [ 'TARGET_S390X',
                    'TARGET_I386',
-                   'TARGET_ARM' ] } }
+                   'TARGET_ARM',
+                   'TARGET_LOONGARCH64' ] } }
 
 ##
 # @CpuDefinitionInfo:
diff --git a/qapi/machine.json b/qapi/machine.json
index 6c9d2f6dcf..b6d634b30d 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1138,6 +1138,68 @@
   'data': { 'actual': 'int' } }
 
 ##
+# @HvBalloonInfo:
+#
+# hv-balloon guest-provided memory status information.
+#
+# @committed: the amount of memory in use inside the guest plus the
+#     amount of the memory unusable inside the guest (ballooned out,
+#     offline, etc.)
+#
+# @available: the amount of the memory inside the guest available for
+#     new allocations ("free")
+#
+# Since: 8.2
+##
+{ 'struct': 'HvBalloonInfo',
+  'data': { 'committed': 'size', 'available': 'size' } }
+
+##
+# @query-hv-balloon-status-report:
+#
+# Returns the hv-balloon driver data contained in the last received "STATUS"
+# message from the guest.
+#
+# Returns:
+# - @HvBalloonInfo on success
+# - If no hv-balloon device is present, guest memory status reporting
+#   is not enabled or no guest memory status report received yet,
+#   GenericError
+#
+# Since: 8.2
+#
+# Example:
+#
+# -> { "execute": "query-hv-balloon-status-report" }
+# <- { "return": {
+#          "committed": 816640000,
+#          "available": 3333054464
+#       }
+#    }
+##
+{ 'command': 'query-hv-balloon-status-report', 'returns': 'HvBalloonInfo' }
+
+##
+# @HV_BALLOON_STATUS_REPORT:
+#
+# Emitted when the hv-balloon driver receives a "STATUS" message from
+# the guest.
+#
+# Note: this event is rate-limited.
+#
+# Since: 8.2
+#
+# Example:
+#
+# <- { "event": "HV_BALLOON_STATUS_REPORT",
+#      "data": { "committed": 816640000, "available": 3333054464 },
+#      "timestamp": { "seconds": 1600295492, "microseconds": 661044 } }
+#
+##
+{ 'event': 'HV_BALLOON_STATUS_REPORT',
+  'data': 'HvBalloonInfo' }
+
+##
 # @MemoryInfo:
 #
 # Actual memory information in bytes.
@@ -1290,6 +1352,29 @@
 }
 
 ##
+# @HvBalloonDeviceInfo:
+#
+# hv-balloon provided memory state information
+#
+# @id: device's ID
+#
+# @memaddr: physical address in memory, where device is mapped
+#
+# @max-size: the maximum size of memory that the device can provide
+#
+# @memdev: memory backend linked with device
+#
+# Since: 8.2
+##
+{ 'struct': 'HvBalloonDeviceInfo',
+  'data': { '*id': 'str',
+            '*memaddr': 'size',
+            'max-size': 'size',
+            '*memdev': 'str'
+          }
+}
+
+##
 # @MemoryDeviceInfoKind:
 #
 # @nvdimm: since 2.12
@@ -1300,10 +1385,13 @@
 #
 # @sgx-epc: since 6.2.
 #
+# @hv-balloon: since 8.2.
+#
 # Since: 2.1
 ##
 { 'enum': 'MemoryDeviceInfoKind',
-  'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc' ] }
+  'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc',
+            'hv-balloon' ] }
 
 ##
 # @PCDIMMDeviceInfoWrapper:
@@ -1338,6 +1426,14 @@
   'data': { 'data': 'SgxEPCDeviceInfo' } }
 
 ##
+# @HvBalloonDeviceInfoWrapper:
+#
+# Since: 8.2
+##
+{ 'struct': 'HvBalloonDeviceInfoWrapper',
+  'data': { 'data': 'HvBalloonDeviceInfo' } }
+
+##
 # @MemoryDeviceInfo:
 #
 # Union containing information about a memory device
@@ -1351,7 +1447,8 @@
             'nvdimm': 'PCDIMMDeviceInfoWrapper',
             'virtio-pmem': 'VirtioPMEMDeviceInfoWrapper',
             'virtio-mem': 'VirtioMEMDeviceInfoWrapper',
-            'sgx-epc': 'SgxEPCDeviceInfoWrapper'
+            'sgx-epc': 'SgxEPCDeviceInfoWrapper',
+            'hv-balloon': 'HvBalloonDeviceInfoWrapper'
           }
 }
 
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 7ca4b77eae..e9d6d39279 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -123,6 +123,7 @@ meson_options_help() {
   printf "%s\n" '  gtk-clipboard   clipboard support for the gtk UI (EXPERIMENTAL, MAY HANG)'
   printf "%s\n" '  guest-agent     Build QEMU Guest Agent'
   printf "%s\n" '  guest-agent-msi Build MSI package for the QEMU Guest Agent'
+  printf "%s\n" '  hv-balloon      hv-balloon driver (requires Glib 2.68+ GTree API)'
   printf "%s\n" '  hvf             HVF acceleration support'
   printf "%s\n" '  iconv           Font glyph conversion support'
   printf "%s\n" '  jack            JACK sound support'
@@ -333,6 +334,8 @@ _meson_option_parse() {
     --disable-guest-agent-msi) printf "%s" -Dguest_agent_msi=disabled ;;
     --enable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=true ;;
     --disable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=false ;;
+    --enable-hv-balloon) printf "%s" -Dhv_balloon=enabled ;;
+    --disable-hv-balloon) printf "%s" -Dhv_balloon=disabled ;;
     --enable-hvf) printf "%s" -Dhvf=enabled ;;
     --disable-hvf) printf "%s" -Dhvf=disabled ;;
     --iasl=*) quote_sh "-Diasl=$2" ;;
diff --git a/system/dirtylimit.c b/system/dirtylimit.c
index fa959d7743..495c7a7082 100644
--- a/system/dirtylimit.c
+++ b/system/dirtylimit.c
@@ -411,12 +411,20 @@ void dirtylimit_set_all(uint64_t quota,
 
 void dirtylimit_vcpu_execute(CPUState *cpu)
 {
-    if (dirtylimit_in_service() &&
-        dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
-        cpu->throttle_us_per_full) {
-        trace_dirtylimit_vcpu_execute(cpu->cpu_index,
-                cpu->throttle_us_per_full);
-        usleep(cpu->throttle_us_per_full);
+    if (cpu->throttle_us_per_full) {
+        dirtylimit_state_lock();
+
+        if (dirtylimit_in_service() &&
+            dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
+            dirtylimit_state_unlock();
+            trace_dirtylimit_vcpu_execute(cpu->cpu_index,
+                    cpu->throttle_us_per_full);
+
+            g_usleep(cpu->throttle_us_per_full);
+            return;
+        }
+
+        dirtylimit_state_unlock();
     }
 }
 
@@ -644,10 +652,6 @@ static struct DirtyLimitInfoList *dirtylimit_query_all(void)
 
 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
 {
-    if (!dirtylimit_in_service()) {
-        return NULL;
-    }
-
     return dirtylimit_query_all();
 }
 
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index 76348f9d5d..75b2c557b9 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -267,7 +267,6 @@ static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
             fi.submap |= 1 << XENFEAT_writable_page_tables |
                          1 << XENFEAT_writable_descriptor_tables |
                          1 << XENFEAT_auto_translated_physmap |
-                         1 << XENFEAT_supervisor_mode_kernel |
                          1 << XENFEAT_hvm_callback_vector |
                          1 << XENFEAT_hvm_safe_pvclock |
                          1 << XENFEAT_hvm_pirqs;
@@ -307,7 +306,7 @@ static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
 
     trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
 
-    return kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, &xva);
+    return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva);
 }
 
 static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
@@ -425,6 +424,13 @@ void kvm_xen_set_callback_asserted(void)
     }
 }
 
+bool kvm_xen_has_vcpu_callback_vector(void)
+{
+    CPUState *cs = qemu_get_cpu(0);
+
+    return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector;
+}
+
 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
 {
     CPUState *cs = qemu_get_cpu(vcpu_id);
@@ -441,7 +447,8 @@ void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
          * deliver it as an MSI.
          */
         MSIMessage msg = {
-            .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id,
+            .address = APIC_DEFAULT_ADDRESS |
+                       (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT),
             .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
         };
         kvm_irqchip_send_msi(kvm_state, msg);
@@ -850,8 +857,7 @@ static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
     int ret = -ENOSYS;
     switch (cmd) {
     case HVMOP_set_evtchn_upcall_vector:
-        ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu,
-                                                 exit->u.hcall.params[0]);
+        ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg);
         break;
 
     case HVMOP_pagetable_dying:
diff --git a/target/loongarch/cpu-param.h b/target/loongarch/cpu-param.h
index 1265dc7cb5..cfe195db4e 100644
--- a/target/loongarch/cpu-param.h
+++ b/target/loongarch/cpu-param.h
@@ -12,6 +12,6 @@
 #define TARGET_PHYS_ADDR_SPACE_BITS 48
 #define TARGET_VIRT_ADDR_SPACE_BITS 48
 
-#define TARGET_PAGE_BITS 14
+#define TARGET_PAGE_BITS 12
 
 #endif
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index ef1bf89dac..a60d07acd5 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -443,6 +443,7 @@ static void loongarch_la464_initfn(Object *obj)
     env->cpucfg[20] = data;
 
     env->CSR_ASID = FIELD_DP64(0, CSR_ASID, ASIDBITS, 0xa);
+    loongarch_cpu_post_init(obj);
 }
 
 static void loongarch_la132_initfn(Object *obj)
@@ -474,6 +475,12 @@ static void loongarch_la132_initfn(Object *obj)
     env->cpucfg[1] = data;
 }
 
+static void loongarch_max_initfn(Object *obj)
+{
+    /* '-cpu max' for TCG: we use cpu la464. */
+    loongarch_la464_initfn(obj);
+}
+
 static void loongarch_cpu_list_entry(gpointer data, gpointer user_data)
 {
     const char *typename = object_class_get_name(OBJECT_CLASS(data));
@@ -616,6 +623,72 @@ static const MemoryRegionOps loongarch_qemu_ops = {
 };
 #endif
 
+static bool loongarch_get_lsx(Object *obj, Error **errp)
+{
+    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
+    bool ret;
+
+    if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) {
+        ret = true;
+    } else {
+        ret = false;
+    }
+    return ret;
+}
+
+static void loongarch_set_lsx(Object *obj, bool value, Error **errp)
+{
+    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
+
+    if (value) {
+        cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LSX, 1);
+    } else {
+        cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LSX, 0);
+        cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LASX, 0);
+    }
+}
+
+static bool loongarch_get_lasx(Object *obj, Error **errp)
+{
+    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
+    bool ret;
+
+    if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LASX)) {
+        ret = true;
+    } else {
+        ret = false;
+    }
+    return ret;
+}
+
+static void loongarch_set_lasx(Object *obj, bool value, Error **errp)
+{
+    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
+
+    if (value) {
+	if (!FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) {
+            cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LSX, 1);
+	}
+        cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LASX, 1);
+    } else {
+        cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, LASX, 0);
+    }
+}
+
+void loongarch_cpu_post_init(Object *obj)
+{
+    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
+
+    if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) {
+        object_property_add_bool(obj, "lsx", loongarch_get_lsx,
+                                 loongarch_set_lsx);
+    }
+    if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LASX)) {
+        object_property_add_bool(obj, "lasx", loongarch_get_lasx,
+                                 loongarch_set_lasx);
+    }
+}
+
 static void loongarch_cpu_init(Object *obj)
 {
 #ifndef CONFIG_USER_ONLY
@@ -829,6 +902,7 @@ static const TypeInfo loongarch_cpu_type_infos[] = {
     },
     DEFINE_LOONGARCH_CPU_TYPE(64, "la464", loongarch_la464_initfn),
     DEFINE_LOONGARCH_CPU_TYPE(32, "la132", loongarch_la132_initfn),
+    DEFINE_LOONGARCH_CPU_TYPE(64, "max", loongarch_max_initfn),
 };
 
 DEFINE_TYPES(loongarch_cpu_type_infos)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 8b54cf109c..9d0f79f814 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -486,4 +486,6 @@ void loongarch_cpu_list(void);
 #define LOONGARCH_CPU_TYPE_NAME(model) model LOONGARCH_CPU_TYPE_SUFFIX
 #define CPU_RESOLVING_TYPE TYPE_LOONGARCH_CPU
 
+void loongarch_cpu_post_init(Object *obj);
+
 #endif /* LOONGARCH_CPU_H */
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 98f856bb29..92b1d22e28 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -4,8 +4,6 @@
  * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
  */
 
-#ifndef CONFIG_USER_ONLY
-
 static bool check_vec(DisasContext *ctx, uint32_t oprsz)
 {
     if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) {
@@ -21,15 +19,6 @@ static bool check_vec(DisasContext *ctx, uint32_t oprsz)
     return true;
 }
 
-#else
-
-static bool check_vec(DisasContext *ctx, uint32_t oprsz)
-{
-    return true;
-}
-
-#endif
-
 static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
                             gen_helper_gvec_4_ptr *fn)
 {
diff --git a/target/loongarch/loongarch-qmp-cmds.c b/target/loongarch/loongarch-qmp-cmds.c
index 6c25957881..645672ff59 100644
--- a/target/loongarch/loongarch-qmp-cmds.c
+++ b/target/loongarch/loongarch-qmp-cmds.c
@@ -7,8 +7,13 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qapi/qapi-commands-machine-target.h"
 #include "cpu.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qom/qom-qobject.h"
 
 static void loongarch_cpu_add_definition(gpointer data, gpointer user_data)
 {
@@ -35,3 +40,62 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
 
     return cpu_list;
 }
+
+static const char *cpu_model_advertised_features[] = {
+    "lsx", "lasx", NULL
+};
+
+CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
+                                                     CpuModelInfo *model,
+                                                     Error **errp)
+{
+    CpuModelExpansionInfo *expansion_info;
+    QDict *qdict_out;
+    ObjectClass *oc;
+    Object *obj;
+    const char *name;
+    int i;
+
+    if (type != CPU_MODEL_EXPANSION_TYPE_STATIC) {
+        error_setg(errp, "The requested expansion type is not supported");
+        return NULL;
+    }
+
+    oc = cpu_class_by_name(TYPE_LOONGARCH_CPU, model->name);
+    if (!oc) {
+        error_setg(errp, "The CPU type '%s' is not a recognized LoongArch CPU type",
+                   model->name);
+        return NULL;
+    }
+
+    obj = object_new(object_class_get_name(oc));
+
+    expansion_info = g_new0(CpuModelExpansionInfo, 1);
+    expansion_info->model = g_malloc0(sizeof(*expansion_info->model));
+    expansion_info->model->name = g_strdup(model->name);
+
+    qdict_out = qdict_new();
+
+    i = 0;
+    while ((name = cpu_model_advertised_features[i++]) != NULL) {
+        ObjectProperty *prop = object_property_find(obj, name);
+        if (prop) {
+            QObject *value;
+
+            assert(prop->get);
+            value = object_property_get_qobject(obj, name, &error_abort);
+
+            qdict_put_obj(qdict_out, name, value);
+        }
+    }
+
+    if (!qdict_size(qdict_out)) {
+        qobject_unref(qdict_out);
+    } else {
+        expansion_info->model->props = QOBJECT(qdict_out);
+    }
+
+    object_unref(obj);
+
+    return expansion_info;
+}
diff --git a/target/loongarch/tlb_helper.c b/target/loongarch/tlb_helper.c
index c8b8b0497f..449043c68b 100644
--- a/target/loongarch/tlb_helper.c
+++ b/target/loongarch/tlb_helper.c
@@ -60,6 +60,9 @@ static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical,
         tlb_rplv = 0;
     }
 
+    /* Remove sw bit between bit12 -- bit PS*/
+    tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1));
+
     /* Check access rights */
     if (!tlb_v) {
         return TLBRET_INVALID;
@@ -82,10 +85,6 @@ static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical,
         return TLBRET_DIRTY;
     }
 
-    /*
-     * tlb_entry contains ppn[47:12] while 16KiB ppn is [47:15]
-     * need adjust.
-     */
     *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) |
                 (address & MAKE_64BIT_MASK(0, tlb_ps));
     *prot = PAGE_READ;
@@ -774,7 +773,7 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd,
         /* Move Global bit */
         tmp0 = ((tmp0 & (1 << LOONGARCH_HGLOBAL_SHIFT))  >>
                 LOONGARCH_HGLOBAL_SHIFT) << R_TLBENTRY_G_SHIFT |
-                (tmp0 & (~(1 << R_TLBENTRY_G_SHIFT)));
+                (tmp0 & (~(1 << LOONGARCH_HGLOBAL_SHIFT)));
         ps = ptbase + ptwidth - 1;
         if (odd) {
             tmp0 += MAKE_64BIT_MASK(ps, 1);
diff --git a/target/sparc/cc_helper.c b/target/sparc/cc_helper.c
deleted file mode 100644
index 7ad5b9b29e..0000000000
--- a/target/sparc/cc_helper.c
+++ /dev/null
@@ -1,471 +0,0 @@
-/*
- * Helpers for lazy condition code handling
- *
- *  Copyright (c) 2003-2005 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/helper-proto.h"
-
-static uint32_t compute_all_flags(CPUSPARCState *env)
-{
-    return env->psr & PSR_ICC;
-}
-
-static uint32_t compute_C_flags(CPUSPARCState *env)
-{
-    return env->psr & PSR_CARRY;
-}
-
-static inline uint32_t get_NZ_icc(int32_t dst)
-{
-    uint32_t ret = 0;
-
-    if (dst == 0) {
-        ret = PSR_ZERO;
-    } else if (dst < 0) {
-        ret = PSR_NEG;
-    }
-    return ret;
-}
-
-#ifdef TARGET_SPARC64
-static uint32_t compute_all_flags_xcc(CPUSPARCState *env)
-{
-    return env->xcc & PSR_ICC;
-}
-
-static uint32_t compute_C_flags_xcc(CPUSPARCState *env)
-{
-    return env->xcc & PSR_CARRY;
-}
-
-static inline uint32_t get_NZ_xcc(target_long dst)
-{
-    uint32_t ret = 0;
-
-    if (!dst) {
-        ret = PSR_ZERO;
-    } else if (dst < 0) {
-        ret = PSR_NEG;
-    }
-    return ret;
-}
-#endif
-
-static inline uint32_t get_V_div_icc(target_ulong src2)
-{
-    uint32_t ret = 0;
-
-    if (src2 != 0) {
-        ret = PSR_OVF;
-    }
-    return ret;
-}
-
-static uint32_t compute_all_div(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_icc(CC_DST);
-    ret |= get_V_div_icc(CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_C_div(CPUSPARCState *env)
-{
-    return 0;
-}
-
-static inline uint32_t get_C_add_icc(uint32_t dst, uint32_t src1)
-{
-    uint32_t ret = 0;
-
-    if (dst < src1) {
-        ret = PSR_CARRY;
-    }
-    return ret;
-}
-
-static inline uint32_t get_C_addx_icc(uint32_t dst, uint32_t src1,
-                                      uint32_t src2)
-{
-    uint32_t ret = 0;
-
-    if (((src1 & src2) | (~dst & (src1 | src2))) & (1U << 31)) {
-        ret = PSR_CARRY;
-    }
-    return ret;
-}
-
-static inline uint32_t get_V_add_icc(uint32_t dst, uint32_t src1,
-                                     uint32_t src2)
-{
-    uint32_t ret = 0;
-
-    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1U << 31)) {
-        ret = PSR_OVF;
-    }
-    return ret;
-}
-
-#ifdef TARGET_SPARC64
-static inline uint32_t get_C_add_xcc(target_ulong dst, target_ulong src1)
-{
-    uint32_t ret = 0;
-
-    if (dst < src1) {
-        ret = PSR_CARRY;
-    }
-    return ret;
-}
-
-static inline uint32_t get_C_addx_xcc(target_ulong dst, target_ulong src1,
-                                      target_ulong src2)
-{
-    uint32_t ret = 0;
-
-    if (((src1 & src2) | (~dst & (src1 | src2))) & (1ULL << 63)) {
-        ret = PSR_CARRY;
-    }
-    return ret;
-}
-
-static inline uint32_t get_V_add_xcc(target_ulong dst, target_ulong src1,
-                                     target_ulong src2)
-{
-    uint32_t ret = 0;
-
-    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1ULL << 63)) {
-        ret = PSR_OVF;
-    }
-    return ret;
-}
-
-static uint32_t compute_all_add_xcc(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_xcc(CC_DST);
-    ret |= get_C_add_xcc(CC_DST, CC_SRC);
-    ret |= get_V_add_xcc(CC_DST, CC_SRC, CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_C_add_xcc(CPUSPARCState *env)
-{
-    return get_C_add_xcc(CC_DST, CC_SRC);
-}
-#endif
-
-static uint32_t compute_all_add(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_icc(CC_DST);
-    ret |= get_C_add_icc(CC_DST, CC_SRC);
-    ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_C_add(CPUSPARCState *env)
-{
-    return get_C_add_icc(CC_DST, CC_SRC);
-}
-
-#ifdef TARGET_SPARC64
-static uint32_t compute_all_addx_xcc(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_xcc(CC_DST);
-    ret |= get_C_addx_xcc(CC_DST, CC_SRC, CC_SRC2);
-    ret |= get_V_add_xcc(CC_DST, CC_SRC, CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_C_addx_xcc(CPUSPARCState *env)
-{
-    return get_C_addx_xcc(CC_DST, CC_SRC, CC_SRC2);
-}
-#endif
-
-static uint32_t compute_all_addx(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_icc(CC_DST);
-    ret |= get_C_addx_icc(CC_DST, CC_SRC, CC_SRC2);
-    ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_C_addx(CPUSPARCState *env)
-{
-    return get_C_addx_icc(CC_DST, CC_SRC, CC_SRC2);
-}
-
-static inline uint32_t get_V_tag_icc(target_ulong src1, target_ulong src2)
-{
-    uint32_t ret = 0;
-
-    if ((src1 | src2) & 0x3) {
-        ret = PSR_OVF;
-    }
-    return ret;
-}
-
-static uint32_t compute_all_tadd(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_icc(CC_DST);
-    ret |= get_C_add_icc(CC_DST, CC_SRC);
-    ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
-    ret |= get_V_tag_icc(CC_SRC, CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_all_taddtv(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_icc(CC_DST);
-    ret |= get_C_add_icc(CC_DST, CC_SRC);
-    return ret;
-}
-
-static inline uint32_t get_C_sub_icc(uint32_t src1, uint32_t src2)
-{
-    uint32_t ret = 0;
-
-    if (src1 < src2) {
-        ret = PSR_CARRY;
-    }
-    return ret;
-}
-
-static inline uint32_t get_C_subx_icc(uint32_t dst, uint32_t src1,
-                                      uint32_t src2)
-{
-    uint32_t ret = 0;
-
-    if (((~src1 & src2) | (dst & (~src1 | src2))) & (1U << 31)) {
-        ret = PSR_CARRY;
-    }
-    return ret;
-}
-
-static inline uint32_t get_V_sub_icc(uint32_t dst, uint32_t src1,
-                                     uint32_t src2)
-{
-    uint32_t ret = 0;
-
-    if (((src1 ^ src2) & (src1 ^ dst)) & (1U << 31)) {
-        ret = PSR_OVF;
-    }
-    return ret;
-}
-
-
-#ifdef TARGET_SPARC64
-static inline uint32_t get_C_sub_xcc(target_ulong src1, target_ulong src2)
-{
-    uint32_t ret = 0;
-
-    if (src1 < src2) {
-        ret = PSR_CARRY;
-    }
-    return ret;
-}
-
-static inline uint32_t get_C_subx_xcc(target_ulong dst, target_ulong src1,
-                                      target_ulong src2)
-{
-    uint32_t ret = 0;
-
-    if (((~src1 & src2) | (dst & (~src1 | src2))) & (1ULL << 63)) {
-        ret = PSR_CARRY;
-    }
-    return ret;
-}
-
-static inline uint32_t get_V_sub_xcc(target_ulong dst, target_ulong src1,
-                                     target_ulong src2)
-{
-    uint32_t ret = 0;
-
-    if (((src1 ^ src2) & (src1 ^ dst)) & (1ULL << 63)) {
-        ret = PSR_OVF;
-    }
-    return ret;
-}
-
-static uint32_t compute_all_sub_xcc(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_xcc(CC_DST);
-    ret |= get_C_sub_xcc(CC_SRC, CC_SRC2);
-    ret |= get_V_sub_xcc(CC_DST, CC_SRC, CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_C_sub_xcc(CPUSPARCState *env)
-{
-    return get_C_sub_xcc(CC_SRC, CC_SRC2);
-}
-#endif
-
-static uint32_t compute_all_sub(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_icc(CC_DST);
-    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
-    ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_C_sub(CPUSPARCState *env)
-{
-    return get_C_sub_icc(CC_SRC, CC_SRC2);
-}
-
-#ifdef TARGET_SPARC64
-static uint32_t compute_all_subx_xcc(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_xcc(CC_DST);
-    ret |= get_C_subx_xcc(CC_DST, CC_SRC, CC_SRC2);
-    ret |= get_V_sub_xcc(CC_DST, CC_SRC, CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_C_subx_xcc(CPUSPARCState *env)
-{
-    return get_C_subx_xcc(CC_DST, CC_SRC, CC_SRC2);
-}
-#endif
-
-static uint32_t compute_all_subx(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_icc(CC_DST);
-    ret |= get_C_subx_icc(CC_DST, CC_SRC, CC_SRC2);
-    ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_C_subx(CPUSPARCState *env)
-{
-    return get_C_subx_icc(CC_DST, CC_SRC, CC_SRC2);
-}
-
-static uint32_t compute_all_tsub(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_icc(CC_DST);
-    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
-    ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
-    ret |= get_V_tag_icc(CC_SRC, CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_all_tsubtv(CPUSPARCState *env)
-{
-    uint32_t ret;
-
-    ret = get_NZ_icc(CC_DST);
-    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
-    return ret;
-}
-
-static uint32_t compute_all_logic(CPUSPARCState *env)
-{
-    return get_NZ_icc(CC_DST);
-}
-
-static uint32_t compute_C_logic(CPUSPARCState *env)
-{
-    return 0;
-}
-
-#ifdef TARGET_SPARC64
-static uint32_t compute_all_logic_xcc(CPUSPARCState *env)
-{
-    return get_NZ_xcc(CC_DST);
-}
-#endif
-
-typedef struct CCTable {
-    uint32_t (*compute_all)(CPUSPARCState *env); /* return all the flags */
-    uint32_t (*compute_c)(CPUSPARCState *env);  /* return the C flag */
-} CCTable;
-
-static const CCTable icc_table[CC_OP_NB] = {
-    /* CC_OP_DYNAMIC should never happen */
-    [CC_OP_FLAGS] = { compute_all_flags, compute_C_flags },
-    [CC_OP_DIV] = { compute_all_div, compute_C_div },
-    [CC_OP_ADD] = { compute_all_add, compute_C_add },
-    [CC_OP_ADDX] = { compute_all_addx, compute_C_addx },
-    [CC_OP_TADD] = { compute_all_tadd, compute_C_add },
-    [CC_OP_TADDTV] = { compute_all_taddtv, compute_C_add },
-    [CC_OP_SUB] = { compute_all_sub, compute_C_sub },
-    [CC_OP_SUBX] = { compute_all_subx, compute_C_subx },
-    [CC_OP_TSUB] = { compute_all_tsub, compute_C_sub },
-    [CC_OP_TSUBTV] = { compute_all_tsubtv, compute_C_sub },
-    [CC_OP_LOGIC] = { compute_all_logic, compute_C_logic },
-};
-
-#ifdef TARGET_SPARC64
-static const CCTable xcc_table[CC_OP_NB] = {
-    /* CC_OP_DYNAMIC should never happen */
-    [CC_OP_FLAGS] = { compute_all_flags_xcc, compute_C_flags_xcc },
-    [CC_OP_DIV] = { compute_all_logic_xcc, compute_C_logic },
-    [CC_OP_ADD] = { compute_all_add_xcc, compute_C_add_xcc },
-    [CC_OP_ADDX] = { compute_all_addx_xcc, compute_C_addx_xcc },
-    [CC_OP_TADD] = { compute_all_add_xcc, compute_C_add_xcc },
-    [CC_OP_TADDTV] = { compute_all_add_xcc, compute_C_add_xcc },
-    [CC_OP_SUB] = { compute_all_sub_xcc, compute_C_sub_xcc },
-    [CC_OP_SUBX] = { compute_all_subx_xcc, compute_C_subx_xcc },
-    [CC_OP_TSUB] = { compute_all_sub_xcc, compute_C_sub_xcc },
-    [CC_OP_TSUBTV] = { compute_all_sub_xcc, compute_C_sub_xcc },
-    [CC_OP_LOGIC] = { compute_all_logic_xcc, compute_C_logic },
-};
-#endif
-
-void helper_compute_psr(CPUSPARCState *env)
-{
-    uint32_t new_psr;
-
-    new_psr = icc_table[CC_OP].compute_all(env);
-    env->psr = new_psr;
-#ifdef TARGET_SPARC64
-    new_psr = xcc_table[CC_OP].compute_all(env);
-    env->xcc = new_psr;
-#endif
-    CC_OP = CC_OP_FLAGS;
-}
-
-uint32_t helper_compute_C_icc(CPUSPARCState *env)
-{
-    return icc_table[CC_OP].compute_c(env) >> PSR_CARRY_SHIFT;
-}
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index bb1a155510..befa7fc4eb 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -46,7 +46,6 @@ static void sparc_cpu_reset_hold(Object *obj)
     env->wim = 1;
 #endif
     env->regwptr = env->regbase + (env->cwp * 16);
-    CC_OP = CC_OP_FLAGS;
 #if defined(CONFIG_USER_ONLY)
 #ifdef TARGET_SPARC64
     env->cleanwin = env->nwindows - 2;
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index 758a4e8aaa..3e361a5b75 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -137,32 +137,6 @@ enum {
 #define PSR_CWP   0x1f
 #endif
 
-#define CC_SRC (env->cc_src)
-#define CC_SRC2 (env->cc_src2)
-#define CC_DST (env->cc_dst)
-#define CC_OP  (env->cc_op)
-
-/* Even though lazy evaluation of CPU condition codes tends to be less
- * important on RISC systems where condition codes are only updated
- * when explicitly requested, SPARC uses it to update 32-bit and 64-bit
- * condition codes.
- */
-enum {
-    CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
-    CC_OP_FLAGS,   /* all cc are back in status register */
-    CC_OP_DIV,     /* modify N, Z and V, C = 0*/
-    CC_OP_ADD,     /* modify all flags, CC_DST = res, CC_SRC = src1 */
-    CC_OP_ADDX,    /* modify all flags, CC_DST = res, CC_SRC = src1 */
-    CC_OP_TADD,    /* modify all flags, CC_DST = res, CC_SRC = src1 */
-    CC_OP_TADDTV,  /* modify all flags except V, CC_DST = res, CC_SRC = src1 */
-    CC_OP_SUB,     /* modify all flags, CC_DST = res, CC_SRC = src1 */
-    CC_OP_SUBX,    /* modify all flags, CC_DST = res, CC_SRC = src1 */
-    CC_OP_TSUB,    /* modify all flags, CC_DST = res, CC_SRC = src1 */
-    CC_OP_TSUBTV,  /* modify all flags except V, CC_DST = res, CC_SRC = src1 */
-    CC_OP_LOGIC,   /* modify N and Z, C = V = 0, CC_DST = res */
-    CC_OP_NB,
-};
-
 /* Trap base register */
 #define TBR_BASE_MASK 0xfffff000
 
@@ -458,15 +432,35 @@ struct CPUArchState {
     target_ulong npc;      /* next program counter */
     target_ulong y;        /* multiply/divide register */
 
-    /* emulator internal flags handling */
-    target_ulong cc_src, cc_src2;
-    target_ulong cc_dst;
-    uint32_t cc_op;
+    /*
+     * Bit 31 is for icc, bit 63 for xcc.
+     * Other bits are garbage.
+     */
+    target_long cc_N;
+    target_long cc_V;
+
+    /*
+     * Z is represented as == 0; any non-zero value is !Z.
+     * For sparc64, the high 32-bits of icc.Z are garbage.
+     */
+    target_ulong icc_Z;
+#ifdef TARGET_SPARC64
+    target_ulong xcc_Z;
+#endif
+
+    /*
+     * For sparc32, icc.C is boolean.
+     * For sparc64, xcc.C is boolean;
+     *              icc.C is bit 32 with other bits garbage.
+     */
+    target_ulong icc_C;
+#ifdef TARGET_SPARC64
+    target_ulong xcc_C;
+#endif
 
     target_ulong cond; /* conditional branch result (XXX: save it in a
                           temporary register when possible) */
 
-    uint32_t psr;      /* processor state register */
     target_ulong fsr;      /* FPU state register */
     CPU_DoubleU fpr[TARGET_DPREGS];  /* floating point registers */
     uint32_t cwp;      /* index of current register window (extracted
@@ -522,7 +516,6 @@ struct CPUArchState {
 #define MAXTL_MAX 8
 #define MAXTL_MASK (MAXTL_MAX - 1)
     trap_state ts[MAXTL_MAX];
-    uint32_t xcc;               /* Extended integer condition codes */
     uint32_t asi;
     uint32_t pstate;
     uint32_t tl;
@@ -619,6 +612,7 @@ void sparc_restore_state_to_opc(CPUState *cs,
 /* win_helper.c */
 target_ulong cpu_get_psr(CPUSPARCState *env1);
 void cpu_put_psr(CPUSPARCState *env1, target_ulong val);
+void cpu_put_psr_icc(CPUSPARCState *env1, target_ulong val);
 void cpu_put_psr_raw(CPUSPARCState *env1, target_ulong val);
 #ifdef TARGET_SPARC64
 void cpu_change_pstate(CPUSPARCState *env1, uint32_t new_pstate);
diff --git a/target/sparc/helper.c b/target/sparc/helper.c
index 2bcdc81d54..bd10b60e4b 100644
--- a/target/sparc/helper.c
+++ b/target/sparc/helper.c
@@ -81,109 +81,58 @@ void helper_tick_set_limit(void *opaque, uint64_t limit)
 }
 #endif
 
-static target_ulong do_udiv(CPUSPARCState *env, target_ulong a,
-                            target_ulong b, int cc, uintptr_t ra)
+uint64_t helper_udiv(CPUSPARCState *env, target_ulong a, target_ulong b)
 {
-    int overflow = 0;
-    uint64_t x0;
-    uint32_t x1;
+    uint64_t a64 = (uint32_t)a | ((uint64_t)env->y << 32);
+    uint32_t b32 = b;
+    uint32_t r;
 
-    x0 = (a & 0xffffffff) | ((int64_t) (env->y) << 32);
-    x1 = (b & 0xffffffff);
-
-    if (x1 == 0) {
-        cpu_raise_exception_ra(env, TT_DIV_ZERO, ra);
-    }
-
-    x0 = x0 / x1;
-    if (x0 > UINT32_MAX) {
-        x0 = UINT32_MAX;
-        overflow = 1;
+    if (b32 == 0) {
+        cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC());
     }
 
-    if (cc) {
-        env->cc_src2 = overflow;
+    a64 /= b32;
+    r = a64;
+    if (unlikely(a64 > UINT32_MAX)) {
+        return -1; /* r = UINT32_MAX, v = 1 */
     }
-    return x0;
+    return r;
 }
 
-target_ulong helper_udiv(CPUSPARCState *env, target_ulong a, target_ulong b)
+uint64_t helper_sdiv(CPUSPARCState *env, target_ulong a, target_ulong b)
 {
-    return do_udiv(env, a, b, 0, GETPC());
-}
+    int64_t a64 = (uint32_t)a | ((uint64_t)env->y << 32);
+    int32_t b32 = b;
+    int32_t r;
 
-target_ulong helper_udiv_cc(CPUSPARCState *env, target_ulong a, target_ulong b)
-{
-    return do_udiv(env, a, b, 1, GETPC());
-}
-
-static target_ulong do_sdiv(CPUSPARCState *env, target_ulong a,
-                            target_ulong b, int cc, uintptr_t ra)
-{
-    int overflow = 0;
-    int64_t x0;
-    int32_t x1;
-
-    x0 = (a & 0xffffffff) | ((int64_t) (env->y) << 32);
-    x1 = (b & 0xffffffff);
-
-    if (x1 == 0) {
-        cpu_raise_exception_ra(env, TT_DIV_ZERO, ra);
-    } else if (x1 == -1 && x0 == INT64_MIN) {
-        x0 = INT32_MAX;
-        overflow = 1;
-    } else {
-        x0 = x0 / x1;
-        if ((int32_t) x0 != x0) {
-            x0 = x0 < 0 ? INT32_MIN : INT32_MAX;
-            overflow = 1;
-        }
-    }
-
-    if (cc) {
-        env->cc_src2 = overflow;
+    if (b32 == 0) {
+        cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC());
     }
-    return x0;
-}
-
-target_ulong helper_sdiv(CPUSPARCState *env, target_ulong a, target_ulong b)
-{
-    return do_sdiv(env, a, b, 0, GETPC());
-}
-
-target_ulong helper_sdiv_cc(CPUSPARCState *env, target_ulong a, target_ulong b)
-{
-    return do_sdiv(env, a, b, 1, GETPC());
-}
 
-#ifdef TARGET_SPARC64
-int64_t helper_sdivx(CPUSPARCState *env, int64_t a, int64_t b)
-{
-    if (b == 0) {
-        /* Raise divide by zero trap.  */
-        cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC());
-    } else if (b == -1) {
-        /* Avoid overflow trap with i386 divide insn.  */
-        return -a;
-    } else {
-        return a / b;
+    if (unlikely(a64 == INT64_MIN)) {
+        /*
+         * Special case INT64_MIN / -1 is required to avoid trap on x86 host.
+         * However, with a dividend of INT64_MIN, there is no 32-bit divisor
+         * which can yield a 32-bit result:
+         *    INT64_MIN / INT32_MIN =  0x1_0000_0000
+         *    INT64_MIN / INT32_MAX = -0x1_0000_0002
+         * Therefore we know we must overflow and saturate.
+         */
+        return (uint32_t)(b32 < 0 ? INT32_MAX : INT32_MIN) | (-1ull << 32);
     }
-}
 
-uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b)
-{
-    if (b == 0) {
-        /* Raise divide by zero trap.  */
-        cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC());
+    a64 /= b;
+    r = a64;
+    if (unlikely(r != a64)) {
+        return (uint32_t)(a64 < 0 ? INT32_MIN : INT32_MAX) | (-1ull << 32);
     }
-    return a / b;
+    return (uint32_t)r;
 }
-#endif
 
 target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1,
                              target_ulong src2)
 {
-    target_ulong dst;
+    target_ulong dst, v;
 
     /* Tag overflow occurs if either input has bits 0 or 1 set.  */
     if ((src1 | src2) & 3) {
@@ -193,13 +142,23 @@ target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1,
     dst = src1 + src2;
 
     /* Tag overflow occurs if the addition overflows.  */
-    if (~(src1 ^ src2) & (src1 ^ dst) & (1u << 31)) {
+    v = ~(src1 ^ src2) & (src1 ^ dst);
+    if (v & (1u << 31)) {
         goto tag_overflow;
     }
 
     /* Only modify the CC after any exceptions have been generated.  */
-    env->cc_src = src1;
-    env->cc_src2 = src2;
+    env->cc_V = v;
+    env->cc_N = dst;
+    env->icc_Z = dst;
+#ifdef TARGET_SPARC64
+    env->xcc_Z = dst;
+    env->icc_C = dst ^ src1 ^ src2;
+    env->xcc_C = dst < src1;
+#else
+    env->icc_C = dst < src1;
+#endif
+
     return dst;
 
  tag_overflow:
@@ -209,7 +168,7 @@ target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1,
 target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1,
                              target_ulong src2)
 {
-    target_ulong dst;
+    target_ulong dst, v;
 
     /* Tag overflow occurs if either input has bits 0 or 1 set.  */
     if ((src1 | src2) & 3) {
@@ -219,13 +178,23 @@ target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1,
     dst = src1 - src2;
 
     /* Tag overflow occurs if the subtraction overflows.  */
-    if ((src1 ^ src2) & (src1 ^ dst) & (1u << 31)) {
+    v = (src1 ^ src2) & (src1 ^ dst);
+    if (v & (1u << 31)) {
         goto tag_overflow;
     }
 
     /* Only modify the CC after any exceptions have been generated.  */
-    env->cc_src = src1;
-    env->cc_src2 = src2;
+    env->cc_V = v;
+    env->cc_N = dst;
+    env->icc_Z = dst;
+#ifdef TARGET_SPARC64
+    env->xcc_Z = dst;
+    env->icc_C = dst ^ src1 ^ src2;
+    env->xcc_C = src1 < src2;
+#else
+    env->icc_C = src1 < src2;
+#endif
+
     return dst;
 
  tag_overflow:
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
index dd1721a340..55eff66283 100644
--- a/target/sparc/helper.h
+++ b/target/sparc/helper.h
@@ -27,16 +27,10 @@ DEF_HELPER_FLAGS_2(tick_set_limit, TCG_CALL_NO_RWG, void, ptr, i64)
 DEF_HELPER_1(debug, void, env)
 DEF_HELPER_1(save, void, env)
 DEF_HELPER_1(restore, void, env)
-DEF_HELPER_3(udiv, tl, env, tl, tl)
-DEF_HELPER_3(udiv_cc, tl, env, tl, tl)
-DEF_HELPER_3(sdiv, tl, env, tl, tl)
-DEF_HELPER_3(sdiv_cc, tl, env, tl, tl)
+DEF_HELPER_FLAGS_3(udiv, TCG_CALL_NO_WG, i64, env, tl, tl)
+DEF_HELPER_FLAGS_3(sdiv, TCG_CALL_NO_WG, i64, env, tl, tl)
 DEF_HELPER_3(taddcctv, tl, env, tl, tl)
 DEF_HELPER_3(tsubcctv, tl, env, tl, tl)
-#ifdef TARGET_SPARC64
-DEF_HELPER_FLAGS_3(sdivx, TCG_CALL_NO_WG, s64, env, s64, s64)
-DEF_HELPER_FLAGS_3(udivx, TCG_CALL_NO_WG, i64, env, i64, i64)
-#endif
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
 DEF_HELPER_FLAGS_4(ld_asi, TCG_CALL_NO_WG, i64, env, tl, int, i32)
 DEF_HELPER_FLAGS_5(st_asi, TCG_CALL_NO_WG, void, env, tl, i64, int, i32)
@@ -150,5 +144,3 @@ VIS_CMPHELPER(cmpne)
 #undef F_HELPER_0_1
 #undef VIS_HELPER
 #undef VIS_CMPHELPER
-DEF_HELPER_1(compute_psr, void, env)
-DEF_HELPER_FLAGS_1(compute_C_icc, TCG_CALL_NO_WG_SE, i32, env)
diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode
index 0552f1447d..2d26404cb2 100644
--- a/target/sparc/insns.decode
+++ b/target/sparc/insns.decode
@@ -182,9 +182,10 @@ UMUL        10 ..... 0.1010 ..... . .............          @r_r_ri_cc
 SMUL        10 ..... 0.1011 ..... . .............          @r_r_ri_cc
 MULScc      10 ..... 100100 ..... . .............          @r_r_ri_cc1
 
-UDIVX       10 ..... 001101 ..... . .............          @r_r_ri_cc0
-SDIVX       10 ..... 101101 ..... . .............          @r_r_ri_cc0
-UDIV        10 ..... 0.1110 ..... . .............          @r_r_ri_cc
+UDIVX       10 ..... 001101 ..... . .............          @r_r_ri
+SDIVX       10 ..... 101101 ..... . .............          @r_r_ri
+UDIV        10 ..... 001110 ..... . .............          @r_r_ri
+UDIVcc      10 ..... 011110 ..... . .............          @r_r_ri_cc1
 SDIV        10 ..... 0.1111 ..... . .............          @r_r_ri_cc
 
 TADDcc      10 ..... 100000 ..... . .............          @r_r_ri_cc1
diff --git a/target/sparc/int32_helper.c b/target/sparc/int32_helper.c
index 82e8418e46..1563613582 100644
--- a/target/sparc/int32_helper.c
+++ b/target/sparc/int32_helper.c
@@ -103,11 +103,6 @@ void sparc_cpu_do_interrupt(CPUState *cs)
     CPUSPARCState *env = &cpu->env;
     int cwp, intno = cs->exception_index;
 
-    /* Compute PSR before exposing state.  */
-    if (env->cc_op != CC_OP_FLAGS) {
-        cpu_get_psr(env);
-    }
-
     if (qemu_loglevel_mask(CPU_LOG_INT)) {
         static int count;
         const char *name;
diff --git a/target/sparc/int64_helper.c b/target/sparc/int64_helper.c
index 793e57c536..1b4155f5f3 100644
--- a/target/sparc/int64_helper.c
+++ b/target/sparc/int64_helper.c
@@ -135,11 +135,6 @@ void sparc_cpu_do_interrupt(CPUState *cs)
     int intno = cs->exception_index;
     trap_state *tsptr;
 
-    /* Compute PSR before exposing state.  */
-    if (env->cc_op != CC_OP_FLAGS) {
-        cpu_get_psr(env);
-    }
-
 #ifdef DEBUG_PCALL
     if (qemu_loglevel_mask(CPU_LOG_INT)) {
         static int count;
diff --git a/target/sparc/machine.c b/target/sparc/machine.c
index 274e1217df..44dfc07014 100644
--- a/target/sparc/machine.c
+++ b/target/sparc/machine.c
@@ -83,6 +83,42 @@ static const VMStateInfo vmstate_psr = {
     .put = put_psr,
 };
 
+#ifdef TARGET_SPARC64
+static int get_xcc(QEMUFile *f, void *opaque, size_t size,
+                   const VMStateField *field)
+{
+    SPARCCPU *cpu = opaque;
+    CPUSPARCState *env = &cpu->env;
+    uint32_t val = qemu_get_be32(f);
+
+    /* Do not clobber icc.[NV] */
+    env->cc_N = deposit64(env->cc_N, 32, 32, -(val & PSR_NEG));
+    env->cc_V = deposit64(env->cc_V, 32, 32, -(val & PSR_OVF));
+    env->xcc_Z = ~val & PSR_ZERO;
+    env->xcc_C = (val >> PSR_CARRY_SHIFT) & 1;
+
+    return 0;
+}
+
+static int put_xcc(QEMUFile *f, void *opaque, size_t size,
+                   const VMStateField *field, JSONWriter *vmdesc)
+{
+    SPARCCPU *cpu = opaque;
+    CPUSPARCState *env = &cpu->env;
+    uint32_t val = cpu_get_ccr(env);
+
+    /* Extract just xcc out of ccr and shift into legacy position. */
+    qemu_put_be32(f, (val & 0xf0) << (20 - 4));
+    return 0;
+}
+
+static const VMStateInfo vmstate_xcc = {
+    .name = "xcc",
+    .get = get_xcc,
+    .put = put_xcc,
+};
+#endif
+
 static int cpu_pre_save(void *opaque)
 {
     SPARCCPU *cpu = opaque;
@@ -155,7 +191,14 @@ const VMStateDescription vmstate_sparc_cpu = {
         VMSTATE_UINT32(env.mmu_version, SPARCCPU),
         VMSTATE_STRUCT_ARRAY(env.ts, SPARCCPU, MAXTL_MAX, 0,
                              vmstate_trap_state, trap_state),
-        VMSTATE_UINT32(env.xcc, SPARCCPU),
+        {
+            .name = "xcc",
+            .version_id = 0,
+            .size = sizeof(uint32_t),
+            .info = &vmstate_xcc,
+            .flags = VMS_SINGLE,
+            .offset = 0,
+        },
         VMSTATE_UINT32(env.asi, SPARCCPU),
         VMSTATE_UINT32(env.pstate, SPARCCPU),
         VMSTATE_UINT32(env.tl, SPARCCPU),
diff --git a/target/sparc/meson.build b/target/sparc/meson.build
index c316773db6..46289c8669 100644
--- a/target/sparc/meson.build
+++ b/target/sparc/meson.build
@@ -3,7 +3,6 @@ gen = decodetree.process('insns.decode')
 sparc_ss = ss.source_set()
 sparc_ss.add(gen)
 sparc_ss.add(files(
-  'cc_helper.c',
   'cpu.c',
   'fop_helper.c',
   'gdbstub.c',
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 986a88c4e1..6fc333a6b8 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -51,12 +51,10 @@
 # define gen_helper_restored(E)                 qemu_build_not_reached()
 # define gen_helper_retry(E)                    qemu_build_not_reached()
 # define gen_helper_saved(E)                    qemu_build_not_reached()
-# define gen_helper_sdivx(D, E, A, B)           qemu_build_not_reached()
 # define gen_helper_set_softint(E, S)           qemu_build_not_reached()
 # define gen_helper_tick_get_count(D, E, T, C)  qemu_build_not_reached()
 # define gen_helper_tick_set_count(P, S)        qemu_build_not_reached()
 # define gen_helper_tick_set_limit(P, S)        qemu_build_not_reached()
-# define gen_helper_udivx(D, E, A, B)           qemu_build_not_reached()
 # define gen_helper_wrccr(E, S)                 qemu_build_not_reached()
 # define gen_helper_wrcwp(E, S)                 qemu_build_not_reached()
 # define gen_helper_wrgl(E, S)                  qemu_build_not_reached()
@@ -105,21 +103,35 @@
 
 /* global register indexes */
 static TCGv_ptr cpu_regwptr;
-static TCGv cpu_cc_src, cpu_cc_src2, cpu_cc_dst;
-static TCGv_i32 cpu_cc_op;
-static TCGv_i32 cpu_psr;
 static TCGv cpu_fsr, cpu_pc, cpu_npc;
 static TCGv cpu_regs[32];
 static TCGv cpu_y;
 static TCGv cpu_tbr;
 static TCGv cpu_cond;
+static TCGv cpu_cc_N;
+static TCGv cpu_cc_V;
+static TCGv cpu_icc_Z;
+static TCGv cpu_icc_C;
 #ifdef TARGET_SPARC64
-static TCGv_i32 cpu_xcc, cpu_fprs;
+static TCGv cpu_xcc_Z;
+static TCGv cpu_xcc_C;
+static TCGv_i32 cpu_fprs;
 static TCGv cpu_gsr;
 #else
 # define cpu_fprs               ({ qemu_build_not_reached(); (TCGv)NULL; })
 # define cpu_gsr                ({ qemu_build_not_reached(); (TCGv)NULL; })
 #endif
+
+#ifdef TARGET_SPARC64
+#define cpu_cc_Z  cpu_xcc_Z
+#define cpu_cc_C  cpu_xcc_C
+#else
+#define cpu_cc_Z  cpu_icc_Z
+#define cpu_cc_C  cpu_icc_C
+#define cpu_xcc_Z ({ qemu_build_not_reached(); NULL; })
+#define cpu_xcc_C ({ qemu_build_not_reached(); NULL; })
+#endif
+
 /* Floating point registers */
 static TCGv_i64 cpu_fpr[TARGET_DPREGS];
 
@@ -132,6 +144,12 @@ static TCGv_i64 cpu_fpr[TARGET_DPREGS];
 # define env64_field_offsetof(X)  ({ qemu_build_not_reached(); 0; })
 #endif
 
+typedef struct DisasCompare {
+    TCGCond cond;
+    TCGv c1;
+    int c2;
+} DisasCompare;
+
 typedef struct DisasDelayException {
     struct DisasDelayException *next;
     TCGLabel *lab;
@@ -145,8 +163,13 @@ typedef struct DisasContext {
     DisasContextBase base;
     target_ulong pc;    /* current Program Counter: integer or DYNAMIC_PC */
     target_ulong npc;   /* next PC: integer or DYNAMIC_PC or JUMP_PC */
-    target_ulong jump_pc[2]; /* used when JUMP_PC pc value is used */
+
+    /* Used when JUMP_PC value is used. */
+    DisasCompare jump;
+    target_ulong jump_pc[2];
+
     int mem_idx;
+    bool cpu_cond_live;
     bool fpu_enabled;
     bool address_mask_32bit;
 #ifndef CONFIG_USER_ONLY
@@ -156,7 +179,6 @@ typedef struct DisasContext {
 #endif
 #endif
 
-    uint32_t cc_op;  /* current CC operation */
     sparc_def_t *def;
 #ifdef TARGET_SPARC64
     int fprs_dirty;
@@ -165,12 +187,6 @@ typedef struct DisasContext {
     DisasDelayException *delay_excp_list;
 } DisasContext;
 
-typedef struct {
-    TCGCond cond;
-    bool is_bool;
-    TCGv c1, c2;
-} DisasCompare;
-
 // This function uses non-native bit order
 #define GET_FIELD(X, FROM, TO)                                  \
     ((X) >> (31 - (TO)) & ((1 << ((TO) - (FROM) + 1)) - 1))
@@ -366,293 +382,162 @@ static void gen_goto_tb(DisasContext *s, int tb_num,
     }
 }
 
-// XXX suboptimal
-static void gen_mov_reg_N(TCGv reg, TCGv_i32 src)
-{
-    tcg_gen_extu_i32_tl(reg, src);
-    tcg_gen_extract_tl(reg, reg, PSR_NEG_SHIFT, 1);
-}
-
-static void gen_mov_reg_Z(TCGv reg, TCGv_i32 src)
-{
-    tcg_gen_extu_i32_tl(reg, src);
-    tcg_gen_extract_tl(reg, reg, PSR_ZERO_SHIFT, 1);
-}
-
-static void gen_mov_reg_V(TCGv reg, TCGv_i32 src)
-{
-    tcg_gen_extu_i32_tl(reg, src);
-    tcg_gen_extract_tl(reg, reg, PSR_OVF_SHIFT, 1);
-}
-
-static void gen_mov_reg_C(TCGv reg, TCGv_i32 src)
-{
-    tcg_gen_extu_i32_tl(reg, src);
-    tcg_gen_extract_tl(reg, reg, PSR_CARRY_SHIFT, 1);
-}
-
-static void gen_op_add_cc(TCGv dst, TCGv src1, TCGv src2)
-{
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
-}
-
-static TCGv_i32 gen_add32_carry32(void)
-{
-    TCGv_i32 carry_32, cc_src1_32, cc_src2_32;
-
-    /* Carry is computed from a previous add: (dst < src)  */
-#if TARGET_LONG_BITS == 64
-    cc_src1_32 = tcg_temp_new_i32();
-    cc_src2_32 = tcg_temp_new_i32();
-    tcg_gen_extrl_i64_i32(cc_src1_32, cpu_cc_dst);
-    tcg_gen_extrl_i64_i32(cc_src2_32, cpu_cc_src);
-#else
-    cc_src1_32 = cpu_cc_dst;
-    cc_src2_32 = cpu_cc_src;
-#endif
-
-    carry_32 = tcg_temp_new_i32();
-    tcg_gen_setcond_i32(TCG_COND_LTU, carry_32, cc_src1_32, cc_src2_32);
-
-    return carry_32;
-}
-
-static TCGv_i32 gen_sub32_carry32(void)
-{
-    TCGv_i32 carry_32, cc_src1_32, cc_src2_32;
-
-    /* Carry is computed from a previous borrow: (src1 < src2)  */
-#if TARGET_LONG_BITS == 64
-    cc_src1_32 = tcg_temp_new_i32();
-    cc_src2_32 = tcg_temp_new_i32();
-    tcg_gen_extrl_i64_i32(cc_src1_32, cpu_cc_src);
-    tcg_gen_extrl_i64_i32(cc_src2_32, cpu_cc_src2);
-#else
-    cc_src1_32 = cpu_cc_src;
-    cc_src2_32 = cpu_cc_src2;
-#endif
-
-    carry_32 = tcg_temp_new_i32();
-    tcg_gen_setcond_i32(TCG_COND_LTU, carry_32, cc_src1_32, cc_src2_32);
-
-    return carry_32;
-}
-
-static void gen_op_addc_int(TCGv dst, TCGv src1, TCGv src2,
-                            TCGv_i32 carry_32, bool update_cc)
+static TCGv gen_carry32(void)
 {
-    tcg_gen_add_tl(dst, src1, src2);
-
-#ifdef TARGET_SPARC64
-    TCGv carry = tcg_temp_new();
-    tcg_gen_extu_i32_tl(carry, carry_32);
-    tcg_gen_add_tl(dst, dst, carry);
-#else
-    tcg_gen_add_i32(dst, dst, carry_32);
-#endif
-
-    if (update_cc) {
-        tcg_debug_assert(dst == cpu_cc_dst);
-        tcg_gen_mov_tl(cpu_cc_src, src1);
-        tcg_gen_mov_tl(cpu_cc_src2, src2);
+    if (TARGET_LONG_BITS == 64) {
+        TCGv t = tcg_temp_new();
+        tcg_gen_extract_tl(t, cpu_icc_C, 32, 1);
+        return t;
     }
+    return cpu_icc_C;
 }
 
-static void gen_op_addc_int_add(TCGv dst, TCGv src1, TCGv src2, bool update_cc)
+static void gen_op_addcc_int(TCGv dst, TCGv src1, TCGv src2, TCGv cin)
 {
-    TCGv discard;
+    TCGv z = tcg_constant_tl(0);
 
-    if (TARGET_LONG_BITS == 64) {
-        gen_op_addc_int(dst, src1, src2, gen_add32_carry32(), update_cc);
-        return;
+    if (cin) {
+        tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, src1, z, cin, z);
+        tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, cpu_cc_N, cpu_cc_C, src2, z);
+    } else {
+        tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, src1, z, src2, z);
     }
-
-    /*
-     * We can re-use the host's hardware carry generation by using
-     * an ADD2 opcode.  We discard the low part of the output.
-     * Ideally we'd combine this operation with the add that
-     * generated the carry in the first place.
-     */
-    discard = tcg_temp_new();
-    tcg_gen_add2_tl(discard, dst, cpu_cc_src, src1, cpu_cc_src2, src2);
-
-    if (update_cc) {
-        tcg_debug_assert(dst == cpu_cc_dst);
-        tcg_gen_mov_tl(cpu_cc_src, src1);
-        tcg_gen_mov_tl(cpu_cc_src2, src2);
+    tcg_gen_xor_tl(cpu_cc_Z, src1, src2);
+    tcg_gen_xor_tl(cpu_cc_V, cpu_cc_N, src2);
+    tcg_gen_andc_tl(cpu_cc_V, cpu_cc_V, cpu_cc_Z);
+    if (TARGET_LONG_BITS == 64) {
+        /*
+         * Carry-in to bit 32 is result ^ src1 ^ src2.
+         * We already have the src xor term in Z, from computation of V.
+         */
+        tcg_gen_xor_tl(cpu_icc_C, cpu_cc_Z, cpu_cc_N);
+        tcg_gen_mov_tl(cpu_icc_Z, cpu_cc_N);
     }
+    tcg_gen_mov_tl(cpu_cc_Z, cpu_cc_N);
+    tcg_gen_mov_tl(dst, cpu_cc_N);
 }
 
-static void gen_op_addc_add(TCGv dst, TCGv src1, TCGv src2)
+static void gen_op_addcc(TCGv dst, TCGv src1, TCGv src2)
 {
-    gen_op_addc_int_add(dst, src1, src2, false);
+    gen_op_addcc_int(dst, src1, src2, NULL);
 }
 
-static void gen_op_addccc_add(TCGv dst, TCGv src1, TCGv src2)
+static void gen_op_taddcc(TCGv dst, TCGv src1, TCGv src2)
 {
-    gen_op_addc_int_add(dst, src1, src2, true);
-}
-
-static void gen_op_addc_sub(TCGv dst, TCGv src1, TCGv src2)
-{
-    gen_op_addc_int(dst, src1, src2, gen_sub32_carry32(), false);
-}
+    TCGv t = tcg_temp_new();
 
-static void gen_op_addccc_sub(TCGv dst, TCGv src1, TCGv src2)
-{
-    gen_op_addc_int(dst, src1, src2, gen_sub32_carry32(), true);
-}
+    /* Save the tag bits around modification of dst. */
+    tcg_gen_or_tl(t, src1, src2);
 
-static void gen_op_addc_int_generic(TCGv dst, TCGv src1, TCGv src2,
-                                    bool update_cc)
-{
-    TCGv_i32 carry_32 = tcg_temp_new_i32();
-    gen_helper_compute_C_icc(carry_32, tcg_env);
-    gen_op_addc_int(dst, src1, src2, carry_32, update_cc);
-}
+    gen_op_addcc(dst, src1, src2);
 
-static void gen_op_addc_generic(TCGv dst, TCGv src1, TCGv src2)
-{
-    gen_op_addc_int_generic(dst, src1, src2, false);
+    /* Incorprate tag bits into icc.V */
+    tcg_gen_andi_tl(t, t, 3);
+    tcg_gen_neg_tl(t, t);
+    tcg_gen_ext32u_tl(t, t);
+    tcg_gen_or_tl(cpu_cc_V, cpu_cc_V, t);
 }
 
-static void gen_op_addccc_generic(TCGv dst, TCGv src1, TCGv src2)
+static void gen_op_addc(TCGv dst, TCGv src1, TCGv src2)
 {
-    gen_op_addc_int_generic(dst, src1, src2, true);
+    tcg_gen_add_tl(dst, src1, src2);
+    tcg_gen_add_tl(dst, dst, gen_carry32());
 }
 
-static void gen_op_sub_cc(TCGv dst, TCGv src1, TCGv src2)
+static void gen_op_addccc(TCGv dst, TCGv src1, TCGv src2)
 {
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
+    gen_op_addcc_int(dst, src1, src2, gen_carry32());
 }
 
-static void gen_op_subc_int(TCGv dst, TCGv src1, TCGv src2,
-                            TCGv_i32 carry_32, bool update_cc)
+static void gen_op_subcc_int(TCGv dst, TCGv src1, TCGv src2, TCGv cin)
 {
-    TCGv carry;
-
-#if TARGET_LONG_BITS == 64
-    carry = tcg_temp_new();
-    tcg_gen_extu_i32_i64(carry, carry_32);
-#else
-    carry = carry_32;
-#endif
+    TCGv z = tcg_constant_tl(0);
 
-    tcg_gen_sub_tl(dst, src1, src2);
-    tcg_gen_sub_tl(dst, dst, carry);
-
-    if (update_cc) {
-        tcg_debug_assert(dst == cpu_cc_dst);
-        tcg_gen_mov_tl(cpu_cc_src, src1);
-        tcg_gen_mov_tl(cpu_cc_src2, src2);
+    if (cin) {
+        tcg_gen_sub2_tl(cpu_cc_N, cpu_cc_C, src1, z, cin, z);
+        tcg_gen_sub2_tl(cpu_cc_N, cpu_cc_C, cpu_cc_N, cpu_cc_C, src2, z);
+    } else {
+        tcg_gen_sub2_tl(cpu_cc_N, cpu_cc_C, src1, z, src2, z);
     }
+    tcg_gen_neg_tl(cpu_cc_C, cpu_cc_C);
+    tcg_gen_xor_tl(cpu_cc_Z, src1, src2);
+    tcg_gen_xor_tl(cpu_cc_V, cpu_cc_N, src1);
+    tcg_gen_and_tl(cpu_cc_V, cpu_cc_V, cpu_cc_Z);
+#ifdef TARGET_SPARC64
+    tcg_gen_xor_tl(cpu_icc_C, cpu_cc_Z, cpu_cc_N);
+    tcg_gen_mov_tl(cpu_icc_Z, cpu_cc_N);
+#endif
+    tcg_gen_mov_tl(cpu_cc_Z, cpu_cc_N);
+    tcg_gen_mov_tl(dst, cpu_cc_N);
 }
 
-static void gen_op_subc_add(TCGv dst, TCGv src1, TCGv src2)
-{
-    gen_op_subc_int(dst, src1, src2, gen_add32_carry32(), false);
-}
-
-static void gen_op_subccc_add(TCGv dst, TCGv src1, TCGv src2)
-{
-    gen_op_subc_int(dst, src1, src2, gen_add32_carry32(), true);
-}
-
-static void gen_op_subc_int_sub(TCGv dst, TCGv src1, TCGv src2, bool update_cc)
+static void gen_op_subcc(TCGv dst, TCGv src1, TCGv src2)
 {
-    TCGv discard;
-
-    if (TARGET_LONG_BITS == 64) {
-        gen_op_subc_int(dst, src1, src2, gen_sub32_carry32(), update_cc);
-        return;
-    }
-
-    /*
-     * We can re-use the host's hardware carry generation by using
-     * a SUB2 opcode.  We discard the low part of the output.
-     */
-    discard = tcg_temp_new();
-    tcg_gen_sub2_tl(discard, dst, cpu_cc_src, src1, cpu_cc_src2, src2);
-
-    if (update_cc) {
-        tcg_debug_assert(dst == cpu_cc_dst);
-        tcg_gen_mov_tl(cpu_cc_src, src1);
-        tcg_gen_mov_tl(cpu_cc_src2, src2);
-    }
+    gen_op_subcc_int(dst, src1, src2, NULL);
 }
 
-static void gen_op_subc_sub(TCGv dst, TCGv src1, TCGv src2)
+static void gen_op_tsubcc(TCGv dst, TCGv src1, TCGv src2)
 {
-    gen_op_subc_int_sub(dst, src1, src2, false);
-}
+    TCGv t = tcg_temp_new();
 
-static void gen_op_subccc_sub(TCGv dst, TCGv src1, TCGv src2)
-{
-    gen_op_subc_int_sub(dst, src1, src2, true);
-}
+    /* Save the tag bits around modification of dst. */
+    tcg_gen_or_tl(t, src1, src2);
 
-static void gen_op_subc_int_generic(TCGv dst, TCGv src1, TCGv src2,
-                                    bool update_cc)
-{
-    TCGv_i32 carry_32 = tcg_temp_new_i32();
+    gen_op_subcc(dst, src1, src2);
 
-    gen_helper_compute_C_icc(carry_32, tcg_env);
-    gen_op_subc_int(dst, src1, src2, carry_32, update_cc);
+    /* Incorprate tag bits into icc.V */
+    tcg_gen_andi_tl(t, t, 3);
+    tcg_gen_neg_tl(t, t);
+    tcg_gen_ext32u_tl(t, t);
+    tcg_gen_or_tl(cpu_cc_V, cpu_cc_V, t);
 }
 
-static void gen_op_subc_generic(TCGv dst, TCGv src1, TCGv src2)
+static void gen_op_subc(TCGv dst, TCGv src1, TCGv src2)
 {
-    gen_op_subc_int_generic(dst, src1, src2, false);
+    tcg_gen_sub_tl(dst, src1, src2);
+    tcg_gen_sub_tl(dst, dst, gen_carry32());
 }
 
-static void gen_op_subccc_generic(TCGv dst, TCGv src1, TCGv src2)
+static void gen_op_subccc(TCGv dst, TCGv src1, TCGv src2)
 {
-    gen_op_subc_int_generic(dst, src1, src2, true);
+    gen_op_subcc_int(dst, src1, src2, gen_carry32());
 }
 
 static void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
 {
-    TCGv r_temp, zero, t0;
+    TCGv zero = tcg_constant_tl(0);
+    TCGv t_src1 = tcg_temp_new();
+    TCGv t_src2 = tcg_temp_new();
+    TCGv t0 = tcg_temp_new();
 
-    r_temp = tcg_temp_new();
-    t0 = tcg_temp_new();
+    tcg_gen_ext32u_tl(t_src1, src1);
+    tcg_gen_ext32u_tl(t_src2, src2);
 
-    /* old op:
-    if (!(env->y & 1))
-        T1 = 0;
-    */
-    zero = tcg_constant_tl(0);
-    tcg_gen_andi_tl(cpu_cc_src, src1, 0xffffffff);
-    tcg_gen_andi_tl(r_temp, cpu_y, 0x1);
-    tcg_gen_andi_tl(cpu_cc_src2, src2, 0xffffffff);
-    tcg_gen_movcond_tl(TCG_COND_EQ, cpu_cc_src2, r_temp, zero,
-                       zero, cpu_cc_src2);
+    /*
+     * if (!(env->y & 1))
+     *   src2 = 0;
+     */
+    tcg_gen_andi_tl(t0, cpu_y, 0x1);
+    tcg_gen_movcond_tl(TCG_COND_EQ, t_src2, t0, zero, zero, t_src2);
 
-    // b2 = T0 & 1;
-    // env->y = (b2 << 31) | (env->y >> 1);
+    /*
+     * b2 = src1 & 1;
+     * y = (b2 << 31) | (y >> 1);
+     */
     tcg_gen_extract_tl(t0, cpu_y, 1, 31);
-    tcg_gen_deposit_tl(cpu_y, t0, cpu_cc_src, 31, 1);
+    tcg_gen_deposit_tl(cpu_y, t0, src1, 31, 1);
 
     // b1 = N ^ V;
-    gen_mov_reg_N(t0, cpu_psr);
-    gen_mov_reg_V(r_temp, cpu_psr);
-    tcg_gen_xor_tl(t0, t0, r_temp);
+    tcg_gen_xor_tl(t0, cpu_cc_N, cpu_cc_V);
 
-    // T0 = (b1 << 31) | (T0 >> 1);
-    // src1 = T0;
-    tcg_gen_shli_tl(t0, t0, 31);
-    tcg_gen_shri_tl(cpu_cc_src, cpu_cc_src, 1);
-    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-
-    tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
+    /*
+     * src1 = (b1 << 31) | (src1 >> 1)
+     */
+    tcg_gen_andi_tl(t0, t0, 1u << 31);
+    tcg_gen_shri_tl(t_src1, t_src1, 1);
+    tcg_gen_or_tl(t_src1, t_src1, t0);
 
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
+    gen_op_addcc(dst, t_src1, t_src2);
 }
 
 static void gen_op_multiply(TCGv dst, TCGv src1, TCGv src2, int sign_ext)
@@ -692,34 +577,66 @@ static void gen_op_smul(TCGv dst, TCGv src1, TCGv src2)
     gen_op_multiply(dst, src1, src2, 1);
 }
 
-static void gen_op_udivx(TCGv dst, TCGv src1, TCGv src2)
-{
-    gen_helper_udivx(dst, tcg_env, src1, src2);
-}
-
-static void gen_op_sdivx(TCGv dst, TCGv src1, TCGv src2)
-{
-    gen_helper_sdivx(dst, tcg_env, src1, src2);
-}
-
-static void gen_op_udiv(TCGv dst, TCGv src1, TCGv src2)
-{
-    gen_helper_udiv(dst, tcg_env, src1, src2);
-}
-
 static void gen_op_sdiv(TCGv dst, TCGv src1, TCGv src2)
 {
+#ifdef TARGET_SPARC64
     gen_helper_sdiv(dst, tcg_env, src1, src2);
+    tcg_gen_ext32s_tl(dst, dst);
+#else
+    TCGv_i64 t64 = tcg_temp_new_i64();
+    gen_helper_sdiv(t64, tcg_env, src1, src2);
+    tcg_gen_trunc_i64_tl(dst, t64);
+#endif
 }
 
 static void gen_op_udivcc(TCGv dst, TCGv src1, TCGv src2)
 {
-    gen_helper_udiv_cc(dst, tcg_env, src1, src2);
+    TCGv_i64 t64;
+
+#ifdef TARGET_SPARC64
+    t64 = cpu_cc_V;
+#else
+    t64 = tcg_temp_new_i64();
+#endif
+
+    gen_helper_udiv(t64, tcg_env, src1, src2);
+
+#ifdef TARGET_SPARC64
+    tcg_gen_ext32u_tl(cpu_cc_N, t64);
+    tcg_gen_shri_tl(cpu_cc_V, t64, 32);
+    tcg_gen_mov_tl(cpu_icc_Z, cpu_cc_N);
+    tcg_gen_movi_tl(cpu_icc_C, 0);
+#else
+    tcg_gen_extr_i64_tl(cpu_cc_N, cpu_cc_V, t64);
+#endif
+    tcg_gen_mov_tl(cpu_cc_Z, cpu_cc_N);
+    tcg_gen_movi_tl(cpu_cc_C, 0);
+    tcg_gen_mov_tl(dst, cpu_cc_N);
 }
 
 static void gen_op_sdivcc(TCGv dst, TCGv src1, TCGv src2)
 {
-    gen_helper_sdiv_cc(dst, tcg_env, src1, src2);
+    TCGv_i64 t64;
+
+#ifdef TARGET_SPARC64
+    t64 = cpu_cc_V;
+#else
+    t64 = tcg_temp_new_i64();
+#endif
+
+    gen_helper_sdiv(t64, tcg_env, src1, src2);
+
+#ifdef TARGET_SPARC64
+    tcg_gen_ext32s_tl(cpu_cc_N, t64);
+    tcg_gen_shri_tl(cpu_cc_V, t64, 32);
+    tcg_gen_mov_tl(cpu_icc_Z, cpu_cc_N);
+    tcg_gen_movi_tl(cpu_icc_C, 0);
+#else
+    tcg_gen_extr_i64_tl(cpu_cc_N, cpu_cc_V, t64);
+#endif
+    tcg_gen_mov_tl(cpu_cc_Z, cpu_cc_N);
+    tcg_gen_movi_tl(cpu_cc_C, 0);
+    tcg_gen_mov_tl(dst, cpu_cc_N);
 }
 
 static void gen_op_taddcctv(TCGv dst, TCGv src1, TCGv src2)
@@ -825,114 +742,12 @@ static void gen_op_eval_ba(TCGv dst)
     tcg_gen_movi_tl(dst, 1);
 }
 
-// Z
-static void gen_op_eval_be(TCGv dst, TCGv_i32 src)
-{
-    gen_mov_reg_Z(dst, src);
-}
-
-// Z | (N ^ V)
-static void gen_op_eval_ble(TCGv dst, TCGv_i32 src)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_N(t0, src);
-    gen_mov_reg_V(dst, src);
-    tcg_gen_xor_tl(dst, dst, t0);
-    gen_mov_reg_Z(t0, src);
-    tcg_gen_or_tl(dst, dst, t0);
-}
-
-// N ^ V
-static void gen_op_eval_bl(TCGv dst, TCGv_i32 src)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_V(t0, src);
-    gen_mov_reg_N(dst, src);
-    tcg_gen_xor_tl(dst, dst, t0);
-}
-
-// C | Z
-static void gen_op_eval_bleu(TCGv dst, TCGv_i32 src)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_Z(t0, src);
-    gen_mov_reg_C(dst, src);
-    tcg_gen_or_tl(dst, dst, t0);
-}
-
-// C
-static void gen_op_eval_bcs(TCGv dst, TCGv_i32 src)
-{
-    gen_mov_reg_C(dst, src);
-}
-
-// V
-static void gen_op_eval_bvs(TCGv dst, TCGv_i32 src)
-{
-    gen_mov_reg_V(dst, src);
-}
-
 // 0
 static void gen_op_eval_bn(TCGv dst)
 {
     tcg_gen_movi_tl(dst, 0);
 }
 
-// N
-static void gen_op_eval_bneg(TCGv dst, TCGv_i32 src)
-{
-    gen_mov_reg_N(dst, src);
-}
-
-// !Z
-static void gen_op_eval_bne(TCGv dst, TCGv_i32 src)
-{
-    gen_mov_reg_Z(dst, src);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// !(Z | (N ^ V))
-static void gen_op_eval_bg(TCGv dst, TCGv_i32 src)
-{
-    gen_op_eval_ble(dst, src);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// !(N ^ V)
-static void gen_op_eval_bge(TCGv dst, TCGv_i32 src)
-{
-    gen_op_eval_bl(dst, src);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// !(C | Z)
-static void gen_op_eval_bgu(TCGv dst, TCGv_i32 src)
-{
-    gen_op_eval_bleu(dst, src);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// !C
-static void gen_op_eval_bcc(TCGv dst, TCGv_i32 src)
-{
-    gen_mov_reg_C(dst, src);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// !N
-static void gen_op_eval_bpos(TCGv dst, TCGv_i32 src)
-{
-    gen_mov_reg_N(dst, src);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// !V
-static void gen_op_eval_bvc(TCGv dst, TCGv_i32 src)
-{
-    gen_mov_reg_V(dst, src);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
 /*
   FPSR bit field FCC1 | FCC0:
    0 =
@@ -1074,26 +889,26 @@ static void gen_op_eval_fbo(TCGv dst, TCGv src, unsigned int fcc_offset)
     tcg_gen_xori_tl(dst, dst, 0x1);
 }
 
-static void gen_branch2(DisasContext *dc, target_ulong pc1,
-                        target_ulong pc2, TCGv r_cond)
+static void finishing_insn(DisasContext *dc)
 {
-    TCGLabel *l1 = gen_new_label();
-
-    tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond, 0, l1);
-
-    gen_goto_tb(dc, 0, pc1, pc1 + 4);
-
-    gen_set_label(l1);
-    gen_goto_tb(dc, 1, pc2, pc2 + 4);
+    /*
+     * From here, there is no future path through an unwinding exception.
+     * If the current insn cannot raise an exception, the computation of
+     * cpu_cond may be able to be elided.
+     */
+    if (dc->cpu_cond_live) {
+        tcg_gen_discard_tl(cpu_cond);
+        dc->cpu_cond_live = false;
+    }
 }
 
 static void gen_generic_branch(DisasContext *dc)
 {
     TCGv npc0 = tcg_constant_tl(dc->jump_pc[0]);
     TCGv npc1 = tcg_constant_tl(dc->jump_pc[1]);
-    TCGv zero = tcg_constant_tl(0);
+    TCGv c2 = tcg_constant_tl(dc->jump.c2);
 
-    tcg_gen_movcond_tl(TCG_COND_NE, cpu_npc, cpu_cond, zero, npc0, npc1);
+    tcg_gen_movcond_tl(dc->jump.cond, cpu_npc, dc->jump.c1, c2, npc0, npc1);
 }
 
 /* call this function before using the condition register as it may
@@ -1125,14 +940,6 @@ static void save_npc(DisasContext *dc)
     }
 }
 
-static void update_psr(DisasContext *dc)
-{
-    if (dc->cc_op != CC_OP_FLAGS) {
-        dc->cc_op = CC_OP_FLAGS;
-        gen_helper_compute_psr(tcg_env);
-    }
-}
-
 static void save_state(DisasContext *dc)
 {
     tcg_gen_movi_tl(cpu_pc, dc->pc);
@@ -1141,6 +948,7 @@ static void save_state(DisasContext *dc)
 
 static void gen_exception(DisasContext *dc, int which)
 {
+    finishing_insn(dc);
     save_state(dc);
     gen_helper_raise_exception(tcg_env, tcg_constant_i32(which));
     dc->base.is_jmp = DISAS_NORETURN;
@@ -1182,6 +990,8 @@ static void gen_check_align(DisasContext *dc, TCGv addr, int mask)
 
 static void gen_mov_pc_npc(DisasContext *dc)
 {
+    finishing_insn(dc);
+
     if (dc->npc & 3) {
         switch (dc->npc) {
         case JUMP_PC:
@@ -1202,178 +1012,103 @@ static void gen_mov_pc_npc(DisasContext *dc)
     }
 }
 
-static void gen_op_next_insn(void)
-{
-    tcg_gen_mov_tl(cpu_pc, cpu_npc);
-    tcg_gen_addi_tl(cpu_npc, cpu_npc, 4);
-}
-
 static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
                         DisasContext *dc)
 {
-    static int subcc_cond[16] = {
-        TCG_COND_NEVER,
-        TCG_COND_EQ,
-        TCG_COND_LE,
-        TCG_COND_LT,
-        TCG_COND_LEU,
-        TCG_COND_LTU,
-        -1, /* neg */
-        -1, /* overflow */
-        TCG_COND_ALWAYS,
-        TCG_COND_NE,
-        TCG_COND_GT,
-        TCG_COND_GE,
-        TCG_COND_GTU,
-        TCG_COND_GEU,
-        -1, /* pos */
-        -1, /* no overflow */
-    };
+    TCGv t1;
 
-    static int logic_cond[16] = {
-        TCG_COND_NEVER,
-        TCG_COND_EQ,     /* eq:  Z */
-        TCG_COND_LE,     /* le:  Z | (N ^ V) -> Z | N */
-        TCG_COND_LT,     /* lt:  N ^ V -> N */
-        TCG_COND_EQ,     /* leu: C | Z -> Z */
-        TCG_COND_NEVER,  /* ltu: C -> 0 */
-        TCG_COND_LT,     /* neg: N */
-        TCG_COND_NEVER,  /* vs:  V -> 0 */
-        TCG_COND_ALWAYS,
-        TCG_COND_NE,     /* ne:  !Z */
-        TCG_COND_GT,     /* gt:  !(Z | (N ^ V)) -> !(Z | N) */
-        TCG_COND_GE,     /* ge:  !(N ^ V) -> !N */
-        TCG_COND_NE,     /* gtu: !(C | Z) -> !Z */
-        TCG_COND_ALWAYS, /* geu: !C -> 1 */
-        TCG_COND_GE,     /* pos: !N */
-        TCG_COND_ALWAYS, /* vc:  !V -> 1 */
-    };
-
-    TCGv_i32 r_src;
-    TCGv r_dst;
+    cmp->c1 = t1 = tcg_temp_new();
+    cmp->c2 = 0;
 
-#ifdef TARGET_SPARC64
-    if (xcc) {
-        r_src = cpu_xcc;
-    } else {
-        r_src = cpu_psr;
-    }
-#else
-    r_src = cpu_psr;
-#endif
+    switch (cond & 7) {
+    case 0x0: /* never */
+        cmp->cond = TCG_COND_NEVER;
+        cmp->c1 = tcg_constant_tl(0);
+        break;
 
-    switch (dc->cc_op) {
-    case CC_OP_LOGIC:
-        cmp->cond = logic_cond[cond];
-    do_compare_dst_0:
-        cmp->is_bool = false;
-        cmp->c2 = tcg_constant_tl(0);
-#ifdef TARGET_SPARC64
-        if (!xcc) {
-            cmp->c1 = tcg_temp_new();
-            tcg_gen_ext32s_tl(cmp->c1, cpu_cc_dst);
-            break;
+    case 0x1: /* eq: Z */
+        cmp->cond = TCG_COND_EQ;
+        if (TARGET_LONG_BITS == 32 || xcc) {
+            tcg_gen_mov_tl(t1, cpu_cc_Z);
+        } else {
+            tcg_gen_ext32u_tl(t1, cpu_icc_Z);
         }
-#endif
-        cmp->c1 = cpu_cc_dst;
         break;
 
-    case CC_OP_SUB:
-        switch (cond) {
-        case 6:  /* neg */
-        case 14: /* pos */
-            cmp->cond = (cond == 6 ? TCG_COND_LT : TCG_COND_GE);
-            goto do_compare_dst_0;
-
-        case 7: /* overflow */
-        case 15: /* !overflow */
-            goto do_dynamic;
+    case 0x2: /* le: Z | (N ^ V) */
+        /*
+         * Simplify:
+         *   cc_Z || (N ^ V) < 0        NE
+         *   cc_Z && !((N ^ V) < 0)     EQ
+         *   cc_Z & ~((N ^ V) >> TLB)   EQ
+         */
+        cmp->cond = TCG_COND_EQ;
+        tcg_gen_xor_tl(t1, cpu_cc_N, cpu_cc_V);
+        tcg_gen_sextract_tl(t1, t1, xcc ? 63 : 31, 1);
+        tcg_gen_andc_tl(t1, xcc ? cpu_cc_Z : cpu_icc_Z, t1);
+        if (TARGET_LONG_BITS == 64 && !xcc) {
+            tcg_gen_ext32u_tl(t1, t1);
+        }
+        break;
 
-        default:
-            cmp->cond = subcc_cond[cond];
-            cmp->is_bool = false;
-#ifdef TARGET_SPARC64
-            if (!xcc) {
-                /* Note that sign-extension works for unsigned compares as
-                   long as both operands are sign-extended.  */
-                cmp->c1 = tcg_temp_new();
-                cmp->c2 = tcg_temp_new();
-                tcg_gen_ext32s_tl(cmp->c1, cpu_cc_src);
-                tcg_gen_ext32s_tl(cmp->c2, cpu_cc_src2);
-                break;
-            }
-#endif
-            cmp->c1 = cpu_cc_src;
-            cmp->c2 = cpu_cc_src2;
-            break;
+    case 0x3: /* lt: N ^ V */
+        cmp->cond = TCG_COND_LT;
+        tcg_gen_xor_tl(t1, cpu_cc_N, cpu_cc_V);
+        if (TARGET_LONG_BITS == 64 && !xcc) {
+            tcg_gen_ext32s_tl(t1, t1);
         }
         break;
 
-    default:
-    do_dynamic:
-        gen_helper_compute_psr(tcg_env);
-        dc->cc_op = CC_OP_FLAGS;
-        /* FALLTHRU */
+    case 0x4: /* leu: Z | C */
+        /*
+         * Simplify:
+         *   cc_Z == 0 || cc_C != 0     NE
+         *   cc_Z != 0 && cc_C == 0     EQ
+         *   cc_Z & (cc_C ? 0 : -1)     EQ
+         *   cc_Z & (cc_C - 1)          EQ
+         */
+        cmp->cond = TCG_COND_EQ;
+        if (TARGET_LONG_BITS == 32 || xcc) {
+            tcg_gen_subi_tl(t1, cpu_cc_C, 1);
+            tcg_gen_and_tl(t1, t1, cpu_cc_Z);
+        } else {
+            tcg_gen_extract_tl(t1, cpu_icc_C, 32, 1);
+            tcg_gen_subi_tl(t1, t1, 1);
+            tcg_gen_and_tl(t1, t1, cpu_icc_Z);
+            tcg_gen_ext32u_tl(t1, t1);
+        }
+        break;
 
-    case CC_OP_FLAGS:
-        /* We're going to generate a boolean result.  */
+    case 0x5: /* ltu: C */
         cmp->cond = TCG_COND_NE;
-        cmp->is_bool = true;
-        cmp->c1 = r_dst = tcg_temp_new();
-        cmp->c2 = tcg_constant_tl(0);
+        if (TARGET_LONG_BITS == 32 || xcc) {
+            tcg_gen_mov_tl(t1, cpu_cc_C);
+        } else {
+            tcg_gen_extract_tl(t1, cpu_icc_C, 32, 1);
+        }
+        break;
 
-        switch (cond) {
-        case 0x0:
-            gen_op_eval_bn(r_dst);
-            break;
-        case 0x1:
-            gen_op_eval_be(r_dst, r_src);
-            break;
-        case 0x2:
-            gen_op_eval_ble(r_dst, r_src);
-            break;
-        case 0x3:
-            gen_op_eval_bl(r_dst, r_src);
-            break;
-        case 0x4:
-            gen_op_eval_bleu(r_dst, r_src);
-            break;
-        case 0x5:
-            gen_op_eval_bcs(r_dst, r_src);
-            break;
-        case 0x6:
-            gen_op_eval_bneg(r_dst, r_src);
-            break;
-        case 0x7:
-            gen_op_eval_bvs(r_dst, r_src);
-            break;
-        case 0x8:
-            gen_op_eval_ba(r_dst);
-            break;
-        case 0x9:
-            gen_op_eval_bne(r_dst, r_src);
-            break;
-        case 0xa:
-            gen_op_eval_bg(r_dst, r_src);
-            break;
-        case 0xb:
-            gen_op_eval_bge(r_dst, r_src);
-            break;
-        case 0xc:
-            gen_op_eval_bgu(r_dst, r_src);
-            break;
-        case 0xd:
-            gen_op_eval_bcc(r_dst, r_src);
-            break;
-        case 0xe:
-            gen_op_eval_bpos(r_dst, r_src);
-            break;
-        case 0xf:
-            gen_op_eval_bvc(r_dst, r_src);
-            break;
+    case 0x6: /* neg: N */
+        cmp->cond = TCG_COND_LT;
+        if (TARGET_LONG_BITS == 32 || xcc) {
+            tcg_gen_mov_tl(t1, cpu_cc_N);
+        } else {
+            tcg_gen_ext32s_tl(t1, cpu_cc_N);
+        }
+        break;
+
+    case 0x7: /* vs: V */
+        cmp->cond = TCG_COND_LT;
+        if (TARGET_LONG_BITS == 32 || xcc) {
+            tcg_gen_mov_tl(t1, cpu_cc_V);
+        } else {
+            tcg_gen_ext32s_tl(t1, cpu_cc_V);
         }
         break;
     }
+    if (cond & 8) {
+        cmp->cond = tcg_invert_cond(cmp->cond);
+    }
 }
 
 static void gen_fcompare(DisasCompare *cmp, unsigned int cc, unsigned int cond)
@@ -1383,9 +1118,8 @@ static void gen_fcompare(DisasCompare *cmp, unsigned int cc, unsigned int cond)
 
     /* For now we still generate a straight boolean result.  */
     cmp->cond = TCG_COND_NE;
-    cmp->is_bool = true;
     cmp->c1 = r_dst = tcg_temp_new();
-    cmp->c2 = tcg_constant_tl(0);
+    cmp->c2 = 0;
 
     switch (cc) {
     default:
@@ -1455,24 +1189,29 @@ static void gen_fcompare(DisasCompare *cmp, unsigned int cc, unsigned int cond)
     }
 }
 
-// Inverted logic
-static const TCGCond gen_tcg_cond_reg[8] = {
-    TCG_COND_NEVER,  /* reserved */
-    TCG_COND_NE,
-    TCG_COND_GT,
-    TCG_COND_GE,
-    TCG_COND_NEVER,  /* reserved */
-    TCG_COND_EQ,
-    TCG_COND_LE,
-    TCG_COND_LT,
-};
-
-static void gen_compare_reg(DisasCompare *cmp, int cond, TCGv r_src)
+static bool gen_compare_reg(DisasCompare *cmp, int cond, TCGv r_src)
 {
-    cmp->cond = tcg_invert_cond(gen_tcg_cond_reg[cond]);
-    cmp->is_bool = false;
-    cmp->c1 = r_src;
-    cmp->c2 = tcg_constant_tl(0);
+    static const TCGCond cond_reg[4] = {
+        TCG_COND_NEVER,  /* reserved */
+        TCG_COND_EQ,
+        TCG_COND_LE,
+        TCG_COND_LT,
+    };
+    TCGCond tcond;
+
+    if ((cond & 3) == 0) {
+        return false;
+    }
+    tcond = cond_reg[cond & 3];
+    if (cond & 4) {
+        tcond = tcg_invert_cond(tcond);
+    }
+
+    cmp->cond = tcond;
+    cmp->c1 = tcg_temp_new();
+    cmp->c2 = 0;
+    tcg_gen_mov_tl(cmp->c1, r_src);
+    return true;
 }
 
 static void gen_op_clear_ieee_excp_and_FTT(void)
@@ -2472,18 +2211,14 @@ static void gen_fmovs(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
 {
 #ifdef TARGET_SPARC64
     TCGv_i32 c32, zero, dst, s1, s2;
+    TCGv_i64 c64 = tcg_temp_new_i64();
 
     /* We have two choices here: extend the 32 bit data and use movcond_i64,
        or fold the comparison down to 32 bits and use movcond_i32.  Choose
        the later.  */
     c32 = tcg_temp_new_i32();
-    if (cmp->is_bool) {
-        tcg_gen_extrl_i64_i32(c32, cmp->c1);
-    } else {
-        TCGv_i64 c64 = tcg_temp_new_i64();
-        tcg_gen_setcond_i64(cmp->cond, c64, cmp->c1, cmp->c2);
-        tcg_gen_extrl_i64_i32(c32, c64);
-    }
+    tcg_gen_setcondi_i64(cmp->cond, c64, cmp->c1, cmp->c2);
+    tcg_gen_extrl_i64_i32(c32, c64);
 
     s1 = gen_load_fpr_F(dc, rs);
     s2 = gen_load_fpr_F(dc, rd);
@@ -2502,7 +2237,7 @@ static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
 {
 #ifdef TARGET_SPARC64
     TCGv_i64 dst = gen_dest_fpr_D(dc, rd);
-    tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, cmp->c2,
+    tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, tcg_constant_tl(cmp->c2),
                         gen_load_fpr_D(dc, rs),
                         gen_load_fpr_D(dc, rd));
     gen_store_fpr_D(dc, rd, dst);
@@ -2516,10 +2251,11 @@ static void gen_fmovq(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
 #ifdef TARGET_SPARC64
     int qd = QFPREG(rd);
     int qs = QFPREG(rs);
+    TCGv c2 = tcg_constant_tl(cmp->c2);
 
-    tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2], cmp->c1, cmp->c2,
+    tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2], cmp->c1, c2,
                         cpu_fpr[qs / 2], cpu_fpr[qd / 2]);
-    tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2 + 1], cmp->c1, cmp->c2,
+    tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2 + 1], cmp->c1, c2,
                         cpu_fpr[qs / 2 + 1], cpu_fpr[qd / 2 + 1]);
 
     gen_update_fprs_dirty(dc, qd);
@@ -2599,18 +2335,34 @@ static int extract_qfpreg(DisasContext *dc, int x)
 /* Default case for non jump instructions. */
 static bool advance_pc(DisasContext *dc)
 {
+    TCGLabel *l1;
+
+    finishing_insn(dc);
+
     if (dc->npc & 3) {
         switch (dc->npc) {
         case DYNAMIC_PC:
         case DYNAMIC_PC_LOOKUP:
             dc->pc = dc->npc;
-            gen_op_next_insn();
+            tcg_gen_mov_tl(cpu_pc, cpu_npc);
+            tcg_gen_addi_tl(cpu_npc, cpu_npc, 4);
             break;
+
         case JUMP_PC:
             /* we can do a static jump */
-            gen_branch2(dc, dc->jump_pc[0], dc->jump_pc[1], cpu_cond);
+            l1 = gen_new_label();
+            tcg_gen_brcondi_tl(dc->jump.cond, dc->jump.c1, dc->jump.c2, l1);
+
+            /* jump not taken */
+            gen_goto_tb(dc, 1, dc->jump_pc[1], dc->jump_pc[1] + 4);
+
+            /* jump taken */
+            gen_set_label(l1);
+            gen_goto_tb(dc, 0, dc->jump_pc[0], dc->jump_pc[0] + 4);
+
             dc->base.is_jmp = DISAS_NORETURN;
             break;
+
         default:
             g_assert_not_reached();
         }
@@ -2625,41 +2377,47 @@ static bool advance_pc(DisasContext *dc)
  * Major opcodes 00 and 01 -- branches, call, and sethi
  */
 
-static bool advance_jump_uncond_never(DisasContext *dc, bool annul)
+static bool advance_jump_cond(DisasContext *dc, DisasCompare *cmp,
+                              bool annul, int disp)
 {
-    if (annul) {
-        dc->pc = dc->npc + 4;
-        dc->npc = dc->pc + 4;
-    } else {
-        dc->pc = dc->npc;
-        dc->npc = dc->pc + 4;
+    target_ulong dest = address_mask_i(dc, dc->pc + disp * 4);
+    target_ulong npc;
+
+    finishing_insn(dc);
+
+    if (cmp->cond == TCG_COND_ALWAYS) {
+        if (annul) {
+            dc->pc = dest;
+            dc->npc = dest + 4;
+        } else {
+            gen_mov_pc_npc(dc);
+            dc->npc = dest;
+        }
+        return true;
     }
-    return true;
-}
 
-static bool advance_jump_uncond_always(DisasContext *dc, bool annul,
-                                       target_ulong dest)
-{
-    if (annul) {
-        dc->pc = dest;
-        dc->npc = dest + 4;
-    } else {
-        dc->pc = dc->npc;
-        dc->npc = dest;
-        tcg_gen_mov_tl(cpu_pc, cpu_npc);
+    if (cmp->cond == TCG_COND_NEVER) {
+        npc = dc->npc;
+        if (npc & 3) {
+            gen_mov_pc_npc(dc);
+            if (annul) {
+                tcg_gen_addi_tl(cpu_pc, cpu_pc, 4);
+            }
+            tcg_gen_addi_tl(cpu_npc, cpu_pc, 4);
+        } else {
+            dc->pc = npc + (annul ? 4 : 0);
+            dc->npc = dc->pc + 4;
+        }
+        return true;
     }
-    return true;
-}
 
-static bool advance_jump_cond(DisasContext *dc, DisasCompare *cmp,
-                              bool annul, target_ulong dest)
-{
-    target_ulong npc = dc->npc;
+    flush_cond(dc);
+    npc = dc->npc;
 
     if (annul) {
         TCGLabel *l1 = gen_new_label();
 
-        tcg_gen_brcond_tl(tcg_invert_cond(cmp->cond), cmp->c1, cmp->c2, l1);
+        tcg_gen_brcondi_tl(tcg_invert_cond(cmp->cond), cmp->c1, cmp->c2, l1);
         gen_goto_tb(dc, 0, npc, dest);
         gen_set_label(l1);
         gen_goto_tb(dc, 1, npc + 4, npc + 8);
@@ -2673,7 +2431,7 @@ static bool advance_jump_cond(DisasContext *dc, DisasCompare *cmp,
                 tcg_gen_mov_tl(cpu_pc, cpu_npc);
                 tcg_gen_addi_tl(cpu_npc, cpu_npc, 4);
                 tcg_gen_movcond_tl(cmp->cond, cpu_npc,
-                                   cmp->c1, cmp->c2,
+                                   cmp->c1, tcg_constant_tl(cmp->c2),
                                    tcg_constant_tl(dest), cpu_npc);
                 dc->pc = npc;
                 break;
@@ -2682,14 +2440,18 @@ static bool advance_jump_cond(DisasContext *dc, DisasCompare *cmp,
             }
         } else {
             dc->pc = npc;
+            dc->npc = JUMP_PC;
+            dc->jump = *cmp;
             dc->jump_pc[0] = dest;
             dc->jump_pc[1] = npc + 4;
-            dc->npc = JUMP_PC;
-            if (cmp->is_bool) {
-                tcg_gen_mov_tl(cpu_cond, cmp->c1);
+
+            /* The condition for cpu_cond is always NE -- normalize. */
+            if (cmp->cond == TCG_COND_NE) {
+                tcg_gen_xori_tl(cpu_cond, cmp->c1, cmp->c2);
             } else {
-                tcg_gen_setcond_tl(cmp->cond, cpu_cond, cmp->c1, cmp->c2);
+                tcg_gen_setcondi_tl(cmp->cond, cpu_cond, cmp->c1, cmp->c2);
             }
+            dc->cpu_cond_live = true;
         }
     }
     return true;
@@ -2717,20 +2479,10 @@ static bool gen_trap_float128(DisasContext *dc)
 
 static bool do_bpcc(DisasContext *dc, arg_bcc *a)
 {
-    target_long target = address_mask_i(dc, dc->pc + a->i * 4);
     DisasCompare cmp;
 
-    switch (a->cond) {
-    case 0x0:
-        return advance_jump_uncond_never(dc, a->a);
-    case 0x8:
-        return advance_jump_uncond_always(dc, a->a, target);
-    default:
-        flush_cond(dc);
-
-        gen_compare(&cmp, a->cc, a->cond, dc);
-        return advance_jump_cond(dc, &cmp, a->a, target);
-    }
+    gen_compare(&cmp, a->cc, a->cond, dc);
+    return advance_jump_cond(dc, &cmp, a->a, a->i);
 }
 
 TRANS(Bicc, ALL, do_bpcc, a)
@@ -2738,23 +2490,13 @@ TRANS(BPcc,  64, do_bpcc, a)
 
 static bool do_fbpfcc(DisasContext *dc, arg_bcc *a)
 {
-    target_long target = address_mask_i(dc, dc->pc + a->i * 4);
     DisasCompare cmp;
 
     if (gen_trap_ifnofpu(dc)) {
         return true;
     }
-    switch (a->cond) {
-    case 0x0:
-        return advance_jump_uncond_never(dc, a->a);
-    case 0x8:
-        return advance_jump_uncond_always(dc, a->a, target);
-    default:
-        flush_cond(dc);
-
-        gen_fcompare(&cmp, a->cc, a->cond);
-        return advance_jump_cond(dc, &cmp, a->a, target);
-    }
+    gen_fcompare(&cmp, a->cc, a->cond);
+    return advance_jump_cond(dc, &cmp, a->a, a->i);
 }
 
 TRANS(FBPfcc,  64, do_fbpfcc, a)
@@ -2762,19 +2504,15 @@ TRANS(FBfcc,  ALL, do_fbpfcc, a)
 
 static bool trans_BPr(DisasContext *dc, arg_BPr *a)
 {
-    target_long target = address_mask_i(dc, dc->pc + a->i * 4);
     DisasCompare cmp;
 
     if (!avail_64(dc)) {
         return false;
     }
-    if (gen_tcg_cond_reg[a->cond] == TCG_COND_NEVER) {
+    if (!gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1))) {
         return false;
     }
-
-    flush_cond(dc);
-    gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1));
-    return advance_jump_cond(dc, &cmp, a->a, target);
+    return advance_jump_cond(dc, &cmp, a->a, a->i);
 }
 
 static bool trans_CALL(DisasContext *dc, arg_CALL *a)
@@ -2848,6 +2586,8 @@ static bool do_tcc(DisasContext *dc, int cond, int cc,
         tcg_gen_addi_i32(trap, trap, TT_TRAP);
     }
 
+    finishing_insn(dc);
+
     /* Trap always.  */
     if (cond == 8) {
         save_state(dc);
@@ -2860,7 +2600,7 @@ static bool do_tcc(DisasContext *dc, int cond, int cc,
     flush_cond(dc);
     lab = delay_exceptionv(dc, trap);
     gen_compare(&cmp, cc, cond, dc);
-    tcg_gen_brcond_tl(cmp.cond, cmp.c1, cmp.c2, lab);
+    tcg_gen_brcondi_tl(cmp.cond, cmp.c1, cmp.c2, lab);
 
     return advance_pc(dc);
 }
@@ -2957,7 +2697,6 @@ TRANS(RDASR17, ASR17, do_rd_special, true, a->rd, do_rd_leon3_config)
 
 static TCGv do_rdccr(DisasContext *dc, TCGv dst)
 {
-    update_psr(dc);
     gen_helper_rdccr(dst, tcg_env);
     return dst;
 }
@@ -3070,7 +2809,6 @@ TRANS(RDSTRAND_STATUS, HYPV, do_rd_special, true, a->rd, do_rdstrand_status)
 
 static TCGv do_rdpsr(DisasContext *dc, TCGv dst)
 {
-    update_psr(dc);
     gen_helper_rdpsr(dst, tcg_env);
     return dst;
 }
@@ -3466,6 +3204,7 @@ TRANS(WRSTICK_CMPR, 64, do_wr_special, a, supervisor(dc), do_wrstick_cmpr)
 
 static void do_wrpowerdown(DisasContext *dc, TCGv src)
 {
+    finishing_insn(dc);
     save_state(dc);
     gen_helper_power_down(tcg_env);
 }
@@ -3475,8 +3214,6 @@ TRANS(WRPOWERDOWN, POWERDOWN, do_wr_special, a, supervisor(dc), do_wrpowerdown)
 static void do_wrpsr(DisasContext *dc, TCGv src)
 {
     gen_helper_wrpsr(tcg_env, src);
-    tcg_gen_movi_i32(cpu_cc_op, CC_OP_FLAGS);
-    dc->cc_op = CC_OP_FLAGS;
     dc->base.is_jmp = DISAS_EXIT;
 }
 
@@ -3740,9 +3477,10 @@ static bool trans_NOP(DisasContext *dc, arg_NOP *a)
 TRANS(NOP_v7, 32, trans_NOP, a)
 TRANS(NOP_v9, 64, trans_NOP, a)
 
-static bool do_arith_int(DisasContext *dc, arg_r_r_ri_cc *a, int cc_op,
+static bool do_arith_int(DisasContext *dc, arg_r_r_ri_cc *a,
                          void (*func)(TCGv, TCGv, TCGv),
-                         void (*funci)(TCGv, TCGv, target_long))
+                         void (*funci)(TCGv, TCGv, target_long),
+                         bool logic_cc)
 {
     TCGv dst, src1;
 
@@ -3751,8 +3489,8 @@ static bool do_arith_int(DisasContext *dc, arg_r_r_ri_cc *a, int cc_op,
         return false;
     }
 
-    if (a->cc) {
-        dst = cpu_cc_dst;
+    if (logic_cc) {
+        dst = cpu_cc_N;
     } else {
         dst = gen_dest_gpr(dc, a->rd);
     }
@@ -3767,43 +3505,48 @@ static bool do_arith_int(DisasContext *dc, arg_r_r_ri_cc *a, int cc_op,
     } else {
         func(dst, src1, cpu_regs[a->rs2_or_imm]);
     }
-    gen_store_gpr(dc, a->rd, dst);
 
-    if (a->cc) {
-        tcg_gen_movi_i32(cpu_cc_op, cc_op);
-        dc->cc_op = cc_op;
+    if (logic_cc) {
+        if (TARGET_LONG_BITS == 64) {
+            tcg_gen_mov_tl(cpu_icc_Z, cpu_cc_N);
+            tcg_gen_movi_tl(cpu_icc_C, 0);
+        }
+        tcg_gen_mov_tl(cpu_cc_Z, cpu_cc_N);
+        tcg_gen_movi_tl(cpu_cc_C, 0);
+        tcg_gen_movi_tl(cpu_cc_V, 0);
     }
+
+    gen_store_gpr(dc, a->rd, dst);
     return advance_pc(dc);
 }
 
-static bool do_arith(DisasContext *dc, arg_r_r_ri_cc *a, int cc_op,
+static bool do_arith(DisasContext *dc, arg_r_r_ri_cc *a,
                      void (*func)(TCGv, TCGv, TCGv),
                      void (*funci)(TCGv, TCGv, target_long),
                      void (*func_cc)(TCGv, TCGv, TCGv))
 {
     if (a->cc) {
-        assert(cc_op >= 0);
-        return do_arith_int(dc, a, cc_op, func_cc, NULL);
+        return do_arith_int(dc, a, func_cc, NULL, false);
     }
-    return do_arith_int(dc, a, cc_op, func, funci);
+    return do_arith_int(dc, a, func, funci, false);
 }
 
 static bool do_logic(DisasContext *dc, arg_r_r_ri_cc *a,
                      void (*func)(TCGv, TCGv, TCGv),
                      void (*funci)(TCGv, TCGv, target_long))
 {
-    return do_arith_int(dc, a, CC_OP_LOGIC, func, funci);
+    return do_arith_int(dc, a, func, funci, a->cc);
 }
 
-TRANS(ADD, ALL, do_arith, a, CC_OP_ADD,
-      tcg_gen_add_tl, tcg_gen_addi_tl, gen_op_add_cc)
-TRANS(SUB, ALL, do_arith, a, CC_OP_SUB,
-      tcg_gen_sub_tl, tcg_gen_subi_tl, gen_op_sub_cc)
+TRANS(ADD, ALL, do_arith, a, tcg_gen_add_tl, tcg_gen_addi_tl, gen_op_addcc)
+TRANS(SUB, ALL, do_arith, a, tcg_gen_sub_tl, tcg_gen_subi_tl, gen_op_subcc)
+TRANS(ADDC, ALL, do_arith, a, gen_op_addc, NULL, gen_op_addccc)
+TRANS(SUBC, ALL, do_arith, a, gen_op_subc, NULL, gen_op_subccc)
 
-TRANS(TADDcc, ALL, do_arith, a, CC_OP_TADD, NULL, NULL, gen_op_add_cc)
-TRANS(TSUBcc, ALL, do_arith, a, CC_OP_TSUB, NULL, NULL, gen_op_sub_cc)
-TRANS(TADDccTV, ALL, do_arith, a, CC_OP_TADDTV, NULL, NULL, gen_op_taddcctv)
-TRANS(TSUBccTV, ALL, do_arith, a, CC_OP_TSUBTV, NULL, NULL, gen_op_tsubcctv)
+TRANS(TADDcc, ALL, do_arith, a, NULL, NULL, gen_op_taddcc)
+TRANS(TSUBcc, ALL, do_arith, a, NULL, NULL, gen_op_tsubcc)
+TRANS(TADDccTV, ALL, do_arith, a, NULL, NULL, gen_op_taddcctv)
+TRANS(TSUBccTV, ALL, do_arith, a, NULL, NULL, gen_op_tsubcctv)
 
 TRANS(AND, ALL, do_logic, a, tcg_gen_and_tl, tcg_gen_andi_tl)
 TRANS(XOR, ALL, do_logic, a, tcg_gen_xor_tl, tcg_gen_xori_tl)
@@ -3811,17 +3554,16 @@ TRANS(ANDN, ALL, do_logic, a, tcg_gen_andc_tl, NULL)
 TRANS(ORN, ALL, do_logic, a, tcg_gen_orc_tl, NULL)
 TRANS(XORN, ALL, do_logic, a, tcg_gen_eqv_tl, NULL)
 
-TRANS(MULX, 64, do_arith, a, -1, tcg_gen_mul_tl, tcg_gen_muli_tl, NULL)
+TRANS(MULX, 64, do_arith, a, tcg_gen_mul_tl, tcg_gen_muli_tl, NULL)
 TRANS(UMUL, MUL, do_logic, a, gen_op_umul, NULL)
 TRANS(SMUL, MUL, do_logic, a, gen_op_smul, NULL)
+TRANS(MULScc, ALL, do_arith, a, NULL, NULL, gen_op_mulscc)
 
-TRANS(UDIVX, 64, do_arith, a, -1, gen_op_udivx, NULL, NULL)
-TRANS(SDIVX, 64, do_arith, a, -1, gen_op_sdivx, NULL, NULL)
-TRANS(UDIV, DIV, do_arith, a, CC_OP_DIV, gen_op_udiv, NULL, gen_op_udivcc)
-TRANS(SDIV, DIV, do_arith, a, CC_OP_DIV, gen_op_sdiv, NULL, gen_op_sdivcc)
+TRANS(UDIVcc, DIV, do_arith, a, NULL, NULL, gen_op_udivcc)
+TRANS(SDIV, DIV, do_arith, a, gen_op_sdiv, NULL, gen_op_sdivcc)
 
 /* TODO: Should have feature bit -- comes in with UltraSparc T2. */
-TRANS(POPC, 64, do_arith, a, -1, gen_op_popc, NULL, NULL)
+TRANS(POPC, 64, do_arith, a, gen_op_popc, NULL, NULL)
 
 static bool trans_OR(DisasContext *dc, arg_r_r_ri_cc *a)
 {
@@ -3840,58 +3582,152 @@ static bool trans_OR(DisasContext *dc, arg_r_r_ri_cc *a)
     return do_logic(dc, a, tcg_gen_or_tl, tcg_gen_ori_tl);
 }
 
-static bool trans_ADDC(DisasContext *dc, arg_r_r_ri_cc *a)
-{
-    switch (dc->cc_op) {
-    case CC_OP_DIV:
-    case CC_OP_LOGIC:
-        /* Carry is known to be zero.  Fall back to plain ADD.  */
-        return do_arith(dc, a, CC_OP_ADD,
-                        tcg_gen_add_tl, tcg_gen_addi_tl, gen_op_add_cc);
-    case CC_OP_ADD:
-    case CC_OP_TADD:
-    case CC_OP_TADDTV:
-        return do_arith(dc, a, CC_OP_ADDX,
-                        gen_op_addc_add, NULL, gen_op_addccc_add);
-    case CC_OP_SUB:
-    case CC_OP_TSUB:
-    case CC_OP_TSUBTV:
-        return do_arith(dc, a, CC_OP_ADDX,
-                        gen_op_addc_sub, NULL, gen_op_addccc_sub);
-    default:
-        return do_arith(dc, a, CC_OP_ADDX,
-                        gen_op_addc_generic, NULL, gen_op_addccc_generic);
-    }
-}
-
-static bool trans_SUBC(DisasContext *dc, arg_r_r_ri_cc *a)
-{
-    switch (dc->cc_op) {
-    case CC_OP_DIV:
-    case CC_OP_LOGIC:
-        /* Carry is known to be zero.  Fall back to plain SUB.  */
-        return do_arith(dc, a, CC_OP_SUB,
-                        tcg_gen_sub_tl, tcg_gen_subi_tl, gen_op_sub_cc);
-    case CC_OP_ADD:
-    case CC_OP_TADD:
-    case CC_OP_TADDTV:
-        return do_arith(dc, a, CC_OP_SUBX,
-                        gen_op_subc_add, NULL, gen_op_subccc_add);
-    case CC_OP_SUB:
-    case CC_OP_TSUB:
-    case CC_OP_TSUBTV:
-        return do_arith(dc, a, CC_OP_SUBX,
-                        gen_op_subc_sub, NULL, gen_op_subccc_sub);
-    default:
-        return do_arith(dc, a, CC_OP_SUBX,
-                        gen_op_subc_generic, NULL, gen_op_subccc_generic);
+static bool trans_UDIV(DisasContext *dc, arg_r_r_ri *a)
+{
+    TCGv_i64 t1, t2;
+    TCGv dst;
+
+    if (!avail_DIV(dc)) {
+        return false;
+    }
+    /* For simplicity, we under-decoded the rs2 form. */
+    if (!a->imm && a->rs2_or_imm & ~0x1f) {
+        return false;
+    }
+
+    if (unlikely(a->rs2_or_imm == 0)) {
+        gen_exception(dc, TT_DIV_ZERO);
+        return true;
+    }
+
+    if (a->imm) {
+        t2 = tcg_constant_i64((uint32_t)a->rs2_or_imm);
+    } else {
+        TCGLabel *lab;
+        TCGv_i32 n2;
+
+        finishing_insn(dc);
+        flush_cond(dc);
+
+        n2 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(n2, cpu_regs[a->rs2_or_imm]);
+
+        lab = delay_exception(dc, TT_DIV_ZERO);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, n2, 0, lab);
+
+        t2 = tcg_temp_new_i64();
+#ifdef TARGET_SPARC64
+        tcg_gen_ext32u_i64(t2, cpu_regs[a->rs2_or_imm]);
+#else
+        tcg_gen_extu_i32_i64(t2, cpu_regs[a->rs2_or_imm]);
+#endif
     }
+
+    t1 = tcg_temp_new_i64();
+    tcg_gen_concat_tl_i64(t1, gen_load_gpr(dc, a->rs1), cpu_y);
+
+    tcg_gen_divu_i64(t1, t1, t2);
+    tcg_gen_umin_i64(t1, t1, tcg_constant_i64(UINT32_MAX));
+
+    dst = gen_dest_gpr(dc, a->rd);
+    tcg_gen_trunc_i64_tl(dst, t1);
+    gen_store_gpr(dc, a->rd, dst);
+    return advance_pc(dc);
 }
 
-static bool trans_MULScc(DisasContext *dc, arg_r_r_ri_cc *a)
+static bool trans_UDIVX(DisasContext *dc, arg_r_r_ri *a)
 {
-    update_psr(dc);
-    return do_arith(dc, a, CC_OP_ADD, NULL, NULL, gen_op_mulscc);
+    TCGv dst, src1, src2;
+
+    if (!avail_64(dc)) {
+        return false;
+    }
+    /* For simplicity, we under-decoded the rs2 form. */
+    if (!a->imm && a->rs2_or_imm & ~0x1f) {
+        return false;
+    }
+
+    if (unlikely(a->rs2_or_imm == 0)) {
+        gen_exception(dc, TT_DIV_ZERO);
+        return true;
+    }
+
+    if (a->imm) {
+        src2 = tcg_constant_tl(a->rs2_or_imm);
+    } else {
+        TCGLabel *lab;
+
+        finishing_insn(dc);
+        flush_cond(dc);
+
+        lab = delay_exception(dc, TT_DIV_ZERO);
+        src2 = cpu_regs[a->rs2_or_imm];
+        tcg_gen_brcondi_tl(TCG_COND_EQ, src2, 0, lab);
+    }
+
+    dst = gen_dest_gpr(dc, a->rd);
+    src1 = gen_load_gpr(dc, a->rs1);
+
+    tcg_gen_divu_tl(dst, src1, src2);
+    gen_store_gpr(dc, a->rd, dst);
+    return advance_pc(dc);
+}
+
+static bool trans_SDIVX(DisasContext *dc, arg_r_r_ri *a)
+{
+    TCGv dst, src1, src2;
+
+    if (!avail_64(dc)) {
+        return false;
+    }
+    /* For simplicity, we under-decoded the rs2 form. */
+    if (!a->imm && a->rs2_or_imm & ~0x1f) {
+        return false;
+    }
+
+    if (unlikely(a->rs2_or_imm == 0)) {
+        gen_exception(dc, TT_DIV_ZERO);
+        return true;
+    }
+
+    dst = gen_dest_gpr(dc, a->rd);
+    src1 = gen_load_gpr(dc, a->rs1);
+
+    if (a->imm) {
+        if (unlikely(a->rs2_or_imm == -1)) {
+            tcg_gen_neg_tl(dst, src1);
+            gen_store_gpr(dc, a->rd, dst);
+            return advance_pc(dc);
+        }
+        src2 = tcg_constant_tl(a->rs2_or_imm);
+    } else {
+        TCGLabel *lab;
+        TCGv t1, t2;
+
+        finishing_insn(dc);
+        flush_cond(dc);
+
+        lab = delay_exception(dc, TT_DIV_ZERO);
+        src2 = cpu_regs[a->rs2_or_imm];
+        tcg_gen_brcondi_tl(TCG_COND_EQ, src2, 0, lab);
+
+        /*
+         * Need to avoid INT64_MIN / -1, which will trap on x86 host.
+         * Set SRC2 to 1 as a new divisor, to produce the correct result.
+         */
+        t1 = tcg_temp_new();
+        t2 = tcg_temp_new();
+        tcg_gen_setcondi_tl(TCG_COND_EQ, t1, src1, (target_long)INT64_MIN);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, t2, src2, -1);
+        tcg_gen_and_tl(t1, t1, t2);
+        tcg_gen_movcond_tl(TCG_COND_NE, t1, t1, tcg_constant_tl(0),
+                           tcg_constant_tl(1), src2);
+        src2 = t1;
+    }
+
+    tcg_gen_div_tl(dst, src1, src2);
+    gen_store_gpr(dc, a->rd, dst);
+    return advance_pc(dc);
 }
 
 static bool gen_edge(DisasContext *dc, arg_r_r_r *a,
@@ -3906,11 +3742,7 @@ static bool gen_edge(DisasContext *dc, arg_r_r_r *a,
     s2 = gen_load_gpr(dc, a->rs2);
 
     if (cc) {
-        tcg_gen_mov_tl(cpu_cc_src, s1);
-        tcg_gen_mov_tl(cpu_cc_src2, s2);
-        tcg_gen_sub_tl(cpu_cc_dst, s1, s2);
-        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUB);
-        dc->cc_op = CC_OP_SUB;
+        gen_op_subcc(cpu_cc_N, s1, s2);
     }
 
     /*
@@ -4152,8 +3984,9 @@ static TCGv gen_rs2_or_imm(DisasContext *dc, bool imm, int rs2_or_imm)
 static bool do_mov_cond(DisasContext *dc, DisasCompare *cmp, int rd, TCGv src2)
 {
     TCGv dst = gen_load_gpr(dc, rd);
+    TCGv c2 = tcg_constant_tl(cmp->c2);
 
-    tcg_gen_movcond_tl(cmp->cond, dst, cmp->c1, cmp->c2, src2, dst);
+    tcg_gen_movcond_tl(cmp->cond, dst, cmp->c1, c2, src2, dst);
     gen_store_gpr(dc, rd, dst);
     return advance_pc(dc);
 }
@@ -4190,7 +4023,9 @@ static bool trans_MOVR(DisasContext *dc, arg_MOVR *a)
     if (src2 == NULL) {
         return false;
     }
-    gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1));
+    if (!gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1))) {
+        return false;
+    }
     return do_mov_cond(dc, &cmp, a->rd, src2);
 }
 
@@ -5177,6 +5012,9 @@ static bool do_fmovr(DisasContext *dc, arg_FMOVRs *a, bool is_128,
 {
     DisasCompare cmp;
 
+    if (!gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1))) {
+        return false;
+    }
     if (gen_trap_ifnofpu(dc)) {
         return true;
     }
@@ -5185,7 +5023,6 @@ static bool do_fmovr(DisasContext *dc, arg_FMOVRs *a, bool is_128,
     }
 
     gen_op_clear_ieee_excp_and_FTT();
-    gen_compare_reg(&cmp, a->cond, gen_load_gpr(dc, a->rs1));
     func(dc, &cmp, a->rd, a->rs2);
     return advance_pc(dc);
 }
@@ -5322,7 +5159,6 @@ static void sparc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 
     dc->pc = dc->base.pc_first;
     dc->npc = (target_ulong)dc->base.tb->cs_base;
-    dc->cc_op = CC_OP_DYNAMIC;
     dc->mem_idx = dc->base.tb->flags & TB_FLAG_MMU_MASK;
     dc->def = &env->def;
     dc->fpu_enabled = tb_fpu_enabled(dc->base.tb->flags);
@@ -5398,6 +5234,8 @@ static void sparc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
     DisasDelayException *e, *e_next;
     bool may_lookup;
 
+    finishing_insn(dc);
+
     switch (dc->base.is_jmp) {
     case DISAS_NEXT:
     case DISAS_TOO_MANY:
@@ -5511,23 +5349,17 @@ void sparc_tcg_init(void)
         "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62",
     };
 
-    static const struct { TCGv_i32 *ptr; int off; const char *name; } r32[] = {
-#ifdef TARGET_SPARC64
-        { &cpu_xcc, offsetof(CPUSPARCState, xcc), "xcc" },
-        { &cpu_fprs, offsetof(CPUSPARCState, fprs), "fprs" },
-#endif
-        { &cpu_cc_op, offsetof(CPUSPARCState, cc_op), "cc_op" },
-        { &cpu_psr, offsetof(CPUSPARCState, psr), "psr" },
-    };
-
     static const struct { TCGv *ptr; int off; const char *name; } rtl[] = {
 #ifdef TARGET_SPARC64
         { &cpu_gsr, offsetof(CPUSPARCState, gsr), "gsr" },
+        { &cpu_xcc_Z, offsetof(CPUSPARCState, xcc_Z), "xcc_Z" },
+        { &cpu_xcc_C, offsetof(CPUSPARCState, xcc_C), "xcc_C" },
 #endif
+        { &cpu_cc_N, offsetof(CPUSPARCState, cc_N), "cc_N" },
+        { &cpu_cc_V, offsetof(CPUSPARCState, cc_V), "cc_V" },
+        { &cpu_icc_Z, offsetof(CPUSPARCState, icc_Z), "icc_Z" },
+        { &cpu_icc_C, offsetof(CPUSPARCState, icc_C), "icc_C" },
         { &cpu_cond, offsetof(CPUSPARCState, cond), "cond" },
-        { &cpu_cc_src, offsetof(CPUSPARCState, cc_src), "cc_src" },
-        { &cpu_cc_src2, offsetof(CPUSPARCState, cc_src2), "cc_src2" },
-        { &cpu_cc_dst, offsetof(CPUSPARCState, cc_dst), "cc_dst" },
         { &cpu_fsr, offsetof(CPUSPARCState, fsr), "fsr" },
         { &cpu_pc, offsetof(CPUSPARCState, pc), "pc" },
         { &cpu_npc, offsetof(CPUSPARCState, npc), "npc" },
@@ -5541,10 +5373,6 @@ void sparc_tcg_init(void)
                                          offsetof(CPUSPARCState, regwptr),
                                          "regwptr");
 
-    for (i = 0; i < ARRAY_SIZE(r32); ++i) {
-        *r32[i].ptr = tcg_global_mem_new_i32(tcg_env, r32[i].off, r32[i].name);
-    }
-
     for (i = 0; i < ARRAY_SIZE(rtl); ++i) {
         *rtl[i].ptr = tcg_global_mem_new(tcg_env, rtl[i].off, rtl[i].name);
     }
@@ -5567,6 +5395,11 @@ void sparc_tcg_init(void)
                                             offsetof(CPUSPARCState, fpr[i]),
                                             fregnames[i]);
     }
+
+#ifdef TARGET_SPARC64
+    cpu_fprs = tcg_global_mem_new_i32(tcg_env,
+                                      offsetof(CPUSPARCState, fprs), "fprs");
+#endif
 }
 
 void sparc_restore_state_to_opc(CPUState *cs,
diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c
index 3a7c0ff943..16d1c70fe7 100644
--- a/target/sparc/win_helper.c
+++ b/target/sparc/win_helper.c
@@ -53,23 +53,47 @@ void cpu_set_cwp(CPUSPARCState *env, int new_cwp)
 
 target_ulong cpu_get_psr(CPUSPARCState *env)
 {
-    helper_compute_psr(env);
+    target_ulong icc = 0;
+
+    icc |= ((int32_t)env->cc_N < 0) << PSR_NEG_SHIFT;
+    icc |= ((int32_t)env->cc_V < 0) << PSR_OVF_SHIFT;
+    icc |= ((int32_t)env->icc_Z == 0) << PSR_ZERO_SHIFT;
+    if (TARGET_LONG_BITS == 64) {
+        icc |= extract64(env->icc_C, 32, 1) << PSR_CARRY_SHIFT;
+    } else {
+        icc |= env->icc_C << PSR_CARRY_SHIFT;
+    }
 
 #if !defined(TARGET_SPARC64)
-    return env->version | (env->psr & PSR_ICC) |
+    return env->version | icc |
         (env->psref ? PSR_EF : 0) |
         (env->psrpil << 8) |
         (env->psrs ? PSR_S : 0) |
         (env->psrps ? PSR_PS : 0) |
         (env->psret ? PSR_ET : 0) | env->cwp;
 #else
-    return env->psr & PSR_ICC;
+    return icc;
 #endif
 }
 
+void cpu_put_psr_icc(CPUSPARCState *env, target_ulong val)
+{
+    if (TARGET_LONG_BITS == 64) {
+        /* Do not clobber xcc.[NV] */
+        env->cc_N = deposit64(env->cc_N, 0, 32, -(val & PSR_NEG));
+        env->cc_V = deposit64(env->cc_V, 0, 32, -(val & PSR_OVF));
+        env->icc_C = -(val & PSR_CARRY);
+    } else {
+        env->cc_N = -(val & PSR_NEG);
+        env->cc_V = -(val & PSR_OVF);
+        env->icc_C = (val >> PSR_CARRY_SHIFT) & 1;
+    }
+    env->icc_Z = ~val & PSR_ZERO;
+}
+
 void cpu_put_psr_raw(CPUSPARCState *env, target_ulong val)
 {
-    env->psr = val & PSR_ICC;
+    cpu_put_psr_icc(env, val);
 #if !defined(TARGET_SPARC64)
     env->psref = (val & PSR_EF) ? 1 : 0;
     env->psrpil = (val & PSR_PIL) >> 8;
@@ -77,7 +101,6 @@ void cpu_put_psr_raw(CPUSPARCState *env, target_ulong val)
     env->psrps = (val & PSR_PS) ? 1 : 0;
     env->psret = (val & PSR_ET) ? 1 : 0;
 #endif
-    env->cc_op = CC_OP_FLAGS;
 #if !defined(TARGET_SPARC64)
     cpu_set_cwp(env, val & PSR_CWP);
 #endif
@@ -244,18 +267,29 @@ void helper_restored(CPUSPARCState *env)
 
 target_ulong cpu_get_ccr(CPUSPARCState *env)
 {
-    target_ulong psr;
+    target_ulong ccr = 0;
+
+    ccr |= (env->icc_C >> 32) & 1;
+    ccr |= ((int32_t)env->cc_V < 0) << 1;
+    ccr |= ((int32_t)env->icc_Z == 0) << 2;
+    ccr |= ((int32_t)env->cc_N < 0) << 3;
 
-    psr = cpu_get_psr(env);
+    ccr |= env->xcc_C << 4;
+    ccr |= (env->cc_V < 0) << 5;
+    ccr |= (env->xcc_Z == 0) << 6;
+    ccr |= (env->cc_N < 0) << 7;
 
-    return ((env->xcc >> 20) << 4) | ((psr & PSR_ICC) >> 20);
+    return ccr;
 }
 
 void cpu_put_ccr(CPUSPARCState *env, target_ulong val)
 {
-    env->xcc = (val >> 4) << 20;
-    env->psr = (val & 0xf) << 20;
-    CC_OP = CC_OP_FLAGS;
+    env->cc_N = deposit64(-(val & 0x08), 32, 32, -(val & 0x80));
+    env->cc_V = deposit64(-(val & 0x02), 32, 32, -(val & 0x20));
+    env->icc_C = (uint64_t)val << 32;
+    env->xcc_C = (val >> 4) & 1;
+    env->icc_Z = ~val & 0x04;
+    env->xcc_Z = ~val & 0x40;
 }
 
 target_ulong cpu_get_cwp64(CPUSPARCState *env)
diff --git a/tests/migration/guestperf/comparison.py b/tests/migration/guestperf/comparison.py
index c03b3f6d7e..42cc0372d1 100644
--- a/tests/migration/guestperf/comparison.py
+++ b/tests/migration/guestperf/comparison.py
@@ -135,4 +135,27 @@ COMPARISONS = [
         Scenario("compr-multifd-channels-64",
                  multifd=True, multifd_channels=64),
     ]),
+
+    # Looking at effect of dirty-limit with
+    # varying x_vcpu_dirty_limit_period
+    Comparison("compr-dirty-limit-period", scenarios = [
+        Scenario("compr-dirty-limit-period-500",
+                 dirty_limit=True, x_vcpu_dirty_limit_period=500),
+        Scenario("compr-dirty-limit-period-800",
+                 dirty_limit=True, x_vcpu_dirty_limit_period=800),
+        Scenario("compr-dirty-limit-period-1000",
+                 dirty_limit=True, x_vcpu_dirty_limit_period=1000),
+    ]),
+
+
+    # Looking at effect of dirty-limit with
+    # varying vcpu_dirty_limit
+    Comparison("compr-dirty-limit", scenarios = [
+        Scenario("compr-dirty-limit-10MB",
+                 dirty_limit=True, vcpu_dirty_limit=10),
+        Scenario("compr-dirty-limit-20MB",
+                 dirty_limit=True, vcpu_dirty_limit=20),
+        Scenario("compr-dirty-limit-50MB",
+                 dirty_limit=True, vcpu_dirty_limit=50),
+    ]),
 ]
diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py
index da96ca034a..608d7270f6 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -102,6 +102,8 @@ class Engine(object):
             info.get("expected-downtime", 0),
             info.get("setup-time", 0),
             info.get("cpu-throttle-percentage", 0),
+            info.get("dirty-limit-throttle-time-per-round", 0),
+            info.get("dirty-limit-ring-full-time", 0),
         )
 
     def _migrate(self, hardware, scenario, src, dst, connect_uri):
@@ -203,6 +205,21 @@ class Engine(object):
             resp = dst.cmd("migrate-set-parameters",
                            multifd_channels=scenario._multifd_channels)
 
+        if scenario._dirty_limit:
+            if not hardware._dirty_ring_size:
+                raise Exception("dirty ring size must be configured when "
+                                "testing dirty limit migration")
+
+            resp = src.cmd("migrate-set-capabilities",
+                           capabilities = [
+                               { "capability": "dirty-limit",
+                                 "state": True }
+                           ])
+            resp = src.cmd("migrate-set-parameters",
+                x_vcpu_dirty_limit_period=scenario._x_vcpu_dirty_limit_period)
+            resp = src.cmd("migrate-set-parameters",
+                           vcpu_dirty_limit=scenario._vcpu_dirty_limit)
+
         resp = src.cmd("migrate", uri=connect_uri)
 
         post_copy = False
@@ -325,7 +342,6 @@ class Engine(object):
             cmdline = "'" + cmdline + "'"
 
         argv = [
-            "-accel", "kvm",
             "-cpu", "host",
             "-kernel", self._kernel,
             "-initrd", self._initrd,
@@ -333,6 +349,11 @@ class Engine(object):
             "-m", str((hardware._mem * 1024) + 512),
             "-smp", str(hardware._cpus),
         ]
+        if hardware._dirty_ring_size:
+            argv.extend(["-accel", "kvm,dirty-ring-size=%s" %
+                         hardware._dirty_ring_size])
+        else:
+            argv.extend(["-accel", "kvm"])
 
         argv.extend(self._get_qemu_serial_args())
 
diff --git a/tests/migration/guestperf/hardware.py b/tests/migration/guestperf/hardware.py
index 3145785ffd..f779cc050b 100644
--- a/tests/migration/guestperf/hardware.py
+++ b/tests/migration/guestperf/hardware.py
@@ -23,7 +23,8 @@ class Hardware(object):
                  src_cpu_bind=None, src_mem_bind=None,
                  dst_cpu_bind=None, dst_mem_bind=None,
                  prealloc_pages = False,
-                 huge_pages=False, locked_pages=False):
+                 huge_pages=False, locked_pages=False,
+                 dirty_ring_size=0):
         self._cpus = cpus
         self._mem = mem # GiB
         self._src_mem_bind = src_mem_bind # List of NUMA nodes
@@ -33,6 +34,7 @@ class Hardware(object):
         self._prealloc_pages = prealloc_pages
         self._huge_pages = huge_pages
         self._locked_pages = locked_pages
+        self._dirty_ring_size = dirty_ring_size
 
 
     def serialize(self):
@@ -46,6 +48,7 @@ class Hardware(object):
             "prealloc_pages": self._prealloc_pages,
             "huge_pages": self._huge_pages,
             "locked_pages": self._locked_pages,
+            "dirty_ring_size": self._dirty_ring_size,
         }
 
     @classmethod
@@ -59,4 +62,5 @@ class Hardware(object):
             data["dst_mem_bind"],
             data["prealloc_pages"],
             data["huge_pages"],
-            data["locked_pages"])
+            data["locked_pages"],
+            data["dirty_ring_size"])
diff --git a/tests/migration/guestperf/progress.py b/tests/migration/guestperf/progress.py
index ab1ee57273..d490584217 100644
--- a/tests/migration/guestperf/progress.py
+++ b/tests/migration/guestperf/progress.py
@@ -81,7 +81,9 @@ class Progress(object):
                  downtime,
                  downtime_expected,
                  setup_time,
-                 throttle_pcent):
+                 throttle_pcent,
+                 dirty_limit_throttle_time_per_round,
+                 dirty_limit_ring_full_time):
 
         self._status = status
         self._ram = ram
@@ -91,6 +93,10 @@ class Progress(object):
         self._downtime_expected = downtime_expected
         self._setup_time = setup_time
         self._throttle_pcent = throttle_pcent
+        self._dirty_limit_throttle_time_per_round = \
+            dirty_limit_throttle_time_per_round
+        self._dirty_limit_ring_full_time = \
+            dirty_limit_ring_full_time
 
     def serialize(self):
         return {
@@ -102,6 +108,10 @@ class Progress(object):
             "downtime_expected": self._downtime_expected,
             "setup_time": self._setup_time,
             "throttle_pcent": self._throttle_pcent,
+            "dirty_limit_throttle_time_per_round":
+                self._dirty_limit_throttle_time_per_round,
+            "dirty_limit_ring_full_time":
+                self._dirty_limit_ring_full_time,
         }
 
     @classmethod
@@ -114,4 +124,6 @@ class Progress(object):
             data["downtime"],
             data["downtime_expected"],
             data["setup_time"],
-            data["throttle_pcent"])
+            data["throttle_pcent"],
+            data["dirty_limit_throttle_time_per_round"],
+            data["dirty_limit_ring_full_time"])
diff --git a/tests/migration/guestperf/scenario.py b/tests/migration/guestperf/scenario.py
index de70d9b2f5..154c4f5d5f 100644
--- a/tests/migration/guestperf/scenario.py
+++ b/tests/migration/guestperf/scenario.py
@@ -30,7 +30,9 @@ class Scenario(object):
                  auto_converge=False, auto_converge_step=10,
                  compression_mt=False, compression_mt_threads=1,
                  compression_xbzrle=False, compression_xbzrle_cache=10,
-                 multifd=False, multifd_channels=2):
+                 multifd=False, multifd_channels=2,
+                 dirty_limit=False, x_vcpu_dirty_limit_period=500,
+                 vcpu_dirty_limit=1):
 
         self._name = name
 
@@ -60,6 +62,10 @@ class Scenario(object):
         self._multifd = multifd
         self._multifd_channels = multifd_channels
 
+        self._dirty_limit = dirty_limit
+        self._x_vcpu_dirty_limit_period = x_vcpu_dirty_limit_period
+        self._vcpu_dirty_limit = vcpu_dirty_limit
+
     def serialize(self):
         return {
             "name": self._name,
@@ -79,6 +85,9 @@ class Scenario(object):
             "compression_xbzrle_cache": self._compression_xbzrle_cache,
             "multifd": self._multifd,
             "multifd_channels": self._multifd_channels,
+            "dirty_limit": self._dirty_limit,
+            "x_vcpu_dirty_limit_period": self._x_vcpu_dirty_limit_period,
+            "vcpu_dirty_limit": self._vcpu_dirty_limit,
         }
 
     @classmethod
diff --git a/tests/migration/guestperf/shell.py b/tests/migration/guestperf/shell.py
index 8a809e3dda..c85d89efec 100644
--- a/tests/migration/guestperf/shell.py
+++ b/tests/migration/guestperf/shell.py
@@ -60,6 +60,8 @@ class BaseShell(object):
         parser.add_argument("--prealloc-pages", dest="prealloc_pages", default=False)
         parser.add_argument("--huge-pages", dest="huge_pages", default=False)
         parser.add_argument("--locked-pages", dest="locked_pages", default=False)
+        parser.add_argument("--dirty-ring-size", dest="dirty_ring_size",
+                            default=0, type=int)
 
         self._parser = parser
 
@@ -89,7 +91,9 @@ class BaseShell(object):
 
                         locked_pages=args.locked_pages,
                         huge_pages=args.huge_pages,
-                        prealloc_pages=args.prealloc_pages)
+                        prealloc_pages=args.prealloc_pages,
+
+                        dirty_ring_size=args.dirty_ring_size)
 
 
 class Shell(BaseShell):
@@ -127,6 +131,17 @@ class Shell(BaseShell):
         parser.add_argument("--multifd-channels", dest="multifd_channels",
                             default=2, type=int)
 
+        parser.add_argument("--dirty-limit", dest="dirty_limit", default=False,
+                            action="store_true")
+
+        parser.add_argument("--x-vcpu-dirty-limit-period",
+                            dest="x_vcpu_dirty_limit_period",
+                            default=500, type=int)
+
+        parser.add_argument("--vcpu-dirty-limit",
+                            dest="vcpu_dirty_limit",
+                            default=1, type=int)
+
     def get_scenario(self, args):
         return Scenario(name="perfreport",
                         downtime=args.downtime,
@@ -150,7 +165,12 @@ class Shell(BaseShell):
                         compression_xbzrle_cache=args.compression_xbzrle_cache,
 
                         multifd=args.multifd,
-                        multifd_channels=args.multifd_channels)
+                        multifd_channels=args.multifd_channels,
+
+                        dirty_limit=args.dirty_limit,
+                        x_vcpu_dirty_limit_period=\
+                            args.x_vcpu_dirty_limit_period,
+                        vcpu_dirty_limit=args.vcpu_dirty_limit)
 
     def run(self, argv):
         args = self._parser.parse_args(argv)
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index e803b46039..5752412b64 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -3091,6 +3091,166 @@ static void test_vcpu_dirty_limit(void)
     dirtylimit_stop_vm(vm);
 }
 
+static void migrate_dirty_limit_wait_showup(QTestState *from,
+                                            const int64_t period,
+                                            const int64_t value)
+{
+    /* Enable dirty limit capability */
+    migrate_set_capability(from, "dirty-limit", true);
+
+    /* Set dirty limit parameters */
+    migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period);
+    migrate_set_parameter_int(from, "vcpu-dirty-limit", value);
+
+    /* Make sure migrate can't converge */
+    migrate_ensure_non_converge(from);
+
+    /* To check limit rate after precopy */
+    migrate_set_capability(from, "pause-before-switchover", true);
+
+    /* Wait for the serial output from the source */
+    wait_for_serial("src_serial");
+}
+
+/*
+ * This test does:
+ *  source                          destination
+ *  start vm
+ *                                  start incoming vm
+ *  migrate
+ *  wait dirty limit to begin
+ *  cancel migrate
+ *  cancellation check
+ *                                  restart incoming vm
+ *  migrate
+ *  wait dirty limit to begin
+ *  wait pre-switchover event
+ *  convergence condition check
+ *
+ * And see if dirty limit migration works correctly.
+ * This test case involves many passes, so it runs in slow mode only.
+ */
+static void test_migrate_dirty_limit(void)
+{
+    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+    QTestState *from, *to;
+    int64_t remaining;
+    uint64_t throttle_us_per_full;
+    /*
+     * We want the test to be stable and as fast as possible.
+     * E.g., with 1Gb/s bandwith migration may pass without dirty limit,
+     * so we need to decrease a bandwidth.
+     */
+    const int64_t dirtylimit_period = 1000, dirtylimit_value = 50;
+    const int64_t max_bandwidth = 400000000; /* ~400Mb/s */
+    const int64_t downtime_limit = 250; /* 250ms */
+    /*
+     * We migrate through unix-socket (> 500Mb/s).
+     * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
+     * So, we can predict expected_threshold
+     */
+    const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
+    int max_try_count = 10;
+    MigrateCommon args = {
+        .start = {
+            .hide_stderr = true,
+            .use_dirty_ring = true,
+        },
+        .listen_uri = uri,
+        .connect_uri = uri,
+    };
+
+    /* Start src, dst vm */
+    if (test_migrate_start(&from, &to, args.listen_uri, &args.start)) {
+        return;
+    }
+
+    /* Prepare for dirty limit migration and wait src vm show up */
+    migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value);
+
+    /* Start migrate */
+    migrate_qmp(from, uri, "{}");
+
+    /* Wait for dirty limit throttle begin */
+    throttle_us_per_full = 0;
+    while (throttle_us_per_full == 0) {
+        throttle_us_per_full =
+        read_migrate_property_int(from, "dirty-limit-throttle-time-per-round");
+        usleep(100);
+        g_assert_false(got_src_stop);
+    }
+
+    /* Now cancel migrate and wait for dirty limit throttle switch off */
+    migrate_cancel(from);
+    wait_for_migration_status(from, "cancelled", NULL);
+
+    /* Check if dirty limit throttle switched off, set timeout 1ms */
+    do {
+        throttle_us_per_full =
+        read_migrate_property_int(from, "dirty-limit-throttle-time-per-round");
+        usleep(100);
+        g_assert_false(got_src_stop);
+    } while (throttle_us_per_full != 0 && --max_try_count);
+
+    /* Assert dirty limit is not in service */
+    g_assert_cmpint(throttle_us_per_full, ==, 0);
+
+    args = (MigrateCommon) {
+        .start = {
+            .only_target = true,
+            .use_dirty_ring = true,
+        },
+        .listen_uri = uri,
+        .connect_uri = uri,
+    };
+
+    /* Restart dst vm, src vm already show up so we needn't wait anymore */
+    if (test_migrate_start(&from, &to, args.listen_uri, &args.start)) {
+        return;
+    }
+
+    /* Start migrate */
+    migrate_qmp(from, uri, "{}");
+
+    /* Wait for dirty limit throttle begin */
+    throttle_us_per_full = 0;
+    while (throttle_us_per_full == 0) {
+        throttle_us_per_full =
+        read_migrate_property_int(from, "dirty-limit-throttle-time-per-round");
+        usleep(100);
+        g_assert_false(got_src_stop);
+    }
+
+    /*
+     * The dirty limit rate should equals the return value of
+     * query-vcpu-dirty-limit if dirty limit cap set
+     */
+    g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from));
+
+    /* Now, we have tested if dirty limit works, let it converge */
+    migrate_set_parameter_int(from, "downtime-limit", downtime_limit);
+    migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth);
+
+    /*
+     * Wait for pre-switchover status to check if migration
+     * satisfy the convergence condition
+     */
+    wait_for_migration_status(from, "pre-switchover", NULL);
+
+    remaining = read_ram_property_int(from, "remaining");
+    g_assert_cmpint(remaining, <,
+                    (expected_threshold + expected_threshold / 100));
+
+    migrate_continue(from, "pre-switchover");
+
+    qtest_qmp_eventwait(to, "RESUME");
+
+    wait_for_serial("dest_serial");
+    wait_for_migration_complete(from);
+
+    test_migrate_end(from, to, true);
+}
+
 static bool kvm_dirty_ring_supported(void)
 {
 #if defined(__linux__) && defined(HOST_X86_64)
@@ -3301,6 +3461,10 @@ int main(int argc, char **argv)
      */
     if (g_test_slow()) {
         qtest_add_func("/migration/auto_converge", test_migrate_auto_converge);
+        if (g_str_equal(arch, "x86_64") &&
+            has_kvm && kvm_dirty_ring_supported()) {
+            qtest_add_func("/migration/dirty_limit", test_migrate_dirty_limit);
+        }
     }
     qtest_add_func("/migration/multifd/tcp/plain/none",
                    test_multifd_tcp_none);
diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
index 73a670e8fa..2c15f60958 100644
--- a/tests/qtest/qmp-cmd-test.c
+++ b/tests/qtest/qmp-cmd-test.c
@@ -45,6 +45,7 @@ static int query_error_class(const char *cmd)
         { "query-acpi-ospm-status", ERROR_CLASS_GENERIC_ERROR },
         { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE },
         { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR },
+        { "query-hv-balloon-status-report", ERROR_CLASS_GENERIC_ERROR },
         { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR },
         /* Only valid with a USB bus added */
         { "x-query-usb", ERROR_CLASS_GENERIC_ERROR },