27 files changed, 2869 insertions, 93 deletions
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index a07cd7eb5d..bfa53960c3 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -115,9 +115,13 @@ static void xen_block_connect(XenDevice *xendev, Error **errp)
         return;
     }
 
-    if (xen_device_frontend_scanf(xendev, "protocol", "%ms",
-                                  &str) != 1) {
-        protocol = BLKIF_PROTOCOL_NATIVE;
+    if (xen_device_frontend_scanf(xendev, "protocol", "%ms", &str) != 1) {
+        /* x86 defaults to the 32-bit protocol even for 64-bit guests. */
+        if (object_dynamic_cast(OBJECT(qdev_get_machine()), "x86-machine")) {
+            protocol = BLKIF_PROTOCOL_X86_32;
+        } else {
+            protocol = BLKIF_PROTOCOL_NATIVE;
+        }
     } else {
         if (strcmp(str, XEN_IO_PROTO_ABI_X86_32) == 0) {
             protocol = BLKIF_PROTOCOL_X86_32;
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index 9a4b59c6f2..a6ff6a4875 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -253,6 +253,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
     MemoryDeviceInfo *value;
     PCDIMMDeviceInfo *di;
     SgxEPCDeviceInfo *se;
+    HvBalloonDeviceInfo *hi;
 
     for (info = info_list; info; info = info->next) {
         value = info->value;
@@ -310,6 +311,20 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
                 monitor_printf(mon, "  node: %" PRId64 "\n", se->node);
                 monitor_printf(mon, "  memdev: %s\n", se->memdev);
                 break;
+            case MEMORY_DEVICE_INFO_KIND_HV_BALLOON:
+                hi = value->u.hv_balloon.data;
+                monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
+                               MemoryDeviceInfoKind_str(value->type),
+                               hi->id ? hi->id : "");
+                if (hi->has_memaddr) {
+                    monitor_printf(mon, "  memaddr: 0x%" PRIx64 "\n",
+                                   hi->memaddr);
+                }
+                monitor_printf(mon, "  max-size: %" PRIu64 "\n", hi->max_size);
+                if (hi->memdev) {
+                    monitor_printf(mon, "  memdev: %s\n", hi->memdev);
+                }
+                break;
             default:
                 g_assert_not_reached();
             }
diff --git a/hw/display/ati.c b/hw/display/ati.c
index 6e38e00502..9a87a5504a 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -319,11 +319,13 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size)
     case DAC_CNTL:
         val = s->regs.dac_cntl;
         break;
-    case GPIO_VGA_DDC:
-        val = s->regs.gpio_vga_ddc;
+    case GPIO_VGA_DDC ... GPIO_VGA_DDC + 3:
+        val = ati_reg_read_offs(s->regs.gpio_vga_ddc,
+                                addr - GPIO_VGA_DDC, size);
         break;
-    case GPIO_DVI_DDC:
-        val = s->regs.gpio_dvi_ddc;
+    case GPIO_DVI_DDC ... GPIO_DVI_DDC + 3:
+        val = ati_reg_read_offs(s->regs.gpio_dvi_ddc,
+                                addr - GPIO_DVI_DDC, size);
         break;
     case GPIO_MONID ... GPIO_MONID + 3:
         val = ati_reg_read_offs(s->regs.gpio_monid,
@@ -337,6 +339,9 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size)
     case PALETTE_DATA:
         val = vga_ioport_read(&s->vga, VGA_PEL_D);
         break;
+    case PALETTE_30_DATA:
+        val = s->regs.palette[vga_ioport_read(&s->vga, VGA_PEL_IR)];
+        break;
     case CNFG_CNTL:
         val = s->regs.config_cntl;
         break;
@@ -349,14 +354,17 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size)
                                       PCI_BASE_ADDRESS_0, size) & 0xfffffff0;
         break;
     case CONFIG_APER_SIZE:
-        val = s->vga.vram_size;
+        val = s->vga.vram_size / 2;
         break;
     case CONFIG_REG_1_BASE:
         val = pci_default_read_config(&s->dev,
                                       PCI_BASE_ADDRESS_2, size) & 0xfffffff0;
         break;
     case CONFIG_REG_APER_SIZE:
-        val = memory_region_size(&s->mm);
+        val = memory_region_size(&s->mm) / 2;
+        break;
+    case HOST_PATH_CNTL:
+        val = BIT(23); /* Radeon HDP_APER_CNTL */
         break;
     case MC_STATUS:
         val = 5;
@@ -612,29 +620,34 @@ static void ati_mm_write(void *opaque, hwaddr addr,
         s->regs.dac_cntl = data & 0xffffe3ff;
         s->vga.dac_8bit = !!(data & DAC_8BIT_EN);
         break;
-    case GPIO_VGA_DDC:
+    /*
+     * GPIO regs for DDC access. Because some drivers access these via
+     * multiple byte writes we have to be careful when we send bits to
+     * avoid spurious changes in bitbang_i2c state. Only do it when either
+     * the enable bits are changed or output bits changed while enabled.
+     */
+    case GPIO_VGA_DDC ... GPIO_VGA_DDC + 3:
         if (s->dev_id != PCI_DEVICE_ID_ATI_RAGE128_PF) {
             /* FIXME: Maybe add a property to select VGA or DVI port? */
         }
         break;
-    case GPIO_DVI_DDC:
+    case GPIO_DVI_DDC ... GPIO_DVI_DDC + 3:
         if (s->dev_id != PCI_DEVICE_ID_ATI_RAGE128_PF) {
-            s->regs.gpio_dvi_ddc = ati_i2c(&s->bbi2c, data, 0);
+            ati_reg_write_offs(&s->regs.gpio_dvi_ddc,
+                               addr - GPIO_DVI_DDC, data, size);
+            if ((addr <= GPIO_DVI_DDC + 2 && addr + size > GPIO_DVI_DDC + 2) ||
+                (addr == GPIO_DVI_DDC && (s->regs.gpio_dvi_ddc & 0x30000))) {
+                s->regs.gpio_dvi_ddc = ati_i2c(&s->bbi2c,
+                                               s->regs.gpio_dvi_ddc, 0);
+            }
         }
         break;
     case GPIO_MONID ... GPIO_MONID + 3:
         /* FIXME What does Radeon have here? */
         if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) {
+            /* Rage128p accesses DDC via MONID(1-2) with additional mask bit */
             ati_reg_write_offs(&s->regs.gpio_monid,
                                addr - GPIO_MONID, data, size);
-            /*
-             * Rage128p accesses DDC used to get EDID via these bits.
-             * Because some drivers access this via multiple byte writes
-             * we have to be careful when we send bits to avoid spurious
-             * changes in bitbang_i2c state. So only do it when mask is set
-             * and either the enable bits are changed or output bits changed
-             * while enabled.
-             */
             if ((s->regs.gpio_monid & BIT(25)) &&
                 ((addr <= GPIO_MONID + 2 && addr + size > GPIO_MONID + 2) ||
                  (addr == GPIO_MONID && (s->regs.gpio_monid & 0x60000)))) {
@@ -663,6 +676,12 @@ static void ati_mm_write(void *opaque, hwaddr addr,
         data >>= 8;
         vga_ioport_write(&s->vga, VGA_PEL_D, data & 0xff);
         break;
+    case PALETTE_30_DATA:
+        s->regs.palette[vga_ioport_read(&s->vga, VGA_PEL_IW)] = data;
+        vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 22) & 0xff);
+        vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 12) & 0xff);
+        vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 2) & 0xff);
+        break;
     case CNFG_CNTL:
         s->regs.config_cntl = data;
         break;
@@ -1014,6 +1033,7 @@ static Property ati_vga_properties[] = {
     DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id,
                        PCI_DEVICE_ID_ATI_RAGE128_PF),
     DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false),
+    DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -1035,11 +1055,18 @@ static void ati_vga_class_init(ObjectClass *klass, void *data)
     k->exit = ati_vga_exit;
 }
 
+static void ati_vga_init(Object *o)
+{
+    object_property_set_description(o, "x-pixman", "Use pixman for: "
+                                    "1: fill, 2: blit");
+}
+
 static const TypeInfo ati_vga_info = {
     .name = TYPE_ATI_VGA,
     .parent = TYPE_PCI_DEVICE,
     .instance_size = sizeof(ATIVGAState),
     .class_init = ati_vga_class_init,
+    .instance_init = ati_vga_init,
     .interfaces = (InterfaceInfo[]) {
           { INTERFACE_CONVENTIONAL_PCI_DEVICE },
           { },
diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
index 7d786653e8..0e6b8e4367 100644
--- a/hw/display/ati_2d.c
+++ b/hw/display/ati_2d.c
@@ -92,6 +92,7 @@ void ati_2d_blt(ATIVGAState *s)
     switch (s->regs.dp_mix & GMC_ROP3_MASK) {
     case ROP3_SRCCOPY:
     {
+        bool fallback = false;
         unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
                        s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
         unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
@@ -122,27 +123,50 @@ void ati_2d_blt(ATIVGAState *s)
                 src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
                 src_x, src_y, dst_x, dst_y,
                 s->regs.dst_width, s->regs.dst_height);
-        if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
+        if ((s->use_pixman & BIT(1)) &&
+            s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
             s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
-            pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
-                       src_stride, dst_stride, bpp, bpp,
-                       src_x, src_y, dst_x, dst_y,
-                       s->regs.dst_width, s->regs.dst_height);
-        } else {
+            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
+                                   src_stride, dst_stride, bpp, bpp,
+                                   src_x, src_y, dst_x, dst_y,
+                                   s->regs.dst_width, s->regs.dst_height);
+        } else if (s->use_pixman & BIT(1)) {
             /* FIXME: We only really need a temporary if src and dst overlap */
             int llb = s->regs.dst_width * (bpp / 8);
             int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
             uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
                                      s->regs.dst_height);
-            pixman_blt((uint32_t *)src_bits, tmp,
-                       src_stride, tmp_stride, bpp, bpp,
-                       src_x, src_y, 0, 0,
-                       s->regs.dst_width, s->regs.dst_height);
-            pixman_blt(tmp, (uint32_t *)dst_bits,
-                       tmp_stride, dst_stride, bpp, bpp,
-                       0, 0, dst_x, dst_y,
-                       s->regs.dst_width, s->regs.dst_height);
+            fallback = !pixman_blt((uint32_t *)src_bits, tmp,
+                                   src_stride, tmp_stride, bpp, bpp,
+                                   src_x, src_y, 0, 0,
+                                   s->regs.dst_width, s->regs.dst_height);
+            if (!fallback) {
+                fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
+                                       tmp_stride, dst_stride, bpp, bpp,
+                                       0, 0, dst_x, dst_y,
+                                       s->regs.dst_width, s->regs.dst_height);
+            }
             g_free(tmp);
+        } else {
+            fallback = true;
+        }
+        if (fallback) {
+            unsigned int y, i, j, bypp = bpp / 8;
+            unsigned int src_pitch = src_stride * sizeof(uint32_t);
+            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
+
+            for (y = 0; y < s->regs.dst_height; y++) {
+                i = dst_x * bypp;
+                j = src_x * bypp;
+                if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
+                    i += (dst_y + y) * dst_pitch;
+                    j += (src_y + y) * src_pitch;
+                } else {
+                    i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
+                    j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
+                }
+                memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
+            }
         }
         if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
             dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
@@ -180,14 +204,21 @@ void ati_2d_blt(ATIVGAState *s)
 
         dst_stride /= sizeof(uint32_t);
         DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
-                dst_bits, dst_stride, bpp,
-                dst_x, dst_y,
-                s->regs.dst_width, s->regs.dst_height,
-                filler);
-        pixman_fill((uint32_t *)dst_bits, dst_stride, bpp,
-                    dst_x, dst_y,
-                    s->regs.dst_width, s->regs.dst_height,
-                    filler);
+                dst_bits, dst_stride, bpp, dst_x, dst_y,
+                s->regs.dst_width, s->regs.dst_height, filler);
+        if (!(s->use_pixman & BIT(0)) ||
+            !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
+                    s->regs.dst_width, s->regs.dst_height, filler)) {
+            /* fallback when pixman failed or we don't want to call it */
+            unsigned int x, y, i, bypp = bpp / 8;
+            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
+            for (y = 0; y < s->regs.dst_height; y++) {
+                i = dst_x * bypp + (dst_y + y) * dst_pitch;
+                for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
+                    stn_he_p(&dst_bits[i], bypp, filler);
+                }
+            }
+        }
         if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
             dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
             s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
diff --git a/hw/display/ati_dbg.c b/hw/display/ati_dbg.c
index bd0ecd48c7..3ffa7f35df 100644
--- a/hw/display/ati_dbg.c
+++ b/hw/display/ati_dbg.c
@@ -30,6 +30,7 @@ static struct ati_regdesc ati_reg_names[] = {
     {"AMCGPIO_EN_MIR", 0x00a8},
     {"PALETTE_INDEX", 0x00b0},
     {"PALETTE_DATA", 0x00b4},
+    {"PALETTE_30_DATA", 0x00b8},
     {"CNFG_CNTL", 0x00e0},
     {"GEN_RESET_CNTL", 0x00f0},
     {"CNFG_MEMSIZE", 0x00f8},
@@ -38,6 +39,7 @@ static struct ati_regdesc ati_reg_names[] = {
     {"CONFIG_APER_SIZE", 0x0108},
     {"CONFIG_REG_1_BASE", 0x010c},
     {"CONFIG_REG_APER_SIZE", 0x0110},
+    {"HOST_PATH_CNTL", 0x0130},
     {"MEM_CNTL", 0x0140},
     {"MC_FB_LOCATION", 0x0148},
     {"MC_AGP_LOCATION", 0x014C},
diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
index e8d3c7af75..f5a47b82b0 100644
--- a/hw/display/ati_int.h
+++ b/hw/display/ati_int.h
@@ -44,6 +44,7 @@ typedef struct ATIVGARegs {
     uint32_t gpio_dvi_ddc;
     uint32_t gpio_monid;
     uint32_t config_cntl;
+    uint32_t palette[256];
     uint32_t crtc_h_total_disp;
     uint32_t crtc_h_sync_strt_wid;
     uint32_t crtc_v_total_disp;
@@ -89,6 +90,7 @@ struct ATIVGAState {
     char *model;
     uint16_t dev_id;
     uint8_t mode;
+    uint8_t use_pixman;
     bool cursor_guest_mode;
     uint16_t cursor_size;
     uint32_t cursor_offset;
diff --git a/hw/display/ati_regs.h b/hw/display/ati_regs.h
index d6282b2ef2..d7127748ff 100644
--- a/hw/display/ati_regs.h
+++ b/hw/display/ati_regs.h
@@ -48,6 +48,7 @@
 #define AMCGPIO_EN_MIR                          0x00a8
 #define PALETTE_INDEX                           0x00b0
 #define PALETTE_DATA                            0x00b4
+#define PALETTE_30_DATA                         0x00b8
 #define CNFG_CNTL                               0x00e0
 #define GEN_RESET_CNTL                          0x00f0
 #define CNFG_MEMSIZE                            0x00f8
@@ -56,6 +57,7 @@
 #define CONFIG_APER_SIZE                        0x0108
 #define CONFIG_REG_1_BASE                       0x010c
 #define CONFIG_REG_APER_SIZE                    0x0110
+#define HOST_PATH_CNTL                          0x0130
 #define MEM_CNTL                                0x0140
 #define MC_FB_LOCATION                          0x0148
 #define MC_AGP_LOCATION                         0x014C
diff --git a/hw/display/macfb.c b/hw/display/macfb.c
index 2f8e016566..d61541ccb5 100644
--- a/hw/display/macfb.c
+++ b/hw/display/macfb.c
@@ -36,8 +36,8 @@
 #define DAFB_INTR_MASK      0x104
 #define DAFB_INTR_STAT      0x108
 #define DAFB_INTR_CLEAR     0x10c
-#define DAFB_RESET          0x200
-#define DAFB_LUT            0x213
+#define DAFB_LUT_INDEX      0x200
+#define DAFB_LUT            0x210
 
 #define DAFB_INTR_VBL   0x4
 
@@ -537,6 +537,11 @@ static uint64_t macfb_ctrl_read(void *opaque,
     case DAFB_MODE_SENSE:
         val = macfb_sense_read(s);
         break;
+    case DAFB_LUT ... DAFB_LUT + 3:
+        val = s->color_palette[s->palette_current];
+        s->palette_current = (s->palette_current + 1) %
+                             ARRAY_SIZE(s->color_palette);
+        break;
     default:
         if (addr < MACFB_CTRL_TOPADDR) {
             val = s->regs[addr >> 2];
@@ -583,13 +588,11 @@ static void macfb_ctrl_write(void *opaque,
         s->regs[DAFB_INTR_STAT >> 2] &= ~DAFB_INTR_VBL;
         macfb_update_irq(s);
         break;
-    case DAFB_RESET:
-        s->palette_current = 0;
-        s->regs[DAFB_INTR_STAT >> 2] &= ~DAFB_INTR_VBL;
-        macfb_update_irq(s);
+    case DAFB_LUT_INDEX:
+        s->palette_current = (val & 0xff) * 3;
         break;
-    case DAFB_LUT:
-        s->color_palette[s->palette_current] = val;
+    case DAFB_LUT ... DAFB_LUT + 3:
+        s->color_palette[s->palette_current] = val & 0xff;
         s->palette_current = (s->palette_current + 1) %
                              ARRAY_SIZE(s->color_palette);
         if (s->palette_current % 3) {
diff --git a/hw/display/virtio-gpu-pci-rutabaga.c b/hw/display/virtio-gpu-pci-rutabaga.c
index c96729e198..abbb898c65 100644
--- a/hw/display/virtio-gpu-pci-rutabaga.c
+++ b/hw/display/virtio-gpu-pci-rutabaga.c
@@ -36,6 +36,7 @@ static const TypeInfo virtio_gpu_rutabaga_pci_info[] = {
         .instance_init = virtio_gpu_rutabaga_initfn,
         .interfaces = (InterfaceInfo[]) {
             { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+            { },
         }
     },
 };
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 4265316cbb..2707bceea8 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1213,6 +1213,9 @@ static int virtio_gpu_save(QEMUFile *f, void *opaque, size_t size,
     assert(QTAILQ_EMPTY(&g->cmdq));
 
     QTAILQ_FOREACH(res, &g->reslist, next) {
+        if (res->blob_size) {
+            continue;
+        }
         qemu_put_be32(f, res->resource_id);
         qemu_put_be32(f, res->width);
         qemu_put_be32(f, res->height);
@@ -1230,12 +1233,40 @@ static int virtio_gpu_save(QEMUFile *f, void *opaque, size_t size,
     return vmstate_save_state(f, &vmstate_virtio_gpu_scanouts, g, NULL);
 }
 
+static bool virtio_gpu_load_restore_mapping(VirtIOGPU *g,
+                                            struct virtio_gpu_simple_resource *res)
+{
+    int i;
+
+    for (i = 0; i < res->iov_cnt; i++) {
+        hwaddr len = res->iov[i].iov_len;
+        res->iov[i].iov_base =
+            dma_memory_map(VIRTIO_DEVICE(g)->dma_as, res->addrs[i], &len,
+                           DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
+
+        if (!res->iov[i].iov_base || len != res->iov[i].iov_len) {
+            /* Clean up the half-a-mapping we just created... */
+            if (res->iov[i].iov_base) {
+                dma_memory_unmap(VIRTIO_DEVICE(g)->dma_as, res->iov[i].iov_base,
+                                 len, DMA_DIRECTION_TO_DEVICE, 0);
+            }
+            /* ...and the mappings for previous loop iterations */
+            res->iov_cnt = i;
+            virtio_gpu_cleanup_mapping(g, res);
+            return false;
+        }
+    }
+
+    QTAILQ_INSERT_HEAD(&g->reslist, res, next);
+    g->hostmem += res->hostmem;
+    return true;
+}
+
 static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
                            const VMStateField *field)
 {
     VirtIOGPU *g = opaque;
     struct virtio_gpu_simple_resource *res;
-    struct virtio_gpu_scanout *scanout;
     uint32_t resource_id, pformat;
     void *bits = NULL;
     int i;
@@ -1294,40 +1325,96 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
         qemu_get_buffer(f, (void *)pixman_image_get_data(res->image),
                         pixman_image_get_stride(res->image) * res->height);
 
-        /* restore mapping */
-        for (i = 0; i < res->iov_cnt; i++) {
-            hwaddr len = res->iov[i].iov_len;
-            res->iov[i].iov_base =
-                dma_memory_map(VIRTIO_DEVICE(g)->dma_as, res->addrs[i], &len,
-                               DMA_DIRECTION_TO_DEVICE,
-                               MEMTXATTRS_UNSPECIFIED);
-
-            if (!res->iov[i].iov_base || len != res->iov[i].iov_len) {
-                /* Clean up the half-a-mapping we just created... */
-                if (res->iov[i].iov_base) {
-                    dma_memory_unmap(VIRTIO_DEVICE(g)->dma_as,
-                                     res->iov[i].iov_base,
-                                     len,
-                                     DMA_DIRECTION_TO_DEVICE,
-                                     0);
-                }
-                /* ...and the mappings for previous loop iterations */
-                res->iov_cnt = i;
-                virtio_gpu_cleanup_mapping(g, res);
-                pixman_image_unref(res->image);
-                g_free(res);
-                return -EINVAL;
-            }
+        if (!virtio_gpu_load_restore_mapping(g, res)) {
+            pixman_image_unref(res->image);
+            g_free(res);
+            return -EINVAL;
         }
 
-        QTAILQ_INSERT_HEAD(&g->reslist, res, next);
-        g->hostmem += res->hostmem;
-
         resource_id = qemu_get_be32(f);
     }
 
     /* load & apply scanout state */
     vmstate_load_state(f, &vmstate_virtio_gpu_scanouts, g, 1);
+
+    return 0;
+}
+
+static int virtio_gpu_blob_save(QEMUFile *f, void *opaque, size_t size,
+                                const VMStateField *field, JSONWriter *vmdesc)
+{
+    VirtIOGPU *g = opaque;
+    struct virtio_gpu_simple_resource *res;
+    int i;
+
+    /* in 2d mode we should never find unprocessed commands here */
+    assert(QTAILQ_EMPTY(&g->cmdq));
+
+    QTAILQ_FOREACH(res, &g->reslist, next) {
+        if (!res->blob_size) {
+            continue;
+        }
+        qemu_put_be32(f, res->resource_id);
+        qemu_put_be32(f, res->blob_size);
+        qemu_put_be32(f, res->iov_cnt);
+        for (i = 0; i < res->iov_cnt; i++) {
+            qemu_put_be64(f, res->addrs[i]);
+            qemu_put_be32(f, res->iov[i].iov_len);
+        }
+    }
+    qemu_put_be32(f, 0); /* end of list */
+
+    return 0;
+}
+
+static int virtio_gpu_blob_load(QEMUFile *f, void *opaque, size_t size,
+                                const VMStateField *field)
+{
+    VirtIOGPU *g = opaque;
+    struct virtio_gpu_simple_resource *res;
+    uint32_t resource_id;
+    int i;
+
+    resource_id = qemu_get_be32(f);
+    while (resource_id != 0) {
+        res = virtio_gpu_find_resource(g, resource_id);
+        if (res) {
+            return -EINVAL;
+        }
+
+        res = g_new0(struct virtio_gpu_simple_resource, 1);
+        res->resource_id = resource_id;
+        res->blob_size = qemu_get_be32(f);
+        res->iov_cnt = qemu_get_be32(f);
+        res->addrs = g_new(uint64_t, res->iov_cnt);
+        res->iov = g_new(struct iovec, res->iov_cnt);
+
+        /* read data */
+        for (i = 0; i < res->iov_cnt; i++) {
+            res->addrs[i] = qemu_get_be64(f);
+            res->iov[i].iov_len = qemu_get_be32(f);
+        }
+
+        if (!virtio_gpu_load_restore_mapping(g, res)) {
+            g_free(res);
+            return -EINVAL;
+        }
+
+        virtio_gpu_init_udmabuf(res);
+
+        resource_id = qemu_get_be32(f);
+    }
+
+    return 0;
+}
+
+static int virtio_gpu_post_load(void *opaque, int version_id)
+{
+    VirtIOGPU *g = opaque;
+    struct virtio_gpu_scanout *scanout;
+    struct virtio_gpu_simple_resource *res;
+    int i;
+
     for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
         /* FIXME: should take scanout.r.{x,y} into account */
         scanout = &g->parent_obj.scanout[i];
@@ -1475,6 +1562,32 @@ virtio_gpu_set_config(VirtIODevice *vdev, const uint8_t *config)
     }
 }
 
+static bool virtio_gpu_blob_state_needed(void *opaque)
+{
+    VirtIOGPU *g = VIRTIO_GPU(opaque);
+
+    return virtio_gpu_blob_enabled(g->parent_obj.conf);
+}
+
+const VMStateDescription vmstate_virtio_gpu_blob_state = {
+    .name = "virtio-gpu/blob",
+    .minimum_version_id = VIRTIO_GPU_VM_VERSION,
+    .version_id = VIRTIO_GPU_VM_VERSION,
+    .needed = virtio_gpu_blob_state_needed,
+    .fields = (const VMStateField[]){
+        {
+            .name = "virtio-gpu/blob",
+            .info = &(const VMStateInfo) {
+                .name = "blob",
+                .get = virtio_gpu_blob_load,
+                .put = virtio_gpu_blob_save,
+            },
+            .flags = VMS_SINGLE,
+        } /* device */,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 /*
  * For historical reasons virtio_gpu does not adhere to virtio migration
  * scheme as described in doc/virtio-migration.txt, in a sense that no
@@ -1500,6 +1613,11 @@ static const VMStateDescription vmstate_virtio_gpu = {
         } /* device */,
         VMSTATE_END_OF_LIST()
     },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_gpu_blob_state,
+        NULL
+    },
+    .post_load = virtio_gpu_post_load,
 };
 
 static Property virtio_gpu_properties[] = {
diff --git a/hw/hyperv/Kconfig b/hw/hyperv/Kconfig
index fcf65903bd..41dd827c84 100644
--- a/hw/hyperv/Kconfig
+++ b/hw/hyperv/Kconfig
@@ -16,3 +16,13 @@ config SYNDBG
     bool
     default y
     depends on VMBUS
+
+config HV_BALLOON_SUPPORTED
+    bool
+
+config HV_BALLOON
+    bool
+    default y
+    depends on VMBUS
+    depends on HV_BALLOON_POSSIBLE
+    depends on HV_BALLOON_SUPPORTED
diff --git a/hw/hyperv/hv-balloon-internal.h b/hw/hyperv/hv-balloon-internal.h
new file mode 100644
index 0000000000..164c2e5825
--- /dev/null
+++ b/hw/hyperv/hv-balloon-internal.h
@@ -0,0 +1,33 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HV_BALLOON_INTERNAL_H
+#define HW_HYPERV_HV_BALLOON_INTERNAL_H
+
+#include "qemu/osdep.h"
+
+#define HV_BALLOON_PFN_SHIFT 12
+#define HV_BALLOON_PAGE_SIZE (1 << HV_BALLOON_PFN_SHIFT)
+
+#define SUM_OVERFLOW_U64(in1, in2) ((in1) > UINT64_MAX - (in2))
+#define SUM_SATURATE_U64(in1, in2)              \
+    ({                                          \
+        uint64_t _in1 = (in1), _in2 = (in2);    \
+        uint64_t _result;                       \
+                                                \
+        if (!SUM_OVERFLOW_U64(_in1, _in2)) {    \
+            _result = _in1 + _in2;              \
+        } else {                                \
+            _result = UINT64_MAX;               \
+        }                                       \
+                                                \
+        _result;                                \
+    })
+
+#endif
diff --git a/hw/hyperv/hv-balloon-our_range_memslots.c b/hw/hyperv/hv-balloon-our_range_memslots.c
new file mode 100644
index 0000000000..99bae870f3
--- /dev/null
+++ b/hw/hyperv/hv-balloon-our_range_memslots.c
@@ -0,0 +1,201 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hv-balloon-internal.h"
+#include "hv-balloon-our_range_memslots.h"
+#include "trace.h"
+
+/* OurRange */
+static void our_range_init(OurRange *our_range, uint64_t start, uint64_t count)
+{
+    assert(count <= UINT64_MAX - start);
+    our_range->range.start = start;
+    our_range->range.count = count;
+
+    hvb_page_range_tree_init(&our_range->removed_guest);
+    hvb_page_range_tree_init(&our_range->removed_both);
+
+    /* mark the whole range as unused but for potential use */
+    our_range->added = 0;
+    our_range->unusable_tail = 0;
+}
+
+static void our_range_destroy(OurRange *our_range)
+{
+    hvb_page_range_tree_destroy(&our_range->removed_guest);
+    hvb_page_range_tree_destroy(&our_range->removed_both);
+}
+
+void hvb_our_range_clear_removed_trees(OurRange *our_range)
+{
+    hvb_page_range_tree_destroy(&our_range->removed_guest);
+    hvb_page_range_tree_destroy(&our_range->removed_both);
+    hvb_page_range_tree_init(&our_range->removed_guest);
+    hvb_page_range_tree_init(&our_range->removed_both);
+}
+
+void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size)
+{
+    assert(additional_size <= UINT64_MAX - our_range->added);
+
+    our_range->added += additional_size;
+
+    assert(our_range->added <= UINT64_MAX - our_range->unusable_tail);
+    assert(our_range->added + our_range->unusable_tail <=
+           our_range->range.count);
+}
+
+/* OurRangeMemslots */
+static void our_range_memslots_init_slots(OurRangeMemslots *our_range,
+                                          MemoryRegion *backing_mr,
+                                          Object *memslot_owner)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    unsigned int idx;
+    uint64_t memslot_offset;
+
+    assert(memslots->count > 0);
+    memslots->slots = g_new0(MemoryRegion, memslots->count);
+
+    /* Initialize our memslots, but don't map them yet. */
+    assert(memslots->size_each > 0);
+    for (idx = 0, memslot_offset = 0; idx < memslots->count;
+         idx++, memslot_offset += memslots->size_each) {
+        uint64_t memslot_size;
+        g_autofree char *name = NULL;
+
+        /* The size of the last memslot might be smaller. */
+        if (idx == memslots->count - 1) {
+            uint64_t region_size;
+
+            assert(our_range->mr);
+            region_size = memory_region_size(our_range->mr);
+            memslot_size = region_size - memslot_offset;
+        } else {
+            memslot_size = memslots->size_each;
+        }
+
+        name = g_strdup_printf("memslot-%u", idx);
+        memory_region_init_alias(&memslots->slots[idx], memslot_owner, name,
+                                 backing_mr, memslot_offset, memslot_size);
+        /*
+         * We want to be able to atomically and efficiently activate/deactivate
+         * individual memslots without affecting adjacent memslots in memory
+         * notifiers.
+         */
+        memory_region_set_unmergeable(&memslots->slots[idx], true);
+    }
+
+    memslots->mapped_count = 0;
+}
+
+OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr,
+                                             MemoryRegion *parent_mr,
+                                             MemoryRegion *backing_mr,
+                                             Object *memslot_owner,
+                                             unsigned int memslot_count,
+                                             uint64_t memslot_size)
+{
+    OurRangeMemslots *our_range;
+
+    our_range = g_malloc(sizeof(*our_range));
+    our_range_init(&our_range->range,
+                   addr / HV_BALLOON_PAGE_SIZE,
+                   memory_region_size(parent_mr) / HV_BALLOON_PAGE_SIZE);
+    our_range->slots.size_each = memslot_size;
+    our_range->slots.count = memslot_count;
+    our_range->mr = parent_mr;
+    our_range_memslots_init_slots(our_range, backing_mr, memslot_owner);
+
+    return our_range;
+}
+
+static void our_range_memslots_free_memslots(OurRangeMemslots *our_range)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    unsigned int idx;
+    uint64_t offset;
+
+    memory_region_transaction_begin();
+    for (idx = 0, offset = 0; idx < memslots->mapped_count;
+         idx++, offset += memslots->size_each) {
+        trace_hv_balloon_unmap_slot(idx, memslots->count, offset);
+        assert(memory_region_is_mapped(&memslots->slots[idx]));
+        memory_region_del_subregion(our_range->mr, &memslots->slots[idx]);
+    }
+    memory_region_transaction_commit();
+
+    for (idx = 0; idx < memslots->count; idx++) {
+        object_unparent(OBJECT(&memslots->slots[idx]));
+    }
+
+    g_clear_pointer(&our_range->slots.slots, g_free);
+}
+
+void hvb_our_range_memslots_free(OurRangeMemslots *our_range)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    MemoryRegion *hostmem_mr;
+    RAMBlock *rb;
+
+    assert(our_range->slots.count > 0);
+    assert(our_range->slots.slots);
+
+    hostmem_mr = memslots->slots[0].alias;
+    rb = hostmem_mr->ram_block;
+    ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
+
+    our_range_memslots_free_memslots(our_range);
+    our_range_destroy(&our_range->range);
+    g_free(our_range);
+}
+
+void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range,
+                                                     uint64_t additional_map_size)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    uint64_t total_map_size;
+    unsigned int idx;
+    uint64_t offset;
+
+    total_map_size = (our_range->range.added + additional_map_size) *
+        HV_BALLOON_PAGE_SIZE;
+    idx = memslots->mapped_count;
+    assert(memslots->size_each > 0);
+    offset = idx * memslots->size_each;
+
+    /*
+     * Activate all memslots covered by the newly added region in a single
+     * transaction.
+     */
+    memory_region_transaction_begin();
+    for ( ; idx < memslots->count;
+          idx++, offset += memslots->size_each) {
+        /*
+         * If this memslot starts beyond or at the end of the range to map so
+         * does every next one.
+         */
+        if (offset >= total_map_size) {
+            break;
+        }
+
+        /*
+         * Instead of enabling/disabling memslot, we add/remove them. This
+         * should make address space updates faster, because we don't have to
+         * loop over many disabled subregions.
+         */
+        trace_hv_balloon_map_slot(idx, memslots->count, offset);
+        assert(!memory_region_is_mapped(&memslots->slots[idx]));
+        memory_region_add_subregion(our_range->mr, offset,
+                                    &memslots->slots[idx]);
+
+        memslots->mapped_count++;
+    }
+    memory_region_transaction_commit();
+}
diff --git a/hw/hyperv/hv-balloon-our_range_memslots.h b/hw/hyperv/hv-balloon-our_range_memslots.h
new file mode 100644
index 0000000000..b6f592d34b
--- /dev/null
+++ b/hw/hyperv/hv-balloon-our_range_memslots.h
@@ -0,0 +1,110 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H
+#define HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H
+
+#include "qemu/osdep.h"
+
+#include "exec/memory.h"
+#include "qom/object.h"
+#include "hv-balloon-page_range_tree.h"
+
+/* OurRange */
+#define OUR_RANGE(ptr) ((OurRange *)(ptr))
+
+/* "our range" means the memory range owned by this driver (for hot-adding) */
+typedef struct OurRange {
+    PageRange range;
+
+    /* How many pages were hot-added to the guest */
+    uint64_t added;
+
+    /* Pages at the end not currently usable */
+    uint64_t unusable_tail;
+
+    /* Memory removed from the guest */
+    PageRangeTree removed_guest, removed_both;
+} OurRange;
+
+static inline uint64_t our_range_get_remaining_start(OurRange *our_range)
+{
+    return our_range->range.start + our_range->added;
+}
+
+static inline uint64_t our_range_get_remaining_size(OurRange *our_range)
+{
+    return our_range->range.count - our_range->added - our_range->unusable_tail;
+}
+
+void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size);
+
+static inline void our_range_mark_remaining_unusable(OurRange *our_range)
+{
+    our_range->unusable_tail = our_range->range.count - our_range->added;
+}
+
+static inline PageRangeTree our_range_get_removed_tree(OurRange *our_range,
+                                                       bool both)
+{
+    if (both) {
+        return our_range->removed_both;
+    } else {
+        return our_range->removed_guest;
+    }
+}
+
+static inline bool our_range_is_removed_tree_empty(OurRange *our_range,
+                                                   bool both)
+{
+    if (both) {
+        return page_range_tree_is_empty(our_range->removed_both);
+    } else {
+        return page_range_tree_is_empty(our_range->removed_guest);
+    }
+}
+
+void hvb_our_range_clear_removed_trees(OurRange *our_range);
+
+/* OurRangeMemslots */
+typedef struct OurRangeMemslotsSlots {
+    /* Nominal size of each memslot (the last one might be smaller) */
+    uint64_t size_each;
+
+    /* Slots array and its element count */
+    MemoryRegion *slots;
+    unsigned int count;
+
+    /* How many slots are currently mapped */
+    unsigned int mapped_count;
+} OurRangeMemslotsSlots;
+
+typedef struct OurRangeMemslots {
+    OurRange range;
+
+    /* Memslots covering our range */
+    OurRangeMemslotsSlots slots;
+
+    MemoryRegion *mr;
+} OurRangeMemslots;
+
+OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr,
+                                             MemoryRegion *parent_mr,
+                                             MemoryRegion *backing_mr,
+                                             Object *memslot_owner,
+                                             unsigned int memslot_count,
+                                             uint64_t memslot_size);
+void hvb_our_range_memslots_free(OurRangeMemslots *our_range);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(OurRangeMemslots, hvb_our_range_memslots_free)
+
+void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range,
+                                                     uint64_t additional_map_size);
+
+#endif
diff --git a/hw/hyperv/hv-balloon-page_range_tree.c b/hw/hyperv/hv-balloon-page_range_tree.c
new file mode 100644
index 0000000000..e178d8b413
--- /dev/null
+++ b/hw/hyperv/hv-balloon-page_range_tree.c
@@ -0,0 +1,228 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hv-balloon-internal.h"
+#include "hv-balloon-page_range_tree.h"
+
+/*
+ * temporarily avoid warnings about enhanced GTree API usage requiring a
+ * too recent Glib version until GLIB_VERSION_MAX_ALLOWED finally reaches
+ * the Glib version with this API
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+/* PageRangeTree */
+static gint page_range_tree_key_compare(gconstpointer leftp,
+                                        gconstpointer rightp,
+                                        gpointer user_data)
+{
+    const uint64_t *left = leftp, *right = rightp;
+
+    if (*left < *right) {
+        return -1;
+    } else if (*left > *right) {
+        return 1;
+    } else { /* *left == *right */
+        return 0;
+    }
+}
+
+static GTreeNode *page_range_tree_insert_new(PageRangeTree tree,
+                                             uint64_t start, uint64_t count)
+{
+    uint64_t *key = g_malloc(sizeof(*key));
+    PageRange *range = g_malloc(sizeof(*range));
+
+    assert(count > 0);
+
+    *key = range->start = start;
+    range->count = count;
+
+    return g_tree_insert_node(tree.t, key, range);
+}
+
+void hvb_page_range_tree_insert(PageRangeTree tree,
+                                uint64_t start, uint64_t count,
+                                uint64_t *dupcount)
+{
+    GTreeNode *node;
+    bool joinable;
+    uint64_t intersection;
+    PageRange *range;
+
+    assert(!SUM_OVERFLOW_U64(start, count));
+    if (count == 0) {
+        return;
+    }
+
+    node = g_tree_upper_bound(tree.t, &start);
+    if (node) {
+        node = g_tree_node_previous(node);
+    } else {
+        node = g_tree_node_last(tree.t);
+    }
+
+    if (node) {
+        range = g_tree_node_value(node);
+        assert(range);
+        intersection = page_range_intersection_size(range, start, count);
+        joinable = page_range_joinable_right(range, start, count);
+    }
+
+    if (!node ||
+        (!intersection && !joinable)) {
+        /*
+         * !node case: the tree is empty or the very first node in the tree
+         * already has a higher key (the start of its range).
+         * the other case: there is a gap in the tree between the new range
+         * and the previous one.
+         * anyway, let's just insert the new range into the tree.
+         */
+        node = page_range_tree_insert_new(tree, start, count);
+        assert(node);
+        range = g_tree_node_value(node);
+        assert(range);
+    } else {
+        /*
+         * the previous range in the tree either partially covers the new
+         * range or ends just at its beginning - extend it
+         */
+        if (dupcount) {
+            *dupcount += intersection;
+        }
+
+        count += start - range->start;
+        range->count = MAX(range->count, count);
+    }
+
+    /* check next nodes for possible merging */
+    for (node = g_tree_node_next(node); node; ) {
+        PageRange *rangecur;
+
+        rangecur = g_tree_node_value(node);
+        assert(rangecur);
+
+        intersection = page_range_intersection_size(rangecur,
+                                                    range->start, range->count);
+        joinable = page_range_joinable_left(rangecur,
+                                            range->start, range->count);
+        if (!intersection && !joinable) {
+            /* the current node is disjoint */
+            break;
+        }
+
+        if (dupcount) {
+            *dupcount += intersection;
+        }
+
+        count = rangecur->count + (rangecur->start - range->start);
+        range->count = MAX(range->count, count);
+
+        /* the current node was merged in, remove it */
+        start = rangecur->start;
+        node = g_tree_node_next(node);
+        /* no hinted removal in GTree... */
+        g_tree_remove(tree.t, &start);
+    }
+}
+
+bool hvb_page_range_tree_pop(PageRangeTree tree, PageRange *out,
+                             uint64_t maxcount)
+{
+    GTreeNode *node;
+    PageRange *range;
+
+    node = g_tree_node_last(tree.t);
+    if (!node) {
+        return false;
+    }
+
+    range = g_tree_node_value(node);
+    assert(range);
+
+    out->start = range->start;
+
+    /* can't modify range->start as it is the node key */
+    if (range->count > maxcount) {
+        out->start += range->count - maxcount;
+        out->count = maxcount;
+        range->count -= maxcount;
+    } else {
+        out->count = range->count;
+        /* no hinted removal in GTree... */
+        g_tree_remove(tree.t, &out->start);
+    }
+
+    return true;
+}
+
+bool hvb_page_range_tree_intree_any(PageRangeTree tree,
+                                    uint64_t start, uint64_t count)
+{
+    GTreeNode *node;
+
+    if (count == 0) {
+        return false;
+    }
+
+    /* find the first node that can possibly intersect our range */
+    node = g_tree_upper_bound(tree.t, &start);
+    if (node) {
+        /*
+         * a NULL node below means that the very first node in the tree
+         * already has a higher key (the start of its range).
+         */
+        node = g_tree_node_previous(node);
+    } else {
+        /* a NULL node below means that the tree is empty */
+        node = g_tree_node_last(tree.t);
+    }
+    /* node range start <= range start */
+
+    if (!node) {
+        /* node range start > range start */
+        node = g_tree_node_first(tree.t);
+    }
+
+    for ( ; node; node = g_tree_node_next(node)) {
+        PageRange *range = g_tree_node_value(node);
+
+        assert(range);
+        /*
+         * if this node starts beyond or at the end of our range so does
+         * every next one
+         */
+        if (range->start >= start + count) {
+            break;
+        }
+
+        if (page_range_intersection_size(range, start, count) > 0) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+void hvb_page_range_tree_init(PageRangeTree *tree)
+{
+    tree->t = g_tree_new_full(page_range_tree_key_compare, NULL,
+                              g_free, g_free);
+}
+
+void hvb_page_range_tree_destroy(PageRangeTree *tree)
+{
+    /* g_tree_destroy() is not NULL-safe */
+    if (!tree->t) {
+        return;
+    }
+
+    g_tree_destroy(tree->t);
+    tree->t = NULL;
+}
diff --git a/hw/hyperv/hv-balloon-page_range_tree.h b/hw/hyperv/hv-balloon-page_range_tree.h
new file mode 100644
index 0000000000..07a9ae0da6
--- /dev/null
+++ b/hw/hyperv/hv-balloon-page_range_tree.h
@@ -0,0 +1,118 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HV_BALLOON_PAGE_RANGE_TREE_H
+#define HW_HYPERV_HV_BALLOON_PAGE_RANGE_TREE_H
+
+#include "qemu/osdep.h"
+
+/* PageRange */
+typedef struct PageRange {
+    uint64_t start;
+    uint64_t count;
+} PageRange;
+
+/* return just the part of range before (start) */
+static inline void page_range_part_before(const PageRange *range,
+                                          uint64_t start, PageRange *out)
+{
+    uint64_t endr = range->start + range->count;
+    uint64_t end = MIN(endr, start);
+
+    out->start = range->start;
+    if (end > out->start) {
+        out->count = end - out->start;
+    } else {
+        out->count = 0;
+    }
+}
+
+/* return just the part of range after (start, count) */
+static inline void page_range_part_after(const PageRange *range,
+                                         uint64_t start, uint64_t count,
+                                         PageRange *out)
+{
+    uint64_t end = range->start + range->count;
+    uint64_t ends = start + count;
+
+    out->start = MAX(range->start, ends);
+    if (end > out->start) {
+        out->count = end - out->start;
+    } else {
+        out->count = 0;
+    }
+}
+
+static inline void page_range_intersect(const PageRange *range,
+                                        uint64_t start, uint64_t count,
+                                        PageRange *out)
+{
+    uint64_t end1 = range->start + range->count;
+    uint64_t end2 = start + count;
+    uint64_t end = MIN(end1, end2);
+
+    out->start = MAX(range->start, start);
+    out->count = out->start < end ? end - out->start : 0;
+}
+
+static inline uint64_t page_range_intersection_size(const PageRange *range,
+                                                    uint64_t start, uint64_t count)
+{
+    PageRange trange;
+
+    page_range_intersect(range, start, count, &trange);
+    return trange.count;
+}
+
+static inline bool page_range_joinable_left(const PageRange *range,
+                                            uint64_t start, uint64_t count)
+{
+    return start + count == range->start;
+}
+
+static inline bool page_range_joinable_right(const PageRange *range,
+                                             uint64_t start, uint64_t count)
+{
+    return range->start + range->count == start;
+}
+
+static inline bool page_range_joinable(const PageRange *range,
+                                       uint64_t start, uint64_t count)
+{
+    return page_range_joinable_left(range, start, count) ||
+        page_range_joinable_right(range, start, count);
+}
+
+/* PageRangeTree */
+/* type safety */
+typedef struct PageRangeTree {
+    GTree *t;
+} PageRangeTree;
+
+static inline bool page_range_tree_is_empty(PageRangeTree tree)
+{
+    guint nnodes = g_tree_nnodes(tree.t);
+
+    return nnodes == 0;
+}
+
+void hvb_page_range_tree_init(PageRangeTree *tree);
+void hvb_page_range_tree_destroy(PageRangeTree *tree);
+
+bool hvb_page_range_tree_intree_any(PageRangeTree tree,
+                                    uint64_t start, uint64_t count);
+
+bool hvb_page_range_tree_pop(PageRangeTree tree, PageRange *out,
+                             uint64_t maxcount);
+
+void hvb_page_range_tree_insert(PageRangeTree tree,
+                                uint64_t start, uint64_t count,
+                                uint64_t *dupcount);
+
+#endif
diff --git a/hw/hyperv/hv-balloon-stub.c b/hw/hyperv/hv-balloon-stub.c
new file mode 100644
index 0000000000..a47412d4a8
--- /dev/null
+++ b/hw/hyperv/hv-balloon-stub.c
@@ -0,0 +1,19 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-types-machine.h"
+
+HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp)
+{
+    error_setg(errp, "hv-balloon device not enabled in this build");
+    return NULL;
+}
diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c
new file mode 100644
index 0000000000..66f297c1d7
--- /dev/null
+++ b/hw/hyperv/hv-balloon.c
@@ -0,0 +1,1769 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hv-balloon-internal.h"
+
+#include "exec/address-spaces.h"
+#include "exec/cpu-common.h"
+#include "exec/ramblock.h"
+#include "hw/boards.h"
+#include "hw/hyperv/dynmem-proto.h"
+#include "hw/hyperv/hv-balloon.h"
+#include "hw/hyperv/vmbus.h"
+#include "hw/mem/memory-device.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
+#include "monitor/qdev.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-types-machine.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/visitor.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+#include "qemu/timer.h"
+#include "sysemu/balloon.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/reset.h"
+#include "hv-balloon-our_range_memslots.h"
+#include "hv-balloon-page_range_tree.h"
+#include "trace.h"
+
+#define HV_BALLOON_ADDR_PROP "addr"
+#define HV_BALLOON_MEMDEV_PROP "memdev"
+#define HV_BALLOON_GUID "525074DC-8985-46e2-8057-A307DC18A502"
+
+/*
+ * Some Windows versions (at least Server 2019) will crash with various
+ * error codes when receiving DM protocol requests (at least
+ * DM_MEM_HOT_ADD_REQUEST) immediately after boot.
+ *
+ * It looks like Hyper-V from Server 2016 uses a 50-second after-boot
+ * delay, probably to workaround this issue, so we'll use this value, too.
+ */
+#define HV_BALLOON_POST_INIT_WAIT (50 * 1000)
+
+#define HV_BALLOON_HA_CHUNK_SIZE (2 * GiB)
+#define HV_BALLOON_HA_CHUNK_PAGES (HV_BALLOON_HA_CHUNK_SIZE / HV_BALLOON_PAGE_SIZE)
+
+#define HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN (128 * MiB)
+
+#define HV_BALLOON_HR_CHUNK_PAGES 585728
+/*
+ *                                ^ that's the maximum number of pages
+ * that Windows returns in one hot remove response
+ *
+ * If the number requested is too high Windows will no longer honor
+ * these requests
+ */
+
+struct HvBalloonClass {
+    VMBusDeviceClass parent_class;
+} HvBalloonClass;
+
+typedef enum State {
+    /* not a real state */
+    S_NO_CHANGE = 0,
+
+    S_WAIT_RESET,
+    S_POST_RESET_CLOSED,
+
+    /* init flow */
+    S_VERSION,
+    S_CAPS,
+    S_POST_INIT_WAIT,
+
+    S_IDLE,
+
+    /* balloon op flow */
+    S_BALLOON_POSTING,
+    S_BALLOON_RB_WAIT,
+    S_BALLOON_REPLY_WAIT,
+
+    /* unballoon + hot add ops flow */
+    S_UNBALLOON_POSTING,
+    S_UNBALLOON_RB_WAIT,
+    S_UNBALLOON_REPLY_WAIT,
+    S_HOT_ADD_SETUP,
+    S_HOT_ADD_RB_WAIT,
+    S_HOT_ADD_POSTING,
+    S_HOT_ADD_REPLY_WAIT,
+} State;
+
+typedef struct StateDesc {
+    State state;
+    const char *desc;
+} StateDesc;
+
+typedef struct HvBalloon {
+    VMBusDevice parent;
+    State state;
+
+    union dm_version version;
+    union dm_caps caps;
+
+    QEMUTimer post_init_timer;
+
+    unsigned int trans_id;
+
+    struct {
+        bool enabled;
+        bool received;
+        uint64_t committed;
+        uint64_t available;
+    } status_report;
+
+    /* Guest target size */
+    uint64_t target;
+    bool target_changed;
+
+    /* Current (un)balloon / hot-add operation parameters */
+    union {
+        uint64_t balloon_diff;
+
+        struct {
+            uint64_t unballoon_diff;
+            uint64_t hot_add_diff;
+        };
+
+        struct {
+            PageRange hot_add_range;
+            uint64_t ha_current_count;
+        };
+    };
+
+    OurRangeMemslots *our_range;
+
+    /* Count of memslots covering our memory */
+    unsigned int memslot_count;
+
+    /* Nominal size of each memslot (the last one might be smaller) */
+    uint64_t memslot_size;
+
+    /* Non-ours removed memory */
+    PageRangeTree removed_guest, removed_both;
+
+    /* Grand totals of removed memory (both ours and non-ours) */
+    uint64_t removed_guest_ctr, removed_both_ctr;
+
+    /* MEMORY_DEVICE props */
+    uint64_t addr;
+    HostMemoryBackend *hostmem;
+    MemoryRegion *mr;
+} HvBalloon;
+
+OBJECT_DEFINE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, HV_BALLOON, VMBUS_DEVICE, \
+                                   { TYPE_MEMORY_DEVICE }, { })
+
+#define HV_BALLOON_SET_STATE(hvb, news)             \
+    do {                                            \
+        assert(news != S_NO_CHANGE);                \
+        hv_balloon_state_set(hvb, news, # news);    \
+    } while (0)
+
+#define HV_BALLOON_STATE_DESC_SET(stdesc, news)         \
+    _hv_balloon_state_desc_set(stdesc, news, # news)
+
+#define HV_BALLOON_STATE_DESC_INIT \
+    {                              \
+        .state = S_NO_CHANGE,      \
+    }
+
+typedef struct HvBalloonReq {
+    VMBusChanReq vmreq;
+} HvBalloonReq;
+
+/* total our memory includes parts currently removed from the guest */
+static uint64_t hv_balloon_total_our_ram(HvBalloon *balloon)
+{
+    if (!balloon->our_range) {
+        return 0;
+    }
+
+    return balloon->our_range->range.added;
+}
+
+/* TODO: unify the code below with virtio-balloon and cache the value */
+static int build_dimm_list(Object *obj, void *opaque)
+{
+    GSList **list = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
+        DeviceState *dev = DEVICE(obj);
+        if (dev->realized) { /* only realized DIMMs matter */
+            *list = g_slist_prepend(*list, dev);
+        }
+    }
+
+    object_child_foreach(obj, build_dimm_list, opaque);
+    return 0;
+}
+
+static ram_addr_t get_current_ram_size(void)
+{
+    GSList *list = NULL, *item;
+    ram_addr_t size = current_machine->ram_size;
+
+    build_dimm_list(qdev_get_machine(), &list);
+    for (item = list; item; item = g_slist_next(item)) {
+        Object *obj = OBJECT(item->data);
+        if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM))
+            size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
+                                            &error_abort);
+    }
+    g_slist_free(list);
+
+    return size;
+}
+
+/* total RAM includes memory currently removed from the guest */
+static uint64_t hv_balloon_total_ram(HvBalloon *balloon)
+{
+    ram_addr_t ram_size = get_current_ram_size();
+    uint64_t ram_size_pages = ram_size >> HV_BALLOON_PFN_SHIFT;
+    uint64_t our_ram_size_pages = hv_balloon_total_our_ram(balloon);
+
+    assert(ram_size_pages > 0);
+
+    return SUM_SATURATE_U64(ram_size_pages, our_ram_size_pages);
+}
+
+/*
+ * calculating the total RAM size is a slow operation,
+ * avoid it as much as possible
+ */
+static uint64_t hv_balloon_total_removed_rs(HvBalloon *balloon,
+                                            uint64_t ram_size_pages)
+{
+    uint64_t total_removed;
+
+    total_removed = SUM_SATURATE_U64(balloon->removed_guest_ctr,
+                                     balloon->removed_both_ctr);
+
+    /* possible if guest returns pages outside actual RAM */
+    if (total_removed > ram_size_pages) {
+        total_removed = ram_size_pages;
+    }
+
+    return total_removed;
+}
+
+/* Returns whether the state has actually changed */
+static bool hv_balloon_state_set(HvBalloon *balloon,
+                                 State newst, const char *newststr)
+{
+    if (newst == S_NO_CHANGE || balloon->state == newst) {
+        return false;
+    }
+
+    balloon->state = newst;
+    trace_hv_balloon_state_change(newststr);
+    return true;
+}
+
+static void _hv_balloon_state_desc_set(StateDesc *stdesc,
+                                       State newst, const char *newststr)
+{
+    /* state setting is only permitted on a freshly init desc */
+    assert(stdesc->state == S_NO_CHANGE);
+
+    assert(newst != S_NO_CHANGE);
+
+    stdesc->state = newst;
+    stdesc->desc = newststr;
+}
+
+static VMBusChannel *hv_balloon_get_channel_maybe(HvBalloon *balloon)
+{
+    return vmbus_device_channel(&balloon->parent, 0);
+}
+
+static VMBusChannel *hv_balloon_get_channel(HvBalloon *balloon)
+{
+    VMBusChannel *chan;
+
+    chan = hv_balloon_get_channel_maybe(balloon);
+    assert(chan != NULL);
+    return chan;
+}
+
+static ssize_t hv_balloon_send_packet(VMBusChannel *chan,
+                                      struct dm_message *msg)
+{
+    int ret;
+
+    ret = vmbus_channel_reserve(chan, 0, msg->hdr.size);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                              NULL, 0, msg, msg->hdr.size, false,
+                              msg->hdr.trans_id);
+}
+
+static bool hv_balloon_unballoon_get_source(HvBalloon *balloon,
+                                            PageRangeTree *dtree,
+                                            uint64_t **dctr,
+                                            bool *is_our_range)
+{
+    OurRange *our_range = OUR_RANGE(balloon->our_range);
+
+    /* Try the boot memory first */
+    if (g_tree_nnodes(balloon->removed_guest.t) > 0) {
+        *dtree = balloon->removed_guest;
+        *dctr = &balloon->removed_guest_ctr;
+        *is_our_range = false;
+    } else if (g_tree_nnodes(balloon->removed_both.t) > 0) {
+        *dtree = balloon->removed_both;
+        *dctr = &balloon->removed_both_ctr;
+        *is_our_range = false;
+    } else if (!our_range) {
+        return false;
+    } else if (!our_range_is_removed_tree_empty(our_range, false)) {
+        *dtree = our_range_get_removed_tree(our_range, false);
+        *dctr = &balloon->removed_guest_ctr;
+        *is_our_range = true;
+    } else if (!our_range_is_removed_tree_empty(our_range, true)) {
+        *dtree = our_range_get_removed_tree(our_range, true);
+        *dctr = &balloon->removed_both_ctr;
+        *is_our_range = true;
+    } else {
+        return false;
+    }
+
+    return true;
+}
+
+static void hv_balloon_unballoon_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_unballoon_request *ur;
+    size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]);
+
+    assert(balloon->state == S_UNBALLOON_RB_WAIT);
+
+    if (vmbus_channel_reserve(chan, 0, ur_size) < 0) {
+        return;
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_POSTING);
+}
+
+static void hv_balloon_unballoon_posting(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    PageRangeTree dtree;
+    uint64_t *dctr;
+    bool our_range;
+    struct dm_unballoon_request *ur;
+    size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]);
+    PageRange range;
+    bool bret;
+    ssize_t ret;
+
+    assert(balloon->state == S_UNBALLOON_POSTING);
+    assert(balloon->unballoon_diff > 0);
+
+    if (!hv_balloon_unballoon_get_source(balloon, &dtree, &dctr, &our_range)) {
+        error_report("trying to unballoon but nothing seems to be ballooned");
+        /*
+         * there is little we can do as we might have already
+         * sent the guest a partial request we can't cancel
+         */
+        return;
+    }
+
+    assert(balloon->our_range || !our_range);
+    assert(dtree.t);
+    assert(dctr);
+
+    ur = alloca(ur_size);
+    memset(ur, 0, ur_size);
+    ur->hdr.type = DM_UNBALLOON_REQUEST;
+    ur->hdr.size = ur_size;
+    ur->hdr.trans_id = balloon->trans_id;
+
+    bret = hvb_page_range_tree_pop(dtree, &range, MIN(balloon->unballoon_diff,
+                                                      HV_BALLOON_HA_CHUNK_PAGES));
+    assert(bret);
+    /* TODO: madvise? */
+
+    *dctr -= range.count;
+    balloon->unballoon_diff -= range.count;
+
+    ur->range_count = 1;
+    ur->range_array[0].finfo.start_page = range.start;
+    ur->range_array[0].finfo.page_cnt = range.count;
+    ur->more_pages = balloon->unballoon_diff > 0;
+
+    trace_hv_balloon_outgoing_unballoon(ur->hdr.trans_id,
+                                        range.count, range.start,
+                                        balloon->unballoon_diff);
+
+    if (ur->more_pages) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT);
+    } else {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_REPLY_WAIT);
+    }
+
+    ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                             NULL, 0, ur, ur_size, false,
+                             ur->hdr.trans_id);
+    if (ret <= 0) {
+        error_report("error %zd when posting unballoon msg, expect problems",
+                     ret);
+    }
+}
+
+static bool hv_balloon_our_range_ensure(HvBalloon *balloon)
+{
+    uint64_t align;
+    MemoryRegion *hostmem_mr;
+    g_autoptr(OurRangeMemslots) our_range_memslots = NULL;
+    OurRange *our_range;
+
+    if (balloon->our_range) {
+        return true;
+    }
+
+    if (!balloon->hostmem) {
+        return false;
+    }
+
+    align = (1 << balloon->caps.cap_bits.hot_add_alignment) * MiB;
+    assert(QEMU_IS_ALIGNED(balloon->addr, align));
+
+    hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
+
+    our_range_memslots = hvb_our_range_memslots_new(balloon->addr,
+                                                    balloon->mr, hostmem_mr,
+                                                    OBJECT(balloon),
+                                                    balloon->memslot_count,
+                                                    balloon->memslot_size);
+    our_range = OUR_RANGE(our_range_memslots);
+
+    if (hvb_page_range_tree_intree_any(balloon->removed_guest,
+                                       our_range->range.start,
+                                       our_range->range.count) ||
+        hvb_page_range_tree_intree_any(balloon->removed_both,
+                                       our_range->range.start,
+                                       our_range->range.count)) {
+        error_report("some parts of the memory backend were already returned by the guest. this should not happen, please reboot the guest and try again");
+        return false;
+    }
+
+    trace_hv_balloon_our_range_add(our_range->range.count,
+                                   our_range->range.start);
+
+    balloon->our_range = g_steal_pointer(&our_range_memslots);
+    return true;
+}
+
+static void hv_balloon_hot_add_setup(HvBalloon *balloon, StateDesc *stdesc)
+{
+    /* need to make copy since it is in union with hot_add_range */
+    uint64_t hot_add_diff = balloon->hot_add_diff;
+    PageRange *hot_add_range = &balloon->hot_add_range;
+    uint64_t align, our_range_remaining;
+    OurRange *our_range;
+
+    assert(balloon->state == S_HOT_ADD_SETUP);
+    assert(hot_add_diff > 0);
+
+    if (!hv_balloon_our_range_ensure(balloon)) {
+        goto ret_idle;
+    }
+
+    our_range = OUR_RANGE(balloon->our_range);
+
+    align = (1 << balloon->caps.cap_bits.hot_add_alignment) *
+        (MiB / HV_BALLOON_PAGE_SIZE);
+
+    /* Absolute GPA in pages */
+    hot_add_range->start = our_range_get_remaining_start(our_range);
+    assert(QEMU_IS_ALIGNED(hot_add_range->start, align));
+
+    our_range_remaining = our_range_get_remaining_size(our_range);
+    hot_add_range->count = MIN(our_range_remaining, hot_add_diff);
+    hot_add_range->count = QEMU_ALIGN_DOWN(hot_add_range->count, align);
+    if (hot_add_range->count == 0) {
+        goto ret_idle;
+    }
+
+    hvb_our_range_memslots_ensure_mapped_additional(balloon->our_range,
+                                                    hot_add_range->count);
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT);
+    return;
+
+ret_idle:
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+}
+
+static void hv_balloon_hot_add_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_hot_add *ha;
+    size_t ha_size = sizeof(*ha) + sizeof(ha->range);
+
+    assert(balloon->state == S_HOT_ADD_RB_WAIT);
+
+    if (vmbus_channel_reserve(chan, 0, ha_size) < 0) {
+        return;
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_POSTING);
+}
+
+static void hv_balloon_hot_add_posting(HvBalloon *balloon, StateDesc *stdesc)
+{
+    PageRange *hot_add_range = &balloon->hot_add_range;
+    uint64_t *current_count = &balloon->ha_current_count;
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_hot_add *ha;
+    size_t ha_size = sizeof(*ha) + sizeof(ha->range);
+    union dm_mem_page_range *ha_region;
+    uint64_t align, chunk_max_size;
+    ssize_t ret;
+
+    assert(balloon->state == S_HOT_ADD_POSTING);
+    assert(hot_add_range->count > 0);
+
+    align = (1 << balloon->caps.cap_bits.hot_add_alignment) *
+        (MiB / HV_BALLOON_PAGE_SIZE);
+    if (align >= HV_BALLOON_HA_CHUNK_PAGES) {
+        /*
+         * If the required alignment is higher than the chunk size we let it
+         * override that size.
+         */
+        chunk_max_size = align;
+    } else {
+        chunk_max_size = QEMU_ALIGN_DOWN(HV_BALLOON_HA_CHUNK_PAGES, align);
+    }
+
+    /*
+     * hot_add_range->count starts aligned in hv_balloon_hot_add_setup(),
+     * then it is either reduced by subtracting aligned current_count or
+     * further hot-adds are prevented by marking the whole remaining our range
+     * as unusable in hv_balloon_handle_hot_add_response().
+     */
+    *current_count = MIN(hot_add_range->count, chunk_max_size);
+
+    ha = alloca(ha_size);
+    ha_region = &(&ha->range)[1];
+    memset(ha, 0, ha_size);
+    ha->hdr.type = DM_MEM_HOT_ADD_REQUEST;
+    ha->hdr.size = ha_size;
+    ha->hdr.trans_id = balloon->trans_id;
+
+    ha->range.finfo.start_page = hot_add_range->start;
+    ha->range.finfo.page_cnt = *current_count;
+    ha_region->finfo.start_page = hot_add_range->start;
+    ha_region->finfo.page_cnt = ha->range.finfo.page_cnt;
+
+    trace_hv_balloon_outgoing_hot_add(ha->hdr.trans_id,
+                                      *current_count, hot_add_range->start);
+
+    ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                             NULL, 0, ha, ha_size, false,
+                             ha->hdr.trans_id);
+    if (ret <= 0) {
+        error_report("error %zd when posting hot add msg, expect problems",
+                     ret);
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_REPLY_WAIT);
+}
+
+static void hv_balloon_balloon_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    size_t bl_size = sizeof(struct dm_balloon);
+
+    assert(balloon->state == S_BALLOON_RB_WAIT);
+
+    if (vmbus_channel_reserve(chan, 0, bl_size) < 0) {
+        return;
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_POSTING);
+}
+
+static void hv_balloon_balloon_posting(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_balloon bl;
+    size_t bl_size = sizeof(bl);
+    ssize_t ret;
+
+    assert(balloon->state == S_BALLOON_POSTING);
+    assert(balloon->balloon_diff > 0);
+
+    memset(&bl, 0, sizeof(bl));
+    bl.hdr.type = DM_BALLOON_REQUEST;
+    bl.hdr.size = bl_size;
+    bl.hdr.trans_id = balloon->trans_id;
+    bl.num_pages = MIN(balloon->balloon_diff, HV_BALLOON_HR_CHUNK_PAGES);
+
+    trace_hv_balloon_outgoing_balloon(bl.hdr.trans_id, bl.num_pages,
+                                      balloon->balloon_diff);
+
+    ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                             NULL, 0, &bl, bl_size, false,
+                             bl.hdr.trans_id);
+    if (ret <= 0) {
+        error_report("error %zd when posting balloon msg, expect problems",
+                     ret);
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_REPLY_WAIT);
+}
+
+static void hv_balloon_idle_state_process_target(HvBalloon *balloon,
+                                                 StateDesc *stdesc)
+{
+    bool can_balloon = balloon->caps.cap_bits.balloon;
+    uint64_t ram_size_pages, total_removed;
+
+    ram_size_pages = hv_balloon_total_ram(balloon);
+    total_removed = hv_balloon_total_removed_rs(balloon, ram_size_pages);
+
+    /*
+     * we need to cache the values computed from the balloon target value when
+     * starting the adjustment procedure in case someone changes the target when
+     * the procedure is in progress
+     */
+    if (balloon->target > ram_size_pages - total_removed) {
+        bool can_hot_add = balloon->caps.cap_bits.hot_add;
+        uint64_t target_diff = balloon->target -
+            (ram_size_pages - total_removed);
+
+        balloon->unballoon_diff = MIN(target_diff, total_removed);
+
+        if (can_hot_add) {
+            balloon->hot_add_diff = target_diff - balloon->unballoon_diff;
+        } else {
+            balloon->hot_add_diff = 0;
+        }
+
+        if (balloon->unballoon_diff > 0) {
+            assert(can_balloon);
+            HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT);
+        } else if (balloon->hot_add_diff > 0) {
+            HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP);
+        }
+    } else if (can_balloon &&
+               balloon->target < ram_size_pages - total_removed) {
+        balloon->balloon_diff = ram_size_pages - total_removed -
+            balloon->target;
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT);
+    }
+}
+
+static void hv_balloon_idle_state(HvBalloon *balloon,
+                                  StateDesc *stdesc)
+{
+    assert(balloon->state == S_IDLE);
+
+    if (balloon->target_changed) {
+        balloon->target_changed = false;
+        hv_balloon_idle_state_process_target(balloon, stdesc);
+        return;
+    }
+}
+
+static const struct {
+    void (*handler)(HvBalloon *balloon, StateDesc *stdesc);
+} state_handlers[] = {
+    [S_IDLE].handler = hv_balloon_idle_state,
+    [S_BALLOON_POSTING].handler = hv_balloon_balloon_posting,
+    [S_BALLOON_RB_WAIT].handler = hv_balloon_balloon_rb_wait,
+    [S_UNBALLOON_POSTING].handler = hv_balloon_unballoon_posting,
+    [S_UNBALLOON_RB_WAIT].handler = hv_balloon_unballoon_rb_wait,
+    [S_HOT_ADD_SETUP].handler = hv_balloon_hot_add_setup,
+    [S_HOT_ADD_RB_WAIT].handler = hv_balloon_hot_add_rb_wait,
+    [S_HOT_ADD_POSTING].handler = hv_balloon_hot_add_posting,
+};
+
+static void hv_balloon_handle_state(HvBalloon *balloon, StateDesc *stdesc)
+{
+    if (balloon->state >= ARRAY_SIZE(state_handlers) ||
+        !state_handlers[balloon->state].handler) {
+        return;
+    }
+
+    state_handlers[balloon->state].handler(balloon, stdesc);
+}
+
+static void hv_balloon_remove_response_insert_range(PageRangeTree tree,
+                                                    const PageRange *range,
+                                                    uint64_t *ctr1,
+                                                    uint64_t *ctr2,
+                                                    uint64_t *ctr3)
+{
+    uint64_t dupcount, effcount;
+
+    if (range->count == 0) {
+        return;
+    }
+
+    dupcount = 0;
+    hvb_page_range_tree_insert(tree, range->start, range->count, &dupcount);
+
+    assert(dupcount <= range->count);
+    effcount = range->count - dupcount;
+
+    *ctr1 += effcount;
+    *ctr2 += effcount;
+    if (ctr3) {
+        *ctr3 += effcount;
+    }
+}
+
+static void hv_balloon_remove_response_handle_range(HvBalloon *balloon,
+                                                    PageRange *range,
+                                                    bool both,
+                                                    uint64_t *removedctr)
+{
+    OurRange *our_range = OUR_RANGE(balloon->our_range);
+    PageRangeTree globaltree =
+        both ? balloon->removed_both : balloon->removed_guest;
+    uint64_t *globalctr =
+        both ? &balloon->removed_both_ctr : &balloon->removed_guest_ctr;
+    PageRange rangeeff;
+
+    if (range->count == 0) {
+        return;
+    }
+
+    trace_hv_balloon_remove_response(range->count, range->start, both);
+
+    if (our_range) {
+        /* Includes the not-yet-hot-added and unusable parts. */
+        rangeeff = our_range->range;
+    } else {
+        rangeeff.start = rangeeff.count = 0;
+    }
+
+    if (page_range_intersection_size(range, rangeeff.start, rangeeff.count) > 0) {
+        PageRangeTree ourtree = our_range_get_removed_tree(our_range, both);
+        PageRange rangehole, rangecommon;
+        uint64_t ourremoved = 0;
+
+        /* process the hole before our range, if it exists */
+        page_range_part_before(range, rangeeff.start, &rangehole);
+        hv_balloon_remove_response_insert_range(globaltree, &rangehole,
+                                                globalctr, removedctr, NULL);
+        if (rangehole.count > 0) {
+            trace_hv_balloon_remove_response_hole(rangehole.count,
+                                                  rangehole.start,
+                                                  range->count, range->start,
+                                                  rangeeff.start, both);
+        }
+
+        /* process our part */
+        page_range_intersect(range, rangeeff.start, rangeeff.count,
+                             &rangecommon);
+        hv_balloon_remove_response_insert_range(ourtree, &rangecommon,
+                                                globalctr, removedctr,
+                                                &ourremoved);
+        if (rangecommon.count > 0) {
+            trace_hv_balloon_remove_response_common(rangecommon.count,
+                                                    rangecommon.start,
+                                                    range->count, range->start,
+                                                    rangeeff.count,
+                                                    rangeeff.start, ourremoved,
+                                                    both);
+        }
+
+        /* calculate what's left after our range */
+        rangecommon = *range;
+        page_range_part_after(&rangecommon, rangeeff.start, rangeeff.count,
+                              range);
+    }
+
+    /* process the remainder of the range that lies after our range */
+    if (range->count > 0) {
+        hv_balloon_remove_response_insert_range(globaltree, range,
+                                                globalctr, removedctr, NULL);
+        trace_hv_balloon_remove_response_remainder(range->count, range->start,
+                                                   both);
+        range->count = 0;
+    }
+}
+
+static void hv_balloon_remove_response_handle_pages(HvBalloon *balloon,
+                                                    PageRange *range,
+                                                    uint64_t start,
+                                                    uint64_t count,
+                                                    bool both,
+                                                    uint64_t *removedctr)
+{
+    assert(count > 0);
+
+    /*
+     * if there is an existing range that the new range can't be joined to
+     * dump it into tree(s)
+     */
+    if (range->count > 0 && !page_range_joinable(range, start, count)) {
+        hv_balloon_remove_response_handle_range(balloon, range, both,
+                                                removedctr);
+    }
+
+    if (range->count == 0) {
+        range->start = start;
+        range->count = count;
+    } else if (page_range_joinable_left(range, start, count)) {
+        range->start = start;
+        range->count += count;
+    } else { /* page_range_joinable_right() */
+        range->count += count;
+    }
+}
+
+static gboolean hv_balloon_handle_remove_host_addr_node(gpointer key,
+                                                        gpointer value,
+                                                        gpointer data)
+{
+    PageRange *range = value;
+    uint64_t pageoff;
+
+    for (pageoff = 0; pageoff < range->count; ) {
+        uint64_t addr_64 = (range->start + pageoff) * HV_BALLOON_PAGE_SIZE;
+        void *addr;
+        RAMBlock *rb;
+        ram_addr_t rb_offset;
+        size_t rb_page_size;
+        size_t discard_size;
+
+        assert(addr_64 <= UINTPTR_MAX);
+        addr = (void *)((uintptr_t)addr_64);
+        rb = qemu_ram_block_from_host(addr, false, &rb_offset);
+        rb_page_size = qemu_ram_pagesize(rb);
+
+        if (rb_page_size != HV_BALLOON_PAGE_SIZE) {
+            /* TODO: these should end in "removed_guest" */
+            warn_report("guest reported removed page backed by unsupported page size %zu",
+                        rb_page_size);
+            pageoff++;
+            continue;
+        }
+
+        discard_size = MIN(range->count - pageoff,
+                           (rb->max_length - rb_offset) /
+                           HV_BALLOON_PAGE_SIZE);
+        discard_size = MAX(discard_size, 1);
+
+        if (ram_block_discard_range(rb, rb_offset, discard_size *
+                                    HV_BALLOON_PAGE_SIZE) != 0) {
+            warn_report("guest reported removed page failed discard");
+        }
+
+        pageoff += discard_size;
+    }
+
+    return false;
+}
+
+static void hv_balloon_handle_remove_host_addr_tree(PageRangeTree tree)
+{
+    g_tree_foreach(tree.t, hv_balloon_handle_remove_host_addr_node, NULL);
+}
+
+static int hv_balloon_handle_remove_section(PageRangeTree tree,
+                                            const MemoryRegionSection *section,
+                                            uint64_t count)
+{
+    void *addr = memory_region_get_ram_ptr(section->mr) +
+        section->offset_within_region;
+    uint64_t addr_page;
+
+    assert(count > 0);
+
+    if ((uintptr_t)addr % HV_BALLOON_PAGE_SIZE) {
+        warn_report("guest reported removed pages at an unaligned host addr %p",
+                    addr);
+        return -EINVAL;
+    }
+
+    addr_page = (uintptr_t)addr / HV_BALLOON_PAGE_SIZE;
+    hvb_page_range_tree_insert(tree, addr_page, count, NULL);
+
+    return 0;
+}
+
+static void hv_balloon_handle_remove_ranges(HvBalloon *balloon,
+                                            union dm_mem_page_range ranges[],
+                                            uint32_t count)
+{
+    uint64_t removedcnt;
+    PageRangeTree removed_host_addr;
+    PageRange range_guest, range_both;
+
+    hvb_page_range_tree_init(&removed_host_addr);
+    range_guest.count = range_both.count = removedcnt = 0;
+    for (unsigned int ctr = 0; ctr < count; ctr++) {
+        union dm_mem_page_range *mr = &ranges[ctr];
+        hwaddr pa;
+        MemoryRegionSection section;
+
+        for (unsigned int offset = 0; offset < mr->finfo.page_cnt; ) {
+            int ret;
+            uint64_t pageno = mr->finfo.start_page + offset;
+            uint64_t pagecnt = 1;
+
+            pa = (hwaddr)pageno << HV_BALLOON_PFN_SHIFT;
+            section = memory_region_find(get_system_memory(), pa,
+                                         (mr->finfo.page_cnt - offset) *
+                                         HV_BALLOON_PAGE_SIZE);
+            if (!section.mr) {
+                warn_report("guest reported removed page %"PRIu64" not found in RAM",
+                            pageno);
+                ret = -EINVAL;
+                goto finish_page;
+            }
+
+            pagecnt = int128_get64(section.size) / HV_BALLOON_PAGE_SIZE;
+            if (pagecnt <= 0) {
+                warn_report("guest reported removed page %"PRIu64" in a section smaller than page size",
+                            pageno);
+                pagecnt = 1; /* skip the whole page */
+                ret = -EINVAL;
+                goto finish_page;
+            }
+
+            if (!memory_region_is_ram(section.mr) ||
+                memory_region_is_rom(section.mr) ||
+                memory_region_is_romd(section.mr)) {
+                warn_report("guest reported removed page %"PRIu64" in a section that is not an ordinary RAM",
+                            pageno);
+                ret = -EINVAL;
+                goto finish_page;
+            }
+
+            ret = hv_balloon_handle_remove_section(removed_host_addr, &section,
+                                                   pagecnt);
+
+        finish_page:
+            if (ret == 0) {
+                hv_balloon_remove_response_handle_pages(balloon,
+                                                        &range_both,
+                                                        pageno, pagecnt,
+                                                        true, &removedcnt);
+            } else {
+                hv_balloon_remove_response_handle_pages(balloon,
+                                                        &range_guest,
+                                                        pageno, pagecnt,
+                                                        false, &removedcnt);
+            }
+
+            if (section.mr) {
+                memory_region_unref(section.mr);
+            }
+
+            offset += pagecnt;
+        }
+    }
+
+    hv_balloon_remove_response_handle_range(balloon, &range_both, true,
+                                            &removedcnt);
+    hv_balloon_remove_response_handle_range(balloon, &range_guest, false,
+                                            &removedcnt);
+
+    hv_balloon_handle_remove_host_addr_tree(removed_host_addr);
+    hvb_page_range_tree_destroy(&removed_host_addr);
+
+    if (removedcnt > balloon->balloon_diff) {
+        warn_report("guest reported more pages removed than currently pending (%"PRIu64" vs %"PRIu64")",
+                    removedcnt, balloon->balloon_diff);
+        balloon->balloon_diff = 0;
+    } else {
+        balloon->balloon_diff -= removedcnt;
+    }
+}
+
+static bool hv_balloon_handle_msg_size(HvBalloonReq *req, size_t minsize,
+                                       const char *msgname)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    uint32_t msglen = vmreq->msglen;
+
+    if (msglen >= minsize) {
+        return true;
+    }
+
+    warn_report("%s message too short (%u vs %zu), ignoring", msgname,
+                (unsigned int)msglen, minsize);
+    return false;
+}
+
+static void hv_balloon_handle_version_request(HvBalloon *balloon,
+                                              HvBalloonReq *req,
+                                              StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_version_request *msgVr = vmreq->msg;
+    struct dm_version_response respVr;
+
+    if (balloon->state != S_VERSION) {
+        warn_report("unexpected DM_VERSION_REQUEST in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgVr),
+                                    "DM_VERSION_REQUEST")) {
+        return;
+    }
+
+    trace_hv_balloon_incoming_version(msgVr->version.major_version,
+                                      msgVr->version.minor_version);
+
+    memset(&respVr, 0, sizeof(respVr));
+    respVr.hdr.type = DM_VERSION_RESPONSE;
+    respVr.hdr.size = sizeof(respVr);
+    respVr.hdr.trans_id = msgVr->hdr.trans_id;
+    respVr.is_accepted = msgVr->version.version >= DYNMEM_PROTOCOL_VERSION_1 &&
+        msgVr->version.version <= DYNMEM_PROTOCOL_VERSION_3;
+
+    hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respVr);
+
+    if (respVr.is_accepted) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_CAPS);
+    }
+}
+
+static void hv_balloon_handle_caps_report(HvBalloon *balloon,
+                                          HvBalloonReq *req,
+                                          StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_capabilities *msgCap = vmreq->msg;
+    struct dm_capabilities_resp_msg respCap;
+
+    if (balloon->state != S_CAPS) {
+        warn_report("unexpected DM_CAPABILITIES_REPORT in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgCap),
+                                    "DM_CAPABILITIES_REPORT")) {
+        return;
+    }
+
+    trace_hv_balloon_incoming_caps(msgCap->caps.caps);
+    balloon->caps = msgCap->caps;
+
+    memset(&respCap, 0, sizeof(respCap));
+    respCap.hdr.type = DM_CAPABILITIES_RESPONSE;
+    respCap.hdr.size = sizeof(respCap);
+    respCap.hdr.trans_id = msgCap->hdr.trans_id;
+    respCap.is_accepted = 1;
+    respCap.hot_remove = 1;
+    respCap.suppress_pressure_reports = !balloon->status_report.enabled;
+    hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respCap);
+
+    timer_mod(&balloon->post_init_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+              HV_BALLOON_POST_INIT_WAIT);
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_POST_INIT_WAIT);
+}
+
+static void hv_balloon_handle_status_report(HvBalloon *balloon,
+                                            HvBalloonReq *req)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_status *msgStatus = vmreq->msg;
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgStatus),
+                                    "DM_STATUS_REPORT")) {
+        return;
+    }
+
+    if (!balloon->status_report.enabled) {
+        return;
+    }
+
+    balloon->status_report.committed = msgStatus->num_committed;
+    balloon->status_report.committed *= HV_BALLOON_PAGE_SIZE;
+    balloon->status_report.available = msgStatus->num_avail;
+    balloon->status_report.available *= HV_BALLOON_PAGE_SIZE;
+    balloon->status_report.received = true;
+
+    qapi_event_send_hv_balloon_status_report(balloon->status_report.committed,
+                                             balloon->status_report.available);
+}
+
+HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp)
+{
+    HvBalloon *balloon;
+    HvBalloonInfo *info;
+
+    balloon = HV_BALLOON(object_resolve_path_type("", TYPE_HV_BALLOON, NULL));
+    if (!balloon) {
+        error_setg(errp, "no %s device present", TYPE_HV_BALLOON);
+        return NULL;
+    }
+
+    if (!balloon->status_report.enabled) {
+        error_setg(errp, "guest memory status reporting not enabled");
+        return NULL;
+    }
+
+    if (!balloon->status_report.received) {
+        error_setg(errp, "no guest memory status report received yet");
+        return NULL;
+    }
+
+    info = g_malloc0(sizeof(*info));
+    info->committed = balloon->status_report.committed;
+    info->available = balloon->status_report.available;
+    return info;
+}
+
+static void hv_balloon_handle_unballoon_response(HvBalloon *balloon,
+                                                 HvBalloonReq *req,
+                                                 StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_unballoon_response *msgUrR = vmreq->msg;
+
+    if (balloon->state != S_UNBALLOON_REPLY_WAIT) {
+        warn_report("unexpected DM_UNBALLOON_RESPONSE in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgUrR),
+                                    "DM_UNBALLOON_RESPONSE"))
+        return;
+
+    trace_hv_balloon_incoming_unballoon(msgUrR->hdr.trans_id);
+
+    balloon->trans_id++;
+
+    if (balloon->hot_add_diff > 0) {
+        bool can_hot_add = balloon->caps.cap_bits.hot_add;
+
+        assert(can_hot_add);
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP);
+    } else {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+    }
+}
+
+static void hv_balloon_handle_hot_add_response(HvBalloon *balloon,
+                                               HvBalloonReq *req,
+                                               StateDesc *stdesc)
+{
+    PageRange *hot_add_range = &balloon->hot_add_range;
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_hot_add_response *msgHaR = vmreq->msg;
+    OurRange *our_range;
+
+    if (balloon->state != S_HOT_ADD_REPLY_WAIT) {
+        warn_report("unexpected DM_HOT_ADD_RESPONSE in %d state",
+                    balloon->state);
+        return;
+    }
+
+    assert(balloon->our_range);
+    our_range = OUR_RANGE(balloon->our_range);
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgHaR),
+                                    "DM_HOT_ADD_RESPONSE"))
+        return;
+
+    trace_hv_balloon_incoming_hot_add(msgHaR->hdr.trans_id, msgHaR->result,
+                                      msgHaR->page_count);
+
+    balloon->trans_id++;
+
+    if (msgHaR->result) {
+        if (msgHaR->page_count > balloon->ha_current_count) {
+            warn_report("DM_HOT_ADD_RESPONSE page count higher than requested (%"PRIu32" vs %"PRIu64")",
+                        msgHaR->page_count, balloon->ha_current_count);
+            msgHaR->page_count = balloon->ha_current_count;
+        }
+
+        hvb_our_range_mark_added(our_range, msgHaR->page_count);
+        hot_add_range->start += msgHaR->page_count;
+        hot_add_range->count -= msgHaR->page_count;
+    }
+
+    if (!msgHaR->result || msgHaR->page_count < balloon->ha_current_count) {
+        /*
+         * the current planned range was only partially hot-added, take note
+         * how much of it remains and don't attempt any further hot adds
+         */
+        our_range_mark_remaining_unusable(our_range);
+
+        goto ret_idle;
+    }
+
+    /* any pages remaining to hot-add in our range? */
+    if (hot_add_range->count > 0) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT);
+        return;
+    }
+
+ret_idle:
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+}
+
+static void hv_balloon_handle_balloon_response(HvBalloon *balloon,
+                                               HvBalloonReq *req,
+                                               StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_balloon_response *msgBR = vmreq->msg;
+
+    if (balloon->state != S_BALLOON_REPLY_WAIT) {
+        warn_report("unexpected DM_BALLOON_RESPONSE in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgBR),
+                                    "DM_BALLOON_RESPONSE"))
+        return;
+
+    trace_hv_balloon_incoming_balloon(msgBR->hdr.trans_id, msgBR->range_count,
+                                      msgBR->more_pages);
+
+    if (vmreq->msglen < sizeof(*msgBR) +
+        (uint64_t)sizeof(msgBR->range_array[0]) * msgBR->range_count) {
+        warn_report("DM_BALLOON_RESPONSE too short for the range count");
+        return;
+    }
+
+    if (msgBR->range_count == 0) {
+        /* The guest is already at its minimum size */
+        balloon->balloon_diff = 0;
+        goto ret_end_trans;
+    } else {
+        hv_balloon_handle_remove_ranges(balloon,
+                                        msgBR->range_array,
+                                        msgBR->range_count);
+    }
+
+    /* More responses expected? */
+    if (msgBR->more_pages) {
+        return;
+    }
+
+ret_end_trans:
+    balloon->trans_id++;
+
+    if (balloon->balloon_diff > 0) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT);
+    } else {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+    }
+}
+
+static void hv_balloon_handle_packet(HvBalloon *balloon, HvBalloonReq *req,
+                                     StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_message *msg = vmreq->msg;
+
+    if (vmreq->msglen < sizeof(msg->hdr)) {
+        return;
+    }
+
+    switch (msg->hdr.type) {
+    case DM_VERSION_REQUEST:
+        hv_balloon_handle_version_request(balloon, req, stdesc);
+        break;
+
+    case DM_CAPABILITIES_REPORT:
+        hv_balloon_handle_caps_report(balloon, req, stdesc);
+        break;
+
+    case DM_STATUS_REPORT:
+        hv_balloon_handle_status_report(balloon, req);
+        break;
+
+    case DM_MEM_HOT_ADD_RESPONSE:
+        hv_balloon_handle_hot_add_response(balloon, req, stdesc);
+        break;
+
+    case DM_UNBALLOON_RESPONSE:
+        hv_balloon_handle_unballoon_response(balloon, req, stdesc);
+        break;
+
+    case DM_BALLOON_RESPONSE:
+        hv_balloon_handle_balloon_response(balloon, req, stdesc);
+        break;
+
+    default:
+        warn_report("unknown DM message %u", msg->hdr.type);
+        break;
+    }
+}
+
+static bool hv_balloon_recv_channel(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan;
+    HvBalloonReq *req;
+
+    if (balloon->state == S_WAIT_RESET ||
+        balloon->state == S_POST_RESET_CLOSED) {
+        return false;
+    }
+
+    chan = hv_balloon_get_channel(balloon);
+    if (vmbus_channel_recv_start(chan)) {
+        return false;
+    }
+
+    while ((req = vmbus_channel_recv_peek(chan, sizeof(*req)))) {
+        hv_balloon_handle_packet(balloon, req, stdesc);
+        vmbus_free_req(req);
+        vmbus_channel_recv_pop(chan);
+
+        if (stdesc->state != S_NO_CHANGE) {
+            break;
+        }
+    }
+
+    return vmbus_channel_recv_done(chan) > 0;
+}
+
+/* old state handler -> new state transition (potential) */
+static bool hv_balloon_event_loop_state(HvBalloon *balloon)
+{
+    StateDesc state_new = HV_BALLOON_STATE_DESC_INIT;
+
+    hv_balloon_handle_state(balloon, &state_new);
+    return hv_balloon_state_set(balloon, state_new.state, state_new.desc);
+}
+
+/* VMBus message -> new state transition (potential) */
+static bool hv_balloon_event_loop_recv(HvBalloon *balloon)
+{
+    StateDesc state_new = HV_BALLOON_STATE_DESC_INIT;
+    bool any_recv, state_changed;
+
+    any_recv = hv_balloon_recv_channel(balloon, &state_new);
+    state_changed = hv_balloon_state_set(balloon,
+                                         state_new.state, state_new.desc);
+
+    return state_changed || any_recv;
+}
+
+static void hv_balloon_event_loop(HvBalloon *balloon)
+{
+    bool state_repeat, recv_repeat;
+
+    do {
+        state_repeat = hv_balloon_event_loop_state(balloon);
+        recv_repeat = hv_balloon_event_loop_recv(balloon);
+    } while (state_repeat || recv_repeat);
+}
+
+static void hv_balloon_vmdev_chan_notify(VMBusChannel *chan)
+{
+    HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
+
+    hv_balloon_event_loop(balloon);
+}
+
+static void hv_balloon_stat(void *opaque, BalloonInfo *info)
+{
+    HvBalloon *balloon = opaque;
+    info->actual = (hv_balloon_total_ram(balloon) - balloon->removed_both_ctr)
+        << HV_BALLOON_PFN_SHIFT;
+}
+
+static void hv_balloon_to_target(void *opaque, ram_addr_t target)
+{
+    HvBalloon *balloon = opaque;
+    uint64_t target_pages = target >> HV_BALLOON_PFN_SHIFT;
+
+    if (!target_pages) {
+        return;
+    }
+
+    /*
+     * always set target_changed, even with unchanged target, as the user
+     * might be asking us to try again reaching it
+     */
+    balloon->target = target_pages;
+    balloon->target_changed = true;
+
+    hv_balloon_event_loop(balloon);
+}
+
+static int hv_balloon_vmdev_open_channel(VMBusChannel *chan)
+{
+    HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
+
+    if (balloon->state != S_POST_RESET_CLOSED) {
+        warn_report("guest trying to open a DM channel in invalid %d state",
+                    balloon->state);
+        return -EINVAL;
+    }
+
+    HV_BALLOON_SET_STATE(balloon, S_VERSION);
+    hv_balloon_event_loop(balloon);
+
+    return 0;
+}
+
+static void hv_balloon_vmdev_close_channel(VMBusChannel *chan)
+{
+    HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
+
+    timer_del(&balloon->post_init_timer);
+
+    /* Don't report stale data */
+    balloon->status_report.received = false;
+
+    HV_BALLOON_SET_STATE(balloon, S_WAIT_RESET);
+    hv_balloon_event_loop(balloon);
+}
+
+static void hv_balloon_post_init_timer(void *opaque)
+{
+    HvBalloon *balloon = opaque;
+
+    if (balloon->state != S_POST_INIT_WAIT) {
+        return;
+    }
+
+    HV_BALLOON_SET_STATE(balloon, S_IDLE);
+    hv_balloon_event_loop(balloon);
+}
+
+static void hv_balloon_system_reset_unrealize_common(HvBalloon *balloon)
+{
+    g_clear_pointer(&balloon->our_range, hvb_our_range_memslots_free);
+}
+
+static void hv_balloon_system_reset(void *opaque)
+{
+    HvBalloon *balloon = HV_BALLOON(opaque);
+
+    hv_balloon_system_reset_unrealize_common(balloon);
+}
+
+static void hv_balloon_ensure_mr(HvBalloon *balloon)
+{
+    MemoryRegion *hostmem_mr;
+
+    assert(balloon->hostmem);
+
+    if (balloon->mr) {
+        return;
+    }
+
+    hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
+
+    balloon->mr = g_new0(MemoryRegion, 1);
+    memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON,
+                       memory_region_size(hostmem_mr));
+
+    /*
+     * The VM can indicate an alignment up to 32 GiB. Memory device core can
+     * usually only handle/guarantee 1 GiB alignment. The user will have to
+     * specify a larger maxmem eventually.
+     *
+     * The memory device core will warn the user in case maxmem might have to be
+     * increased and will fail plugging the device if there is not sufficient
+     * space after alignment.
+     *
+     * TODO: we could do the alignment ourselves in a slightly bigger region.
+     * But this feels better, although the warning might be annoying. Maybe
+     * we can optimize that in the future (e.g., with such a device on the
+     * cmdline place/size the device memory region differently.
+     */
+    balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr));
+}
+
+static void hv_balloon_free_mr(HvBalloon *balloon)
+{
+    if (!balloon->mr) {
+        return;
+    }
+
+    object_unparent(OBJECT(balloon->mr));
+    g_clear_pointer(&balloon->mr, g_free);
+}
+
+static void hv_balloon_vmdev_realize(VMBusDevice *vdev, Error **errp)
+{
+    ERRP_GUARD();
+    HvBalloon *balloon = HV_BALLOON(vdev);
+    int ret;
+
+    balloon->state = S_WAIT_RESET;
+
+    ret = qemu_add_balloon_handler(hv_balloon_to_target, hv_balloon_stat,
+                                   balloon);
+    if (ret < 0) {
+        /* This also protects against having multiple hv-balloon instances */
+        error_setg(errp, "Only one balloon device is supported");
+        return;
+    }
+
+    if (balloon->hostmem) {
+        if (host_memory_backend_is_mapped(balloon->hostmem)) {
+            Object *obj = OBJECT(balloon->hostmem);
+
+            error_setg(errp, "'%s' property specifies a busy memdev: %s",
+                       HV_BALLOON_MEMDEV_PROP,
+                       object_get_canonical_path_component(obj));
+            goto out_balloon_handler;
+        }
+
+        hv_balloon_ensure_mr(balloon);
+
+        /* This is rather unlikely to happen, but let's still check for it. */
+        if (!QEMU_IS_ALIGNED(memory_region_size(balloon->mr),
+                             HV_BALLOON_PAGE_SIZE)) {
+            error_setg(errp, "'%s' property memdev size has to be a multiple of 0x%" PRIx64,
+                       HV_BALLOON_MEMDEV_PROP, (uint64_t)HV_BALLOON_PAGE_SIZE);
+            goto out_balloon_handler;
+        }
+
+        host_memory_backend_set_mapped(balloon->hostmem, true);
+        vmstate_register_ram(host_memory_backend_get_memory(balloon->hostmem),
+                             DEVICE(balloon));
+    } else if (balloon->addr) {
+        error_setg(errp, "'%s' property must not be set without a memdev",
+                   HV_BALLOON_MEMDEV_PROP);
+        goto out_balloon_handler;
+    }
+
+    timer_init_ms(&balloon->post_init_timer, QEMU_CLOCK_VIRTUAL,
+                  hv_balloon_post_init_timer, balloon);
+
+    qemu_register_reset(hv_balloon_system_reset, balloon);
+
+    return;
+
+out_balloon_handler:
+    qemu_remove_balloon_handler(balloon);
+}
+
+/*
+ * VMBus device reset has to be implemented in case the guest decides to
+ * disconnect and reconnect to the VMBus without rebooting the whole system.
+ *
+ * However, the hot-added memory can't be removed here as Windows keeps on using
+ * it until the system is restarted, even after disconnecting from the VMBus.
+ */
+static void hv_balloon_vmdev_reset(VMBusDevice *vdev)
+{
+    HvBalloon *balloon = HV_BALLOON(vdev);
+
+    if (balloon->state == S_POST_RESET_CLOSED) {
+        return;
+    }
+
+    if (balloon->our_range) {
+        hvb_our_range_clear_removed_trees(OUR_RANGE(balloon->our_range));
+    }
+
+    hvb_page_range_tree_destroy(&balloon->removed_guest);
+    hvb_page_range_tree_destroy(&balloon->removed_both);
+    hvb_page_range_tree_init(&balloon->removed_guest);
+    hvb_page_range_tree_init(&balloon->removed_both);
+
+    balloon->trans_id = 0;
+    balloon->removed_guest_ctr = 0;
+    balloon->removed_both_ctr = 0;
+
+    HV_BALLOON_SET_STATE(balloon, S_POST_RESET_CLOSED);
+    hv_balloon_event_loop(balloon);
+}
+
+/*
+ * Clean up things that were (possibly) allocated pre-realization, for example
+ * from memory_device_pre_plug(), so we don't leak them if the device don't
+ * actually get realized in the end.
+ */
+static void hv_balloon_unrealize_finalize_common(HvBalloon *balloon)
+{
+    hv_balloon_free_mr(balloon);
+    balloon->addr = 0;
+
+    balloon->memslot_count = 0;
+}
+
+static void hv_balloon_vmdev_unrealize(VMBusDevice *vdev)
+{
+    HvBalloon *balloon = HV_BALLOON(vdev);
+
+    qemu_unregister_reset(hv_balloon_system_reset, balloon);
+
+    hv_balloon_system_reset_unrealize_common(balloon);
+
+    qemu_remove_balloon_handler(balloon);
+
+    if (balloon->hostmem) {
+        vmstate_unregister_ram(host_memory_backend_get_memory(balloon->hostmem),
+                               DEVICE(balloon));
+        host_memory_backend_set_mapped(balloon->hostmem, false);
+    }
+
+    hvb_page_range_tree_destroy(&balloon->removed_guest);
+    hvb_page_range_tree_destroy(&balloon->removed_both);
+
+    hv_balloon_unrealize_finalize_common(balloon);
+}
+
+static uint64_t hv_balloon_md_get_addr(const MemoryDeviceState *md)
+{
+    return object_property_get_uint(OBJECT(md), HV_BALLOON_ADDR_PROP,
+                                    &error_abort);
+}
+
+static void hv_balloon_md_set_addr(MemoryDeviceState *md, uint64_t addr,
+                                   Error **errp)
+{
+    object_property_set_uint(OBJECT(md), HV_BALLOON_ADDR_PROP, addr, errp);
+}
+
+static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md,
+                                                     Error **errp)
+{
+    HvBalloon *balloon = HV_BALLOON(md);
+
+    if (!balloon->hostmem) {
+        return NULL;
+    }
+
+    hv_balloon_ensure_mr(balloon);
+
+    return balloon->mr;
+}
+
+static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md,
+                                           MemoryDeviceInfo *info)
+{
+    HvBalloonDeviceInfo *hi = g_new0(HvBalloonDeviceInfo, 1);
+    const HvBalloon *balloon = HV_BALLOON(md);
+    DeviceState *dev = DEVICE(md);
+
+    if (dev->id) {
+        hi->id = g_strdup(dev->id);
+    }
+
+    if (balloon->hostmem) {
+        hi->memdev = object_get_canonical_path(OBJECT(balloon->hostmem));
+        hi->memaddr = balloon->addr;
+        hi->has_memaddr = true;
+        hi->max_size = memory_region_size(balloon->mr);
+        /* TODO: expose current provided size or something else? */
+    } else {
+        hi->max_size = 0;
+    }
+
+    info->u.hv_balloon.data = hi;
+    info->type = MEMORY_DEVICE_INFO_KIND_HV_BALLOON;
+}
+
+static void hv_balloon_decide_memslots(MemoryDeviceState *md,
+                                       unsigned int limit)
+{
+    HvBalloon *balloon = HV_BALLOON(md);
+    MemoryRegion *hostmem_mr;
+    uint64_t region_size, memslot_size, memslots;
+
+    /* We're called exactly once, before realizing the device. */
+    assert(!balloon->memslot_count);
+
+    /* We should not be called if we don't have a memory backend */
+    assert(balloon->hostmem);
+
+    hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
+    region_size = memory_region_size(hostmem_mr);
+
+    assert(region_size > 0);
+    memslot_size = QEMU_ALIGN_UP(region_size / limit,
+                                 HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN);
+    memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size;
+
+    if (memslots > 1) {
+        balloon->memslot_size = memslot_size;
+    } else {
+        balloon->memslot_size = region_size;
+    }
+
+    assert(memslots <= UINT_MAX);
+    balloon->memslot_count = memslots;
+}
+
+static unsigned int hv_balloon_get_memslots(MemoryDeviceState *md)
+{
+    const HvBalloon *balloon = HV_BALLOON(md);
+
+    /* We're called after setting the suggested limit. */
+    assert(balloon->memslot_count > 0);
+
+    return balloon->memslot_count;
+}
+
+static void hv_balloon_init(Object *obj)
+{
+}
+
+static void hv_balloon_finalize(Object *obj)
+{
+    HvBalloon *balloon = HV_BALLOON(obj);
+
+    hv_balloon_unrealize_finalize_common(balloon);
+}
+
+static Property hv_balloon_properties[] = {
+    DEFINE_PROP_BOOL("status-report", HvBalloon,
+                     status_report.enabled, false),
+
+    /* MEMORY_DEVICE props */
+    DEFINE_PROP_LINK(HV_BALLOON_MEMDEV_PROP, HvBalloon, hostmem,
+                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+    DEFINE_PROP_UINT64(HV_BALLOON_ADDR_PROP, HvBalloon, addr, 0),
+
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void hv_balloon_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VMBusDeviceClass *vdc = VMBUS_DEVICE_CLASS(klass);
+    MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, hv_balloon_properties);
+    qemu_uuid_parse(HV_BALLOON_GUID, &vdc->classid);
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+
+    vdc->vmdev_realize = hv_balloon_vmdev_realize;
+    vdc->vmdev_unrealize = hv_balloon_vmdev_unrealize;
+    vdc->vmdev_reset = hv_balloon_vmdev_reset;
+    vdc->open_channel = hv_balloon_vmdev_open_channel;
+    vdc->close_channel = hv_balloon_vmdev_close_channel;
+    vdc->chan_notify_cb = hv_balloon_vmdev_chan_notify;
+
+    mdc->get_addr = hv_balloon_md_get_addr;
+    mdc->set_addr = hv_balloon_md_set_addr;
+    mdc->get_plugged_size = memory_device_get_region_size;
+    mdc->get_memory_region = hv_balloon_md_get_memory_region;
+    mdc->decide_memslots = hv_balloon_decide_memslots;
+    mdc->get_memslots = hv_balloon_get_memslots;
+    mdc->fill_device_info = hv_balloon_md_fill_device_info;
+}
diff --git a/hw/hyperv/meson.build b/hw/hyperv/meson.build
index b43f119ea5..d3d2668c71 100644
--- a/hw/hyperv/meson.build
+++ b/hw/hyperv/meson.build
@@ -2,3 +2,4 @@ specific_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'))
 specific_ss.add(when: 'CONFIG_HYPERV_TESTDEV', if_true: files('hyperv_testdev.c'))
 specific_ss.add(when: 'CONFIG_VMBUS', if_true: files('vmbus.c'))
 specific_ss.add(when: 'CONFIG_SYNDBG', if_true: files('syndbg.c'))
+specific_ss.add(when: 'CONFIG_HV_BALLOON', if_true: files('hv-balloon.c', 'hv-balloon-page_range_tree.c', 'hv-balloon-our_range_memslots.c'), if_false: files('hv-balloon-stub.c'))
diff --git a/hw/hyperv/trace-events b/hw/hyperv/trace-events
index b4c35ca8e3..7963c215b1 100644
--- a/hw/hyperv/trace-events
+++ b/hw/hyperv/trace-events
@@ -16,3 +16,21 @@ vmbus_gpadl_torndown(uint32_t gpadl_id) "gpadl #%d"
 vmbus_open_channel(uint32_t chan_id, uint32_t gpadl_id, uint32_t target_vp) "channel #%d gpadl #%d target vp %d"
 vmbus_channel_open(uint32_t chan_id, uint32_t status) "channel #%d status %d"
 vmbus_close_channel(uint32_t chan_id) "channel #%d"
+
+# hv-balloon
+hv_balloon_state_change(const char *tostr) "-> %s"
+hv_balloon_incoming_version(uint16_t major, uint16_t minor) "incoming proto version %u.%u"
+hv_balloon_incoming_caps(uint32_t caps) "incoming caps 0x%x"
+hv_balloon_outgoing_unballoon(uint32_t trans_id, uint64_t count, uint64_t start, uint64_t rempages) "posting unballoon %"PRIu32" for %"PRIu64" @ 0x%"PRIx64", remaining %"PRIu64
+hv_balloon_incoming_unballoon(uint32_t trans_id) "incoming unballoon response %"PRIu32
+hv_balloon_outgoing_hot_add(uint32_t trans_id, uint64_t count, uint64_t start) "posting hot add %"PRIu32" for %"PRIu64" @ 0x%"PRIx64
+hv_balloon_incoming_hot_add(uint32_t trans_id, uint32_t result, uint32_t count) "incoming hot add response %"PRIu32", result %"PRIu32", count %"PRIu32
+hv_balloon_outgoing_balloon(uint32_t trans_id, uint64_t count, uint64_t rempages) "posting balloon %"PRIu32" for %"PRIu64", remaining %"PRIu64
+hv_balloon_incoming_balloon(uint32_t trans_id, uint32_t range_count, uint32_t more_pages) "incoming balloon response %"PRIu32", ranges %"PRIu32", more %"PRIu32
+hv_balloon_our_range_add(uint64_t count, uint64_t start) "adding our range %"PRIu64" @ 0x%"PRIx64
+hv_balloon_remove_response(uint64_t count, uint64_t start, unsigned int both) "processing remove response range %"PRIu64" @ 0x%"PRIx64", both %u"
+hv_balloon_remove_response_hole(uint64_t counthole, uint64_t starthole, uint64_t countrange, uint64_t startrange, uint64_t starthpr, unsigned int both) "response range hole %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64", before our start 0x%"PRIx64", both %u"
+hv_balloon_remove_response_common(uint64_t countcommon, uint64_t startcommon, uint64_t countrange, uint64_t startrange, uint64_t counthpr, uint64_t starthpr, uint64_t removed, unsigned int both) "response common range %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64" with our %"PRIu64" @ 0x%"PRIx64", removed %"PRIu64", both %u"
+hv_balloon_remove_response_remainder(uint64_t count, uint64_t start, unsigned int both) "remove response remaining range %"PRIu64" @ 0x%"PRIx64", both %u"
+hv_balloon_map_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "mapping memslot %u / %u @ 0x%"PRIx64
+hv_balloon_unmap_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "unmapping memslot %u / %u @ 0x%"PRIx64
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index 94772c726b..55850791df 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -45,6 +45,7 @@ config PC
     select ACPI_VMGENID
     select VIRTIO_PMEM_SUPPORTED
     select VIRTIO_MEM_SUPPORTED
+    select HV_BALLOON_SUPPORTED
 
 config PC_PCI
     bool
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index a731738411..b2b4be9983 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -490,6 +490,12 @@ int xen_evtchn_set_callback_param(uint64_t param)
         break;
     }
 
+    /* If the guest has set a per-vCPU callback vector, prefer that. */
+    if (gsi && kvm_xen_has_vcpu_callback_vector()) {
+        in_kernel = kvm_xen_has_cap(EVTCHN_SEND);
+        gsi = 0;
+    }
+
     if (!ret) {
         /* If vector delivery was turned *off* then tell the kernel */
         if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
@@ -1129,6 +1135,7 @@ int xen_evtchn_reset_op(struct evtchn_reset *reset)
         return -ESRCH;
     }
 
+    QEMU_IOTHREAD_LOCK_GUARD();
     return xen_evtchn_soft_reset();
 }
 
diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 21c30e3659..839ec920a1 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -541,7 +541,5 @@ int xen_gnttab_reset(void)
     s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
     s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
 
-    memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1);
-
     return 0;
 }
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 660d0b72f9..8e716a7009 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -1357,10 +1357,12 @@ static void fire_watch_cb(void *opaque, const char *path, const char *token)
     } else {
         deliver_watch(s, path, token);
         /*
-         * If the message was queued because there was already ring activity,
-         * no need to wake the guest. But if not, we need to send the evtchn.
+         * Attempt to queue the message into the actual ring, and send
+         * the event channel notification if any bytes are copied.
          */
-        xen_be_evtchn_notify(s->eh, s->be_port);
+        if (s->rsp_pending && put_rsp(s) > 0) {
+            xen_be_evtchn_notify(s->eh, s->be_port);
+        }
     }
 }
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 6031234a73..1aef21aa2c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -27,6 +27,7 @@
 #include "hw/i386/pc.h"
 #include "hw/char/serial.h"
 #include "hw/char/parallel.h"
+#include "hw/hyperv/hv-balloon.h"
 #include "hw/i386/fw_cfg.h"
 #include "hw/i386/vmport.h"
 #include "sysemu/cpus.h"
@@ -57,6 +58,7 @@
 #include "hw/i386/kvm/xen_evtchn.h"
 #include "hw/i386/kvm/xen_gnttab.h"
 #include "hw/i386/kvm/xen_xenstore.h"
+#include "hw/mem/memory-device.h"
 #include "e820_memory_layout.h"
 #include "trace.h"
 #include CONFIG_DEVICES
@@ -1422,6 +1424,21 @@ static void pc_memory_unplug(HotplugHandler *hotplug_dev,
     error_propagate(errp, local_err);
 }
 
+static void pc_hv_balloon_pre_plug(HotplugHandler *hotplug_dev,
+                                   DeviceState *dev, Error **errp)
+{
+    /* The vmbus handler has no hotplug handler; we should never end up here. */
+    g_assert(!dev->hotplugged);
+    memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL,
+                           errp);
+}
+
+static void pc_hv_balloon_plug(HotplugHandler *hotplug_dev,
+                               DeviceState *dev, Error **errp)
+{
+    memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
+}
+
 static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
                                           DeviceState *dev, Error **errp)
 {
@@ -1452,6 +1469,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
             return;
         }
         pcms->iommu = dev;
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) {
+        pc_hv_balloon_pre_plug(hotplug_dev, dev, errp);
     }
 }
 
@@ -1464,6 +1483,8 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
         x86_cpu_plug(hotplug_dev, dev, errp);
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
         virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) {
+        pc_hv_balloon_plug(hotplug_dev, dev, errp);
     }
 }
 
@@ -1505,6 +1526,7 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine,
         object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
         object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) ||
         object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON) ||
         object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) {
         return HOTPLUG_HANDLER(machine);
     }
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index ae38f48f16..e0704b8dc3 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -20,6 +20,22 @@
 #include "exec/address-spaces.h"
 #include "trace.h"
 
+static bool memory_device_is_empty(const MemoryDeviceState *md)
+{
+    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+    Error *local_err = NULL;
+    MemoryRegion *mr;
+
+    /* dropping const here is fine as we don't touch the memory region */
+    mr = mdc->get_memory_region((MemoryDeviceState *)md, &local_err);
+    if (local_err) {
+        /* Not empty, we'll report errors later when ontaining the MR again. */
+        error_free(local_err);
+        return false;
+    }
+    return !mr;
+}
+
 static gint memory_device_addr_sort(gconstpointer a, gconstpointer b)
 {
     const MemoryDeviceState *md_a = MEMORY_DEVICE(a);
@@ -220,12 +236,6 @@ static uint64_t memory_device_get_free_addr(MachineState *ms,
         return 0;
     }
 
-    if (!QEMU_IS_ALIGNED(size, align)) {
-        error_setg(errp, "backend memory size must be multiple of 0x%"
-                   PRIx64, align);
-        return 0;
-    }
-
     if (hint) {
         if (range_init(&new, *hint, size) || !range_contains_range(&as, &new)) {
             error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64
@@ -249,6 +259,10 @@ static uint64_t memory_device_get_free_addr(MachineState *ms,
         uint64_t next_addr;
         Range tmp;
 
+        if (memory_device_is_empty(md)) {
+            continue;
+        }
+
         range_init_nofail(&tmp, mdc->get_addr(md),
                           memory_device_get_region_size(md, &error_abort));
 
@@ -292,6 +306,7 @@ MemoryDeviceInfoList *qmp_memory_device_list(void)
         const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(item->data);
         MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
 
+        /* Let's query infotmation even for empty memory devices. */
         mdc->fill_device_info(md, info);
 
         QAPI_LIST_APPEND(tail, info);
@@ -311,7 +326,7 @@ static int memory_device_plugged_size(Object *obj, void *opaque)
         const MemoryDeviceState *md = MEMORY_DEVICE(obj);
         const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj);
 
-        if (dev->realized) {
+        if (dev->realized && !memory_device_is_empty(md)) {
             *size += mdc->get_plugged_size(md, &error_abort);
         }
     }
@@ -337,6 +352,11 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
     uint64_t addr, align = 0;
     MemoryRegion *mr;
 
+    /* We support empty memory devices even without device memory. */
+    if (memory_device_is_empty(md)) {
+        return;
+    }
+
     if (!ms->device_memory) {
         error_setg(errp, "the configuration is not prepared for memory devices"
                          " (e.g., for memory hotplug), consider specifying the"
@@ -380,10 +400,17 @@ out:
 void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
 {
     const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
-    const unsigned int memslots = memory_device_get_memslots(md);
-    const uint64_t addr = mdc->get_addr(md);
+    unsigned int memslots;
+    uint64_t addr;
     MemoryRegion *mr;
 
+    if (memory_device_is_empty(md)) {
+        return;
+    }
+
+    memslots = memory_device_get_memslots(md);
+    addr = mdc->get_addr(md);
+
     /*
      * We expect that a previous call to memory_device_pre_plug() succeeded, so
      * it can't fail at this point.
@@ -408,6 +435,10 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
     const unsigned int memslots = memory_device_get_memslots(md);
     MemoryRegion *mr;
 
+    if (memory_device_is_empty(md)) {
+        return;
+    }
+
     /*
      * We expect that a previous call to memory_device_pre_plug() succeeded, so
      * it can't fail at this point.
diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c
index cc24812d2e..c3512c2dae 100644
--- a/hw/virtio/virtio-pmem.c
+++ b/hw/virtio/virtio-pmem.c
@@ -147,7 +147,10 @@ static void virtio_pmem_fill_device_info(const VirtIOPMEM *pmem,
 static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem,
                                                    Error **errp)
 {
-    assert(pmem->memdev);
+    if (!pmem->memdev) {
+        error_setg(errp, "'%s' property must be set", VIRTIO_PMEM_MEMDEV_PROP);
+        return NULL;
+    }
 
     return &pmem->memdev->mr;
 }