summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--accel/stubs/tcg-stub.c6
-rw-r--r--block/backup.c150
-rw-r--r--block/io.c35
-rw-r--r--block/iscsi.c2
-rw-r--r--block/nbd-client.c3
-rw-r--r--block/nbd-client.h1
-rw-r--r--block/nbd.c10
-rw-r--r--block/trace-events1
-rw-r--r--block/vdi.c7
-rw-r--r--blockdev.c4
-rw-r--r--bsd-user/main.c5
-rw-r--r--chardev/char-serial.c3
-rwxr-xr-xconfigure18
-rw-r--r--default-configs/ppc-softmmu.mak3
-rw-r--r--exec.c6
-rw-r--r--hw/alpha/typhoon.c17
-rw-r--r--hw/arm/msf2-soc.c6
-rw-r--r--hw/arm/msf2-som.c8
-rw-r--r--hw/block/m25p80.c5
-rw-r--r--hw/block/nvme.c3
-rw-r--r--hw/block/tc58128.c3
-rw-r--r--hw/block/xen_disk.c3
-rw-r--r--hw/core/loader-fit.c3
-rw-r--r--hw/core/machine.c4
-rw-r--r--hw/cris/axis_dev88.c3
-rw-r--r--hw/display/bochs-display.c9
-rw-r--r--hw/display/cirrus_vga.c10
-rw-r--r--hw/display/g364fb.c4
-rw-r--r--hw/display/qxl.c30
-rw-r--r--hw/display/sm501.c16
-rw-r--r--hw/display/vga-isa-mm.c5
-rw-r--r--hw/display/vga.c5
-rw-r--r--hw/display/virtio-gpu.c4
-rw-r--r--hw/display/vmware_vga.c3
-rw-r--r--hw/display/xenfb.c3
-rw-r--r--hw/hppa/dino.c3
-rw-r--r--hw/hppa/machine.c12
-rw-r--r--hw/i2c/ppc4xx_i2c.c299
-rw-r--r--hw/i386/acpi-build.c6
-rw-r--r--hw/i386/amd_iommu.h4
-rw-r--r--hw/i386/pc.c23
-rw-r--r--hw/i386/pc_piix.c3
-rw-r--r--hw/i386/pc_q35.c3
-rw-r--r--hw/i386/pc_sysfw.c10
-rw-r--r--hw/i386/xen/xen-mapcache.c3
-rw-r--r--hw/intc/xics.c174
-rw-r--r--hw/intc/xics_kvm.c80
-rw-r--r--hw/intc/xics_pnv.c15
-rw-r--r--hw/ipack/tpci200.c5
-rw-r--r--hw/lm32/lm32_boards.c13
-rw-r--r--hw/lm32/milkymist.c10
-rw-r--r--hw/m68k/mcf5208.c3
-rw-r--r--hw/microblaze/petalogix_ml605_mmu.c7
-rw-r--r--hw/microblaze/petalogix_s3adsp1800_mmu.c7
-rw-r--r--hw/mips/boston.c30
-rw-r--r--hw/mips/mips_fulong2e.c7
-rw-r--r--hw/mips/mips_malta.c25
-rw-r--r--hw/mips/mips_r4k.c15
-rw-r--r--hw/misc/auxbus.c3
-rw-r--r--hw/misc/edu.c3
-rw-r--r--hw/misc/ivshmem.c3
-rw-r--r--hw/misc/macio/mac_dbdma.c21
-rw-r--r--hw/misc/mips_itu.c3
-rw-r--r--hw/net/e1000e.c7
-rw-r--r--hw/net/e1000x_common.c3
-rw-r--r--hw/net/eepro100.c3
-rw-r--r--hw/net/ne2000.h5
-rw-r--r--hw/nios2/boot.c6
-rw-r--r--hw/nvram/spapr_nvram.c11
-rw-r--r--hw/pci-host/prep.c3
-rw-r--r--hw/pci-host/xilinx-pcie.c5
-rw-r--r--hw/ppc/Makefile.objs3
-rw-r--r--hw/ppc/e500.c13
-rw-r--r--hw/ppc/e500plat.c3
-rw-r--r--hw/ppc/mac.h3
-rw-r--r--hw/ppc/mac_newworld.c5
-rw-r--r--hw/ppc/mac_oldworld.c9
-rw-r--r--hw/ppc/pnv.c6
-rw-r--r--hw/ppc/pnv_core.c1
-rw-r--r--hw/ppc/ppc405_boards.c9
-rw-r--r--hw/ppc/ppc405_uc.c7
-rw-r--r--hw/ppc/ppc440.h1
-rw-r--r--hw/ppc/ppc440_bamboo.c5
-rw-r--r--hw/ppc/ppc440_uc.c250
-rw-r--r--hw/ppc/ppc4xx_devs.c22
-rw-r--r--hw/ppc/ppce500_spin.c3
-rw-r--r--hw/ppc/prep.c6
-rw-r--r--hw/ppc/rs6000_mc.c13
-rw-r--r--hw/ppc/sam460ex.c50
-rw-r--r--hw/ppc/spapr.c34
-rw-r--r--hw/ppc/spapr_caps.c13
-rw-r--r--hw/ppc/spapr_rtas.c4
-rw-r--r--hw/ppc/virtex_ml507.c7
-rw-r--r--hw/rdma/vmw/pvrdma.h3
-rw-r--r--hw/riscv/virt.c3
-rw-r--r--hw/s390x/s390-skeys.c3
-rw-r--r--hw/s390x/s390-stattrib.c3
-rw-r--r--hw/s390x/sclp.c3
-rw-r--r--hw/scsi/scsi-disk.c9
-rw-r--r--hw/sd/sd.c8
-rw-r--r--hw/sd/sdhci.c8
-rw-r--r--hw/sh4/r2d.c3
-rw-r--r--hw/smbios/smbios.c15
-rw-r--r--hw/sparc/leon3.c10
-rw-r--r--hw/sparc/sun4m.c10
-rw-r--r--hw/sparc64/niagara.c3
-rw-r--r--hw/sparc64/sun4u.c4
-rw-r--r--hw/timer/Makefile.objs1
-rw-r--r--hw/timer/m41t80.c117
-rw-r--r--hw/tricore/tricore_testboard.c13
-rw-r--r--hw/usb/ccid-card-passthru.c9
-rw-r--r--hw/usb/combined-packet.c3
-rw-r--r--hw/usb/dev-smartcard-reader.c3
-rw-r--r--hw/usb/hcd-ehci.c3
-rw-r--r--hw/usb/hcd-xhci.c7
-rw-r--r--hw/usb/redirect.c3
-rw-r--r--hw/vfio/pci-quirks.c9
-rw-r--r--hw/vfio/pci.c3
-rw-r--r--hw/virtio/virtio-rng.c14
-rw-r--r--hw/xenpv/xen_domainbuild.c13
-rw-r--r--hw/xtensa/xtfpga.c9
-rw-r--r--include/block/block.h5
-rw-r--r--include/block/nbd.h1
-rw-r--r--include/exec/exec-all.h8
-rw-r--r--include/hw/acpi/tpm.h3
-rw-r--r--include/hw/display/xlnx_dp.h5
-rw-r--r--include/hw/i2c/ppc4xx_i2c.h3
-rw-r--r--include/hw/intc/mips_gic.h3
-rw-r--r--include/hw/mips/bios.h3
-rw-r--r--include/hw/net/allwinner_emac.h5
-rw-r--r--include/hw/ppc/spapr.h5
-rw-r--r--include/hw/ppc/xics.h9
-rw-r--r--include/hw/virtio/virtio-net.h3
-rw-r--r--include/qemu/cutils.h7
-rw-r--r--include/qemu/units.h20
-rw-r--r--linux-user/main.c5
-rw-r--r--linux-user/openrisc/signal.c217
-rw-r--r--linux-user/openrisc/target_signal.h2
-rw-r--r--linux-user/openrisc/target_syscall.h28
-rw-r--r--linux-user/ppc/cpu_loop.c121
-rw-r--r--linux-user/signal.c2
-rw-r--r--monitor.c3
-rw-r--r--nbd/client.c4
-rw-r--r--nbd/server.c2
-rw-r--r--qapi/block-core.json7
-rwxr-xr-xscripts/checkpatch.pl1
-rwxr-xr-xscripts/qemu-binfmt-conf.sh10
-rw-r--r--target/i386/cpu.c7
-rw-r--r--target/i386/cpu.h7
-rw-r--r--target/i386/excp_helper.c216
-rw-r--r--target/i386/hyperv-proto.h1
-rw-r--r--target/i386/kvm.c15
-rw-r--r--target/i386/machine.c21
-rw-r--r--target/i386/mem_helper.c6
-rw-r--r--target/i386/monitor.c76
-rw-r--r--target/i386/svm.h14
-rw-r--r--target/i386/svm_helper.c22
-rw-r--r--target/openrisc/Makefile.objs5
-rw-r--r--target/openrisc/cpu.c17
-rw-r--r--target/openrisc/cpu.h61
-rw-r--r--target/openrisc/disas.c170
-rw-r--r--target/openrisc/helper.h4
-rw-r--r--target/openrisc/interrupt.c55
-rw-r--r--target/openrisc/interrupt_helper.c35
-rw-r--r--target/openrisc/machine.c44
-rw-r--r--target/openrisc/mmu.c279
-rw-r--r--target/openrisc/mmu_helper.c40
-rw-r--r--target/openrisc/sys_helper.c84
-rw-r--r--target/openrisc/translate.c303
-rw-r--r--target/ppc/cpu.h8
-rw-r--r--target/ppc/excp_helper.c18
-rw-r--r--target/ppc/fpu_helper.c8
-rw-r--r--target/ppc/helper.h11
-rw-r--r--target/ppc/internal.h5
-rw-r--r--target/ppc/kvm.c118
-rw-r--r--target/ppc/mem_helper.c72
-rw-r--r--target/ppc/mmu_helper.c8
-rw-r--r--target/ppc/translate.c641
-rw-r--r--target/ppc/translate_init.inc.c1
-rw-r--r--target/xtensa/helper.c5
-rw-r--r--tests/benchmark-crypto-cipher.c6
-rw-r--r--tests/benchmark-crypto-hash.c5
-rw-r--r--tests/benchmark-crypto-hmac.c6
-rw-r--r--tests/qemu-iotests/222155
-rw-r--r--tests/qemu-iotests/222.out67
-rwxr-xr-xtests/qemu-iotests/223138
-rw-r--r--tests/qemu-iotests/223.out49
-rw-r--r--tests/qemu-iotests/group2
-rw-r--r--tests/test-cutils.c21
-rw-r--r--tests/test-keyval.c7
-rw-r--r--tests/test-qemu-opts.c9
-rw-r--r--vl.c5
193 files changed, 3335 insertions, 1985 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 42a1892d6a..6630d691d1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -851,6 +851,7 @@ M: BALATON Zoltan <balaton@eik.bme.hu>
 L: qemu-ppc@nongnu.org
 S: Maintained
 F: hw/ide/sii3112.c
+F: hw/timer/m41t80.c
 
 SH4 Machines
 ------------
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
index 8ee85ed665..76ae461749 100644
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -16,7 +16,6 @@
 #include "tcg/tcg.h"
 #include "exec/cpu-common.h"
 #include "exec/exec-all.h"
-#include "translate-all.h"
 
 void tb_flush(CPUState *cpu)
 {
@@ -25,8 +24,3 @@ void tb_flush(CPUState *cpu)
 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
 {
 }
-
-void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
-                                   int is_cpu_write_access)
-{
-}
diff --git a/block/backup.c b/block/backup.c
index d18be40caf..81895ddbe2 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -45,6 +45,8 @@ typedef struct BackupBlockJob {
     QLIST_HEAD(, CowRequest) inflight_reqs;
 
     HBitmap *copy_bitmap;
+    bool use_copy_range;
+    int64_t copy_range_size;
 } BackupBlockJob;
 
 static const BlockJobDriver backup_job_driver;
@@ -86,19 +88,101 @@ static void cow_request_end(CowRequest *req)
     qemu_co_queue_restart_all(&req->wait_queue);
 }
 
+/* Copy range to target with a bounce buffer and return the bytes copied. If
+ * error occured, return a negative error number */
+static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
+                                                      int64_t start,
+                                                      int64_t end,
+                                                      bool is_write_notifier,
+                                                      bool *error_is_read,
+                                                      void **bounce_buffer)
+{
+    int ret;
+    struct iovec iov;
+    QEMUIOVector qiov;
+    BlockBackend *blk = job->common.blk;
+    int nbytes;
+
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
+    nbytes = MIN(job->cluster_size, job->len - start);
+    if (!*bounce_buffer) {
+        *bounce_buffer = blk_blockalign(blk, job->cluster_size);
+    }
+    iov.iov_base = *bounce_buffer;
+    iov.iov_len = nbytes;
+    qemu_iovec_init_external(&qiov, &iov, 1);
+
+    ret = blk_co_preadv(blk, start, qiov.size, &qiov,
+                        is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
+    if (ret < 0) {
+        trace_backup_do_cow_read_fail(job, start, ret);
+        if (error_is_read) {
+            *error_is_read = true;
+        }
+        goto fail;
+    }
+
+    if (qemu_iovec_is_zero(&qiov)) {
+        ret = blk_co_pwrite_zeroes(job->target, start,
+                                   qiov.size, BDRV_REQ_MAY_UNMAP);
+    } else {
+        ret = blk_co_pwritev(job->target, start,
+                             qiov.size, &qiov,
+                             job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
+    }
+    if (ret < 0) {
+        trace_backup_do_cow_write_fail(job, start, ret);
+        if (error_is_read) {
+            *error_is_read = false;
+        }
+        goto fail;
+    }
+
+    return nbytes;
+fail:
+    hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
+    return ret;
+
+}
+
+/* Copy range to target and return the bytes copied. If error occured, return a
+ * negative error number. */
+static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
+                                                int64_t start,
+                                                int64_t end,
+                                                bool is_write_notifier)
+{
+    int ret;
+    int nr_clusters;
+    BlockBackend *blk = job->common.blk;
+    int nbytes;
+
+    assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
+    nbytes = MIN(job->copy_range_size, end - start);
+    nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
+                  nr_clusters);
+    ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
+                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
+    if (ret < 0) {
+        trace_backup_do_cow_copy_range_fail(job, start, ret);
+        hbitmap_set(job->copy_bitmap, start / job->cluster_size,
+                    nr_clusters);
+        return ret;
+    }
+
+    return nbytes;
+}
+
 static int coroutine_fn backup_do_cow(BackupBlockJob *job,
                                       int64_t offset, uint64_t bytes,
                                       bool *error_is_read,
                                       bool is_write_notifier)
 {
-    BlockBackend *blk = job->common.blk;
     CowRequest cow_request;
-    struct iovec iov;
-    QEMUIOVector bounce_qiov;
-    void *bounce_buffer = NULL;
     int ret = 0;
     int64_t start, end; /* bytes */
-    int n; /* bytes */
+    void *bounce_buffer = NULL;
 
     qemu_co_rwlock_rdlock(&job->flush_rwlock);
 
@@ -110,60 +194,38 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
     wait_for_overlapping_requests(job, start, end);
     cow_request_begin(&cow_request, job, start, end);
 
-    for (; start < end; start += job->cluster_size) {
+    while (start < end) {
         if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
             trace_backup_do_cow_skip(job, start);
+            start += job->cluster_size;
             continue; /* already copied */
         }
-        hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
 
         trace_backup_do_cow_process(job, start);
 
-        n = MIN(job->cluster_size, job->len - start);
-
-        if (!bounce_buffer) {
-            bounce_buffer = blk_blockalign(blk, job->cluster_size);
-        }
-        iov.iov_base = bounce_buffer;
-        iov.iov_len = n;
-        qemu_iovec_init_external(&bounce_qiov, &iov, 1);
-
-        ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
-                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
-        if (ret < 0) {
-            trace_backup_do_cow_read_fail(job, start, ret);
-            if (error_is_read) {
-                *error_is_read = true;
+        if (job->use_copy_range) {
+            ret = backup_cow_with_offload(job, start, end, is_write_notifier);
+            if (ret < 0) {
+                job->use_copy_range = false;
             }
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
-            goto out;
         }
-
-        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
-            ret = blk_co_pwrite_zeroes(job->target, start,
-                                       bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
-        } else {
-            ret = blk_co_pwritev(job->target, start,
-                                 bounce_qiov.size, &bounce_qiov,
-                                 job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
+        if (!job->use_copy_range) {
+            ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
+                                                error_is_read, &bounce_buffer);
         }
         if (ret < 0) {
-            trace_backup_do_cow_write_fail(job, start, ret);
-            if (error_is_read) {
-                *error_is_read = false;
-            }
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
-            goto out;
+            break;
         }
 
         /* Publish progress, guest I/O counts as progress too.  Note that the
          * offset field is an opaque progress value, it is not a disk offset.
          */
-        job->bytes_read += n;
-        job_progress_update(&job->common.job, n);
+        start += ret;
+        job->bytes_read += ret;
+        job_progress_update(&job->common.job, ret);
+        ret = 0;
     }
 
-out:
     if (bounce_buffer) {
         qemu_vfree(bounce_buffer);
     }
@@ -665,6 +727,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     } else {
         job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
     }
+    job->use_copy_range = true;
+    job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
+                                        blk_get_max_transfer(job->target));
+    job->copy_range_size = MAX(job->cluster_size,
+                               QEMU_ALIGN_UP(job->copy_range_size,
+                                             job->cluster_size));
 
     /* Required permissions are already taken with target's blk_new() */
     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
diff --git a/block/io.c b/block/io.c
index 7035b78a20..1a2272fad3 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2897,18 +2897,11 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
                                                     bool recurse_src)
 {
     BdrvTrackedRequest src_req, dst_req;
-    BlockDriverState *src_bs = src->bs;
-    BlockDriverState *dst_bs = dst->bs;
     int ret;
 
-    if (!src || !dst || !src->bs || !dst->bs) {
+    if (!dst || !dst->bs) {
         return -ENOMEDIUM;
     }
-    ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
-    if (ret) {
-        return ret;
-    }
-
     ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
     if (ret) {
         return ret;
@@ -2917,20 +2910,30 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
         return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
     }
 
+    if (!src || !src->bs) {
+        return -ENOMEDIUM;
+    }
+    ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
+    if (ret) {
+        return ret;
+    }
+
     if (!src->bs->drv->bdrv_co_copy_range_from
         || !dst->bs->drv->bdrv_co_copy_range_to
         || src->bs->encrypted || dst->bs->encrypted) {
         return -ENOTSUP;
     }
-    bdrv_inc_in_flight(src_bs);
-    bdrv_inc_in_flight(dst_bs);
-    tracked_request_begin(&src_req, src_bs, src_offset,
+    bdrv_inc_in_flight(src->bs);
+    bdrv_inc_in_flight(dst->bs);
+    tracked_request_begin(&src_req, src->bs, src_offset,
                           bytes, BDRV_TRACKED_READ);
-    tracked_request_begin(&dst_req, dst_bs, dst_offset,
+    tracked_request_begin(&dst_req, dst->bs, dst_offset,
                           bytes, BDRV_TRACKED_WRITE);
 
-    wait_serialising_requests(&src_req);
-    wait_serialising_requests(&dst_req);
+    if (!(flags & BDRV_REQ_NO_SERIALISING)) {
+        wait_serialising_requests(&src_req);
+        wait_serialising_requests(&dst_req);
+    }
     if (recurse_src) {
         ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
                                                     src, src_offset,
@@ -2944,8 +2947,8 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
     }
     tracked_request_end(&src_req);
     tracked_request_end(&dst_req);
-    bdrv_dec_in_flight(src_bs);
-    bdrv_dec_in_flight(dst_bs);
+    bdrv_dec_in_flight(src->bs);
+    bdrv_dec_in_flight(dst->bs);
     return ret;
 }
 
diff --git a/block/iscsi.c b/block/iscsi.c
index 9beb06d498..ead2bd5aa7 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -734,7 +734,7 @@ retry:
         goto out_unlock;
     }
 
-    *pnum = lbasd->num_blocks * iscsilun->block_size;
+    *pnum = (int64_t) lbasd->num_blocks * iscsilun->block_size;
 
     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 8d69eaaa32..9686ecbd5e 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -970,6 +970,7 @@ int nbd_client_init(BlockDriverState *bs,
                     const char *export,
                     QCryptoTLSCreds *tlscreds,
                     const char *hostname,
+                    const char *x_dirty_bitmap,
                     Error **errp)
 {
     NBDClientSession *client = nbd_get_client_session(bs);
@@ -982,9 +983,11 @@ int nbd_client_init(BlockDriverState *bs,
     client->info.request_sizes = true;
     client->info.structured_reply = true;
     client->info.base_allocation = true;
+    client->info.x_dirty_bitmap = g_strdup(x_dirty_bitmap);
     ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
                                 tlscreds, hostname,
                                 &client->ioc, &client->info, errp);
+    g_free(client->info.x_dirty_bitmap);
     if (ret < 0) {
         logout("Failed to negotiate with the NBD server\n");
         return ret;
diff --git a/block/nbd-client.h b/block/nbd-client.h
index 0ece76e5af..cfc90550b9 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -45,6 +45,7 @@ int nbd_client_init(BlockDriverState *bs,
                     const char *export_name,
                     QCryptoTLSCreds *tlscreds,
                     const char *hostname,
+                    const char *x_dirty_bitmap,
                     Error **errp);
 void nbd_client_close(BlockDriverState *bs);
 
diff --git a/block/nbd.c b/block/nbd.c
index 13db4030e6..b198ad775f 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -378,6 +378,12 @@ static QemuOptsList nbd_runtime_opts = {
             .type = QEMU_OPT_STRING,
             .help = "ID of the TLS credentials to use",
         },
+        {
+            .name = "x-dirty-bitmap",
+            .type = QEMU_OPT_STRING,
+            .help = "experimental: expose named dirty bitmap in place of "
+                    "block status",
+        },
         { /* end of list */ }
     },
 };
@@ -438,8 +444,8 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* NBD handshake */
-    ret = nbd_client_init(bs, sioc, s->export,
-                          tlscreds, hostname, errp);
+    ret = nbd_client_init(bs, sioc, s->export, tlscreds, hostname,
+                          qemu_opt_get(opts, "x-dirty-bitmap"), errp);
  error:
     if (sioc) {
         object_unref(OBJECT(sioc));
diff --git a/block/trace-events b/block/trace-events
index 2d59b53fd3..c35287b48a 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -42,6 +42,7 @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
 backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
 backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
 backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
+backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
diff --git a/block/vdi.c b/block/vdi.c
index 1d8ed67dbf..6555cffb88 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -50,6 +50,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
@@ -83,9 +84,6 @@
 /* Command line option for static images. */
 #define BLOCK_OPT_STATIC "static"
 
-#define KiB     1024
-#define MiB     (KiB * KiB)
-
 #define SECTOR_SIZE 512
 #define DEFAULT_CLUSTER_SIZE (1 * MiB)
 
@@ -434,7 +432,8 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     } else if (header.block_size != DEFAULT_CLUSTER_SIZE) {
         error_setg(errp, "unsupported VDI image (block size %" PRIu32
-                   " is not %u)", header.block_size, DEFAULT_CLUSTER_SIZE);
+                         " is not %" PRIu64 ")",
+                   header.block_size, DEFAULT_CLUSTER_SIZE);
         ret = -ENOTSUP;
         goto fail;
     } else if (header.disk_size >
diff --git a/blockdev.c b/blockdev.c
index 58d7570932..72f5347df5 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1859,7 +1859,7 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
     assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
     backup = common->action->u.blockdev_backup.data;
 
-    bs = qmp_get_root_bs(backup->device, errp);
+    bs = bdrv_lookup_bs(backup->device, backup->device, errp);
     if (!bs) {
         return;
     }
@@ -3517,7 +3517,7 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn,
         backup->compress = false;
     }
 
-    bs = qmp_get_root_bs(backup->device, errp);
+    bs = bdrv_lookup_bs(backup->device, backup->device, errp);
     if (!bs) {
         return NULL;
     }
diff --git a/bsd-user/main.c b/bsd-user/main.c
index da3b833975..0d3156974c 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -17,6 +17,7 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu-version.h"
 #include <machine/trap.h>
 
@@ -795,9 +796,9 @@ int main(int argc, char **argv)
             if (x86_stack_size <= 0)
                 usage();
             if (*r == 'M')
-                x86_stack_size *= 1024 * 1024;
+                x86_stack_size *= MiB;
             else if (*r == 'k' || *r == 'K')
-                x86_stack_size *= 1024;
+                x86_stack_size *= KiB;
         } else if (!strcmp(r, "L")) {
             interp_prefix = argv[optind++];
         } else if (!strcmp(r, "p")) {
diff --git a/chardev/char-serial.c b/chardev/char-serial.c
index ae548d28da..3299b46853 100644
--- a/chardev/char-serial.c
+++ b/chardev/char-serial.c
@@ -265,7 +265,8 @@ static void qmp_chardev_open_serial(Chardev *chr,
     ChardevHostdev *serial = backend->u.serial.data;
     int fd;
 
-    fd = qmp_chardev_open_file_source(serial->device, O_RDWR, errp);
+    fd = qmp_chardev_open_file_source(serial->device, O_RDWR | O_NONBLOCK,
+                                      errp);
     if (fd < 0) {
         return;
     }
diff --git a/configure b/configure
index 951ff022dd..b99ebdde24 100755
--- a/configure
+++ b/configure
@@ -300,6 +300,24 @@ then
 else
     git_update=no
     git_submodules=""
+
+    if ! test -f "$source_path/ui/keycodemapdb/README"
+    then
+        echo
+        echo "ERROR: missing file $source_path/ui/keycodemapdb/README"
+        echo
+        echo "This is not a GIT checkout but module content appears to"
+        echo "be missing. Do not use 'git archive' or GitHub download links"
+        echo "to acquire QEMU source archives. Non-GIT builds are only"
+        echo "supported with source archives linked from:"
+        echo
+        echo "  https://www.qemu.org/download/"
+        echo
+        echo "Developers working with GIT can use scripts/archive-source.sh"
+        echo "if they need to create valid source archives."
+        echo
+        exit 1
+    fi
 fi
 git="git"
 
diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak
index 851b4afc21..6f12bf84f0 100644
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -22,11 +22,14 @@ CONFIG_OPENPIC_KVM=$(call land,$(CONFIG_E500),$(CONFIG_KVM))
 CONFIG_PLATFORM_BUS=y
 CONFIG_ETSEC=y
 # For Sam460ex
+CONFIG_SAM460EX=y
 CONFIG_USB_EHCI_SYSBUS=y
 CONFIG_SM501=y
 CONFIG_IDE_SII3112=y
 CONFIG_I2C=y
 CONFIG_BITBANG_I2C=y
+CONFIG_M41T80=y
+CONFIG_VGA_CIRRUS=y
 
 # For Macs
 CONFIG_MAC=y
diff --git a/exec.c b/exec.c
index ee726888a5..4f5df07b6a 100644
--- a/exec.c
+++ b/exec.c
@@ -1027,7 +1027,7 @@ const char *parse_cpu_model(const char *cpu_model)
     return cpu_type;
 }
 
-#if defined(CONFIG_USER_ONLY) || !defined(CONFIG_TCG)
+#if defined(CONFIG_USER_ONLY)
 void tb_invalidate_phys_addr(target_ulong addr)
 {
     mmap_lock();
@@ -1046,6 +1046,10 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs)
     MemoryRegion *mr;
     hwaddr l = 1;
 
+    if (!tcg_enabled()) {
+        return;
+    }
+
     rcu_read_lock();
     mr = address_space_translate(as, addr, &addr, &l, false, attrs);
     if (!(memory_region_is_ram(mr)
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index d3ed7cdbe8..d74b5b55e1 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -7,6 +7,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "cpu.h"
 #include "hw/hw.h"
@@ -813,8 +814,6 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
                      qemu_irq *p_rtc_irq,
                      AlphaCPU *cpus[4], pci_map_irq_fn sys_map_irq)
 {
-    const uint64_t MB = 1024 * 1024;
-    const uint64_t GB = 1024 * MB;
     MemoryRegion *addr_space = get_system_memory();
     DeviceState *dev;
     TyphoonState *s;
@@ -855,30 +854,30 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
 
     /* Pchip0 CSRs, 0x801.8000.0000, 256MB.  */
     memory_region_init_io(&s->pchip.region, OBJECT(s), &pchip_ops, s, "pchip0",
-                          256*MB);
+                          256 * MiB);
     memory_region_add_subregion(addr_space, 0x80180000000ULL,
                                 &s->pchip.region);
 
     /* Cchip CSRs, 0x801.A000.0000, 256MB.  */
     memory_region_init_io(&s->cchip.region, OBJECT(s), &cchip_ops, s, "cchip0",
-                          256*MB);
+                          256 * MiB);
     memory_region_add_subregion(addr_space, 0x801a0000000ULL,
                                 &s->cchip.region);
 
     /* Dchip CSRs, 0x801.B000.0000, 256MB.  */
     memory_region_init_io(&s->dchip_region, OBJECT(s), &dchip_ops, s, "dchip0",
-                          256*MB);
+                          256 * MiB);
     memory_region_add_subregion(addr_space, 0x801b0000000ULL,
                                 &s->dchip_region);
 
     /* Pchip0 PCI memory, 0x800.0000.0000, 4GB.  */
-    memory_region_init(&s->pchip.reg_mem, OBJECT(s), "pci0-mem", 4*GB);
+    memory_region_init(&s->pchip.reg_mem, OBJECT(s), "pci0-mem", 4 * GiB);
     memory_region_add_subregion(addr_space, 0x80000000000ULL,
                                 &s->pchip.reg_mem);
 
     /* Pchip0 PCI I/O, 0x801.FC00.0000, 32MB.  */
     memory_region_init_io(&s->pchip.reg_io, OBJECT(s), &alpha_pci_ignore_ops,
-                          NULL, "pci0-io", 32*MB);
+                          NULL, "pci0-io", 32 * MiB);
     memory_region_add_subregion(addr_space, 0x801fc000000ULL,
                                 &s->pchip.reg_io);
 
@@ -899,13 +898,13 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
 
     /* Pchip0 PCI special/interrupt acknowledge, 0x801.F800.0000, 64MB.  */
     memory_region_init_io(&s->pchip.reg_iack, OBJECT(s), &alpha_pci_iack_ops,
-                          b, "pci0-iack", 64*MB);
+                          b, "pci0-iack", 64 * MiB);
     memory_region_add_subregion(addr_space, 0x801f8000000ULL,
                                 &s->pchip.reg_iack);
 
     /* Pchip0 PCI configuration, 0x801.FE00.0000, 16MB.  */
     memory_region_init_io(&s->pchip.reg_conf, OBJECT(s), &alpha_pci_conf1_ops,
-                          b, "pci0-conf", 16*MB);
+                          b, "pci0-conf", 16 * MiB);
     memory_region_add_subregion(addr_space, 0x801fe000000ULL,
                                 &s->pchip.reg_conf);
 
diff --git a/hw/arm/msf2-soc.c b/hw/arm/msf2-soc.c
index 75c44adf7d..edb3ba824f 100644
--- a/hw/arm/msf2-soc.c
+++ b/hw/arm/msf2-soc.c
@@ -23,13 +23,13 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "hw/arm/arm.h"
 #include "exec/address-spaces.h"
 #include "hw/char/serial.h"
 #include "hw/boards.h"
-#include "qemu/cutils.h"
 #include "hw/arm/msf2-soc.h"
 #include "hw/misc/unimp.h"
 
@@ -40,14 +40,14 @@
 
 #define SRAM_BASE_ADDRESS     0x20000000
 
-#define MSF2_ENVM_MAX_SIZE    (512 * K_BYTE)
+#define MSF2_ENVM_MAX_SIZE    (512 * KiB)
 
 /*
  * eSRAM max size is 80k without SECDED(Single error correction and
  * dual error detection) feature and 64k with SECDED.
  * We do not support SECDED now.
  */
-#define MSF2_ESRAM_MAX_SIZE       (80 * K_BYTE)
+#define MSF2_ESRAM_MAX_SIZE       (80 * KiB)
 
 static const uint32_t spi_addr[MSF2_NUM_SPIS] = { 0x40001000 , 0x40011000 };
 static const uint32_t uart_addr[MSF2_NUM_UARTS] = { 0x40000000 , 0x40010000 };
diff --git a/hw/arm/msf2-som.c b/hw/arm/msf2-som.c
index 0795a3a3a1..2432b5e935 100644
--- a/hw/arm/msf2-som.c
+++ b/hw/arm/msf2-som.c
@@ -23,20 +23,20 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "hw/boards.h"
 #include "hw/arm/arm.h"
 #include "exec/address-spaces.h"
-#include "qemu/cutils.h"
 #include "hw/arm/msf2-soc.h"
 #include "cpu.h"
 
 #define DDR_BASE_ADDRESS      0xA0000000
-#define DDR_SIZE              (64 * M_BYTE)
+#define DDR_SIZE              (64 * MiB)
 
-#define M2S010_ENVM_SIZE      (256 * K_BYTE)
-#define M2S010_ESRAM_SIZE     (64 * K_BYTE)
+#define M2S010_ENVM_SIZE      (256 * KiB)
+#define M2S010_ESRAM_SIZE     (64 * KiB)
 
 static void emcraft_sf2_s2s010_init(MachineState *machine)
 {
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index b0ed8fa418..e8dfa14b33 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -22,6 +22,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "sysemu/block-backend.h"
 #include "hw/ssi/ssi.h"
@@ -541,12 +542,12 @@ static void flash_erase(Flash *s, int offset, FlashCMD cmd)
     switch (cmd) {
     case ERASE_4K:
     case ERASE4_4K:
-        len = 4 << 10;
+        len = 4 * KiB;
         capa_to_assert = ER_4K;
         break;
     case ERASE_32K:
     case ERASE4_32K:
-        len = 32 << 10;
+        len = 32 * KiB;
         capa_to_assert = ER_32K;
         break;
     case ERASE_SECTOR:
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 156ecf3c41..fc7dacb816 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -26,6 +26,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/block/block.h"
 #include "hw/hw.h"
 #include "hw/pci/msix.h"
@@ -649,7 +650,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
 
 static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
 {
-    static const int data_len = 4096;
+    static const int data_len = 4 * KiB;
     uint32_t min_nsid = le32_to_cpu(c->nsid);
     uint64_t prp1 = le64_to_cpu(c->prp1);
     uint64_t prp2 = le64_to_cpu(c->prp2);
diff --git a/hw/block/tc58128.c b/hw/block/tc58128.c
index 1d9f7ee000..808ad76ba6 100644
--- a/hw/block/tc58128.c
+++ b/hw/block/tc58128.c
@@ -1,4 +1,5 @@
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "hw/sh4/sh.h"
 #include "hw/loader.h"
@@ -26,7 +27,7 @@ typedef struct {
 
 static tc58128_dev tc58128_devs[2];
 
-#define FLASH_SIZE (16*1024*1024)
+#define FLASH_SIZE (16 * MiB)
 
 static void init_dev(tc58128_dev * dev, const char *filename)
 {
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 9fbc0cdb87..36eff94f84 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -20,6 +20,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include <sys/ioctl.h>
 #include <sys/uio.h>
 
@@ -814,7 +815,7 @@ static int blk_connect(struct XenDevice *xendev)
     xen_pv_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
                   " size %" PRId64 " (%" PRId64 " MB)\n",
                   blkdev->type, blkdev->fileproto, blkdev->filename,
-                  blkdev->file_size, blkdev->file_size >> 20);
+                  blkdev->file_size, blkdev->file_size / MiB);
 
     /* Fill in number of sector size and number of sectors */
     xenstore_write_be_int(xendev, "sector-size", blkdev->file_blk);
diff --git a/hw/core/loader-fit.c b/hw/core/loader-fit.c
index 6387854b54..447f60857d 100644
--- a/hw/core/loader-fit.c
+++ b/hw/core/loader-fit.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "exec/memory.h"
 #include "hw/loader.h"
 #include "hw/loader-fit.h"
@@ -194,7 +195,7 @@ static int fit_load_fdt(const struct fit_loader *ldr, const void *itb,
 
     err = fit_image_addr(itb, img_off, "load", &load_addr);
     if (err == -ENOENT) {
-        load_addr = ROUND_UP(kernel_end, 64 * K_BYTE) + (10 * M_BYTE);
+        load_addr = ROUND_UP(kernel_end, 64 * KiB) + (10 * MiB);
     } else if (err) {
         ret = err;
         goto out;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 617e5f8d75..2077328bcc 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -11,6 +11,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-common.h"
@@ -19,7 +20,6 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/numa.h"
 #include "qemu/error-report.h"
-#include "qemu/cutils.h"
 #include "sysemu/qtest.h"
 
 static char *machine_get_accel(Object *obj, Error **errp)
@@ -522,7 +522,7 @@ static void machine_class_init(ObjectClass *oc, void *data)
     MachineClass *mc = MACHINE_CLASS(oc);
 
     /* Default 128 MB as guest ram size */
-    mc->default_ram_size = 128 * M_BYTE;
+    mc->default_ram_size = 128 * MiB;
     mc->rom_file_has_mr = true;
 
     /* numa node memory size aligned on 8MB by default.
diff --git a/hw/cris/axis_dev88.c b/hw/cris/axis_dev88.c
index 56ee398ee5..191292eebf 100644
--- a/hw/cris/axis_dev88.c
+++ b/hw/cris/axis_dev88.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -242,7 +243,7 @@ static const MemoryRegionOps gpio_ops = {
     },
 };
 
-#define INTMEM_SIZE (128 * 1024)
+#define INTMEM_SIZE (128 * KiB)
 
 static struct cris_load_info li;
 
diff --git a/hw/display/bochs-display.c b/hw/display/bochs-display.c
index 12d8a66c6c..09d8944a1b 100644
--- a/hw/display/bochs-display.c
+++ b/hw/display/bochs-display.c
@@ -5,6 +5,7 @@
  * See the COPYING file in the top-level directory.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "hw/pci/pci.h"
 #include "hw/display/bochs-vbe.h"
@@ -70,7 +71,7 @@ static uint64_t bochs_display_vbe_read(void *ptr, hwaddr addr,
     case VBE_DISPI_INDEX_ID:
         return VBE_DISPI_ID5;
     case VBE_DISPI_INDEX_VIDEO_MEMORY_64K:
-        return s->vgamem / (64 * 1024);
+        return s->vgamem / (64 * KiB);
     }
 
     if (index >= ARRAY_SIZE(s->vbe_regs)) {
@@ -258,10 +259,10 @@ static void bochs_display_realize(PCIDevice *dev, Error **errp)
 
     s->con = graphic_console_init(DEVICE(dev), 0, &bochs_display_gfx_ops, s);
 
-    if (s->vgamem < (4 * 1024 * 1024)) {
+    if (s->vgamem < 4 * MiB) {
         error_setg(errp, "bochs-display: video memory too small");
     }
-    if (s->vgamem > (256 * 1024 * 1024)) {
+    if (s->vgamem > 256 * MiB) {
         error_setg(errp, "bochs-display: video memory too big");
     }
     s->vgamem = pow2ceil(s->vgamem);
@@ -323,7 +324,7 @@ static void bochs_display_exit(PCIDevice *dev)
 }
 
 static Property bochs_display_properties[] = {
-    DEFINE_PROP_SIZE("vgamem", BochsDisplayState, vgamem, 16 * 1024 * 1024),
+    DEFINE_PROP_SIZE("vgamem", BochsDisplayState, vgamem, 16 * MiB),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 138ae961b9..5e44f00f3f 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -27,6 +27,7 @@
  *   available at http://home.worldonline.dk/~finth/
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "trace.h"
 #include "hw/hw.h"
@@ -2218,7 +2219,7 @@ static inline void cirrus_cursor_compute_yrange(CirrusVGAState *s)
     uint32_t content;
     int y, y_min, y_max;
 
-    src = s->vga.vram_ptr + s->real_vram_size - 16 * 1024;
+    src = s->vga.vram_ptr + s->real_vram_size - 16 * KiB;
     if (s->vga.sr[0x12] & CIRRUS_CURSOR_LARGE) {
         src += (s->vga.sr[0x13] & 0x3c) * 256;
         y_min = 64;
@@ -2347,7 +2348,7 @@ static void cirrus_cursor_draw_line(VGACommonState *s1, uint8_t *d1, int scr_y)
         return;
     }
 
-    src = s->vga.vram_ptr + s->real_vram_size - 16 * 1024;
+    src = s->vga.vram_ptr + s->real_vram_size - 16 * KiB;
     if (s->vga.sr[0x12] & CIRRUS_CURSOR_LARGE) {
         src += (s->vga.sr[0x13] & 0x3c) * 256;
         src += (scr_y - s->vga.hw_cursor_y) * 16;
@@ -2995,8 +2996,7 @@ static void cirrus_init_common(CirrusVGAState *s, Object *owner,
 
     /* I/O handler for LFB */
     memory_region_init_io(&s->cirrus_linear_io, owner, &cirrus_linear_io_ops, s,
-                          "cirrus-linear-io", s->vga.vram_size_mb
-                                              * 1024 * 1024);
+                          "cirrus-linear-io", s->vga.vram_size_mb * MiB);
     memory_region_set_flush_coalesced(&s->cirrus_linear_io);
 
     /* I/O handler for LFB */
@@ -3013,7 +3013,7 @@ static void cirrus_init_common(CirrusVGAState *s, Object *owner,
     memory_region_set_flush_coalesced(&s->cirrus_mmio_io);
 
     s->real_vram_size =
-        (s->device_id == CIRRUS_ID_CLGD5446) ? 4096 * 1024 : 2048 * 1024;
+        (s->device_id == CIRRUS_ID_CLGD5446) ? 4 * MiB : 2 * MiB;
 
     /* XXX: s->vga.vram_size must be a power of two */
     s->cirrus_addr_mask = s->real_vram_size - 1;
diff --git a/hw/display/g364fb.c b/hw/display/g364fb.c
index 3d75394e77..fbc2b2422d 100644
--- a/hw/display/g364fb.c
+++ b/hw/display/g364fb.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "qemu/error-report.h"
 #include "ui/console.h"
@@ -510,8 +511,7 @@ static void g364fb_sysbus_reset(DeviceState *d)
 }
 
 static Property g364fb_sysbus_properties[] = {
-    DEFINE_PROP_UINT32("vram_size", G364SysBusState, g364.vram_size,
-    8 * 1024 * 1024),
+    DEFINE_PROP_UINT32("vram_size", G364SysBusState, g364.vram_size, 8 * MiB),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index a71714ccb4..b09a03997a 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -19,6 +19,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include <zlib.h>
 
 #include "qapi/error.h"
@@ -2012,11 +2013,11 @@ static void qxl_init_ramsize(PCIQXLDevice *qxl)
     if (qxl->vgamem_size_mb > 256) {
         qxl->vgamem_size_mb = 256;
     }
-    qxl->vgamem_size = qxl->vgamem_size_mb * 1024 * 1024;
+    qxl->vgamem_size = qxl->vgamem_size_mb * MiB;
 
     /* vga ram (bar 0, total) */
     if (qxl->ram_size_mb != -1) {
-        qxl->vga.vram_size = qxl->ram_size_mb * 1024 * 1024;
+        qxl->vga.vram_size = qxl->ram_size_mb * MiB;
     }
     if (qxl->vga.vram_size < qxl->vgamem_size * 2) {
         qxl->vga.vram_size = qxl->vgamem_size * 2;
@@ -2024,7 +2025,7 @@ static void qxl_init_ramsize(PCIQXLDevice *qxl)
 
     /* vram32 (surfaces, 32bit, bar 1) */
     if (qxl->vram32_size_mb != -1) {
-        qxl->vram32_size = qxl->vram32_size_mb * 1024 * 1024;
+        qxl->vram32_size = qxl->vram32_size_mb * MiB;
     }
     if (qxl->vram32_size < 4096) {
         qxl->vram32_size = 4096;
@@ -2032,7 +2033,7 @@ static void qxl_init_ramsize(PCIQXLDevice *qxl)
 
     /* vram (surfaces, 64bit, bar 4+5) */
     if (qxl->vram_size_mb != -1) {
-        qxl->vram_size = (uint64_t)qxl->vram_size_mb * 1024 * 1024;
+        qxl->vram_size = (uint64_t)qxl->vram_size_mb * MiB;
     }
     if (qxl->vram_size < qxl->vram32_size) {
         qxl->vram_size = qxl->vram32_size;
@@ -2134,13 +2135,12 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)
     }
 
     /* print pci bar details */
-    dprint(qxl, 1, "ram/%s: %d MB [region 0]\n",
-           qxl->id == 0 ? "pri" : "sec",
-           qxl->vga.vram_size / (1024*1024));
-    dprint(qxl, 1, "vram/32: %" PRIx64 "d MB [region 1]\n",
-           qxl->vram32_size / (1024*1024));
-    dprint(qxl, 1, "vram/64: %" PRIx64 "d MB %s\n",
-           qxl->vram_size / (1024*1024),
+    dprint(qxl, 1, "ram/%s: %" PRId64 " MB [region 0]\n",
+           qxl->id == 0 ? "pri" : "sec", qxl->vga.vram_size / MiB);
+    dprint(qxl, 1, "vram/32: %" PRIx64 " MB [region 1]\n",
+           qxl->vram32_size / MiB);
+    dprint(qxl, 1, "vram/64: %" PRIx64 " MB %s\n",
+           qxl->vram_size / MiB,
            qxl->vram32_size < qxl->vram_size ? "[region 4]" : "[unmapped]");
 
     qxl->ssd.qxl.base.sif = &qxl_interface.base;
@@ -2167,7 +2167,7 @@ static void qxl_realize_primary(PCIDevice *dev, Error **errp)
     qxl->id = 0;
     qxl_init_ramsize(qxl);
     vga->vbe_size = qxl->vgamem_size;
-    vga->vram_size_mb = qxl->vga.vram_size >> 20;
+    vga->vram_size_mb = qxl->vga.vram_size / MiB;
     vga_common_init(vga, OBJECT(dev), true);
     vga_init(vga, OBJECT(dev),
              pci_address_space(dev), pci_address_space_io(dev), false);
@@ -2391,10 +2391,8 @@ static VMStateDescription qxl_vmstate = {
 };
 
 static Property qxl_properties[] = {
-        DEFINE_PROP_UINT32("ram_size", PCIQXLDevice, vga.vram_size,
-                           64 * 1024 * 1024),
-        DEFINE_PROP_UINT64("vram_size", PCIQXLDevice, vram32_size,
-                           64 * 1024 * 1024),
+        DEFINE_PROP_UINT32("ram_size", PCIQXLDevice, vga.vram_size, 64 * MiB),
+        DEFINE_PROP_UINT64("vram_size", PCIQXLDevice, vram32_size, 64 * MiB),
         DEFINE_PROP_UINT32("revision", PCIQXLDevice, revision,
                            QXL_DEFAULT_REVISION),
         DEFINE_PROP_UINT32("debug", PCIQXLDevice, debug, 0),
diff --git a/hw/display/sm501.c b/hw/display/sm501.c
index 8206ae81a1..9dec0d3218 100644
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -24,7 +24,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/cutils.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -452,12 +452,12 @@
 
 /* SM501 local memory size taken from "linux/drivers/mfd/sm501.c" */
 static const uint32_t sm501_mem_local_size[] = {
-    [0] = 4 * M_BYTE,
-    [1] = 8 * M_BYTE,
-    [2] = 16 * M_BYTE,
-    [3] = 32 * M_BYTE,
-    [4] = 64 * M_BYTE,
-    [5] = 2 * M_BYTE,
+    [0] = 4 * MiB,
+    [1] = 8 * MiB,
+    [2] = 16 * MiB,
+    [3] = 32 * MiB,
+    [4] = 64 * MiB,
+    [5] = 2 * MiB,
 };
 #define get_local_mem_size(s) sm501_mem_local_size[(s)->local_mem_size_index]
 
@@ -1829,7 +1829,7 @@ static void sm501_realize_pci(PCIDevice *dev, Error **errp)
 }
 
 static Property sm501_pci_properties[] = {
-    DEFINE_PROP_UINT32("vram-size", SM501PCIState, vram_size, 64 * M_BYTE),
+    DEFINE_PROP_UINT32("vram-size", SM501PCIState, vram_size, 64 * MiB),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/display/vga-isa-mm.c b/hw/display/vga-isa-mm.c
index e887b45651..bd58141117 100644
--- a/hw/display/vga-isa-mm.c
+++ b/hw/display/vga-isa-mm.c
@@ -22,12 +22,13 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "hw/display/vga.h"
 #include "vga_int.h"
 #include "ui/pixel_ops.h"
 
-#define VGA_RAM_SIZE (8192 * 1024)
+#define VGA_RAM_SIZE (8 * MiB)
 
 typedef struct ISAVGAMMState {
     VGACommonState vga;
@@ -130,7 +131,7 @@ int isa_vga_mm_init(hwaddr vram_base,
 
     s = g_malloc0(sizeof(*s));
 
-    s->vga.vram_size_mb = VGA_RAM_SIZE >> 20;
+    s->vga.vram_size_mb = VGA_RAM_SIZE / MiB;
     vga_common_init(&s->vga, NULL, true);
     vga_mm_init(s, vram_base, ctrl_base, it_shift, address_space);
 
diff --git a/hw/display/vga.c b/hw/display/vga.c
index ed476e4e80..d7599182a8 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "hw/hw.h"
 #include "hw/display/vga.h"
@@ -721,7 +722,7 @@ uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr)
             val = s->vbe_regs[s->vbe_index];
         }
     } else if (s->vbe_index == VBE_DISPI_INDEX_VIDEO_MEMORY_64K) {
-        val = s->vbe_size / (64 * 1024);
+        val = s->vbe_size / (64 * KiB);
     } else {
         val = 0;
     }
@@ -2192,7 +2193,7 @@ void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate)
 
     s->vram_size_mb = uint_clamp(s->vram_size_mb, 1, 512);
     s->vram_size_mb = pow2ceil(s->vram_size_mb);
-    s->vram_size = s->vram_size_mb << 20;
+    s->vram_size = s->vram_size_mb * MiB;
 
     if (!s->vbe_size) {
         s->vbe_size = s->vram_size;
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 2dd3c3481a..71a00718e6 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu-common.h"
 #include "qemu/iov.h"
 #include "ui/console.h"
@@ -1314,8 +1315,7 @@ static const VMStateDescription vmstate_virtio_gpu = {
 
 static Property virtio_gpu_properties[] = {
     DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1),
-    DEFINE_PROP_SIZE("max_hostmem", VirtIOGPU, conf.max_hostmem,
-                     256 * 1024 * 1024),
+    DEFINE_PROP_SIZE("max_hostmem", VirtIOGPU, conf.max_hostmem, 256 * MiB),
 #ifdef CONFIG_VIRGL
     DEFINE_PROP_BIT("virgl", VirtIOGPU, conf.flags,
                     VIRTIO_GPU_FLAG_VIRGL_ENABLED, true),
diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
index bd3e8b3586..08deb08783 100644
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "hw/hw.h"
 #include "hw/loader.h"
@@ -565,7 +566,7 @@ static inline int vmsvga_fifo_length(struct vmsvga_state_s *s)
         s->fifo_next >= SVGA_FIFO_SIZE) {
         return 0;
     }
-    if (s->fifo_max < s->fifo_min + 10 * 1024) {
+    if (s->fifo_max < s->fifo_min + 10 * KiB) {
         return 0;
     }
 
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 911291c5c3..0330dc6f61 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -25,6 +25,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 
 #include "hw/hw.h"
 #include "ui/input.h"
@@ -889,7 +890,7 @@ static int fb_initialise(struct XenDevice *xendev)
 	return rc;
 
     fb_page = fb->c.page;
-    rc = xenfb_configure_fb(fb, videoram * 1024 * 1024U,
+    rc = xenfb_configure_fb(fb, videoram * MiB,
 			    fb_page->width, fb_page->height, fb_page->depth,
 			    fb_page->mem_length, 0, fb_page->line_length);
     if (rc != 0)
diff --git a/hw/hppa/dino.c b/hw/hppa/dino.c
index 26f2704cd5..564b938e3a 100644
--- a/hw/hppa/dino.c
+++ b/hw/hppa/dino.c
@@ -11,6 +11,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "cpu.h"
 #include "hw/hw.h"
@@ -76,7 +77,7 @@
 /* #define xxx    0x200 - bit 9 not used */
 #define RS232INT  0x400
 
-#define DINO_MEM_CHUNK_SIZE (8 * 1024 * 1024) /* 8MB */
+#define DINO_MEM_CHUNK_SIZE (8 * MiB)
 
 #define DINO_PCI_HOST_BRIDGE(obj) \
     OBJECT_CHECK(DinoState, (obj), TYPE_DINO_PCI_HOST_BRIDGE)
diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
index aba269bb85..cf7c61c6cc 100644
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -17,7 +17,7 @@
 #include "hw/timer/i8254.h"
 #include "hw/char/serial.h"
 #include "hppa_sys.h"
-#include "qemu/cutils.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
 
@@ -178,8 +178,8 @@ static void machine_hppa_init(MachineState *machine)
         }
         qemu_log_mask(CPU_LOG_PAGE, "Kernel loaded at 0x%08" PRIx64
                       "-0x%08" PRIx64 ", entry at 0x%08" PRIx64
-                      ", size %ld kB.\n",
-                      kernel_low, kernel_high, kernel_entry, size / 1024);
+                      ", size %" PRIu64 " kB\n",
+                      kernel_low, kernel_high, kernel_entry, size / KiB);
 
         if (kernel_cmdline) {
             cpu[0]->env.gr[24] = 0x4000;
@@ -203,8 +203,8 @@ static void machine_hppa_init(MachineState *machine)
                (1) Due to sign-extension problems and PDC,
                put the initrd no higher than 1G.
                (2) Reserve 64k for stack.  */
-            initrd_base = MIN(ram_size, 1024 * 1024 * 1024);
-            initrd_base = initrd_base - 64 * 1024;
+            initrd_base = MIN(ram_size, 1 * GiB);
+            initrd_base = initrd_base - 64 * KiB;
             initrd_base = (initrd_base - initrd_size) & TARGET_PAGE_MASK;
 
             if (initrd_base < kernel_high) {
@@ -275,7 +275,7 @@ static void machine_hppa_machine_init(MachineClass *mc)
     mc->max_cpus = HPPA_MAX_CPUS;
     mc->default_cpus = 1;
     mc->is_default = 1;
-    mc->default_ram_size = 512 * M_BYTE;
+    mc->default_ram_size = 512 * MiB;
     mc->default_boot_order = "cd";
 }
 
diff --git a/hw/i2c/ppc4xx_i2c.c b/hw/i2c/ppc4xx_i2c.c
index fca80d695a..d6dfafab31 100644
--- a/hw/i2c/ppc4xx_i2c.c
+++ b/hw/i2c/ppc4xx_i2c.c
@@ -34,16 +34,50 @@
 
 #define PPC4xx_I2C_MEM_SIZE 18
 
+enum {
+    IIC_MDBUF = 0,
+    /* IIC_SDBUF = 2, */
+    IIC_LMADR = 4,
+    IIC_HMADR,
+    IIC_CNTL,
+    IIC_MDCNTL,
+    IIC_STS,
+    IIC_EXTSTS,
+    IIC_LSADR,
+    IIC_HSADR,
+    IIC_CLKDIV,
+    IIC_INTRMSK,
+    IIC_XFRCNT,
+    IIC_XTCNTLSS,
+    IIC_DIRECTCNTL
+    /* IIC_INTR */
+};
+
 #define IIC_CNTL_PT         (1 << 0)
 #define IIC_CNTL_READ       (1 << 1)
 #define IIC_CNTL_CHT        (1 << 2)
 #define IIC_CNTL_RPST       (1 << 3)
+#define IIC_CNTL_AMD        (1 << 6)
+#define IIC_CNTL_HMT        (1 << 7)
+
+#define IIC_MDCNTL_EINT     (1 << 2)
+#define IIC_MDCNTL_ESM      (1 << 3)
+#define IIC_MDCNTL_FMDB     (1 << 6)
 
 #define IIC_STS_PT          (1 << 0)
+#define IIC_STS_IRQA        (1 << 1)
 #define IIC_STS_ERR         (1 << 2)
+#define IIC_STS_MDBF        (1 << 4)
 #define IIC_STS_MDBS        (1 << 5)
 
 #define IIC_EXTSTS_XFRA     (1 << 0)
+#define IIC_EXTSTS_BCS_FREE (4 << 4)
+#define IIC_EXTSTS_BCS_BUSY (5 << 4)
+
+#define IIC_INTRMSK_EIMTC   (1 << 0)
+#define IIC_INTRMSK_EITA    (1 << 1)
+#define IIC_INTRMSK_EIIC    (1 << 2)
+#define IIC_INTRMSK_EIHE    (1 << 3)
 
 #define IIC_XTCNTLSS_SRST   (1 << 0)
 
@@ -56,130 +90,83 @@ static void ppc4xx_i2c_reset(DeviceState *s)
 {
     PPC4xxI2CState *i2c = PPC4xx_I2C(s);
 
-    /* FIXME: Should also reset bus?
-     *if (s->address != ADDR_RESET) {
-     *    i2c_end_transfer(s->bus);
-     *}
-     */
-
-    i2c->mdata = 0;
-    i2c->lmadr = 0;
-    i2c->hmadr = 0;
+    i2c->mdidx = -1;
+    memset(i2c->mdata, 0, ARRAY_SIZE(i2c->mdata));
+    /* [hl][ms]addr are not affected by reset */
     i2c->cntl = 0;
     i2c->mdcntl = 0;
     i2c->sts = 0;
-    i2c->extsts = 0x8f;
-    i2c->lsadr = 0;
-    i2c->hsadr = 0;
+    i2c->extsts = IIC_EXTSTS_BCS_FREE;
     i2c->clkdiv = 0;
     i2c->intrmsk = 0;
     i2c->xfrcnt = 0;
     i2c->xtcntlss = 0;
-    i2c->directcntl = 0xf;
-}
-
-static inline bool ppc4xx_i2c_is_master(PPC4xxI2CState *i2c)
-{
-    return true;
+    i2c->directcntl = 0xf; /* all non-reserved bits set */
 }
 
 static uint64_t ppc4xx_i2c_readb(void *opaque, hwaddr addr, unsigned int size)
 {
     PPC4xxI2CState *i2c = PPC4xx_I2C(opaque);
     uint64_t ret;
+    int i;
 
     switch (addr) {
-    case 0:
-        ret = i2c->mdata;
-        if (ppc4xx_i2c_is_master(i2c)) {
+    case IIC_MDBUF:
+        if (i2c->mdidx < 0) {
             ret = 0xff;
-
-            if (!(i2c->sts & IIC_STS_MDBS)) {
-                qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Trying to read "
-                              "without starting transfer\n",
-                              TYPE_PPC4xx_I2C, __func__);
-            } else {
-                int pending = (i2c->cntl >> 4) & 3;
-
-                /* get the next byte */
-                int byte = i2c_recv(i2c->bus);
-
-                if (byte < 0) {
-                    qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: read failed "
-                                  "for device 0x%02x\n", TYPE_PPC4xx_I2C,
-                                  __func__, i2c->lmadr);
-                    ret = 0xff;
-                } else {
-                    ret = byte;
-                    /* Raise interrupt if enabled */
-                    /*ppc4xx_i2c_raise_interrupt(i2c)*/;
-                }
-
-                if (!pending) {
-                    i2c->sts &= ~IIC_STS_MDBS;
-                    /*i2c_end_transfer(i2c->bus);*/
-                /*} else if (i2c->cntl & (IIC_CNTL_RPST | IIC_CNTL_CHT)) {*/
-                } else if (pending) {
-                    /* current smbus implementation doesn't like
-                       multibyte xfer repeated start */
-                    i2c_end_transfer(i2c->bus);
-                    if (i2c_start_transfer(i2c->bus, i2c->lmadr >> 1, 1)) {
-                        /* if non zero is returned, the adress is not valid */
-                        i2c->sts &= ~IIC_STS_PT;
-                        i2c->sts |= IIC_STS_ERR;
-                        i2c->extsts |= IIC_EXTSTS_XFRA;
-                    } else {
-                        /*i2c->sts |= IIC_STS_PT;*/
-                        i2c->sts |= IIC_STS_MDBS;
-                        i2c->sts &= ~IIC_STS_ERR;
-                        i2c->extsts = 0;
-                    }
-                }
-                pending--;
-                i2c->cntl = (i2c->cntl & 0xcf) | (pending << 4);
-            }
-        } else {
-            qemu_log_mask(LOG_UNIMP, "[%s]%s: slave mode not implemented\n",
-                          TYPE_PPC4xx_I2C, __func__);
+            break;
+        }
+        ret = i2c->mdata[0];
+        if (i2c->mdidx == 3) {
+            i2c->sts &= ~IIC_STS_MDBF;
+        } else if (i2c->mdidx == 0) {
+            i2c->sts &= ~IIC_STS_MDBS;
+        }
+        for (i = 0; i < i2c->mdidx; i++) {
+            i2c->mdata[i] = i2c->mdata[i + 1];
+        }
+        if (i2c->mdidx >= 0) {
+            i2c->mdidx--;
         }
         break;
-    case 4:
+    case IIC_LMADR:
         ret = i2c->lmadr;
         break;
-    case 5:
+    case IIC_HMADR:
         ret = i2c->hmadr;
         break;
-    case 6:
+    case IIC_CNTL:
         ret = i2c->cntl;
         break;
-    case 7:
+    case IIC_MDCNTL:
         ret = i2c->mdcntl;
         break;
-    case 8:
+    case IIC_STS:
         ret = i2c->sts;
         break;
-    case 9:
-        ret = i2c->extsts;
+    case IIC_EXTSTS:
+        ret = i2c_bus_busy(i2c->bus) ?
+              IIC_EXTSTS_BCS_BUSY : IIC_EXTSTS_BCS_FREE;
         break;
-    case 10:
+    case IIC_LSADR:
         ret = i2c->lsadr;
         break;
-    case 11:
+    case IIC_HSADR:
         ret = i2c->hsadr;
         break;
-    case 12:
+    case IIC_CLKDIV:
         ret = i2c->clkdiv;
         break;
-    case 13:
+    case IIC_INTRMSK:
         ret = i2c->intrmsk;
         break;
-    case 14:
+    case IIC_XFRCNT:
         ret = i2c->xfrcnt;
         break;
-    case 15:
+    case IIC_XTCNTLSS:
         ret = i2c->xtcntlss;
         break;
-    case 16:
+    case IIC_DIRECTCNTL:
         ret = i2c->directcntl;
         break;
     default:
@@ -202,99 +189,127 @@ static void ppc4xx_i2c_writeb(void *opaque, hwaddr addr, uint64_t value,
     PPC4xxI2CState *i2c = opaque;
 
     switch (addr) {
-    case 0:
-        i2c->mdata = value;
-        if (!i2c_bus_busy(i2c->bus)) {
-            /* assume we start a write transfer */
-            if (i2c_start_transfer(i2c->bus, i2c->lmadr >> 1, 0)) {
-                /* if non zero is returned, the adress is not valid */
-                i2c->sts &= ~IIC_STS_PT;
-                i2c->sts |= IIC_STS_ERR;
-                i2c->extsts |= IIC_EXTSTS_XFRA;
-            } else {
-                i2c->sts |= IIC_STS_PT;
-                i2c->sts &= ~IIC_STS_ERR;
-                i2c->extsts = 0;
-            }
+    case IIC_MDBUF:
+        if (i2c->mdidx >= 3) {
+            break;
         }
-        if (i2c_bus_busy(i2c->bus)) {
-            if (i2c_send(i2c->bus, i2c->mdata)) {
-                /* if the target return non zero then end the transfer */
-                i2c->sts &= ~IIC_STS_PT;
-                i2c->sts |= IIC_STS_ERR;
-                i2c->extsts |= IIC_EXTSTS_XFRA;
-                i2c_end_transfer(i2c->bus);
-            }
+        i2c->mdata[++i2c->mdidx] = value;
+        if (i2c->mdidx == 3) {
+            i2c->sts |= IIC_STS_MDBF;
+        } else if (i2c->mdidx == 0) {
+            i2c->sts |= IIC_STS_MDBS;
         }
         break;
-    case 4:
+    case IIC_LMADR:
         i2c->lmadr = value;
-        if (i2c_bus_busy(i2c->bus)) {
-            i2c_end_transfer(i2c->bus);
-        }
         break;
-    case 5:
+    case IIC_HMADR:
         i2c->hmadr = value;
         break;
-    case 6:
-        i2c->cntl = value;
-        if (i2c->cntl & IIC_CNTL_PT) {
-            if (i2c->cntl & IIC_CNTL_READ) {
-                if (i2c_bus_busy(i2c->bus)) {
-                    /* end previous transfer */
-                    i2c->sts &= ~IIC_STS_PT;
-                    i2c_end_transfer(i2c->bus);
+    case IIC_CNTL:
+        i2c->cntl = value & ~IIC_CNTL_PT;
+        if (value & IIC_CNTL_AMD) {
+            qemu_log_mask(LOG_UNIMP, "%s: only 7 bit addresses supported\n",
+                          __func__);
+        }
+        if (value & IIC_CNTL_HMT && i2c_bus_busy(i2c->bus)) {
+            i2c_end_transfer(i2c->bus);
+            if (i2c->mdcntl & IIC_MDCNTL_EINT &&
+                i2c->intrmsk & IIC_INTRMSK_EIHE) {
+                i2c->sts |= IIC_STS_IRQA;
+                qemu_irq_raise(i2c->irq);
+            }
+        } else if (value & IIC_CNTL_PT) {
+            int recv = (value & IIC_CNTL_READ) >> 1;
+            int tct = value >> 4 & 3;
+            int i;
+
+            if (recv && (i2c->lmadr >> 1) >= 0x50 && (i2c->lmadr >> 1) < 0x58) {
+                /* smbus emulation does not like multi byte reads w/o restart */
+                value |= IIC_CNTL_RPST;
+            }
+
+            for (i = 0; i <= tct; i++) {
+                if (!i2c_bus_busy(i2c->bus)) {
+                    i2c->extsts = IIC_EXTSTS_BCS_FREE;
+                    if (i2c_start_transfer(i2c->bus, i2c->lmadr >> 1, recv)) {
+                        i2c->sts |= IIC_STS_ERR;
+                        i2c->extsts |= IIC_EXTSTS_XFRA;
+                        break;
+                    } else {
+                        i2c->sts &= ~IIC_STS_ERR;
+                    }
                 }
-                if (i2c_start_transfer(i2c->bus, i2c->lmadr >> 1, 1)) {
-                    /* if non zero is returned, the adress is not valid */
-                    i2c->sts &= ~IIC_STS_PT;
+                if (!(i2c->sts & IIC_STS_ERR) &&
+                    i2c_send_recv(i2c->bus, &i2c->mdata[i], !recv)) {
                     i2c->sts |= IIC_STS_ERR;
                     i2c->extsts |= IIC_EXTSTS_XFRA;
-                } else {
-                    /*i2c->sts |= IIC_STS_PT;*/
-                    i2c->sts |= IIC_STS_MDBS;
-                    i2c->sts &= ~IIC_STS_ERR;
-                    i2c->extsts = 0;
+                    break;
+                }
+                if (value & IIC_CNTL_RPST || !(value & IIC_CNTL_CHT)) {
+                    i2c_end_transfer(i2c->bus);
                 }
-            } else {
-                /* we actually already did the write transfer... */
-                i2c->sts &= ~IIC_STS_PT;
+            }
+            i2c->xfrcnt = i;
+            i2c->mdidx = i - 1;
+            if (recv && i2c->mdidx >= 0) {
+                i2c->sts |= IIC_STS_MDBS;
+            }
+            if (recv && i2c->mdidx == 3) {
+                i2c->sts |= IIC_STS_MDBF;
+            }
+            if (i && i2c->mdcntl & IIC_MDCNTL_EINT &&
+                i2c->intrmsk & IIC_INTRMSK_EIMTC) {
+                i2c->sts |= IIC_STS_IRQA;
+                qemu_irq_raise(i2c->irq);
             }
         }
         break;
-    case 7:
-        i2c->mdcntl = value & 0xdf;
+    case IIC_MDCNTL:
+        i2c->mdcntl = value & 0x3d;
+        if (value & IIC_MDCNTL_ESM) {
+            qemu_log_mask(LOG_UNIMP, "%s: slave mode not implemented\n",
+                          __func__);
+        }
+        if (value & IIC_MDCNTL_FMDB) {
+            i2c->mdidx = -1;
+            memset(i2c->mdata, 0, ARRAY_SIZE(i2c->mdata));
+            i2c->sts &= ~(IIC_STS_MDBF | IIC_STS_MDBS);
+        }
         break;
-    case 8:
-        i2c->sts &= ~(value & 0xa);
+    case IIC_STS:
+        i2c->sts &= ~(value & 0x0a);
+        if (value & IIC_STS_IRQA && i2c->mdcntl & IIC_MDCNTL_EINT) {
+            qemu_irq_lower(i2c->irq);
+        }
         break;
-    case 9:
+    case IIC_EXTSTS:
         i2c->extsts &= ~(value & 0x8f);
         break;
-    case 10:
+    case IIC_LSADR:
         i2c->lsadr = value;
         break;
-    case 11:
+    case IIC_HSADR:
         i2c->hsadr = value;
         break;
-    case 12:
+    case IIC_CLKDIV:
         i2c->clkdiv = value;
         break;
-    case 13:
+    case IIC_INTRMSK:
         i2c->intrmsk = value;
         break;
-    case 14:
+    case IIC_XFRCNT:
         i2c->xfrcnt = value & 0x77;
         break;
-    case 15:
+    case IIC_XTCNTLSS:
+        i2c->xtcntlss &= ~(value & 0xf0);
         if (value & IIC_XTCNTLSS_SRST) {
             /* Is it actually a full reset? U-Boot sets some regs before */
             ppc4xx_i2c_reset(DEVICE(i2c));
             break;
         }
-        i2c->xtcntlss = value;
         break;
-    case 16:
+    case IIC_DIRECTCNTL:
         i2c->directcntl = value & (IIC_DIRECTCNTL_SDAC & IIC_DIRECTCNTL_SCLC);
         i2c->directcntl |= (value & IIC_DIRECTCNTL_SCLC ? 1 : 0);
         bitbang_i2c_set(i2c->bitbang, BITBANG_I2C_SCL,
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 9bc6d97ea1..9e8350c55d 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2248,8 +2248,8 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog)
                  (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4, NULL, NULL);
 }
 
-#define HOLE_640K_START  (640 * 1024)
-#define HOLE_640K_END   (1024 * 1024)
+#define HOLE_640K_START  (640 * KiB)
+#define HOLE_640K_END   (1 * MiB)
 
 static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
                                            uint64_t len, int default_node)
@@ -2537,7 +2537,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
                              (1UL << 7),  /* PPRSup       */
                              1);
     /* IVHD length */
-    build_append_int_noprefix(table_data, 0x24, 2);
+    build_append_int_noprefix(table_data, 28, 2);
     /* DeviceID */
     build_append_int_noprefix(table_data, s->devid, 2);
     /* Capability offset */
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index aeef802364..874030582d 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -165,8 +165,8 @@
 #define AMDVI_DTE_UPPER_QUAD_RESERVED  0x08f0000000000000
 
 /* AMDVI paging mode */
-#define AMDVI_GATS_MODE                 (6ULL <<  12)
-#define AMDVI_HATS_MODE                 (6ULL <<  10)
+#define AMDVI_GATS_MODE                 (2ULL <<  12)
+#define AMDVI_HATS_MODE                 (2ULL <<  10)
 
 /* IOTLB */
 #define AMDVI_IOTLB_MAX_SIZE 1024
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f310040351..50d5553991 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "hw/i386/pc.h"
 #include "hw/char/serial.h"
@@ -448,12 +449,12 @@ void pc_cmos_init(PCMachineState *pcms,
 
     /* memory size */
     /* base memory (first MiB) */
-    val = MIN(pcms->below_4g_mem_size / 1024, 640);
+    val = MIN(pcms->below_4g_mem_size / KiB, 640);
     rtc_set_memory(s, 0x15, val);
     rtc_set_memory(s, 0x16, val >> 8);
     /* extended memory (next 64MiB) */
-    if (pcms->below_4g_mem_size > 1024 * 1024) {
-        val = (pcms->below_4g_mem_size - 1024 * 1024) / 1024;
+    if (pcms->below_4g_mem_size > 1 * MiB) {
+        val = (pcms->below_4g_mem_size - 1 * MiB) / KiB;
     } else {
         val = 0;
     }
@@ -464,8 +465,8 @@ void pc_cmos_init(PCMachineState *pcms,
     rtc_set_memory(s, 0x30, val);
     rtc_set_memory(s, 0x31, val >> 8);
     /* memory between 16MiB and 4GiB */
-    if (pcms->below_4g_mem_size > 16 * 1024 * 1024) {
-        val = (pcms->below_4g_mem_size - 16 * 1024 * 1024) / 65536;
+    if (pcms->below_4g_mem_size > 16 * MiB) {
+        val = (pcms->below_4g_mem_size - 16 * MiB) / (64 * KiB);
     } else {
         val = 0;
     }
@@ -1392,11 +1393,11 @@ void pc_memory_init(PCMachineState *pcms,
         }
 
         machine->device_memory->base =
-            ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size, 1ULL << 30);
+            ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size, 1 * GiB);
 
         if (pcmc->enforce_aligned_dimm) {
             /* size device region assuming 1G page max alignment per slot */
-            device_mem_size += (1ULL << 30) * machine->ram_slots;
+            device_mem_size += (1 * GiB) * machine->ram_slots;
         }
 
         if ((machine->device_memory->base + device_mem_size) <
@@ -1438,7 +1439,7 @@ void pc_memory_init(PCMachineState *pcms,
         if (!pcmc->broken_reserved_end) {
             res_mem_end += memory_region_size(&machine->device_memory->mr);
         }
-        *val = cpu_to_le64(ROUND_UP(res_mem_end, 0x1ULL << 30));
+        *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB));
         fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
     }
 
@@ -1475,7 +1476,7 @@ uint64_t pc_pci_hole64_start(void)
         hole64_start = 0x100000000ULL + pcms->above_4g_mem_size;
     }
 
-    return ROUND_UP(hole64_start, 1ULL << 30);
+    return ROUND_UP(hole64_start, 1 * GiB);
 }
 
 qemu_irq pc_allocate_cpu_irq(void)
@@ -2095,7 +2096,7 @@ static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
         error_propagate(errp, error);
         return;
     }
-    if (value > (1ULL << 32)) {
+    if (value > 4 * GiB) {
         error_setg(&error,
                    "Machine option 'max-ram-below-4g=%"PRIu64
                    "' expects size less than or equal to 4G", value);
@@ -2103,7 +2104,7 @@ static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
         return;
     }
 
-    if (value < (1ULL << 20)) {
+    if (value < 1 * MiB) {
         warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary,"
                     "BIOS may not work with less than 1MiB", value);
     }
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index d357907b0b..dc09466b3e 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -24,6 +24,7 @@
 
 #include "qemu/osdep.h"
 
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "hw/loader.h"
 #include "hw/i386/pc.h"
@@ -131,7 +132,7 @@ static void pc_init1(MachineState *machine,
                 if (lowmem > 0xc0000000) {
                     lowmem = 0xc0000000;
                 }
-                if (lowmem & ((1ULL << 30) - 1)) {
+                if (lowmem & (1 * GiB - 1)) {
                     warn_report("Large machine and max_ram_below_4g "
                                 "(%" PRIu64 ") not a multiple of 1G; "
                                 "possible bad performance.",
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 1a73e1848a..532241e3f8 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -29,6 +29,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "hw/loader.h"
 #include "sysemu/arch_init.h"
@@ -105,7 +106,7 @@ static void pc_q35_init(MachineState *machine)
     if (lowmem > pcms->max_ram_below_4g) {
         lowmem = pcms->max_ram_below_4g;
         if (machine->ram_size - lowmem > lowmem &&
-            lowmem & ((1ULL << 30) - 1)) {
+            lowmem & (1 * GiB - 1)) {
             warn_report("There is possibly poor performance as the ram size "
                         " (0x%" PRIx64 ") is more then twice the size of"
                         " max-ram-below-4g (%"PRIu64") and"
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 73ac783f20..091e22dd60 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -28,6 +28,7 @@
 #include "sysemu/block-backend.h"
 #include "qemu/error-report.h"
 #include "qemu/option.h"
+#include "qemu/units.h"
 #include "hw/sysbus.h"
 #include "hw/hw.h"
 #include "hw/i386/pc.h"
@@ -56,7 +57,7 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
     flash_size = memory_region_size(flash_mem);
 
     /* map the last 128KB of the BIOS in ISA space */
-    isa_bios_size = MIN(flash_size, 128 * 1024);
+    isa_bios_size = MIN(flash_size, 128 * KiB);
     isa_bios = g_malloc(sizeof(*isa_bios));
     memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
                            &error_fatal);
@@ -83,7 +84,7 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
  * only 18MB-4KB below 4G. For now, restrict the cumulative mapping to 8MB in
  * size.
  */
-#define FLASH_MAP_BASE_MIN ((hwaddr)(0x100000000ULL - 8*1024*1024))
+#define FLASH_MAP_BASE_MIN ((hwaddr)(4 * GiB - 8 * MiB))
 
 /* This function maps flash drives from 4G downward, in order of their unit
  * numbers. The mapping starts at unit#0, with unit number increments of 1, and
@@ -221,10 +222,7 @@ static void old_pc_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw)
     g_free(filename);
 
     /* map the last 128KB of the BIOS in ISA space */
-    isa_bios_size = bios_size;
-    if (isa_bios_size > (128 * 1024)) {
-        isa_bios_size = 128 * 1024;
-    }
+    isa_bios_size = MIN(bios_size, 128 * KiB);
     isa_bios = g_malloc(sizeof(*isa_bios));
     memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
                              bios_size - isa_bios_size, isa_bios_size);
diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
index 628b813a11..4e4f069a24 100644
--- a/hw/i386/xen/xen-mapcache.c
+++ b/hw/i386/xen/xen-mapcache.c
@@ -9,6 +9,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/error-report.h"
 
 #include <sys/resource.h>
@@ -46,7 +47,7 @@
  * From empirical tests I observed that qemu use 75MB more than the
  * max_mcache_size.
  */
-#define NON_MCACHE_MEMORY_SIZE (80 * 1024 * 1024)
+#define NON_MCACHE_MEMORY_SIZE (80 * MiB)
 
 typedef struct MapCacheEntry {
     hwaddr paddr_index;
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index e73e623e3b..b9f1a3c972 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -294,7 +294,6 @@ static const VMStateDescription vmstate_icp_server = {
 static void icp_reset(void *dev)
 {
     ICPState *icp = ICP(dev);
-    ICPStateClass *icpc = ICP_GET_CLASS(icp);
 
     icp->xirr = 0;
     icp->pending_priority = 0xff;
@@ -302,16 +301,11 @@ static void icp_reset(void *dev)
 
     /* Make all outputs are deasserted */
     qemu_set_irq(icp->output, 0);
-
-    if (icpc->reset) {
-        icpc->reset(icp);
-    }
 }
 
 static void icp_realize(DeviceState *dev, Error **errp)
 {
     ICPState *icp = ICP(dev);
-    ICPStateClass *icpc = ICP_GET_CLASS(dev);
     PowerPCCPU *cpu;
     CPUPPCState *env;
     Object *obj;
@@ -351,10 +345,6 @@ static void icp_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    if (icpc->realize) {
-        icpc->realize(icp, errp);
-    }
-
     qemu_register_reset(icp_reset, dev);
     vmstate_register(NULL, icp->cs->cpu_index, &vmstate_icp_server, icp);
 }
@@ -547,9 +537,61 @@ static void ics_simple_eoi(ICSState *ics, uint32_t nr)
     }
 }
 
-static void ics_simple_reset(void *dev)
+static void ics_simple_reset(DeviceState *dev)
+{
+    ICSStateClass *icsc = ICS_BASE_GET_CLASS(dev);
+
+    icsc->parent_reset(dev);
+}
+
+static void ics_simple_reset_handler(void *dev)
+{
+    ics_simple_reset(dev);
+}
+
+static void ics_simple_realize(DeviceState *dev, Error **errp)
 {
     ICSState *ics = ICS_SIMPLE(dev);
+    ICSStateClass *icsc = ICS_BASE_GET_CLASS(ics);
+    Error *local_err = NULL;
+
+    icsc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs);
+
+    qemu_register_reset(ics_simple_reset_handler, ics);
+}
+
+static void ics_simple_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ICSStateClass *isc = ICS_BASE_CLASS(klass);
+
+    device_class_set_parent_realize(dc, ics_simple_realize,
+                                    &isc->parent_realize);
+    device_class_set_parent_reset(dc, ics_simple_reset,
+                                  &isc->parent_reset);
+
+    isc->reject = ics_simple_reject;
+    isc->resend = ics_simple_resend;
+    isc->eoi = ics_simple_eoi;
+}
+
+static const TypeInfo ics_simple_info = {
+    .name = TYPE_ICS_SIMPLE,
+    .parent = TYPE_ICS_BASE,
+    .instance_size = sizeof(ICSState),
+    .class_init = ics_simple_class_init,
+    .class_size = sizeof(ICSStateClass),
+};
+
+static void ics_base_reset(DeviceState *dev)
+{
+    ICSState *ics = ICS_BASE(dev);
     int i;
     uint8_t flags[ics->nr_irqs];
 
@@ -566,7 +608,35 @@ static void ics_simple_reset(void *dev)
     }
 }
 
-static int ics_simple_dispatch_pre_save(void *opaque)
+static void ics_base_realize(DeviceState *dev, Error **errp)
+{
+    ICSState *ics = ICS_BASE(dev);
+    Object *obj;
+    Error *err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), ICS_PROP_XICS, &err);
+    if (!obj) {
+        error_propagate(errp, err);
+        error_prepend(errp, "required link '" ICS_PROP_XICS "' not found: ");
+        return;
+    }
+    ics->xics = XICS_FABRIC(obj);
+
+    if (!ics->nr_irqs) {
+        error_setg(errp, "Number of interrupts needs to be greater 0");
+        return;
+    }
+    ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
+}
+
+static void ics_base_instance_init(Object *obj)
+{
+    ICSState *ics = ICS_BASE(obj);
+
+    ics->offset = XICS_IRQ_BASE;
+}
+
+static int ics_base_dispatch_pre_save(void *opaque)
 {
     ICSState *ics = opaque;
     ICSStateClass *info = ICS_BASE_GET_CLASS(ics);
@@ -578,7 +648,7 @@ static int ics_simple_dispatch_pre_save(void *opaque)
     return 0;
 }
 
-static int ics_simple_dispatch_post_load(void *opaque, int version_id)
+static int ics_base_dispatch_post_load(void *opaque, int version_id)
 {
     ICSState *ics = opaque;
     ICSStateClass *info = ICS_BASE_GET_CLASS(ics);
@@ -590,7 +660,7 @@ static int ics_simple_dispatch_post_load(void *opaque, int version_id)
     return 0;
 }
 
-static const VMStateDescription vmstate_ics_simple_irq = {
+static const VMStateDescription vmstate_ics_base_irq = {
     .name = "ics/irq",
     .version_id = 2,
     .minimum_version_id = 1,
@@ -604,95 +674,36 @@ static const VMStateDescription vmstate_ics_simple_irq = {
     },
 };
 
-static const VMStateDescription vmstate_ics_simple = {
+static const VMStateDescription vmstate_ics_base = {
     .name = "ics",
     .version_id = 1,
     .minimum_version_id = 1,
-    .pre_save = ics_simple_dispatch_pre_save,
-    .post_load = ics_simple_dispatch_post_load,
+    .pre_save = ics_base_dispatch_pre_save,
+    .post_load = ics_base_dispatch_post_load,
     .fields = (VMStateField[]) {
         /* Sanity check */
         VMSTATE_UINT32_EQUAL(nr_irqs, ICSState, NULL),
 
         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(irqs, ICSState, nr_irqs,
-                                             vmstate_ics_simple_irq,
+                                             vmstate_ics_base_irq,
                                              ICSIRQState),
         VMSTATE_END_OF_LIST()
     },
 };
 
-static void ics_simple_initfn(Object *obj)
-{
-    ICSState *ics = ICS_SIMPLE(obj);
-
-    ics->offset = XICS_IRQ_BASE;
-}
-
-static void ics_simple_realize(ICSState *ics, Error **errp)
-{
-    if (!ics->nr_irqs) {
-        error_setg(errp, "Number of interrupts needs to be greater 0");
-        return;
-    }
-    ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
-    ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs);
-
-    qemu_register_reset(ics_simple_reset, ics);
-}
-
-static Property ics_simple_properties[] = {
+static Property ics_base_properties[] = {
     DEFINE_PROP_UINT32("nr-irqs", ICSState, nr_irqs, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-static void ics_simple_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    ICSStateClass *isc = ICS_BASE_CLASS(klass);
-
-    isc->realize = ics_simple_realize;
-    dc->props = ics_simple_properties;
-    dc->vmsd = &vmstate_ics_simple;
-    isc->reject = ics_simple_reject;
-    isc->resend = ics_simple_resend;
-    isc->eoi = ics_simple_eoi;
-}
-
-static const TypeInfo ics_simple_info = {
-    .name = TYPE_ICS_SIMPLE,
-    .parent = TYPE_ICS_BASE,
-    .instance_size = sizeof(ICSState),
-    .class_init = ics_simple_class_init,
-    .class_size = sizeof(ICSStateClass),
-    .instance_init = ics_simple_initfn,
-};
-
-static void ics_base_realize(DeviceState *dev, Error **errp)
-{
-    ICSStateClass *icsc = ICS_BASE_GET_CLASS(dev);
-    ICSState *ics = ICS_BASE(dev);
-    Object *obj;
-    Error *err = NULL;
-
-    obj = object_property_get_link(OBJECT(dev), ICS_PROP_XICS, &err);
-    if (!obj) {
-        error_propagate(errp, err);
-        error_prepend(errp, "required link '" ICS_PROP_XICS "' not found: ");
-        return;
-    }
-    ics->xics = XICS_FABRIC(obj);
-
-
-    if (icsc->realize) {
-        icsc->realize(ics, errp);
-    }
-}
-
 static void ics_base_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = ics_base_realize;
+    dc->props = ics_base_properties;
+    dc->reset = ics_base_reset;
+    dc->vmsd = &vmstate_ics_base;
 }
 
 static const TypeInfo ics_base_info = {
@@ -700,6 +711,7 @@ static const TypeInfo ics_base_info = {
     .parent = TYPE_DEVICE,
     .abstract = true,
     .instance_size = sizeof(ICSState),
+    .instance_init = ics_base_instance_init,
     .class_init = ics_base_class_init,
     .class_size = sizeof(ICSStateClass),
 };
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 8dba2f84e7..30c3769a20 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -114,22 +114,38 @@ static int icp_set_kvm_state(ICPState *icp, int version_id)
     return 0;
 }
 
-static void icp_kvm_reset(ICPState *icp)
+static void icp_kvm_reset(DeviceState *dev)
 {
-    icp_set_kvm_state(icp, 1);
+    ICPStateClass *icpc = ICP_GET_CLASS(dev);
+
+    icpc->parent_reset(dev);
+
+    icp_set_kvm_state(ICP(dev), 1);
 }
 
-static void icp_kvm_realize(ICPState *icp, Error **errp)
+static void icp_kvm_realize(DeviceState *dev, Error **errp)
 {
-    CPUState *cs = icp->cs;
+    ICPState *icp = ICP(dev);
+    ICPStateClass *icpc = ICP_GET_CLASS(icp);
+    Error *local_err = NULL;
+    CPUState *cs;
     KVMEnabledICP *enabled_icp;
-    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
+    unsigned long vcpu_id;
     int ret;
 
     if (kernel_xics_fd == -1) {
         abort();
     }
 
+    icpc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    cs = icp->cs;
+    vcpu_id = kvm_arch_vcpu_id(cs);
+
     /*
      * If we are reusing a parked vCPU fd corresponding to the CPU
      * which was hot-removed earlier we don't have to renable
@@ -154,12 +170,16 @@ static void icp_kvm_realize(ICPState *icp, Error **errp)
 
 static void icp_kvm_class_init(ObjectClass *klass, void *data)
 {
+    DeviceClass *dc = DEVICE_CLASS(klass);
     ICPStateClass *icpc = ICP_CLASS(klass);
 
+    device_class_set_parent_realize(dc, icp_kvm_realize,
+                                    &icpc->parent_realize);
+    device_class_set_parent_reset(dc, icp_kvm_reset,
+                                  &icpc->parent_reset);
+
     icpc->pre_save = icp_get_kvm_state;
     icpc->post_load = icp_set_kvm_state;
-    icpc->realize = icp_kvm_realize;
-    icpc->reset = icp_kvm_reset;
     icpc->synchronize_state = icp_synchronize_state;
 }
 
@@ -304,44 +324,46 @@ static void ics_kvm_set_irq(void *opaque, int srcno, int val)
     }
 }
 
-static void ics_kvm_reset(void *dev)
+static void ics_kvm_reset(DeviceState *dev)
 {
-    ICSState *ics = ICS_SIMPLE(dev);
-    int i;
-    uint8_t flags[ics->nr_irqs];
+    ICSStateClass *icsc = ICS_BASE_GET_CLASS(dev);
 
-    for (i = 0; i < ics->nr_irqs; i++) {
-        flags[i] = ics->irqs[i].flags;
-    }
-
-    memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
+    icsc->parent_reset(dev);
 
-    for (i = 0; i < ics->nr_irqs; i++) {
-        ics->irqs[i].priority = 0xff;
-        ics->irqs[i].saved_priority = 0xff;
-        ics->irqs[i].flags = flags[i];
-    }
+    ics_set_kvm_state(ICS_KVM(dev), 1);
+}
 
-    ics_set_kvm_state(ics, 1);
+static void ics_kvm_reset_handler(void *dev)
+{
+    ics_kvm_reset(dev);
 }
 
-static void ics_kvm_realize(ICSState *ics, Error **errp)
+static void ics_kvm_realize(DeviceState *dev, Error **errp)
 {
-    if (!ics->nr_irqs) {
-        error_setg(errp, "Number of interrupts needs to be greater 0");
+    ICSState *ics = ICS_KVM(dev);
+    ICSStateClass *icsc = ICS_BASE_GET_CLASS(ics);
+    Error *local_err = NULL;
+
+    icsc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
         return;
     }
-    ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
     ics->qirqs = qemu_allocate_irqs(ics_kvm_set_irq, ics, ics->nr_irqs);
 
-    qemu_register_reset(ics_kvm_reset, ics);
+    qemu_register_reset(ics_kvm_reset_handler, ics);
 }
 
 static void ics_kvm_class_init(ObjectClass *klass, void *data)
 {
     ICSStateClass *icsc = ICS_BASE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    device_class_set_parent_realize(dc, ics_kvm_realize,
+                                    &icsc->parent_realize);
+    device_class_set_parent_reset(dc, ics_kvm_reset,
+                                  &icsc->parent_reset);
 
-    icsc->realize = ics_kvm_realize;
     icsc->pre_save = ics_get_kvm_state;
     icsc->post_load = ics_set_kvm_state;
     icsc->synchronize_state = ics_synchronize_state;
@@ -349,7 +371,7 @@ static void ics_kvm_class_init(ObjectClass *klass, void *data)
 
 static const TypeInfo ics_kvm_info = {
     .name = TYPE_ICS_KVM,
-    .parent = TYPE_ICS_SIMPLE,
+    .parent = TYPE_ICS_BASE,
     .instance_size = sizeof(ICSState),
     .class_init = ics_kvm_class_init,
 };
diff --git a/hw/intc/xics_pnv.c b/hw/intc/xics_pnv.c
index c87de2189c..fa48505f36 100644
--- a/hw/intc/xics_pnv.c
+++ b/hw/intc/xics_pnv.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "sysemu/sysemu.h"
 #include "qemu/log.h"
 #include "hw/ppc/xics.h"
@@ -158,9 +159,18 @@ static const MemoryRegionOps pnv_icp_ops = {
     },
 };
 
-static void pnv_icp_realize(ICPState *icp, Error **errp)
+static void pnv_icp_realize(DeviceState *dev, Error **errp)
 {
+    ICPState *icp = ICP(dev);
     PnvICPState *pnv_icp = PNV_ICP(icp);
+    ICPStateClass *icpc = ICP_GET_CLASS(icp);
+    Error *local_err = NULL;
+
+    icpc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     memory_region_init_io(&pnv_icp->mmio, OBJECT(icp), &pnv_icp_ops,
                           icp, "icp-thread", 0x1000);
@@ -171,7 +181,8 @@ static void pnv_icp_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     ICPStateClass *icpc = ICP_CLASS(klass);
 
-    icpc->realize = pnv_icp_realize;
+    device_class_set_parent_realize(dc, pnv_icp_realize,
+                                    &icpc->parent_realize);
     dc->desc = "PowerNV ICP";
 }
 
diff --git a/hw/ipack/tpci200.c b/hw/ipack/tpci200.c
index da05c8589d..cd3e79139d 100644
--- a/hw/ipack/tpci200.c
+++ b/hw/ipack/tpci200.c
@@ -9,6 +9,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/ipack/ipack.h"
 #include "hw/pci/pci.h"
 #include "qemu/bitops.h"
@@ -597,9 +598,9 @@ static void tpci200_realize(PCIDevice *pci_dev, Error **errp)
     memory_region_init_io(&s->las1, OBJECT(s), &tpci200_las1_ops,
                           s, "tpci200_las1", 1024);
     memory_region_init_io(&s->las2, OBJECT(s), &tpci200_las2_ops,
-                          s, "tpci200_las2", 1024*1024*32);
+                          s, "tpci200_las2", 32 * MiB);
     memory_region_init_io(&s->las3, OBJECT(s), &tpci200_las3_ops,
-                          s, "tpci200_las3", 1024*1024*16);
+                          s, "tpci200_las3", 16 * MiB);
     pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio);
     pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_IO,     &s->io);
     pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->las0);
diff --git a/hw/lm32/lm32_boards.c b/hw/lm32/lm32_boards.c
index 167058348e..fd8eccca14 100644
--- a/hw/lm32/lm32_boards.c
+++ b/hw/lm32/lm32_boards.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/error-report.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -87,10 +88,10 @@ static void lm32_evr_init(MachineState *machine)
 
     /* memory map */
     hwaddr flash_base  = 0x04000000;
-    size_t flash_sector_size       = 256 * 1024;
-    size_t flash_size              = 32 * 1024 * 1024;
+    size_t flash_sector_size       = 256 * KiB;
+    size_t flash_size              = 32 * MiB;
     hwaddr ram_base    = 0x08000000;
-    size_t ram_size                = 64 * 1024 * 1024;
+    size_t ram_size                = 64 * MiB;
     hwaddr timer0_base = 0x80002000;
     hwaddr uart0_base  = 0x80006000;
     hwaddr timer1_base = 0x8000a000;
@@ -173,10 +174,10 @@ static void lm32_uclinux_init(MachineState *machine)
 
     /* memory map */
     hwaddr flash_base   = 0x04000000;
-    size_t flash_sector_size        = 256 * 1024;
-    size_t flash_size               = 32 * 1024 * 1024;
+    size_t flash_sector_size        = 256 * KiB;
+    size_t flash_size               = 32 * MiB;
     hwaddr ram_base     = 0x08000000;
-    size_t ram_size                 = 64 * 1024 * 1024;
+    size_t ram_size                 = 64 * MiB;
     hwaddr uart0_base   = 0x80000000;
     hwaddr timer0_base  = 0x80002000;
     hwaddr timer1_base  = 0x80010000;
diff --git a/hw/lm32/milkymist.c b/hw/lm32/milkymist.c
index c36bbc4ae2..321f184595 100644
--- a/hw/lm32/milkymist.c
+++ b/hw/lm32/milkymist.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/error-report.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -33,11 +34,10 @@
 #include "milkymist-hw.h"
 #include "lm32.h"
 #include "exec/address-spaces.h"
-#include "qemu/cutils.h"
 
 #define BIOS_FILENAME    "mmone-bios.bin"
 #define BIOS_OFFSET      0x00860000
-#define BIOS_SIZE        (512*1024)
+#define BIOS_SIZE        (512 * KiB)
 #define KERNEL_LOAD_ADDR 0x40000000
 
 typedef struct {
@@ -96,10 +96,10 @@ milkymist_init(MachineState *machine)
 
     /* memory map */
     hwaddr flash_base   = 0x00000000;
-    size_t flash_sector_size        = 128 * 1024;
-    size_t flash_size               = 32 * 1024 * 1024;
+    size_t flash_sector_size        = 128 * KiB;
+    size_t flash_size               = 32 * MiB;
     hwaddr sdram_base   = 0x40000000;
-    size_t sdram_size               = 128 * 1024 * 1024;
+    size_t sdram_size               = 128 * MiB;
 
     hwaddr initrd_base  = sdram_base + 0x1002000;
     hwaddr cmdline_base = sdram_base + 0x1000000;
diff --git a/hw/m68k/mcf5208.c b/hw/m68k/mcf5208.c
index ae3dcc98c3..0f2245dd81 100644
--- a/hw/m68k/mcf5208.c
+++ b/hw/m68k/mcf5208.c
@@ -6,6 +6,7 @@
  * This code is licensed under the GPL
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
@@ -241,7 +242,7 @@ static void mcf5208evb_init(MachineState *machine)
     memory_region_add_subregion(address_space_mem, 0x40000000, ram);
 
     /* Internal SRAM.  */
-    memory_region_init_ram(sram, NULL, "mcf5208.sram", 16384, &error_fatal);
+    memory_region_init_ram(sram, NULL, "mcf5208.sram", 16 * KiB, &error_fatal);
     memory_region_add_subregion(address_space_mem, 0x80000000, sram);
 
     /* Internal peripherals.  */
diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c
index 6c4a544eac..c730878d25 100644
--- a/hw/microblaze/petalogix_ml605_mmu.c
+++ b/hw/microblaze/petalogix_ml605_mmu.c
@@ -26,6 +26,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -44,8 +45,8 @@
 
 #include "hw/stream.h"
 
-#define LMB_BRAM_SIZE  (128 * 1024)
-#define FLASH_SIZE     (32 * 1024 * 1024)
+#define LMB_BRAM_SIZE  (128 * KiB)
+#define FLASH_SIZE     (32 * MiB)
 
 #define BINARY_DEVICE_TREE_FILE "petalogix-ml605.dtb"
 
@@ -109,7 +110,7 @@ petalogix_ml605_init(MachineState *machine)
     pflash_cfi01_register(FLASH_BASEADDR,
                           NULL, "petalogix_ml605.flash", FLASH_SIZE,
                           dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
-                          (64 * 1024), FLASH_SIZE >> 16,
+                          64 * KiB, FLASH_SIZE >> 16,
                           2, 0x89, 0x18, 0x0000, 0x0, 0);
 
 
diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c
index 0da3e62102..5cf7b84c79 100644
--- a/hw/microblaze/petalogix_s3adsp1800_mmu.c
+++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c
@@ -24,6 +24,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -39,8 +40,8 @@
 
 #include "boot.h"
 
-#define LMB_BRAM_SIZE  (128 * 1024)
-#define FLASH_SIZE     (16 * 1024 * 1024)
+#define LMB_BRAM_SIZE  (128 * KiB)
+#define FLASH_SIZE     (16 * MiB)
 
 #define BINARY_DEVICE_TREE_FILE "petalogix-s3adsp1800.dtb"
 
@@ -87,7 +88,7 @@ petalogix_s3adsp1800_init(MachineState *machine)
     pflash_cfi01_register(FLASH_BASEADDR,
                           NULL, "petalogix_s3adsp1800.flash", FLASH_SIZE,
                           dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
-                          (64 * 1024), FLASH_SIZE >> 16,
+                          64 * KiB, FLASH_SIZE >> 16,
                           1, 0x89, 0x18, 0x0000, 0x0, 1);
 
     dev = qdev_create(NULL, "xlnx.xps-intc");
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index 14e6f955d2..6c9c20a93e 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu-common.h"
 
 #include "exec/address-spaces.h"
@@ -32,7 +33,6 @@
 #include "hw/mips/cpudevs.h"
 #include "hw/pci-host/xilinx-pcie.h"
 #include "qapi/error.h"
-#include "qemu/cutils.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
 #include "chardev/char.h"
@@ -200,7 +200,7 @@ static uint64_t boston_platreg_read(void *opaque, hwaddr addr,
         val |= PLAT_BUILD_CFG_PCIE2_EN;
         return val;
     case PLAT_DDR_CFG:
-        val = s->mach->ram_size / G_BYTE;
+        val = s->mach->ram_size / GiB;
         assert(!(val & ~PLAT_DDR_CFG_SIZE));
         val |= PLAT_DDR_CFG_MHZ;
         return val;
@@ -355,7 +355,7 @@ static const void *boston_fdt_filter(void *opaque, const void *fdt_orig,
         return NULL;
     }
 
-    ram_low_sz = MIN(256 * M_BYTE, machine->ram_size);
+    ram_low_sz = MIN(256 * MiB, machine->ram_size);
     ram_high_sz = machine->ram_size - ram_low_sz;
     qemu_fdt_setprop_sized_cells(fdt, "/memory@0", "reg",
                                  1, 0x00000000, 1, ram_low_sz,
@@ -436,8 +436,8 @@ static void boston_mach_init(MachineState *machine)
     int fw_size, fit_err;
     bool is_64b;
 
-    if ((machine->ram_size % G_BYTE) ||
-        (machine->ram_size > (2 * G_BYTE))) {
+    if ((machine->ram_size % GiB) ||
+        (machine->ram_size > (2 * GiB))) {
         error_report("Memory size must be 1GB or 2GB");
         exit(1);
     }
@@ -471,7 +471,7 @@ static void boston_mach_init(MachineState *machine)
     sysbus_mmio_map_overlap(SYS_BUS_DEVICE(s->cps), 0, 0, 1);
 
     flash =  g_new(MemoryRegion, 1);
-    memory_region_init_rom(flash, NULL, "boston.flash", 128 * M_BYTE, &err);
+    memory_region_init_rom(flash, NULL, "boston.flash", 128 * MiB, &err);
     memory_region_add_subregion_overlap(sys_mem, 0x18000000, flash, 0);
 
     ddr = g_new(MemoryRegion, 1);
@@ -481,22 +481,22 @@ static void boston_mach_init(MachineState *machine)
 
     ddr_low_alias = g_new(MemoryRegion, 1);
     memory_region_init_alias(ddr_low_alias, NULL, "boston_low.ddr",
-                             ddr, 0, MIN(machine->ram_size, (256 * M_BYTE)));
+                             ddr, 0, MIN(machine->ram_size, (256 * MiB)));
     memory_region_add_subregion_overlap(sys_mem, 0, ddr_low_alias, 0);
 
     xilinx_pcie_init(sys_mem, 0,
-                     0x10000000, 32 * M_BYTE,
-                     0x40000000, 1 * G_BYTE,
+                     0x10000000, 32 * MiB,
+                     0x40000000, 1 * GiB,
                      get_cps_irq(s->cps, 2), false);
 
     xilinx_pcie_init(sys_mem, 1,
-                     0x12000000, 32 * M_BYTE,
-                     0x20000000, 512 * M_BYTE,
+                     0x12000000, 32 * MiB,
+                     0x20000000, 512 * MiB,
                      get_cps_irq(s->cps, 1), false);
 
     pcie2 = xilinx_pcie_init(sys_mem, 2,
-                             0x14000000, 32 * M_BYTE,
-                             0x16000000, 1 * M_BYTE,
+                             0x14000000, 32 * MiB,
+                             0x16000000, 1 * MiB,
                              get_cps_irq(s->cps, 0), true);
 
     platreg = g_new(MemoryRegion, 1);
@@ -526,7 +526,7 @@ static void boston_mach_init(MachineState *machine)
 
     if (machine->firmware) {
         fw_size = load_image_targphys(machine->firmware,
-                                      0x1fc00000, 4 * M_BYTE);
+                                      0x1fc00000, 4 * MiB);
         if (fw_size == -1) {
             error_printf("unable to load firmware image '%s'\n",
                           machine->firmware);
@@ -552,7 +552,7 @@ static void boston_mach_class_init(MachineClass *mc)
     mc->desc = "MIPS Boston";
     mc->init = boston_mach_init;
     mc->block_default_type = IF_IDE;
-    mc->default_ram_size = 1 * G_BYTE;
+    mc->default_ram_size = 1 * GiB;
     mc->max_cpus = 16;
     mc->default_cpu_type = MIPS_CPU_TYPE_NAME("I6400");
 }
diff --git a/hw/mips/mips_fulong2e.c b/hw/mips/mips_fulong2e.c
index 02fb2fdcc4..c1694c8254 100644
--- a/hw/mips/mips_fulong2e.c
+++ b/hw/mips/mips_fulong2e.c
@@ -19,6 +19,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "hw/hw.h"
 #include "hw/i386/pc.h"
@@ -159,7 +160,7 @@ static int64_t load_kernel (CPUMIPSState *env)
     /* Setup minimum environment variables */
     prom_set(prom_buf, index++, "busclock=33000000");
     prom_set(prom_buf, index++, "cpuclock=100000000");
-    prom_set(prom_buf, index++, "memsize=%i", loaderparams.ram_size/1024/1024);
+    prom_set(prom_buf, index++, "memsize=%"PRIi64, loaderparams.ram_size / MiB);
     prom_set(prom_buf, index++, "modetty0=38400n8r");
     prom_set(prom_buf, index++, NULL);
 
@@ -303,10 +304,10 @@ static void mips_fulong2e_init(MachineState *machine)
     qemu_register_reset(main_cpu_reset, cpu);
 
     /* fulong 2e has 256M ram. */
-    ram_size = 256 * 1024 * 1024;
+    ram_size = 256 * MiB;
 
     /* fulong 2e has a 1M flash.Winbond W39L040AP70Z */
-    bios_size = 1024 * 1024;
+    bios_size = 1 * MiB;
 
     /* allocate RAM */
     memory_region_allocate_system_memory(ram, NULL, "fulong2e.ram", ram_size);
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index 1b4e32e58e..3467451482 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu-common.h"
 #include "cpu.h"
 #include "hw/hw.h"
@@ -191,7 +192,7 @@ static void generate_eeprom_spd(uint8_t *eeprom, ram_addr_t ram_size)
     int i;
 
     /* work in terms of MB */
-    ram_size >>= 20;
+    ram_size /= MiB;
 
     while ((ram_size >= 4) && (nbanks <= 2)) {
         int sz_log2 = MIN(31 - clz32(ram_size), 14);
@@ -843,7 +844,8 @@ static int64_t load_kernel (void)
             /* The kernel allocates the bootmap memory in the low memory after
                the initrd.  It takes at most 128kiB for 2GB RAM and 4kiB
                pages.  */
-            initrd_offset = (loaderparams.ram_low_size - initrd_size - 131072
+            initrd_offset = (loaderparams.ram_low_size - initrd_size
+                             - (128 * KiB)
                              - ~INITRD_PAGE_MASK) & INITRD_PAGE_MASK;
             if (kernel_high >= initrd_offset) {
                 error_report("memory too small for initial ram disk '%s'",
@@ -1021,9 +1023,9 @@ void mips_malta_init(MachineState *machine)
     mips_create_cpu(s, machine->cpu_type, &cbus_irq, &i8259_irq);
 
     /* allocate RAM */
-    if (ram_size > (2048u << 20)) {
-        error_report("Too much memory for this machine: %dMB, maximum 2048MB",
-                     ((unsigned int)ram_size / (1 << 20)));
+    if (ram_size > 2 * GiB) {
+        error_report("Too much memory for this machine: %" PRId64 "MB,"
+                     " maximum 2048MB", ram_size / MiB);
         exit(1);
     }
 
@@ -1034,17 +1036,18 @@ void mips_malta_init(MachineState *machine)
 
     /* alias for pre IO hole access */
     memory_region_init_alias(ram_low_preio, NULL, "mips_malta_low_preio.ram",
-                             ram_high, 0, MIN(ram_size, (256 << 20)));
+                             ram_high, 0, MIN(ram_size, 256 * MiB));
     memory_region_add_subregion(system_memory, 0, ram_low_preio);
 
     /* alias for post IO hole access, if there is enough RAM */
-    if (ram_size > (512 << 20)) {
+    if (ram_size > 512 * MiB) {
         ram_low_postio = g_new(MemoryRegion, 1);
         memory_region_init_alias(ram_low_postio, NULL,
                                  "mips_malta_low_postio.ram",
-                                 ram_high, 512 << 20,
-                                 ram_size - (512 << 20));
-        memory_region_add_subregion(system_memory, 512 << 20, ram_low_postio);
+                                 ram_high, 512 * MiB,
+                                 ram_size - 512 * MiB);
+        memory_region_add_subregion(system_memory, 512 * MiB,
+                                    ram_low_postio);
     }
 
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -1076,7 +1079,7 @@ void mips_malta_init(MachineState *machine)
     bios = pflash_cfi01_get_memory(fl);
     fl_idx++;
     if (kernel_filename) {
-        ram_low_size = MIN(ram_size, 256 << 20);
+        ram_low_size = MIN(ram_size, 256 * MiB);
         /* For KVM we reserve 1MB of RAM for running bootloader */
         if (kvm_enabled()) {
             ram_low_size -= 0x100000;
diff --git a/hw/mips/mips_r4k.c b/hw/mips/mips_r4k.c
index e5cf8ed1a3..d5725d0555 100644
--- a/hw/mips/mips_r4k.c
+++ b/hw/mips/mips_r4k.c
@@ -8,6 +8,7 @@
  * the standard PC ISA addresses.
 */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -79,8 +80,9 @@ typedef struct ResetData {
 
 static int64_t load_kernel(void)
 {
+    const size_t params_size = 264;
     int64_t entry, kernel_high;
-    long kernel_size, initrd_size, params_size;
+    long kernel_size, initrd_size;
     ram_addr_t initrd_offset;
     uint32_t *params_buf;
     int big_endian;
@@ -128,7 +130,6 @@ static int64_t load_kernel(void)
     }
 
     /* Store command line.  */
-    params_size = 264;
     params_buf = g_malloc(params_size);
 
     params_buf[0] = tswap32(ram_size);
@@ -143,7 +144,7 @@ static int64_t load_kernel(void)
     }
 
     rom_add_blob_fixed("params", params_buf, params_size,
-                       (16 << 20) - 264);
+                       16 * MiB - params_size);
 
     g_free(params_buf);
     return entry;
@@ -158,7 +159,7 @@ static void main_cpu_reset(void *opaque)
     env->active_tc.PC = s->vector;
 }
 
-static const int sector_len = 32 * 1024;
+static const int sector_len = 32 * KiB;
 static
 void mips_r4k_init(MachineState *machine)
 {
@@ -194,9 +195,9 @@ void mips_r4k_init(MachineState *machine)
     qemu_register_reset(main_cpu_reset, reset_info);
 
     /* allocate RAM */
-    if (ram_size > (256 << 20)) {
-        error_report("Too much memory for this machine: %dMB, maximum 256MB",
-                     ((unsigned int)ram_size / (1 << 20)));
+    if (ram_size > 256 * MiB) {
+        error_report("Too much memory for this machine: %" PRId64 "MB,"
+                     " maximum 256MB", ram_size / MiB);
         exit(1);
     }
     memory_region_allocate_system_memory(ram, NULL, "mips_r4k.ram", ram_size);
diff --git a/hw/misc/auxbus.c b/hw/misc/auxbus.c
index b4cacd664b..b8a8721201 100644
--- a/hw/misc/auxbus.c
+++ b/hw/misc/auxbus.c
@@ -27,6 +27,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/log.h"
 #include "hw/misc/auxbus.h"
 #include "hw/i2c/i2c.h"
@@ -68,7 +69,7 @@ AUXBus *aux_init_bus(DeviceState *parent, const char *name)
 
     /* Memory related. */
     bus->aux_io = g_malloc(sizeof(*bus->aux_io));
-    memory_region_init(bus->aux_io, OBJECT(bus), "aux-io", (1 << 20));
+    memory_region_init(bus->aux_io, OBJECT(bus), "aux-io", 1 * MiB);
     address_space_init(&bus->aux_addr_space, bus->aux_io, "aux-io");
     return bus;
 }
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index 34eb05d213..df26a4d046 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/msi.h"
 #include "qemu/timer.h"
@@ -357,7 +358,7 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
                        edu, QEMU_THREAD_JOINABLE);
 
     memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu,
-                    "edu-mmio", 1 << 20);
+                    "edu-mmio", 1 * MiB);
     pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &edu->mmio);
 }
 
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index ee01c5e66b..6febbabcaa 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -17,6 +17,7 @@
  * GNU GPL, version 2 or (at your option) any later version.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 #include "hw/hw.h"
@@ -1301,7 +1302,7 @@ static void ivshmem_realize(PCIDevice *dev, Error **errp)
     }
 
     if (s->sizearg == NULL) {
-        s->legacy_size = 4 << 20; /* 4 MB default */
+        s->legacy_size = 4 * MiB; /* 4 MB default */
     } else {
         int ret;
         uint64_t size;
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
index 1b2a69b3ef..87ae246d37 100644
--- a/hw/misc/macio/mac_dbdma.c
+++ b/hw/misc/macio/mac_dbdma.c
@@ -71,18 +71,19 @@ static DBDMAState *dbdma_from_ch(DBDMA_channel *ch)
 }
 
 #if DEBUG_DBDMA
-static void dump_dbdma_cmd(dbdma_cmd *cmd)
+static void dump_dbdma_cmd(DBDMA_channel *ch, dbdma_cmd *cmd)
 {
-    printf("dbdma_cmd %p\n", cmd);
-    printf("    req_count 0x%04x\n", le16_to_cpu(cmd->req_count));
-    printf("    command 0x%04x\n", le16_to_cpu(cmd->command));
-    printf("    phy_addr 0x%08x\n", le32_to_cpu(cmd->phy_addr));
-    printf("    cmd_dep 0x%08x\n", le32_to_cpu(cmd->cmd_dep));
-    printf("    res_count 0x%04x\n", le16_to_cpu(cmd->res_count));
-    printf("    xfer_status 0x%04x\n", le16_to_cpu(cmd->xfer_status));
+    DBDMA_DPRINTFCH(ch, "dbdma_cmd %p\n", cmd);
+    DBDMA_DPRINTFCH(ch, "    req_count 0x%04x\n", le16_to_cpu(cmd->req_count));
+    DBDMA_DPRINTFCH(ch, "    command 0x%04x\n", le16_to_cpu(cmd->command));
+    DBDMA_DPRINTFCH(ch, "    phy_addr 0x%08x\n", le32_to_cpu(cmd->phy_addr));
+    DBDMA_DPRINTFCH(ch, "    cmd_dep 0x%08x\n", le32_to_cpu(cmd->cmd_dep));
+    DBDMA_DPRINTFCH(ch, "    res_count 0x%04x\n", le16_to_cpu(cmd->res_count));
+    DBDMA_DPRINTFCH(ch, "    xfer_status 0x%04x\n",
+                    le16_to_cpu(cmd->xfer_status));
 }
 #else
-static void dump_dbdma_cmd(dbdma_cmd *cmd)
+static void dump_dbdma_cmd(DBDMA_channel *ch, dbdma_cmd *cmd)
 {
 }
 #endif
@@ -448,7 +449,7 @@ static void channel_run(DBDMA_channel *ch)
     uint32_t phy_addr;
 
     DBDMA_DPRINTFCH(ch, "channel_run\n");
-    dump_dbdma_cmd(current);
+    dump_dbdma_cmd(ch, current);
 
     /* clear WAKE flag at command fetch */
 
diff --git a/hw/misc/mips_itu.c b/hw/misc/mips_itu.c
index ccc4c7d98a..43bbec46cf 100644
--- a/hw/misc/mips_itu.c
+++ b/hw/misc/mips_itu.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
 #include "cpu.h"
@@ -80,7 +81,7 @@ static void itc_reconfigure(MIPSITUState *tag)
     uint64_t *am = &tag->ITCAddressMap[0];
     MemoryRegion *mr = &tag->storage_io;
     hwaddr address = am[0] & ITC_AM0_BASE_ADDRESS_MASK;
-    uint64_t size = (1 << 10) + (am[1] & ITC_AM1_ADDR_MASK_MASK);
+    uint64_t size = (1 * KiB) + (am[1] & ITC_AM1_ADDR_MASK_MASK);
     bool is_enabled = (am[0] & ITC_AM0_EN_MASK) != 0;
 
     memory_region_transaction_begin();
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index cda8d48333..510ddb3897 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -34,6 +34,7 @@
 */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "net/net.h"
 #include "net/tap.h"
 #include "qemu/range.h"
@@ -81,10 +82,10 @@ typedef struct E1000EState {
 #define E1000E_IO_IDX       2
 #define E1000E_MSIX_IDX     3
 
-#define E1000E_MMIO_SIZE    (128 * 1024)
-#define E1000E_FLASH_SIZE   (128 * 1024)
+#define E1000E_MMIO_SIZE    (128 * KiB)
+#define E1000E_FLASH_SIZE   (128 * KiB)
 #define E1000E_IO_SIZE      (32)
-#define E1000E_MSIX_SIZE    (16 * 1024)
+#define E1000E_MSIX_SIZE    (16 * KiB)
 
 #define E1000E_MSIX_TABLE   (0x0000)
 #define E1000E_MSIX_PBA     (0x2000)
diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c
index eb0e097137..09047806f2 100644
--- a/hw/net/e1000x_common.c
+++ b/hw/net/e1000x_common.c
@@ -23,6 +23,7 @@
 */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "hw/pci/pci.h"
 #include "net/net.h"
@@ -111,7 +112,7 @@ bool e1000x_is_oversized(uint32_t *mac, size_t size)
     static const int maximum_ethernet_vlan_size = 1522;
     /* this is the size past which hardware will
        drop packets when setting LPE=1 */
-    static const int maximum_ethernet_lpe_size = 16384;
+    static const int maximum_ethernet_lpe_size = 16 * KiB;
 
     if ((size > maximum_ethernet_lpe_size ||
         (size > maximum_ethernet_vlan_size
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index a07a63247e..e761daf551 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -41,6 +41,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "hw/pci/pci.h"
 #include "net/net.h"
@@ -60,8 +61,6 @@
  * changed to pad short packets itself. */
 #define CONFIG_PAD_RECEIVED_FRAMES
 
-#define KiB 1024
-
 /* Debug EEPRO100 card. */
 #if 0
 # define DEBUG_EEPRO100
diff --git a/hw/net/ne2000.h b/hw/net/ne2000.h
index adb8021bd1..2cd193e4c6 100644
--- a/hw/net/ne2000.h
+++ b/hw/net/ne2000.h
@@ -1,11 +1,12 @@
 #ifndef HW_NE2000_H
 #define HW_NE2000_H
 
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "net/net.h"
 
-#define NE2000_PMEM_SIZE    (32*1024)
-#define NE2000_PMEM_START   (16*1024)
+#define NE2000_PMEM_SIZE    (32 * KiB)
+#define NE2000_PMEM_START   (16 * KiB)
 #define NE2000_PMEM_END     (NE2000_PMEM_SIZE+NE2000_PMEM_START)
 #define NE2000_MEM_SIZE     NE2000_PMEM_END
 
diff --git a/hw/nios2/boot.c b/hw/nios2/boot.c
index 94f436e7fb..4bb5b601d3 100644
--- a/hw/nios2/boot.c
+++ b/hw/nios2/boot.c
@@ -29,6 +29,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu-common.h"
 #include "cpu.h"
 #include "qemu/option.h"
@@ -38,7 +39,6 @@
 #include "sysemu/sysemu.h"
 #include "hw/loader.h"
 #include "elf.h"
-#include "qemu/cutils.h"
 
 #include "boot.h"
 
@@ -177,7 +177,7 @@ void nios2_load_kernel(Nios2CPU *cpu, hwaddr ddr_base,
             high = ddr_base + kernel_size;
         }
 
-        high = ROUND_UP(high, 1024 * 1024);
+        high = ROUND_UP(high, 1 * MiB);
 
         /* If initrd is available, it goes after the kernel, aligned to 1M. */
         if (initrd_filename) {
@@ -213,7 +213,7 @@ void nios2_load_kernel(Nios2CPU *cpu, hwaddr ddr_base,
         high += fdt_size;
 
         /* Kernel command is at the end, 4k aligned. */
-        boot_info.cmdline = ROUND_UP(high, 4096);
+        boot_info.cmdline = ROUND_UP(high, 4 * KiB);
         if (kernel_cmdline && strlen(kernel_cmdline)) {
             pstrcpy_targphys("cmdline", boot_info.cmdline, 256, kernel_cmdline);
         }
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 4a0aec8e1d..bed1557d83 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -47,9 +48,9 @@ typedef struct sPAPRNVRAM {
 #define VIO_SPAPR_NVRAM(obj) \
      OBJECT_CHECK(sPAPRNVRAM, (obj), TYPE_VIO_SPAPR_NVRAM)
 
-#define MIN_NVRAM_SIZE 8192
-#define DEFAULT_NVRAM_SIZE 65536
-#define MAX_NVRAM_SIZE 1048576
+#define MIN_NVRAM_SIZE      (8 * KiB)
+#define DEFAULT_NVRAM_SIZE  (64 * KiB)
+#define MAX_NVRAM_SIZE      (1 * MiB)
 
 static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              uint32_t token, uint32_t nargs,
@@ -167,7 +168,9 @@ static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp)
     nvram->buf = g_malloc0(nvram->size);
 
     if ((nvram->size < MIN_NVRAM_SIZE) || (nvram->size > MAX_NVRAM_SIZE)) {
-        error_setg(errp, "spapr-nvram must be between %d and %d bytes in size",
+        error_setg(errp,
+                   "spapr-nvram must be between %" PRId64
+                   " and %" PRId64 " bytes in size",
                    MIN_NVRAM_SIZE, MAX_NVRAM_SIZE);
         return;
     }
diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c
index 01f67f9db1..88f035c20b 100644
--- a/hw/pci-host/prep.c
+++ b/hw/pci-host/prep.c
@@ -24,6 +24,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "hw/hw.h"
 #include "hw/pci/pci.h"
@@ -70,7 +71,7 @@ typedef struct PRePPCIState {
     int contiguous_map;
 } PREPPCIState;
 
-#define BIOS_SIZE (1024 * 1024)
+#define BIOS_SIZE (1 * MiB)
 
 static inline uint32_t raven_pci_io_config(hwaddr addr)
 {
diff --git a/hw/pci-host/xilinx-pcie.c b/hw/pci-host/xilinx-pcie.c
index b0a31b917d..60309afe9e 100644
--- a/hw/pci-host/xilinx-pcie.c
+++ b/hw/pci-host/xilinx-pcie.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci-host/xilinx-pcie.h"
@@ -157,9 +158,9 @@ static void xilinx_pcie_host_init(Object *obj)
 static Property xilinx_pcie_host_props[] = {
     DEFINE_PROP_UINT32("bus_nr", XilinxPCIEHost, bus_nr, 0),
     DEFINE_PROP_SIZE("cfg_base", XilinxPCIEHost, cfg_base, 0),
-    DEFINE_PROP_SIZE("cfg_size", XilinxPCIEHost, cfg_size, 32 << 20),
+    DEFINE_PROP_SIZE("cfg_size", XilinxPCIEHost, cfg_size, 32 * MiB),
     DEFINE_PROP_SIZE("mmio_base", XilinxPCIEHost, mmio_base, 0),
-    DEFINE_PROP_SIZE("mmio_size", XilinxPCIEHost, mmio_size, 1 << 20),
+    DEFINE_PROP_SIZE("mmio_size", XilinxPCIEHost, mmio_size, 1 * MiB),
     DEFINE_PROP_BOOL("link_up", XilinxPCIEHost, link_up, true),
     DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 86d82a6ec3..bcab6323b7 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -14,7 +14,8 @@ obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
 # PowerPC 4xx boards
 obj-y += ppc4xx_devs.o ppc405_uc.o
 obj-$(CONFIG_PPC4XX) += ppc4xx_pci.o ppc405_boards.o
-obj-$(CONFIG_PPC4XX) += ppc440_bamboo.o ppc440_pcix.o ppc440_uc.o sam460ex.o
+obj-$(CONFIG_PPC4XX) += ppc440_bamboo.o ppc440_pcix.o ppc440_uc.o
+obj-$(CONFIG_SAM460EX) += sam460ex.o
 # PReP
 obj-$(CONFIG_PREP) += prep.o
 obj-$(CONFIG_PREP) += prep_systemio.o
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 826053edc8..7d19b1498c 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -15,6 +15,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "e500.h"
 #include "e500-ccsr.h"
@@ -46,11 +47,11 @@
 #define BINARY_DEVICE_TREE_FILE    "mpc8544ds.dtb"
 #define DTC_LOAD_PAD               0x1800000
 #define DTC_PAD_MASK               0xFFFFF
-#define DTB_MAX_SIZE               (8 * 1024 * 1024)
+#define DTB_MAX_SIZE               (8 * MiB)
 #define INITRD_LOAD_PAD            0x2000000
 #define INITRD_PAD_MASK            0xFFFFFF
 
-#define RAM_SIZES_ALIGN            (64UL << 20)
+#define RAM_SIZES_ALIGN            (64 * MiB)
 
 /* TODO: parameterize */
 #define MPC8544_CCSRBAR_SIZE       0x00100000ULL
@@ -603,7 +604,7 @@ static int ppce500_prep_device_tree(PPCE500MachineState *machine,
 /* Create -kernel TLB entries for BookE.  */
 hwaddr booke206_page_size_to_tlb(uint64_t size)
 {
-    return 63 - clz64(size >> 10);
+    return 63 - clz64(size / KiB);
 }
 
 static int booke206_initial_map_tsize(CPUPPCState *env)
@@ -671,7 +672,7 @@ static void ppce500_cpu_reset(void *opaque)
 
     /* Set initial guest state. */
     cs->halted = 0;
-    env->gpr[1] = (16<<20) - 8;
+    env->gpr[1] = (16 * MiB) - 8;
     env->gpr[3] = bi->dt_base;
     env->gpr[4] = 0;
     env->gpr[5] = 0;
@@ -1012,9 +1013,9 @@ void ppce500_init(MachineState *machine)
     }
 
     cur_base = loadaddr + payload_size;
-    if (cur_base < (32 * 1024 * 1024)) {
+    if (cur_base < 32 * MiB) {
         /* u-boot occupies memory up to 32MB, so load blobs above */
-        cur_base = (32 * 1024 * 1024);
+        cur_base = 32 * MiB;
     }
 
     /* Load bare kernel only if no bios/u-boot has been provided */
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
index d8e3f2066e..963d429cc8 100644
--- a/hw/ppc/e500plat.c
+++ b/hw/ppc/e500plat.c
@@ -10,6 +10,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu-common.h"
 #include "e500.h"
 #include "hw/net/fsl_etsec/etsec.h"
@@ -85,7 +86,7 @@ static void e500plat_machine_class_init(ObjectClass *oc, void *data)
     pmc->has_mpc8xxx_gpio = true;
     pmc->has_platform_bus = true;
     pmc->platform_bus_base = 0xf00000000ULL;
-    pmc->platform_bus_size = (128ULL * 1024 * 1024);
+    pmc->platform_bus_size = 128 * MiB;
     pmc->platform_bus_first_irq = 5;
     pmc->platform_bus_num_irqs = 10;
     pmc->ccsrbar_base = 0xFE0000000ULL;
diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h
index c0217e66f2..41fd289e81 100644
--- a/hw/ppc/mac.h
+++ b/hw/ppc/mac.h
@@ -26,6 +26,7 @@
 #ifndef PPC_MAC_H
 #define PPC_MAC_H
 
+#include "qemu/units.h"
 #include "exec/memory.h"
 #include "hw/boards.h"
 #include "hw/sysbus.h"
@@ -38,7 +39,7 @@
 /* SMP is not enabled, for now */
 #define MAX_CPUS 1
 
-#define BIOS_SIZE     (1024 * 1024)
+#define BIOS_SIZE        (1 * MiB)
 #define NVRAM_SIZE        0x2000
 #define PROM_FILENAME    "openbios-ppc"
 #define PROM_ADDR         0xfff00000
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index ff715ffffd..d11980166f 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -71,7 +71,6 @@
 #include "hw/usb.h"
 #include "exec/address-spaces.h"
 #include "hw/sysbus.h"
-#include "qemu/cutils.h"
 #include "trace.h"
 
 #define MAX_IDE_BUS 2
@@ -407,11 +406,11 @@ static void ppc_core99_init(MachineState *machine)
 
         adb_bus = qdev_get_child_bus(dev, "adb.0");
         dev = qdev_create(adb_bus, TYPE_ADB_KEYBOARD);
-        qdev_prop_set_bit(dev, "disable-direct-reg3-writes", has_pmu);
+        qdev_prop_set_bit(dev, "disable-direct-reg3-writes", true);
         qdev_init_nofail(dev);
 
         dev = qdev_create(adb_bus, TYPE_ADB_MOUSE);
-        qdev_prop_set_bit(dev, "disable-direct-reg3-writes", has_pmu);
+        qdev_prop_set_bit(dev, "disable-direct-reg3-writes", true);
         qdev_init_nofail(dev);
     }
 
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 4608bab014..06ed6f660e 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -24,6 +24,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "hw/hw.h"
 #include "hw/ppc/ppc.h"
@@ -46,7 +47,6 @@
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
 #include "exec/address-spaces.h"
-#include "qemu/cutils.h"
 
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
@@ -118,10 +118,9 @@ static void ppc_heathrow_init(MachineState *machine)
     }
 
     /* allocate RAM */
-    if (ram_size > (2047 << 20)) {
-        fprintf(stderr,
-                "qemu: Too much memory for this machine: %d MB, maximum 2047 MB\n",
-                ((unsigned int)ram_size / (1 << 20)));
+    if (ram_size > 2047 * MiB) {
+        error_report("Too much memory for this machine: %" PRId64 " MB, "
+                     "maximum 2047 MB", ram_size / MiB);
         exit(1);
     }
 
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 7401ffe5b0..346f5e7aed 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/numa.h"
@@ -31,7 +32,6 @@
 #include "hw/ppc/pnv_core.h"
 #include "hw/loader.h"
 #include "exec/address-spaces.h"
-#include "qemu/cutils.h"
 #include "qapi/visitor.h"
 #include "monitor/monitor.h"
 #include "hw/intc/intc.h"
@@ -556,7 +556,7 @@ static void pnv_init(MachineState *machine)
     char *chip_typename;
 
     /* allocate RAM */
-    if (machine->ram_size < (1 * G_BYTE)) {
+    if (machine->ram_size < (1 * GiB)) {
         warn_report("skiboot may not work with < 1GB of RAM");
     }
 
@@ -1174,7 +1174,7 @@ static void pnv_machine_class_init(ObjectClass *oc, void *data)
                                       * storage */
     mc->no_parallel = 1;
     mc->default_boot_order = NULL;
-    mc->default_ram_size = 1 * G_BYTE;
+    mc->default_ram_size = 1 * GiB;
     xic->icp_get = pnv_icp_get;
     xic->ics_get = pnv_ics_get;
     xic->ics_resend = pnv_ics_resend;
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index a9f129fc2c..9750464bf4 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -150,6 +150,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp)
     if (!chip) {
         error_propagate(errp, local_err);
         error_prepend(errp, "required link 'chip' not found: ");
+        return;
     }
 
     pc->threads = g_new(PowerPCCPU *, cc->nr_threads);
diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
index d301067d3b..70111075b3 100644
--- a/hw/ppc/ppc405_boards.c
+++ b/hw/ppc/ppc405_boards.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -40,7 +41,7 @@
 #include "exec/address-spaces.h"
 
 #define BIOS_FILENAME "ppc405_rom.bin"
-#define BIOS_SIZE (2048 * 1024)
+#define BIOS_SIZE (2 * MiB)
 
 #define KERNEL_LOAD_ADDR 0x00000000
 #define INITRD_LOAD_ADDR 0x01800000
@@ -216,14 +217,14 @@ static void ref405ep_init(MachineState *machine)
     memory_region_init(&ram_memories[1], NULL, "ef405ep.ram1", 0);
     ram_bases[1] = 0x00000000;
     ram_sizes[1] = 0x00000000;
-    ram_size = 128 * 1024 * 1024;
+    ram_size = 128 * MiB;
 #ifdef DEBUG_BOARD_INIT
     printf("%s: register cpu\n", __func__);
 #endif
     env = ppc405ep_init(sysmem, ram_memories, ram_bases, ram_sizes,
                         33333333, &pic, kernel_filename == NULL ? 0 : 1);
     /* allocate SRAM */
-    sram_size = 512 * 1024;
+    sram_size = 512 * KiB;
     memory_region_init_ram(sram, NULL, "ef405ep.sram", sram_size,
                            &error_fatal);
     memory_region_add_subregion(sysmem, 0xFFF00000, sram);
@@ -589,7 +590,7 @@ static void taihu_405ep_init(MachineState *machine)
 
         bios_size = blk_getlength(blk);
         /* XXX: should check that size is 32MB */
-        bios_size = 32 * 1024 * 1024;
+        bios_size = 32 * MiB;
         fl_sectors = (bios_size + 65535) >> 16;
 #ifdef DEBUG_BOARD_INIT
         printf("Register parallel flash %d size %lx"
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
index 34f8d57b07..4bd9fbcc1e 100644
--- a/hw/ppc/ppc405_uc.c
+++ b/hw/ppc/ppc405_uc.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -983,10 +984,10 @@ static void ppc405_ocm_init(CPUPPCState *env)
 
     ocm = g_malloc0(sizeof(ppc405_ocm_t));
     /* XXX: Size is 4096 or 0x04000000 */
-    memory_region_init_ram(&ocm->isarc_ram, NULL, "ppc405.ocm", 4096,
+    memory_region_init_ram(&ocm->isarc_ram, NULL, "ppc405.ocm", 4 * KiB,
                            &error_fatal);
-    memory_region_init_alias(&ocm->dsarc_ram, NULL, "ppc405.dsarc", &ocm->isarc_ram,
-                             0, 4096);
+    memory_region_init_alias(&ocm->dsarc_ram, NULL, "ppc405.dsarc",
+                             &ocm->isarc_ram, 0, 4 * KiB);
     qemu_register_reset(&ocm_reset, ocm);
     ppc_dcr_register(env, OCM0_ISARC,
                      ocm, &dcr_read_ocm, &dcr_write_ocm);
diff --git a/hw/ppc/ppc440.h b/hw/ppc/ppc440.h
index ad27db12e4..7cef936125 100644
--- a/hw/ppc/ppc440.h
+++ b/hw/ppc/ppc440.h
@@ -21,6 +21,7 @@ void ppc440_sdram_init(CPUPPCState *env, int nbanks,
                        hwaddr *ram_bases, hwaddr *ram_sizes,
                        int do_init);
 void ppc4xx_ahb_init(CPUPPCState *env);
+void ppc4xx_dma_init(CPUPPCState *env, int dcr_base);
 void ppc460ex_pcie_init(CPUPPCState *env);
 
 #endif /* PPC440_H */
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index 44e6a0c21b..3d4c43b8cc 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/error-report.h"
 #include "qemu-common.h"
 #include "qemu/error-report.h"
@@ -49,7 +50,7 @@
 #define PPC440EP_SDRAM_NR_BANKS 4
 
 static const unsigned int ppc440ep_sdram_bank_sizes[] = {
-    256<<20, 128<<20, 64<<20, 32<<20, 16<<20, 8<<20, 0
+    256 * MiB, 128 * MiB, 64 * MiB, 32 * MiB, 16 * MiB, 8 * MiB, 0
 };
 
 static hwaddr entry;
@@ -151,7 +152,7 @@ static void main_cpu_reset(void *opaque)
     CPUPPCState *env = &cpu->env;
 
     cpu_reset(CPU(cpu));
-    env->gpr[1] = (16<<20) - 8;
+    env->gpr[1] = (16 * MiB) - 8;
     env->gpr[3] = FDT_ADDR;
     env->nip = entry;
 
diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
index 123f4ac09d..0bbaa6844a 100644
--- a/hw/ppc/ppc440_uc.c
+++ b/hw/ppc/ppc440_uc.c
@@ -9,10 +9,11 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu-common.h"
-#include "qemu/cutils.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "qemu/log.h"
 #include "cpu.h"
 #include "hw/hw.h"
 #include "exec/address-spaces.h"
@@ -215,13 +216,13 @@ void ppc4xx_l2sram_init(CPUPPCState *env)
     l2sram = g_malloc0(sizeof(*l2sram));
     /* XXX: Size is 4*64kB for 460ex, cf. U-Boot, ppc4xx-isram.h */
     memory_region_init_ram(&l2sram->bank[0], NULL, "ppc4xx.l2sram_bank0",
-                           64 * K_BYTE, &error_abort);
+                           64 * KiB, &error_abort);
     memory_region_init_ram(&l2sram->bank[1], NULL, "ppc4xx.l2sram_bank1",
-                           64 * K_BYTE, &error_abort);
+                           64 * KiB, &error_abort);
     memory_region_init_ram(&l2sram->bank[2], NULL, "ppc4xx.l2sram_bank2",
-                           64 * K_BYTE, &error_abort);
+                           64 * KiB, &error_abort);
     memory_region_init_ram(&l2sram->bank[3], NULL, "ppc4xx.l2sram_bank3",
-                           64 * K_BYTE, &error_abort);
+                           64 * KiB, &error_abort);
     qemu_register_reset(&l2sram_reset, l2sram);
     ppc_dcr_register(env, DCR_L2CACHE_CFG,
                      l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
@@ -513,28 +514,28 @@ static uint32_t sdram_bcr(hwaddr ram_base, hwaddr ram_size)
     uint32_t bcr;
 
     switch (ram_size) {
-    case (8 * M_BYTE):
+    case (8 * MiB):
         bcr = 0xffc0;
         break;
-    case (16 * M_BYTE):
+    case (16 * MiB):
         bcr = 0xff80;
         break;
-    case (32 * M_BYTE):
+    case (32 * MiB):
         bcr = 0xff00;
         break;
-    case (64 * M_BYTE):
+    case (64 * MiB):
         bcr = 0xfe00;
         break;
-    case (128 * M_BYTE):
+    case (128 * MiB):
         bcr = 0xfc00;
         break;
-    case (256 * M_BYTE):
+    case (256 * MiB):
         bcr = 0xf800;
         break;
-    case (512 * M_BYTE):
+    case (512 * MiB):
         bcr = 0xf000;
         break;
-    case (1 * G_BYTE):
+    case (1 * GiB):
         bcr = 0xe000;
         break;
     default:
@@ -561,7 +562,7 @@ static target_ulong sdram_size(uint32_t bcr)
     if (sh == 0) {
         size = -1;
     } else {
-        size = 8 * M_BYTE * sh;
+        size = 8 * MiB * sh;
     }
 
     return size;
@@ -803,6 +804,227 @@ void ppc4xx_ahb_init(CPUPPCState *env)
 }
 
 /*****************************************************************************/
+/* DMA controller */
+
+#define DMA0_CR_CE  (1 << 31)
+#define DMA0_CR_PW  (1 << 26 | 1 << 25)
+#define DMA0_CR_DAI (1 << 24)
+#define DMA0_CR_SAI (1 << 23)
+#define DMA0_CR_DEC (1 << 2)
+
+enum {
+    DMA0_CR  = 0x00,
+    DMA0_CT,
+    DMA0_SAH,
+    DMA0_SAL,
+    DMA0_DAH,
+    DMA0_DAL,
+    DMA0_SGH,
+    DMA0_SGL,
+
+    DMA0_SR  = 0x20,
+    DMA0_SGC = 0x23,
+    DMA0_SLP = 0x25,
+    DMA0_POL = 0x26,
+};
+
+typedef struct {
+    uint32_t cr;
+    uint32_t ct;
+    uint64_t sa;
+    uint64_t da;
+    uint64_t sg;
+} PPC4xxDmaChnl;
+
+typedef struct {
+    int base;
+    PPC4xxDmaChnl ch[4];
+    uint32_t sr;
+} PPC4xxDmaState;
+
+static uint32_t dcr_read_dma(void *opaque, int dcrn)
+{
+    PPC4xxDmaState *dma = opaque;
+    uint32_t val = 0;
+    int addr = dcrn - dma->base;
+    int chnl = addr / 8;
+
+    switch (addr) {
+    case 0x00 ... 0x1f:
+        switch (addr % 8) {
+        case DMA0_CR:
+            val = dma->ch[chnl].cr;
+            break;
+        case DMA0_CT:
+            val = dma->ch[chnl].ct;
+            break;
+        case DMA0_SAH:
+            val = dma->ch[chnl].sa >> 32;
+            break;
+        case DMA0_SAL:
+            val = dma->ch[chnl].sa;
+            break;
+        case DMA0_DAH:
+            val = dma->ch[chnl].da >> 32;
+            break;
+        case DMA0_DAL:
+            val = dma->ch[chnl].da;
+            break;
+        case DMA0_SGH:
+            val = dma->ch[chnl].sg >> 32;
+            break;
+        case DMA0_SGL:
+            val = dma->ch[chnl].sg;
+            break;
+        }
+        break;
+    case DMA0_SR:
+        val = dma->sr;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: unimplemented register %x (%d, %x)\n",
+                      __func__, dcrn, chnl, addr);
+    }
+
+    return val;
+}
+
+static void dcr_write_dma(void *opaque, int dcrn, uint32_t val)
+{
+    PPC4xxDmaState *dma = opaque;
+    int addr = dcrn - dma->base;
+    int chnl = addr / 8;
+
+    switch (addr) {
+    case 0x00 ... 0x1f:
+        switch (addr % 8) {
+        case DMA0_CR:
+            dma->ch[chnl].cr = val;
+            if (val & DMA0_CR_CE) {
+                int count = dma->ch[chnl].ct & 0xffff;
+
+                if (count) {
+                    int width, i, sidx, didx;
+                    uint8_t *rptr, *wptr;
+                    hwaddr rlen, wlen;
+
+                    sidx = didx = 0;
+                    width = 1 << ((val & DMA0_CR_PW) >> 25);
+                    rptr = cpu_physical_memory_map(dma->ch[chnl].sa, &rlen, 0);
+                    wptr = cpu_physical_memory_map(dma->ch[chnl].da, &wlen, 1);
+                    if (rptr && wptr) {
+                        if (!(val & DMA0_CR_DEC) &&
+                            val & DMA0_CR_SAI && val & DMA0_CR_DAI) {
+                            /* optimise common case */
+                            memmove(wptr, rptr, count * width);
+                            sidx = didx = count * width;
+                        } else {
+                            /* do it the slow way */
+                            for (sidx = didx = i = 0; i < count; i++) {
+                                uint64_t v = ldn_le_p(rptr + sidx, width);
+                                stn_le_p(wptr + didx, width, v);
+                                if (val & DMA0_CR_SAI) {
+                                    sidx += width;
+                                }
+                                if (val & DMA0_CR_DAI) {
+                                    didx += width;
+                                }
+                            }
+                        }
+                    }
+                    if (wptr) {
+                        cpu_physical_memory_unmap(wptr, wlen, 1, didx);
+                    }
+                    if (wptr) {
+                        cpu_physical_memory_unmap(rptr, rlen, 0, sidx);
+                    }
+                }
+            }
+            break;
+        case DMA0_CT:
+            dma->ch[chnl].ct = val;
+            break;
+        case DMA0_SAH:
+            dma->ch[chnl].sa &= 0xffffffffULL;
+            dma->ch[chnl].sa |= (uint64_t)val << 32;
+            break;
+        case DMA0_SAL:
+            dma->ch[chnl].sa &= 0xffffffff00000000ULL;
+            dma->ch[chnl].sa |= val;
+            break;
+        case DMA0_DAH:
+            dma->ch[chnl].da &= 0xffffffffULL;
+            dma->ch[chnl].da |= (uint64_t)val << 32;
+            break;
+        case DMA0_DAL:
+            dma->ch[chnl].da &= 0xffffffff00000000ULL;
+            dma->ch[chnl].da |= val;
+            break;
+        case DMA0_SGH:
+            dma->ch[chnl].sg &= 0xffffffffULL;
+            dma->ch[chnl].sg |= (uint64_t)val << 32;
+            break;
+        case DMA0_SGL:
+            dma->ch[chnl].sg &= 0xffffffff00000000ULL;
+            dma->ch[chnl].sg |= val;
+            break;
+        }
+        break;
+    case DMA0_SR:
+        dma->sr &= ~val;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: unimplemented register %x (%d, %x)\n",
+                      __func__, dcrn, chnl, addr);
+    }
+}
+
+static void ppc4xx_dma_reset(void *opaque)
+{
+    PPC4xxDmaState *dma = opaque;
+    int dma_base = dma->base;
+
+    memset(dma, 0, sizeof(*dma));
+    dma->base = dma_base;
+}
+
+void ppc4xx_dma_init(CPUPPCState *env, int dcr_base)
+{
+    PPC4xxDmaState *dma;
+    int i;
+
+    dma = g_malloc0(sizeof(*dma));
+    dma->base = dcr_base;
+    qemu_register_reset(&ppc4xx_dma_reset, dma);
+    for (i = 0; i < 4; i++) {
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_CR,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_CT,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SAH,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SAL,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_DAH,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_DAL,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SGH,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SGL,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+    }
+    ppc_dcr_register(env, dcr_base + DMA0_SR,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, dcr_base + DMA0_SGC,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, dcr_base + DMA0_SLP,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, dcr_base + DMA0_POL,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+}
+
+/*****************************************************************************/
 /* PCI Express controller */
 /* FIXME: This is not complete and does not work, only implemented partially
  * to allow firmware and guests to find an empty bus. Cards should use PCI.
diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c
index 2e963894fe..8c6f3c9577 100644
--- a/hw/ppc/ppc4xx_devs.c
+++ b/hw/ppc/ppc4xx_devs.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "cpu.h"
 #include "hw/hw.h"
 #include "hw/ppc/ppc.h"
@@ -29,6 +30,7 @@
 #include "hw/boards.h"
 #include "qemu/log.h"
 #include "exec/address-spaces.h"
+#include "qemu/error-report.h"
 
 #define DEBUG_UIC
 
@@ -353,25 +355,25 @@ static uint32_t sdram_bcr (hwaddr ram_base,
     uint32_t bcr;
 
     switch (ram_size) {
-    case (4 * 1024 * 1024):
+    case 4 * MiB:
         bcr = 0x00000000;
         break;
-    case (8 * 1024 * 1024):
+    case 8 * MiB:
         bcr = 0x00020000;
         break;
-    case (16 * 1024 * 1024):
+    case 16 * MiB:
         bcr = 0x00040000;
         break;
-    case (32 * 1024 * 1024):
+    case 32 * MiB:
         bcr = 0x00060000;
         break;
-    case (64 * 1024 * 1024):
+    case 64 * MiB:
         bcr = 0x00080000;
         break;
-    case (128 * 1024 * 1024):
+    case 128 * MiB:
         bcr = 0x000A0000;
         break;
-    case (256 * 1024 * 1024):
+    case 256 * MiB:
         bcr = 0x000C0000;
         break;
     default:
@@ -399,7 +401,7 @@ static target_ulong sdram_size (uint32_t bcr)
     if (sh == 7)
         size = -1;
     else
-        size = (4 * 1024 * 1024) << sh;
+        size = (4 * MiB) << sh;
 
     return size;
 }
@@ -702,8 +704,8 @@ ram_addr_t ppc4xx_sdram_adjust(ram_addr_t ram_size, int nr_banks,
 
     ram_size -= size_left;
     if (size_left) {
-        printf("Truncating memory to %d MiB to fit SDRAM controller limits.\n",
-               (int)(ram_size >> 20));
+        error_report("Truncating memory to %" PRId64 " MiB to fit SDRAM"
+                     " controller limits", ram_size / MiB);
     }
 
     memory_region_allocate_system_memory(ram, NULL, "ppc4xx.sdram", ram_size);
diff --git a/hw/ppc/ppce500_spin.c b/hw/ppc/ppce500_spin.c
index 69ca2d0e42..c45fc858de 100644
--- a/hw/ppc/ppce500_spin.c
+++ b/hw/ppc/ppce500_spin.c
@@ -28,6 +28,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/hw.h"
 #include "hw/sysbus.h"
 #include "sysemu/hw_accel.h"
@@ -89,7 +90,7 @@ static void spin_kick(CPUState *cs, run_on_cpu_data data)
     PowerPCCPU *cpu = POWERPC_CPU(cs);
     CPUPPCState *env = &cpu->env;
     SpinInfo *curspin = data.host_ptr;
-    hwaddr map_size = 64 * 1024 * 1024;
+    hwaddr map_size = 64 * MiB;
     hwaddr map_start;
 
     cpu_synchronize_state(cs);
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 5ed0bcd862..6689407b3d 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -50,7 +50,7 @@
 #include "exec/address-spaces.h"
 #include "trace.h"
 #include "elf.h"
-#include "qemu/cutils.h"
+#include "qemu/units.h"
 #include "kvm_ppc.h"
 
 /* SMP is not enabled, for now */
@@ -60,7 +60,7 @@
 
 #define CFG_ADDR 0xf0000510
 
-#define BIOS_SIZE (1024 * 1024)
+#define BIOS_SIZE (1 * MiB)
 #define BIOS_FILENAME "ppc_rom.bin"
 #define KERNEL_LOAD_ADDR 0x01000000
 #define INITRD_LOAD_ADDR 0x01800000
@@ -884,7 +884,7 @@ static void ibm_40p_machine_init(MachineClass *mc)
     mc->desc = "IBM RS/6000 7020 (40p)",
     mc->init = ibm_40p_init;
     mc->max_cpus = 1;
-    mc->default_ram_size = 128 * M_BYTE;
+    mc->default_ram_size = 128 * MiB;
     mc->block_default_type = IF_SCSI;
     mc->default_boot_order = "c";
     mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("604");
diff --git a/hw/ppc/rs6000_mc.c b/hw/ppc/rs6000_mc.c
index b6135650bd..45cb95e08a 100644
--- a/hw/ppc/rs6000_mc.c
+++ b/hw/ppc/rs6000_mc.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/isa/isa.h"
 #include "exec/address-spaces.h"
 #include "hw/boards.h"
@@ -109,7 +110,7 @@ static void rs6000mc_port0820_write(void *opaque, uint32_t addr, uint32_t val)
             size = end_address - start_address;
             memory_region_set_enabled(&s->simm[socket - 1], size != 0);
             memory_region_set_address(&s->simm[socket - 1],
-                                      start_address * 8 * 1024 * 1024);
+                                      start_address * 8 * MiB);
         }
     }
 }
@@ -140,7 +141,7 @@ static void rs6000mc_realize(DeviceState *dev, Error **errp)
 {
     RS6000MCState *s = RS6000MC_DEVICE(dev);
     int socket = 0;
-    unsigned int ram_size = s->ram_size / (1024 * 1024);
+    unsigned int ram_size = s->ram_size / MiB;
 
     while (socket < 6) {
         if (ram_size >= 64) {
@@ -163,8 +164,8 @@ static void rs6000mc_realize(DeviceState *dev, Error **errp)
             char name[] = "simm.?";
             name[5] = socket + '0';
             memory_region_allocate_system_memory(&s->simm[socket], OBJECT(dev),
-                                                 name, s->simm_size[socket]
-                                                 * 1024 * 1024);
+                                                 name,
+                                                 s->simm_size[socket] * MiB);
             memory_region_add_subregion_overlap(get_system_memory(), 0,
                                                 &s->simm[socket], socket);
         }
@@ -172,8 +173,8 @@ static void rs6000mc_realize(DeviceState *dev, Error **errp)
     if (ram_size) {
         /* unable to push all requested RAM in SIMMs */
         error_setg(errp, "RAM size incompatible with this board. "
-                   "Try again with something else, like %d MB",
-                   s->ram_size / 1024 / 1024 - ram_size);
+                   "Try again with something else, like %" PRId64 " MB",
+                   s->ram_size / MiB - ram_size);
         return;
     }
 
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index bdc53d2603..7eed2ec601 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -12,8 +12,8 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu-common.h"
-#include "qemu/cutils.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "hw/hw.h"
@@ -37,6 +37,8 @@
 #include "hw/i2c/smbus.h"
 #include "hw/usb/hcd-ehci.h"
 
+#include <libfdt.h>
+
 #define BINARY_DEVICE_TREE_FILE "canyonlands.dtb"
 #define UBOOT_FILENAME "u-boot-sam460-20100605.bin"
 /* to extract the official U-Boot bin from the updater: */
@@ -46,7 +48,7 @@
 /* from Sam460 U-Boot include/configs/Sam460ex.h */
 #define FLASH_BASE             0xfff00000
 #define FLASH_BASE_H           0x4
-#define FLASH_SIZE             (1 << 20)
+#define FLASH_SIZE             (1 * MiB)
 #define UBOOT_LOAD_BASE        0xfff80000
 #define UBOOT_SIZE             0x00080000
 #define UBOOT_ENTRY            0xfffffffc
@@ -67,11 +69,15 @@
 */
 
 #define CPU_FREQ 1150000000
+#define PLB_FREQ 230000000
+#define OPB_FREQ 115000000
+#define EBC_FREQ 115000000
+#define UART_FREQ 11059200
 #define SDRAM_NR_BANKS 4
 
 /* FIXME: See u-boot.git 8ac41e, also fix in ppc440_uc.c */
 static const unsigned int ppc460ex_sdram_bank_sizes[] = {
-    1024 << 20, 512 << 20, 256 << 20, 128 << 20, 64 << 20, 32 << 20, 0
+    1 * GiB, 512 * MiB, 256 * MiB, 128 * MiB, 64 * MiB, 32 * MiB, 0
 };
 
 struct boot_info {
@@ -126,7 +132,7 @@ static void generate_eeprom_spd(uint8_t *eeprom, ram_addr_t ram_size)
     int i;
 
     /* work in terms of MB */
-    ram_size >>= 20;
+    ram_size /= MiB;
 
     while ((ram_size >= 4) && (nbanks <= 2)) {
         int sz_log2 = MIN(31 - clz32(ram_size), 14);
@@ -225,7 +231,7 @@ static int sam460ex_load_uboot(void)
     fl_sectors = (bios_size + 65535) >> 16;
 
     if (!pflash_cfi01_register(base, NULL, "sam460ex.flash", bios_size,
-                               blk, (64 * 1024), fl_sectors,
+                               blk, 64 * KiB, fl_sectors,
                                1, 0x89, 0x18, 0x0000, 0x0, 1)) {
         error_report("qemu: Error registering flash memory.");
         /* XXX: return an error instead? */
@@ -255,6 +261,7 @@ static int sam460ex_load_device_tree(hwaddr addr,
     void *fdt;
     uint32_t tb_freq = CPU_FREQ;
     uint32_t clock_freq = CPU_FREQ;
+    int offset;
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE);
     if (!filename) {
@@ -308,6 +315,27 @@ static int sam460ex_load_device_tree(hwaddr addr,
     qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency",
                               tb_freq);
 
+    /* Remove cpm node if it exists (it is not emulated) */
+    offset = fdt_path_offset(fdt, "/cpm");
+    if (offset >= 0) {
+        fdt_nop_node(fdt, offset);
+    }
+
+    /* set serial port clocks */
+    offset = fdt_node_offset_by_compatible(fdt, -1, "ns16550");
+    while (offset >= 0) {
+        fdt_setprop_cell(fdt, offset, "clock-frequency", UART_FREQ);
+        offset = fdt_node_offset_by_compatible(fdt, offset, "ns16550");
+    }
+
+    /* some more clocks */
+    qemu_fdt_setprop_cell(fdt, "/plb", "clock-frequency",
+                              PLB_FREQ);
+    qemu_fdt_setprop_cell(fdt, "/plb/opb", "clock-frequency",
+                              OPB_FREQ);
+    qemu_fdt_setprop_cell(fdt, "/plb/opb/ebc", "clock-frequency",
+                              EBC_FREQ);
+
     rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
     g_free(fdt);
     ret = fdt_size;
@@ -359,14 +387,14 @@ static void main_cpu_reset(void *opaque)
 
     /* either we have a kernel to boot or we jump to U-Boot */
     if (bi->entry != UBOOT_ENTRY) {
-        env->gpr[1] = (16 << 20) - 8;
+        env->gpr[1] = (16 * MiB) - 8;
         env->gpr[3] = FDT_ADDR;
         env->nip = bi->entry;
 
         /* Create a mapping for the kernel.  */
         mmubooke_create_initial_mapping(env, 0, 0);
         env->gpr[6] = tswap32(EPAPR_MAGIC);
-        env->gpr[7] = (16 << 20) - 8; /*bi->ima_size;*/
+        env->gpr[7] = (16 * MiB) - 8; /* bi->ima_size; */
 
     } else {
         env->nip = UBOOT_ENTRY;
@@ -457,6 +485,7 @@ static void sam460ex_init(MachineState *machine)
     object_property_set_bool(OBJECT(dev), true, "realized", NULL);
     smbus_eeprom_init(i2c[0]->bus, 8, smbus_eeprom_buf, smbus_eeprom_size);
     g_free(smbus_eeprom_buf);
+    i2c_create_slave(i2c[0]->bus, "m41t80", 0x68);
 
     dev = sysbus_create_simple(TYPE_PPC4xx_I2C, 0x4ef600800, uic[0][3]);
     i2c[1] = PPC4xx_I2C(dev);
@@ -476,10 +505,13 @@ static void sam460ex_init(MachineState *machine)
     /* MAL */
     ppc4xx_mal_init(env, 4, 16, &uic[2][3]);
 
+    /* DMA */
+    ppc4xx_dma_init(env, 0x200);
+
     /* 256K of L2 cache as memory */
     ppc4xx_l2sram_init(env);
     /* FIXME: remove this after fixing l2sram mapping in ppc440_uc.c? */
-    memory_region_init_ram(l2cache_ram, NULL, "ppc440.l2cache_ram", 256 << 10,
+    memory_region_init_ram(l2cache_ram, NULL, "ppc440.l2cache_ram", 256 * KiB,
                            &error_abort);
     memory_region_add_subregion(address_space_mem, 0x400000000LL, l2cache_ram);
 
@@ -597,7 +629,7 @@ static void sam460ex_machine_init(MachineClass *mc)
     mc->desc = "aCube Sam460ex";
     mc->init = sam460ex_init;
     mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("460exb");
-    mc->default_ram_size = 512 * M_BYTE;
+    mc->default_ram_size = 512 * MiB;
 }
 
 DEFINE_MACHINE("sam460ex", sam460ex_machine_init)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index b32b971a14..3f5e1d3ec2 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -137,7 +137,7 @@ static ICSState *spapr_ics_create(sPAPRMachineState *spapr,
         goto error;
     }
 
-    return ICS_SIMPLE(obj);
+    return ICS_BASE(obj);
 
 error:
     error_propagate(errp, local_err);
@@ -2322,17 +2322,17 @@ static void spapr_validate_node_memory(MachineState *machine, Error **errp)
 
     if (machine->ram_size % SPAPR_MEMORY_BLOCK_SIZE) {
         error_setg(errp, "Memory size 0x" RAM_ADDR_FMT
-                   " is not aligned to %llu MiB",
+                   " is not aligned to %" PRIu64 " MiB",
                    machine->ram_size,
-                   SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
+                   SPAPR_MEMORY_BLOCK_SIZE / MiB);
         return;
     }
 
     if (machine->maxram_size % SPAPR_MEMORY_BLOCK_SIZE) {
         error_setg(errp, "Maximum memory size 0x" RAM_ADDR_FMT
-                   " is not aligned to %llu MiB",
+                   " is not aligned to %" PRIu64 " MiB",
                    machine->ram_size,
-                   SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
+                   SPAPR_MEMORY_BLOCK_SIZE / MiB);
         return;
     }
 
@@ -2340,9 +2340,9 @@ static void spapr_validate_node_memory(MachineState *machine, Error **errp)
         if (numa_info[i].node_mem % SPAPR_MEMORY_BLOCK_SIZE) {
             error_setg(errp,
                        "Node %d memory size 0x%" PRIx64
-                       " is not aligned to %llu MiB",
+                       " is not aligned to %" PRIu64 " MiB",
                        i, numa_info[i].node_mem,
-                       SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
+                       SPAPR_MEMORY_BLOCK_SIZE / MiB);
             return;
         }
     }
@@ -2763,7 +2763,7 @@ static void spapr_machine_init(MachineState *machine)
         }
     }
 
-    if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
+    if (spapr->rma_size < (MIN_RMA_SLOF * MiB)) {
         error_report(
             "pSeries SLOF firmware requires >= %ldM guest RMA (Real Mode Area memory)",
             MIN_RMA_SLOF);
@@ -3209,7 +3209,7 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
 
     if (size % SPAPR_MEMORY_BLOCK_SIZE) {
         error_setg(errp, "Hotplugged memory size must be a multiple of "
-                      "%lld MB", SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
+                      "%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB);
         return;
     }
 
@@ -3961,7 +3961,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     mc->max_cpus = 1024;
     mc->no_parallel = 1;
     mc->default_boot_order = "";
-    mc->default_ram_size = 512 * M_BYTE;
+    mc->default_ram_size = 512 * MiB;
+    mc->default_display = "std";
     mc->kvm_type = spapr_kvm_type;
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_SPAPR_PCI_HOST_BRIDGE);
     mc->pci_allow_0_address = true;
@@ -4095,17 +4096,16 @@ static void spapr_machine_2_12_instance_options(MachineState *machine)
 static void spapr_machine_2_12_class_options(MachineClass *mc)
 {
     sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
-    uint8_t mps;
 
     spapr_machine_3_0_class_options(mc);
     SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_12);
 
-    if (kvmppc_hpt_needs_host_contiguous_pages()) {
-        mps = ctz64(qemu_getrampagesize());
-    } else {
-        mps = 34; /* allow everything up to 16GiB, i.e. everything */
-    }
-    smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps;
+    /* We depend on kvm_enabled() to choose a default value for the
+     * hpt-max-page-size capability. Of course we can't do it here
+     * because this is too early and the HW accelerator isn't initialzed
+     * yet. Postpone this to machine init (see default_caps_with_cpu()).
+     */
+    smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0;
 }
 
 DEFINE_SPAPR_MACHINE(2_12, "2.12", false);
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 62663ebdf5..aa605cea91 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -465,6 +465,19 @@ static sPAPRCapabilities default_caps_with_cpu(sPAPRMachineState *spapr,
         caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
     }
 
+    /* This is for pseries-2.12 and older */
+    if (smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] == 0) {
+        uint8_t mps;
+
+        if (kvmppc_hpt_needs_host_contiguous_pages()) {
+            mps = ctz64(qemu_getrampagesize());
+        } else {
+            mps = 34; /* allow everything up to 16GiB, i.e. everything */
+        }
+
+        caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps;
+    }
+
     return caps;
 }
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 7f9738daed..4ac96bc94b 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -237,11 +237,11 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
     switch (parameter) {
     case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: {
         char *param_val = g_strdup_printf("MaxEntCap=%d,"
-                                          "DesMem=%llu,"
+                                          "DesMem=%" PRIu64 ","
                                           "DesProcs=%d,"
                                           "MaxPlatProcs=%d",
                                           max_cpus,
-                                          current_machine->ram_size / M_BYTE,
+                                          current_machine->ram_size / MiB,
                                           smp_cpus,
                                           max_cpus);
         ret = sysparm_st(buffer, length, param_val, strlen(param_val) + 1);
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index b4bb90d50b..7891464cd9 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "cpu.h"
 #include "hw/sysbus.h"
 #include "hw/hw.h"
@@ -45,7 +46,7 @@
 #include "ppc405.h"
 
 #define EPAPR_MAGIC    (0x45504150)
-#define FLASH_SIZE     (16 * 1024 * 1024)
+#define FLASH_SIZE     (16 * MiB)
 
 #define INTC_BASEADDR       0x81800000
 #define UART16550_BASEADDR  0x83e01003
@@ -127,7 +128,7 @@ static void main_cpu_reset(void *opaque)
        *   r8: 0
        *   r9: 0
     */
-    env->gpr[1] = (16<<20) - 8;
+    env->gpr[1] = (16 * MiB) - 8;
     /* Provide a device-tree.  */
     env->gpr[3] = bi->fdt;
     env->nip = bi->bootstrap_pc;
@@ -235,7 +236,7 @@ static void virtex_init(MachineState *machine)
     dinfo = drive_get(IF_PFLASH, 0, 0);
     pflash_cfi01_register(PFLASH_BASEADDR, NULL, "virtex.flash", FLASH_SIZE,
                           dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
-                          (64 * 1024), FLASH_SIZE >> 16,
+                          64 * KiB, FLASH_SIZE >> 16,
                           1, 0x89, 0x18, 0x0000, 0x0, 1);
 
     cpu_irq = (qemu_irq *) &env->irq_inputs[PPC40x_INPUT_INT];
diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h
index 0b46dc5a9b..81e0e0e99c 100644
--- a/hw/rdma/vmw/pvrdma.h
+++ b/hw/rdma/vmw/pvrdma.h
@@ -16,6 +16,7 @@
 #ifndef PVRDMA_PVRDMA_H
 #define PVRDMA_PVRDMA_H
 
+#include "qemu/units.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/msix.h"
 
@@ -30,7 +31,7 @@
 #define RDMA_MSIX_BAR_IDX    0
 #define RDMA_REG_BAR_IDX     1
 #define RDMA_UAR_BAR_IDX     2
-#define RDMA_BAR0_MSIX_SIZE  (16 * 1024)
+#define RDMA_BAR0_MSIX_SIZE  (16 * KiB)
 #define RDMA_BAR1_REGS_SIZE  64
 #define RDMA_BAR2_UAR_SIZE   (0x1000 * MAX_UCS) /* each uc gets page */
 
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index ad03113e0f..34d48993a2 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -19,6 +19,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
@@ -84,7 +85,7 @@ static hwaddr load_initrd(const char *filename, uint64_t mem_size,
      * halfway into RAM, and for boards with 256MB of RAM or more we put
      * the initrd at 128MB.
      */
-    *start = kernel_entry + MIN(mem_size / 2, 128 * 1024 * 1024);
+    *start = kernel_entry + MIN(mem_size / 2, 128 * MiB);
 
     size = load_ramdisk(filename, *start, mem_size - *start);
     if (size == -1) {
diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c
index 76241c240e..15f7ab0e53 100644
--- a/hw/s390x/s390-skeys.c
+++ b/hw/s390x/s390-skeys.c
@@ -10,6 +10,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/boards.h"
 #include "hw/s390x/storage-keys.h"
 #include "qapi/error.h"
@@ -19,7 +20,7 @@
 #include "sysemu/kvm.h"
 #include "migration/register.h"
 
-#define S390_SKEYS_BUFFER_SIZE 131072  /* Room for 128k storage keys */
+#define S390_SKEYS_BUFFER_SIZE (128 * KiB)  /* Room for 128k storage keys */
 #define S390_SKEYS_SAVE_FLAG_EOS 0x01
 #define S390_SKEYS_SAVE_FLAG_SKEYS 0x02
 #define S390_SKEYS_SAVE_FLAG_ERROR 0x04
diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index 70b95550a8..5161a1659b 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c
@@ -10,6 +10,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/boards.h"
 #include "cpu.h"
 #include "migration/qemu-file.h"
@@ -20,7 +21,7 @@
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
 
-#define CMMA_BLOCK_SIZE  (1 << 10)
+#define CMMA_BLOCK_SIZE  (1 * KiB)
 
 #define STATTR_FLAG_EOS     0x01ULL
 #define STATTR_FLAG_MORE    0x02ULL
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index 047d577313..bd2a024efd 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -13,6 +13,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "cpu.h"
 #include "sysemu/sysemu.h"
@@ -289,7 +290,7 @@ static void sclp_realize(DeviceState *dev, Error **errp)
     ret = s390_set_memory_limit(machine->maxram_size, &hw_limit);
     if (ret == -E2BIG) {
         error_setg(&err, "host supports a maximum of %" PRIu64 " GB",
-                   hw_limit >> 30);
+                   hw_limit / GiB);
     } else if (ret) {
         error_setg(&err, "setting the guest size failed");
     }
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 55a34b3895..32f3f96ff8 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -29,6 +29,7 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
 #endif
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "hw/scsi/scsi.h"
@@ -44,13 +45,13 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
 #include <scsi/sg.h>
 #endif
 
-#define SCSI_WRITE_SAME_MAX         524288
-#define SCSI_DMA_BUF_SIZE           131072
+#define SCSI_WRITE_SAME_MAX         (512 * KiB)
+#define SCSI_DMA_BUF_SIZE           (128 * KiB)
 #define SCSI_MAX_INQUIRY_LEN        256
 #define SCSI_MAX_MODE_LEN           256
 
-#define DEFAULT_DISCARD_GRANULARITY 4096
-#define DEFAULT_MAX_UNMAP_SIZE      (1 << 30)   /* 1 GB */
+#define DEFAULT_DISCARD_GRANULARITY (4 * KiB)
+#define DEFAULT_MAX_UNMAP_SIZE      (1 * GiB)
 #define DEFAULT_MAX_IO_SIZE         INT_MAX     /* 2 GB - 1 block */
 
 #define TYPE_SCSI_DISK_BASE         "scsi-disk-base"
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 540bccb8d1..d4356e9b73 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -31,6 +31,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/qdev.h"
 #include "hw/hw.h"
 #include "hw/registerfields.h"
@@ -38,7 +39,6 @@
 #include "hw/sd/sd.h"
 #include "qapi/error.h"
 #include "qemu/bitmap.h"
-#include "qemu/cutils.h"
 #include "hw/qdev-properties.h"
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
@@ -305,7 +305,7 @@ static void sd_ocr_powerup(void *opaque)
     /* card power-up OK */
     sd->ocr = FIELD_DP32(sd->ocr, OCR, CARD_POWER_UP, 1);
 
-    if (sd->size > 1 * G_BYTE) {
+    if (sd->size > 1 * GiB) {
         sd->ocr = FIELD_DP32(sd->ocr, OCR, CARD_CAPACITY, 1);
     }
 }
@@ -377,7 +377,7 @@ static void sd_set_csd(SDState *sd, uint64_t size)
     uint32_t sectsize = (1 << (SECTOR_SHIFT + 1)) - 1;
     uint32_t wpsize = (1 << (WPGROUP_SHIFT + 1)) - 1;
 
-    if (size <= 1 * G_BYTE) { /* Standard Capacity SD */
+    if (size <= 1 * GiB) { /* Standard Capacity SD */
         sd->csd[0] = 0x00;	/* CSD structure */
         sd->csd[1] = 0x26;	/* Data read access-time-1 */
         sd->csd[2] = 0x00;	/* Data read access-time-2 */
@@ -403,7 +403,7 @@ static void sd_set_csd(SDState *sd, uint64_t size)
             ((HWBLOCK_SHIFT << 6) & 0xc0);
         sd->csd[14] = 0x00;	/* File format group */
     } else {			/* SDHC */
-        size /= 512 * 1024;
+        size /= 512 * KiB;
         size -= 1;
         sd->csd[0] = 0x40;
         sd->csd[1] = 0x0e;
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 321d02d75a..8f58c31265 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "hw/hw.h"
@@ -32,7 +33,6 @@
 #include "hw/sd/sdhci.h"
 #include "sdhci-internal.h"
 #include "qemu/log.h"
-#include "qemu/cutils.h"
 #include "trace.h"
 
 #define TYPE_SDHCI_BUS "sdhci-bus"
@@ -409,7 +409,7 @@ static void sdhci_end_transfer(SDHCIState *s)
 /*
  * Programmed i/o data transfer
  */
-#define BLOCK_SIZE_MASK (4 * K_BYTE - 1)
+#define BLOCK_SIZE_MASK (4 * KiB - 1)
 
 /* Fill host controller's read buffer with BLKSIZE bytes of data from card */
 static void sdhci_read_block_from_card(SDHCIState *s)
@@ -737,7 +737,7 @@ static void get_adma_description(SDHCIState *s, ADMADescr *dscr)
         if ((dscr->attr & SDHC_ADMA_ATTR_ACT_MASK) == SDHC_ADMA_ATTR_SET_LEN) {
             dscr->length = (uint16_t)extract32(adma1, 12, 16);
         } else {
-            dscr->length = 4096;
+            dscr->length = 4 * KiB;
         }
         break;
     case SDHC_CTRL_ADMA2_64:
@@ -785,7 +785,7 @@ static void sdhci_do_adma(SDHCIState *s)
             return;
         }
 
-        length = dscr.length ? dscr.length : 65536;
+        length = dscr.length ? dscr.length : 64 * KiB;
 
         switch (dscr.attr & SDHC_ADMA_ATTR_ACT_MASK) {
         case SDHC_ADMA_ATTR_ACT_TRAN:  /* data transfer */
diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c
index 8fe8766eb9..6a5fc46a47 100644
--- a/hw/sh4/r2d.c
+++ b/hw/sh4/r2d.c
@@ -24,6 +24,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -291,7 +292,7 @@ static void r2d_init(MachineState *machine)
     dinfo = drive_get(IF_PFLASH, 0, 0);
     pflash_cfi02_register(0x0, NULL, "r2d.flash", FLASH_SIZE,
                           dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
-                          (16 * 1024), FLASH_SIZE >> 16,
+                          16 * KiB, FLASH_SIZE >> 16,
                           1, 4, 0x0000, 0x0000, 0x0000, 0x0000,
                           0x555, 0x2aa, 0);
 
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 27a07e96f4..a27e54b2fa 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -16,6 +16,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
@@ -625,10 +626,6 @@ static void smbios_build_type_11_table(void)
     SMBIOS_BUILD_TABLE_POST;
 }
 
-#define ONE_KB ((ram_addr_t)1 << 10)
-#define ONE_MB ((ram_addr_t)1 << 20)
-#define ONE_GB ((ram_addr_t)1 << 30)
-
 #define MAX_T16_STD_SZ 0x80000000 /* 2T in Kilobytes */
 
 static void smbios_build_type_16_table(unsigned dimm_cnt)
@@ -640,7 +637,7 @@ static void smbios_build_type_16_table(unsigned dimm_cnt)
     t->location = 0x01; /* Other */
     t->use = 0x03; /* System memory */
     t->error_correction = 0x06; /* Multi-bit ECC (for Microsoft, per SeaBIOS) */
-    size_kb = QEMU_ALIGN_UP(ram_size, ONE_KB) / ONE_KB;
+    size_kb = QEMU_ALIGN_UP(ram_size, KiB) / KiB;
     if (size_kb < MAX_T16_STD_SZ) {
         t->maximum_capacity = cpu_to_le32(size_kb);
         t->extended_maximum_capacity = cpu_to_le64(0);
@@ -668,7 +665,7 @@ static void smbios_build_type_17_table(unsigned instance, uint64_t size)
     t->memory_error_information_handle = cpu_to_le16(0xFFFE); /* Not provided */
     t->total_width = cpu_to_le16(0xFFFF); /* Unknown */
     t->data_width = cpu_to_le16(0xFFFF); /* Unknown */
-    size_mb = QEMU_ALIGN_UP(size, ONE_MB) / ONE_MB;
+    size_mb = QEMU_ALIGN_UP(size, MiB) / MiB;
     if (size_mb < MAX_T17_STD_SZ) {
         t->size = cpu_to_le16(size_mb);
         t->extended_size = cpu_to_le32(0);
@@ -707,8 +704,8 @@ static void smbios_build_type_19_table(unsigned instance,
 
     end = start + size - 1;
     assert(end > start);
-    start_kb = start / ONE_KB;
-    end_kb = end / ONE_KB;
+    start_kb = start / KiB;
+    end_kb = end / KiB;
     if (start_kb < UINT32_MAX && end_kb < UINT32_MAX) {
         t->starting_address = cpu_to_le32(start_kb);
         t->ending_address = cpu_to_le32(end_kb);
@@ -869,7 +866,7 @@ void smbios_get_tables(const struct smbios_phys_mem_area *mem_array,
 
         smbios_build_type_11_table();
 
-#define MAX_DIMM_SZ (16ll * ONE_GB)
+#define MAX_DIMM_SZ (16 * GiB)
 #define GET_DIMM_SZ ((i < dimm_cnt - 1) ? MAX_DIMM_SZ \
                                         : ((ram_size - 1) % MAX_DIMM_SZ) + 1)
 
diff --git a/hw/sparc/leon3.c b/hw/sparc/leon3.c
index 98fa6adae0..fa98ab8177 100644
--- a/hw/sparc/leon3.c
+++ b/hw/sparc/leon3.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
@@ -139,9 +140,10 @@ static void leon3_generic_hw_init(MachineState *machine)
     env->qemu_irq_ack = leon3_irq_manager;
 
     /* Allocate RAM */
-    if ((uint64_t)ram_size > (1UL << 30)) {
-        error_report("Too much memory for this machine: %d, maximum 1G",
-                     (unsigned int)(ram_size / (1024 * 1024)));
+    if (ram_size > 1 * GiB) {
+        error_report("Too much memory for this machine: %" PRId64 "MB,"
+                     " maximum 1G",
+                     ram_size / MiB);
         exit(1);
     }
 
@@ -149,7 +151,7 @@ static void leon3_generic_hw_init(MachineState *machine)
     memory_region_add_subregion(address_space_mem, 0x40000000, ram);
 
     /* Allocate BIOS */
-    prom_size = 8 * 1024 * 1024; /* 8Mb */
+    prom_size = 8 * MiB;
     memory_region_init_ram(prom, NULL, "Leon3.bios", prom_size, &error_fatal);
     memory_region_set_readonly(prom, true);
     memory_region_add_subregion(address_space_mem, 0x00000000, prom);
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 21078cc121..d981de1841 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -45,7 +46,6 @@
 #include "hw/loader.h"
 #include "elf.h"
 #include "trace.h"
-#include "qemu/cutils.h"
 
 /*
  * Sun4m architecture was used in the following machines:
@@ -66,7 +66,7 @@
 #define KERNEL_LOAD_ADDR     0x00004000
 #define CMDLINE_ADDR         0x007ff000
 #define INITRD_LOAD_ADDR     0x00800000
-#define PROM_SIZE_MAX        (1024 * 1024)
+#define PROM_SIZE_MAX        (1 * MiB)
 #define PROM_VADDR           0xffd00000
 #define PROM_FILENAME        "openbios-sparc32"
 #define CFG_ADDR             0xd00000510ULL
@@ -774,9 +774,9 @@ static void ram_init(hwaddr addr, ram_addr_t RAM_size,
 
     /* allocate RAM */
     if ((uint64_t)RAM_size > max_mem) {
-        error_report("Too much memory for this machine: %d, maximum %d",
-                     (unsigned int)(RAM_size / (1024 * 1024)),
-                     (unsigned int)(max_mem / (1024 * 1024)));
+        error_report("Too much memory for this machine: %" PRId64 ","
+                     " maximum %" PRId64,
+                     RAM_size / MiB, max_mem / MiB);
         exit(1);
     }
     dev = qdev_create(NULL, "memory");
diff --git a/hw/sparc64/niagara.c b/hw/sparc64/niagara.c
index 22c4655fde..4fa8cb2904 100644
--- a/hw/sparc64/niagara.c
+++ b/hw/sparc64/niagara.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu-common.h"
 #include "cpu.h"
 #include "hw/hw.h"
@@ -84,7 +85,7 @@ typedef struct NiagaraBoardState {
 #define NIAGARA_PROM_BASE   0xfff0000000ULL
 #define NIAGARA_Q_OFFSET    0x10000ULL
 #define NIAGARA_OBP_OFFSET  0x80000ULL
-#define PROM_SIZE_MAX       (4 * 1024 * 1024)
+#define PROM_SIZE_MAX       (4 * MiB)
 
 static void add_rom_or_fail(const char *file, const hwaddr addr)
 {
diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c
index 334dd7008e..74b748497e 100644
--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
@@ -52,11 +53,10 @@
 #include "hw/loader.h"
 #include "elf.h"
 #include "trace.h"
-#include "qemu/cutils.h"
 
 #define KERNEL_LOAD_ADDR     0x00404000
 #define CMDLINE_ADDR         0x003ff000
-#define PROM_SIZE_MAX        (4 * 1024 * 1024)
+#define PROM_SIZE_MAX        (4 * MiB)
 #define PROM_VADDR           0x000ffd00000ULL
 #define PBM_SPECIAL_BASE     0x1fe00000000ULL
 #define PBM_MEM_BASE         0x1ff00000000ULL
diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs
index 8b27a4b7ef..e16b2b913c 100644
--- a/hw/timer/Makefile.objs
+++ b/hw/timer/Makefile.objs
@@ -6,6 +6,7 @@ common-obj-$(CONFIG_CADENCE) += cadence_ttc.o
 common-obj-$(CONFIG_DS1338) += ds1338.o
 common-obj-$(CONFIG_HPET) += hpet.o
 common-obj-$(CONFIG_I8254) += i8254_common.o i8254.o
+common-obj-$(CONFIG_M41T80) += m41t80.o
 common-obj-$(CONFIG_M48T59) += m48t59.o
 ifeq ($(CONFIG_ISA_BUS),y)
 common-obj-$(CONFIG_M48T59) += m48t59-isa.o
diff --git a/hw/timer/m41t80.c b/hw/timer/m41t80.c
new file mode 100644
index 0000000000..734d7d95fc
--- /dev/null
+++ b/hw/timer/m41t80.c
@@ -0,0 +1,117 @@
+/*
+ * M41T80 serial rtc emulation
+ *
+ * Copyright (c) 2018 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "qemu/bcd.h"
+#include "hw/i2c/i2c.h"
+
+#define TYPE_M41T80 "m41t80"
+#define M41T80(obj) OBJECT_CHECK(M41t80State, (obj), TYPE_M41T80)
+
+typedef struct M41t80State {
+    I2CSlave parent_obj;
+    int8_t addr;
+} M41t80State;
+
+static void m41t80_realize(DeviceState *dev, Error **errp)
+{
+    M41t80State *s = M41T80(dev);
+
+    s->addr = -1;
+}
+
+static int m41t80_send(I2CSlave *i2c, uint8_t data)
+{
+    M41t80State *s = M41T80(i2c);
+
+    if (s->addr < 0) {
+        s->addr = data;
+    } else {
+        s->addr++;
+    }
+    return 0;
+}
+
+static int m41t80_recv(I2CSlave *i2c)
+{
+    M41t80State *s = M41T80(i2c);
+    struct tm now;
+    qemu_timeval tv;
+
+    if (s->addr < 0) {
+        s->addr = 0;
+    }
+    if (s->addr >= 1 && s->addr <= 7) {
+        qemu_get_timedate(&now, -1);
+    }
+    switch (s->addr++) {
+    case 0:
+        qemu_gettimeofday(&tv);
+        return to_bcd(tv.tv_usec / 10000);
+    case 1:
+        return to_bcd(now.tm_sec);
+    case 2:
+        return to_bcd(now.tm_min);
+    case 3:
+        return to_bcd(now.tm_hour);
+    case 4:
+        return to_bcd(now.tm_wday);
+    case 5:
+        return to_bcd(now.tm_mday);
+    case 6:
+        return to_bcd(now.tm_mon + 1);
+    case 7:
+        return to_bcd(now.tm_year % 100);
+    case 8 ... 19:
+        qemu_log_mask(LOG_UNIMP, "%s: unimplemented register: %d\n",
+                      __func__, s->addr - 1);
+        return 0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid register: %d\n",
+                      __func__, s->addr - 1);
+        return 0;
+    }
+}
+
+static int m41t80_event(I2CSlave *i2c, enum i2c_event event)
+{
+    M41t80State *s = M41T80(i2c);
+
+    if (event == I2C_START_SEND) {
+        s->addr = -1;
+    }
+    return 0;
+}
+
+static void m41t80_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    I2CSlaveClass *sc = I2C_SLAVE_CLASS(klass);
+
+    dc->realize = m41t80_realize;
+    sc->send = m41t80_send;
+    sc->recv = m41t80_recv;
+    sc->event = m41t80_event;
+}
+
+static const TypeInfo m41t80_info = {
+    .name          = TYPE_M41T80,
+    .parent        = TYPE_I2C_SLAVE,
+    .instance_size = sizeof(M41t80State),
+    .class_init    = m41t80_class_init,
+};
+
+static void m41t80_register_types(void)
+{
+    type_register_static(&m41t80_info);
+}
+
+type_init(m41t80_register_types)
diff --git a/hw/tricore/tricore_testboard.c b/hw/tricore/tricore_testboard.c
index 8e61dfc3e6..a58096f05e 100644
--- a/hw/tricore/tricore_testboard.c
+++ b/hw/tricore/tricore_testboard.c
@@ -19,6 +19,7 @@
 
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -72,17 +73,17 @@ static void tricore_testboard_init(MachineState *machine, int board_id)
     cpu = TRICORE_CPU(cpu_create(machine->cpu_type));
     env = &cpu->env;
     memory_region_init_ram(ext_cram, NULL, "powerlink_ext_c.ram",
-                           2 * 1024 * 1024, &error_fatal);
+                           2 * MiB, &error_fatal);
     memory_region_init_ram(ext_dram, NULL, "powerlink_ext_d.ram",
-                           4 * 1024 * 1024, &error_fatal);
-    memory_region_init_ram(int_cram, NULL, "powerlink_int_c.ram", 48 * 1024,
+                           4 * MiB, &error_fatal);
+    memory_region_init_ram(int_cram, NULL, "powerlink_int_c.ram", 48 * KiB,
                            &error_fatal);
-    memory_region_init_ram(int_dram, NULL, "powerlink_int_d.ram", 48 * 1024,
+    memory_region_init_ram(int_dram, NULL, "powerlink_int_d.ram", 48 * KiB,
                            &error_fatal);
     memory_region_init_ram(pcp_data, NULL, "powerlink_pcp_data.ram",
-                           16 * 1024, &error_fatal);
+                           16 * KiB, &error_fatal);
     memory_region_init_ram(pcp_text, NULL, "powerlink_pcp_text.ram",
-                           32 * 1024, &error_fatal);
+                           32 * KiB, &error_fatal);
 
     memory_region_add_subregion(sysmem, 0x80000000, ext_cram);
     memory_region_add_subregion(sysmem, 0xa1000000, ext_dram);
diff --git a/hw/usb/ccid-card-passthru.c b/hw/usb/ccid-card-passthru.c
index 25fb19b0d7..0a6c657228 100644
--- a/hw/usb/ccid-card-passthru.c
+++ b/hw/usb/ccid-card-passthru.c
@@ -9,6 +9,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include <libcacard.h>
 #include "chardev/char-fe.h"
 #include "qemu/error-report.h"
@@ -40,7 +41,7 @@ static const uint8_t DEFAULT_ATR[] = {
  0x13, 0x08
 };
 
-#define VSCARD_IN_SIZE 65536
+#define VSCARD_IN_SIZE      (64 * KiB)
 
 /* maximum size of ATR - from 7816-3 */
 #define MAX_ATR_SIZE        40
@@ -275,9 +276,9 @@ static void ccid_card_vscard_read(void *opaque, const uint8_t *buf, int size)
     VSCMsgHeader *hdr;
 
     if (card->vscard_in_pos + size > VSCARD_IN_SIZE) {
-        error_report(
-            "no room for data: pos %d +  size %d > %d. dropping connection.",
-            card->vscard_in_pos, size, VSCARD_IN_SIZE);
+        error_report("no room for data: pos %u +  size %d > %" PRId64 "."
+                     " dropping connection.",
+                     card->vscard_in_pos, size, VSCARD_IN_SIZE);
         ccid_card_vscard_drop_connection(card);
         return;
     }
diff --git a/hw/usb/combined-packet.c b/hw/usb/combined-packet.c
index 48cac87f6a..01a7ed0848 100644
--- a/hw/usb/combined-packet.c
+++ b/hw/usb/combined-packet.c
@@ -20,6 +20,7 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu-common.h"
 #include "hw/usb.h"
 #include "qemu/iov.h"
@@ -171,7 +172,7 @@ void usb_ep_combine_input_packets(USBEndpoint *ep)
         if ((p->iov.size % ep->max_packet_size) != 0 || !p->short_not_ok ||
                 next == NULL ||
                 /* Work around for Linux usbfs bulk splitting + migration */
-                (totalsize == 16348 && p->int_req)) {
+                (totalsize == (16 * KiB - 36) && p->int_req)) {
             usb_device_handle_data(ep->dev, first);
             assert(first->status == USB_RET_ASYNC);
             if (first->combined) {
diff --git a/hw/usb/dev-smartcard-reader.c b/hw/usb/dev-smartcard-reader.c
index 13d0befd9c..8f716fc165 100644
--- a/hw/usb/dev-smartcard-reader.c
+++ b/hw/usb/dev-smartcard-reader.c
@@ -35,6 +35,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "qemu/error-report.h"
@@ -63,7 +64,7 @@ do { \
  * or handle the migration complexity - VMState doesn't handle this case.
  * sizes are expected never to be exceeded, unless guest misbehaves.
  */
-#define BULK_OUT_DATA_SIZE 65536
+#define BULK_OUT_DATA_SIZE  (64 * KiB)
 #define PENDING_ANSWERS_NUM 128
 
 #define BULK_IN_BUF_SIZE 384
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 0134232627..e5acfc5ba5 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1672,7 +1672,8 @@ static EHCIQueue *ehci_state_fetchqh(EHCIState *ehci, int async)
         ehci_set_state(ehci, async, EST_HORIZONTALQH);
 
     } else if ((q->qh.token & QTD_TOKEN_ACTIVE) &&
-               (NLPTR_TBIT(q->qh.current_qtd) == 0)) {
+               (NLPTR_TBIT(q->qh.current_qtd) == 0) &&
+               (q->qh.current_qtd != 0)) {
         q->qtdaddr = q->qh.current_qtd;
         ehci_set_state(ehci, async, EST_FETCHQTD);
 
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 721beb5486..8f1a01a405 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -1954,7 +1954,12 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid)
         for (i = 0; i < length; i++) {
             TRBType type;
             type = xhci_ring_fetch(xhci, ring, &xfer->trbs[i], NULL);
-            assert(type);
+            if (!type) {
+                xhci_die(xhci);
+                xhci_ep_free_xfer(xfer);
+                epctx->kick_active--;
+                return;
+            }
         }
         xfer->streamid = streamid;
 
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 58e8f7f5bd..99094a721e 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -26,6 +26,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "qemu/timer.h"
@@ -1298,7 +1299,7 @@ static int usbredir_chardev_can_read(void *opaque)
     }
 
     /* usbredir_parser_do_read will consume *all* data we give it */
-    return 1024 * 1024;
+    return 1 * MiB;
 }
 
 static void usbredir_chardev_read(void *opaque, const uint8_t *buf, int size)
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 061259b86b..481fd08df7 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -11,6 +11,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/range.h"
@@ -1448,9 +1449,9 @@ static int vfio_igd_gtt_max(VFIOPCIDevice *vdev)
         ggms = 1 << ggms;
     }
 
-    ggms *= 1024 * 1024;
+    ggms *= MiB;
 
-    return (ggms / (4 * 1024)) * (gen < 8 ? 4 : 8);
+    return (ggms / (4 * KiB)) * (gen < 8 ? 4 : 8);
 }
 
 /*
@@ -1705,7 +1706,7 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     igd->vdev = vdev;
     igd->index = ~0;
     igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4);
-    igd->bdsm &= ~((1 << 20) - 1); /* 1MB aligned */
+    igd->bdsm &= ~((1 * MiB) - 1); /* 1MB aligned */
 
     memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk,
                           igd, "vfio-igd-index-quirk", 4);
@@ -1752,7 +1753,7 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
      * config offset 0x5C.
      */
     bdsm_size = g_malloc(sizeof(*bdsm_size));
-    *bdsm_size = cpu_to_le64((ggms_mb + gms_mb) * 1024 * 1024);
+    *bdsm_size = cpu_to_le64((ggms_mb + gms_mb) * MiB);
     fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",
                     bdsm_size, sizeof(*bdsm_size));
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 18c493b49e..a1577dea7f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -28,6 +28,7 @@
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "qemu/range.h"
+#include "qemu/units.h"
 #include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
 #include "pci.h"
@@ -1417,7 +1418,7 @@ static void vfio_pci_relocate_msix(VFIOPCIDevice *vdev, Error **errp)
     }
 
     /* 2GB max size for 32-bit BARs, cannot double if already > 1G */
-    if (vdev->bars[target_bar].size > (1 * 1024 * 1024 * 1024) &&
+    if (vdev->bars[target_bar].size > 1 * GiB &&
         !vdev->bars[target_bar].mem64) {
         error_setg(errp, "Invalid MSI-X relocation BAR %d, "
                    "no space to extend 32-bit BAR", target_bar);
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index 289bbcac03..855f1b41d1 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -156,6 +156,19 @@ static void check_rate_limit(void *opaque)
     vrng->activate_timer = true;
 }
 
+static void virtio_rng_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VirtIORNG *vrng = VIRTIO_RNG(vdev);
+
+    if (!vdev->vm_running) {
+        return;
+    }
+    vdev->status = status;
+
+    /* Something changed, try to process buffers */
+    virtio_rng_process(vrng);
+}
+
 static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -261,6 +274,7 @@ static void virtio_rng_class_init(ObjectClass *klass, void *data)
     vdc->realize = virtio_rng_device_realize;
     vdc->unrealize = virtio_rng_device_unrealize;
     vdc->get_features = get_features;
+    vdc->set_status = virtio_rng_set_status;
 }
 
 static const TypeInfo virtio_rng_info = {
diff --git a/hw/xenpv/xen_domainbuild.c b/hw/xenpv/xen_domainbuild.c
index 027f76fad1..188acaca16 100644
--- a/hw/xenpv/xen_domainbuild.c
+++ b/hw/xenpv/xen_domainbuild.c
@@ -1,4 +1,5 @@
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "hw/xen/xen_backend.h"
 #include "xen_domainbuild.h"
 #include "qemu/timer.h"
@@ -75,9 +76,9 @@ int xenstore_domain_init1(const char *kernel, const char *ramdisk,
     xenstore_write_str(dom, "vm",     vm);
 
     /* memory */
-    xenstore_write_int(dom, "memory/target", ram_size >> 10);  // kB
-    xenstore_write_int(vm, "memory",         ram_size >> 20);  // MB
-    xenstore_write_int(vm, "maxmem",         ram_size >> 20);  // MB
+    xenstore_write_int(dom, "memory/target", ram_size / KiB);
+    xenstore_write_int(vm, "memory",         ram_size / MiB);
+    xenstore_write_int(vm, "maxmem",         ram_size / MiB);
 
     /* cpus */
     for (i = 0; i < smp_cpus; i++) {
@@ -113,7 +114,7 @@ int xenstore_domain_init2(int xenstore_port, int xenstore_mfn,
 
     /* console */
     xenstore_write_str(dom, "console/type",     "ioemu");
-    xenstore_write_int(dom, "console/limit",    128 * 1024);
+    xenstore_write_int(dom, "console/limit",    128 * KiB);
     xenstore_write_int(dom, "console/ring-ref", console_mfn);
     xenstore_write_int(dom, "console/port",     console_port);
     xen_config_dev_console(0);
@@ -260,7 +261,7 @@ int xen_domain_build_pv(const char *kernel, const char *ramdisk,
     }
 #endif
 
-    rc = xc_domain_setmaxmem(xen_xc, xen_domid, ram_size >> 10);
+    rc = xc_domain_setmaxmem(xen_xc, xen_domid, ram_size / KiB);
     if (rc < 0) {
         fprintf(stderr, "xen: xc_domain_setmaxmem() failed\n");
         goto err;
@@ -269,7 +270,7 @@ int xen_domain_build_pv(const char *kernel, const char *ramdisk,
     xenstore_port = xc_evtchn_alloc_unbound(xen_xc, xen_domid, 0);
     console_port = xc_evtchn_alloc_unbound(xen_xc, xen_domid, 0);
 
-    rc = xc_linux_build(xen_xc, xen_domid, ram_size >> 20,
+    rc = xc_linux_build(xen_xc, xen_domid, ram_size / MiB,
                         kernel, ramdisk, cmdline,
                         0, flags,
                         xenstore_port, &xenstore_mfn,
diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c
index 5dc13034f9..b3161de320 100644
--- a/hw/xtensa/xtfpga.c
+++ b/hw/xtensa/xtfpga.c
@@ -26,6 +26,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "cpu.h"
 #include "sysemu/sysemu.h"
@@ -152,7 +153,7 @@ static void xtfpga_net_init(MemoryRegion *address_space,
             sysbus_mmio_get_region(s, 1));
 
     ram = g_malloc(sizeof(*ram));
-    memory_region_init_ram_nomigrate(ram, OBJECT(s), "open_eth.ram", 16384,
+    memory_region_init_ram_nomigrate(ram, OBJECT(s), "open_eth.ram", 16 * KiB,
                            &error_fatal);
     vmstate_register_ram_global(ram);
     memory_region_add_subregion(address_space, buffers, ram);
@@ -229,7 +230,7 @@ static void xtfpga_init(const XtfpgaBoardDesc *board, MachineState *machine)
     const char *kernel_cmdline = qemu_opt_get(machine_opts, "append");
     const char *dtb_filename = qemu_opt_get(machine_opts, "dtb");
     const char *initrd_filename = qemu_opt_get(machine_opts, "initrd");
-    const unsigned system_io_size = 224 * 1024 * 1024;
+    const unsigned system_io_size = 224 * MiB;
     int n;
 
     for (n = 0; n < smp_cpus; n++) {
@@ -342,7 +343,7 @@ static void xtfpga_init(const XtfpgaBoardDesc *board, MachineState *machine)
             cpu_physical_memory_write(cur_lowmem, fdt, fdt_size);
             cur_tagptr = put_tag(cur_tagptr, BP_TAG_FDT,
                                  sizeof(dtb_addr), &dtb_addr);
-            cur_lowmem = QEMU_ALIGN_UP(cur_lowmem + fdt_size, 4096);
+            cur_lowmem = QEMU_ALIGN_UP(cur_lowmem + fdt_size, 4 * KiB);
         }
 #else
         if (dtb_filename) {
@@ -370,7 +371,7 @@ static void xtfpga_init(const XtfpgaBoardDesc *board, MachineState *machine)
             initrd_location.end = tswap32(cur_lowmem + initrd_size);
             cur_tagptr = put_tag(cur_tagptr, BP_TAG_INITRD,
                                  sizeof(initrd_location), &initrd_location);
-            cur_lowmem = QEMU_ALIGN_UP(cur_lowmem + initrd_size, 4096);
+            cur_lowmem = QEMU_ALIGN_UP(cur_lowmem + initrd_size, 4 * KiB);
         }
         cur_tagptr = put_tag(cur_tagptr, BP_TAG_LAST, 0, NULL);
         env->regs[2] = tagptr;
diff --git a/include/block/block.h b/include/block/block.h
index 2ffc1c64c6..e5c7759a0c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -659,13 +659,14 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host);
  * @dst: Destination child to copy data to
  * @dst_offset: offset in @dst image to write data
  * @bytes: number of bytes to copy
- * @flags: request flags. Must be one of:
- *         0 - actually read data from src;
+ * @flags: request flags. Supported flags:
  *         BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
  *                               write on @dst as if bdrv_co_pwrite_zeroes is
  *                               called. Used to simplify caller code, or
  *                               during BlockDriver.bdrv_co_copy_range_from()
  *                               recursion.
+ *         BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
+ *                                   requests currently in flight.
  *
  * Returns: 0 if succeeded; negative error code if failed.
  **/
diff --git a/include/block/nbd.h b/include/block/nbd.h
index daaeae61bf..4638c839f5 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -259,6 +259,7 @@ static inline bool nbd_reply_type_is_error(int type)
 struct NBDExportInfo {
     /* Set by client before nbd_receive_negotiate() */
     bool request_sizes;
+    char *x_dirty_bitmap;
 
     /* In-out fields, set by client before nbd_receive_negotiate() and
      * updated by server results during nbd_receive_negotiate() */
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index cb497dee0b..da73e3bfed 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -412,13 +412,11 @@ static inline uint32_t curr_cflags(void)
 }
 
 /* TranslationBlock invalidate API */
-#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
-void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
-#else
-void tb_invalidate_phys_addr(target_ulong addr);
-#endif
 #if defined(CONFIG_USER_ONLY)
+void tb_invalidate_phys_addr(target_ulong addr);
 void tb_invalidate_phys_range(target_ulong start, target_ulong end);
+#else
+void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
 #endif
 void tb_flush(CPUState *cpu);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
diff --git a/include/hw/acpi/tpm.h b/include/hw/acpi/tpm.h
index 46ac4dc581..3580ffd50c 100644
--- a/include/hw/acpi/tpm.h
+++ b/include/hw/acpi/tpm.h
@@ -16,6 +16,7 @@
 #ifndef HW_ACPI_TPM_H
 #define HW_ACPI_TPM_H
 
+#include "qemu/units.h"
 #include "hw/registerfields.h"
 
 #define TPM_TIS_ADDR_BASE           0xFED40000
@@ -176,7 +177,7 @@ REG32(CRB_DATA_BUFFER, 0x80)
 #define TPM_CRB_ADDR_CTRL           (TPM_CRB_ADDR_BASE + A_CRB_CTRL_REQ)
 #define TPM_CRB_R_MAX               R_CRB_DATA_BUFFER
 
-#define TPM_LOG_AREA_MINIMUM_SIZE   (64 * 1024)
+#define TPM_LOG_AREA_MINIMUM_SIZE   (64 * KiB)
 
 #define TPM_TCPA_ACPI_CLASS_CLIENT  0
 #define TPM_TCPA_ACPI_CLASS_SERVER  1
diff --git a/include/hw/display/xlnx_dp.h b/include/hw/display/xlnx_dp.h
index ee046a5fac..26b759cd44 100644
--- a/include/hw/display/xlnx_dp.h
+++ b/include/hw/display/xlnx_dp.h
@@ -29,14 +29,15 @@
 #include "hw/display/dpcd.h"
 #include "hw/i2c/i2c-ddc.h"
 #include "qemu/fifo8.h"
+#include "qemu/units.h"
 #include "hw/dma/xlnx_dpdma.h"
 #include "audio/audio.h"
 
 #ifndef XLNX_DP_H
 #define XLNX_DP_H
 
-#define AUD_CHBUF_MAX_DEPTH                 32768
-#define MAX_QEMU_BUFFER_SIZE                4096
+#define AUD_CHBUF_MAX_DEPTH                 (32 * KiB)
+#define MAX_QEMU_BUFFER_SIZE                (4 * KiB)
 
 #define DP_CORE_REG_ARRAY_SIZE              (0x3AF >> 2)
 #define DP_AVBUF_REG_ARRAY_SIZE             (0x238 >> 2)
diff --git a/include/hw/i2c/ppc4xx_i2c.h b/include/hw/i2c/ppc4xx_i2c.h
index ea6c8e1a58..0891a9c948 100644
--- a/include/hw/i2c/ppc4xx_i2c.h
+++ b/include/hw/i2c/ppc4xx_i2c.h
@@ -46,7 +46,8 @@ typedef struct PPC4xxI2CState {
     qemu_irq irq;
     MemoryRegion iomem;
     bitbang_i2c_interface *bitbang;
-    uint8_t mdata;
+    int mdidx;
+    uint8_t mdata[4];
     uint8_t lmadr;
     uint8_t hmadr;
     uint8_t cntl;
diff --git a/include/hw/intc/mips_gic.h b/include/hw/intc/mips_gic.h
index b98d50094a..902a12b178 100644
--- a/include/hw/intc/mips_gic.h
+++ b/include/hw/intc/mips_gic.h
@@ -11,6 +11,7 @@
 #ifndef MIPS_GIC_H
 #define MIPS_GIC_H
 
+#include "qemu/units.h"
 #include "hw/timer/mips_gictimer.h"
 #include "cpu.h"
 /*
@@ -19,7 +20,7 @@
 
 /* The MIPS default location */
 #define GIC_BASE_ADDR           0x1bdc0000ULL
-#define GIC_ADDRSPACE_SZ        (128 * 1024)
+#define GIC_ADDRSPACE_SZ        (128 * KiB)
 
 /* Constants */
 #define GIC_POL_POS     1
diff --git a/include/hw/mips/bios.h b/include/hw/mips/bios.h
index b4b88ac43d..d67ef33e83 100644
--- a/include/hw/mips/bios.h
+++ b/include/hw/mips/bios.h
@@ -1,6 +1,7 @@
+#include "qemu/units.h"
 #include "cpu.h"
 
-#define BIOS_SIZE (4 * 1024 * 1024)
+#define BIOS_SIZE (4 * MiB)
 #ifdef TARGET_WORDS_BIGENDIAN
 #define BIOS_FILENAME "mips_bios.bin"
 #else
diff --git a/include/hw/net/allwinner_emac.h b/include/hw/net/allwinner_emac.h
index 4cc8aab7ec..905a43deb4 100644
--- a/include/hw/net/allwinner_emac.h
+++ b/include/hw/net/allwinner_emac.h
@@ -23,6 +23,7 @@
 #ifndef ALLWINNER_EMAC_H
 #define ALLWINNER_EMAC_H
 
+#include "qemu/units.h"
 #include "net/net.h"
 #include "qemu/fifo8.h"
 #include "hw/net/mii.h"
@@ -125,8 +126,8 @@
 #define EMAC_INT_RX         (1 << 8)
 
 /* Due to lack of specifications, size of fifos is chosen arbitrarily */
-#define TX_FIFO_SIZE        (4 * 1024)
-#define RX_FIFO_SIZE        (32 * 1024)
+#define TX_FIFO_SIZE        (4 * KiB)
+#define RX_FIFO_SIZE        (32 * KiB)
 
 #define NUM_TX_FIFOS        2
 #define RX_HDR_SIZE         8
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7e028164ba..7e5de1a6fd 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -1,6 +1,7 @@
 #ifndef HW_SPAPR_H
 #define HW_SPAPR_H
 
+#include "qemu/units.h"
 #include "sysemu/dma.h"
 #include "hw/boards.h"
 #include "hw/ppc/xics.h"
@@ -749,8 +750,8 @@ int spapr_rng_populate_dt(void *fdt);
  */
 #define SPAPR_MAX_RAM_SLOTS     32
 
-/* 1GB alignment for device memory region */
-#define SPAPR_DEVICE_MEM_ALIGN (1ULL << 30)
+/* 1GB alignment for hotplug memory region */
+#define SPAPR_DEVICE_MEM_ALIGN (1 * GiB)
 
 /*
  * Number of 32 bit words in each LMB list entry in ibm,dynamic-memory
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 6cebff47a7..6ac8a9392d 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -65,10 +65,11 @@ typedef struct XICSFabric XICSFabric;
 struct ICPStateClass {
     DeviceClass parent_class;
 
-    void (*realize)(ICPState *icp, Error **errp);
+    DeviceRealize parent_realize;
+    DeviceReset parent_reset;
+
     void (*pre_save)(ICPState *icp);
     int (*post_load)(ICPState *icp, int version_id);
-    void (*reset)(ICPState *icp);
     void (*synchronize_state)(ICPState *icp);
 };
 
@@ -114,7 +115,9 @@ struct PnvICPState {
 struct ICSStateClass {
     DeviceClass parent_class;
 
-    void (*realize)(ICSState *s, Error **errp);
+    DeviceRealize parent_realize;
+    DeviceReset parent_reset;
+
     void (*pre_save)(ICSState *s);
     int (*post_load)(ICSState *s, int version_id);
     void (*reject)(ICSState *s, uint32_t irq);
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 02484dc94c..4d7f3c82ca 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -14,6 +14,7 @@
 #ifndef QEMU_VIRTIO_NET_H
 #define QEMU_VIRTIO_NET_H
 
+#include "qemu/units.h"
 #include "standard-headers/linux/virtio_net.h"
 #include "hw/virtio/virtio.h"
 
@@ -44,7 +45,7 @@ typedef struct virtio_net_conf
 } virtio_net_conf;
 
 /* Maximum packet size we can receive from tap device: header + 64k */
-#define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10))
+#define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 * KiB))
 
 typedef struct VirtIONetQueue {
     VirtQueue *rx_vq;
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 274d419bb7..47aaa3b0b9 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -155,13 +155,6 @@ int qemu_strtosz(const char *nptr, char **end, uint64_t *result);
 int qemu_strtosz_MiB(const char *nptr, char **end, uint64_t *result);
 int qemu_strtosz_metric(const char *nptr, char **end, uint64_t *result);
 
-#define K_BYTE     (1ULL << 10)
-#define M_BYTE     (1ULL << 20)
-#define G_BYTE     (1ULL << 30)
-#define T_BYTE     (1ULL << 40)
-#define P_BYTE     (1ULL << 50)
-#define E_BYTE     (1ULL << 60)
-
 /* used to print char* safely */
 #define STR_OR_NULL(str) ((str) ? (str) : "null")
 
diff --git a/include/qemu/units.h b/include/qemu/units.h
new file mode 100644
index 0000000000..692db3fbb2
--- /dev/null
+++ b/include/qemu/units.h
@@ -0,0 +1,20 @@
+/*
+ * IEC binary prefixes definitions
+ *
+ * Copyright (C) 2015 Nikunj A Dadhania, IBM Corporation
+ * Copyright (C) 2018 Philippe Mathieu-Daudé <f4bug@amsat.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef QEMU_UNITS_H
+#define QEMU_UNITS_H
+
+#define KiB     (INT64_C(1) << 10)
+#define MiB     (INT64_C(1) << 20)
+#define GiB     (INT64_C(1) << 30)
+#define TiB     (INT64_C(1) << 40)
+#define PiB     (INT64_C(1) << 50)
+#define EiB     (INT64_C(1) << 60)
+
+#endif
diff --git a/linux-user/main.c b/linux-user/main.c
index 84e9ec9335..52b5a618fe 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -17,6 +17,7 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu-version.h"
 #include <sys/syscall.h>
 #include <sys/resource.h>
@@ -274,9 +275,9 @@ static void handle_arg_stack_size(const char *arg)
     }
 
     if (*p == 'M') {
-        guest_stack_size *= 1024 * 1024;
+        guest_stack_size *= MiB;
     } else if (*p == 'k' || *p == 'K') {
-        guest_stack_size *= 1024;
+        guest_stack_size *= KiB;
     }
 }
 
diff --git a/linux-user/openrisc/signal.c b/linux-user/openrisc/signal.c
index 8be0b74001..232ad82b98 100644
--- a/linux-user/openrisc/signal.c
+++ b/linux-user/openrisc/signal.c
@@ -21,124 +21,69 @@
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
-struct target_sigcontext {
+typedef struct target_sigcontext {
     struct target_pt_regs regs;
     abi_ulong oldmask;
-    abi_ulong usp;
-};
+} target_sigcontext;
 
-struct target_ucontext {
+typedef struct target_ucontext {
     abi_ulong tuc_flags;
     abi_ulong tuc_link;
     target_stack_t tuc_stack;
-    struct target_sigcontext tuc_mcontext;
+    target_sigcontext tuc_mcontext;
     target_sigset_t tuc_sigmask;   /* mask last for extensibility */
-};
+} target_ucontext;
 
-struct target_rt_sigframe {
-    abi_ulong pinfo;
-    uint64_t puc;
+typedef struct target_rt_sigframe {
     struct target_siginfo info;
-    struct target_sigcontext sc;
-    struct target_ucontext uc;
-    unsigned char retcode[16];  /* trampoline code */
-};
-
-/* This is the asm-generic/ucontext.h version */
-#if 0
-static int restore_sigcontext(CPUOpenRISCState *regs,
-                              struct target_sigcontext *sc)
-{
-    unsigned int err = 0;
-    unsigned long old_usp;
-
-    /* Alwys make any pending restarted system call return -EINTR */
-    current_thread_info()->restart_block.fn = do_no_restart_syscall;
+    target_ucontext uc;
+    uint32_t retcode[4];  /* trampoline code */
+} target_rt_sigframe;
 
-    /* restore the regs from &sc->regs (same as sc, since regs is first)
-     * (sc is already checked for VERIFY_READ since the sigframe was
-     *  checked in sys_sigreturn previously)
-     */
+static void restore_sigcontext(CPUOpenRISCState *env, target_sigcontext *sc)
+{
+    int i;
+    abi_ulong v;
 
-    if (copy_from_user(regs, &sc, sizeof(struct target_pt_regs))) {
-        goto badframe;
+    for (i = 0; i < 32; ++i) {
+        __get_user(v, &sc->regs.gpr[i]);
+        cpu_set_gpr(env, i, v);
     }
+    __get_user(env->pc, &sc->regs.pc);
 
-    /* make sure the U-flag is set so user-mode cannot fool us */
-
-    regs->sr &= ~SR_SM;
-
-    /* restore the old USP as it was before we stacked the sc etc.
-     * (we cannot just pop the sigcontext since we aligned the sp and
-     *  stuff after pushing it)
-     */
-
-    __get_user(old_usp, &sc->usp);
-    phx_signal("old_usp 0x%lx", old_usp);
-
-    __PHX__ REALLY           /* ??? */
-    wrusp(old_usp);
-    regs->gpr[1] = old_usp;
-
-    /* TODO: the other ports use regs->orig_XX to disable syscall checks
-     * after this completes, but we don't use that mechanism. maybe we can
-     * use it now ?
-     */
-
-    return err;
-
-badframe:
-    return 1;
+    /* Make sure the supervisor flag is clear.  */
+    __get_user(v, &sc->regs.sr);
+    cpu_set_sr(env, v & ~SR_SM);
 }
-#endif
 
 /* Set up a signal frame.  */
 
-static void setup_sigcontext(struct target_sigcontext *sc,
-                             CPUOpenRISCState *regs,
-                             unsigned long mask)
+static void setup_sigcontext(target_sigcontext *sc, CPUOpenRISCState *env)
 {
-    unsigned long usp = cpu_get_gpr(regs, 1);
-
-    /* copy the regs. they are first in sc so we can use sc directly */
+    int i;
 
-    /*copy_to_user(&sc, regs, sizeof(struct target_pt_regs));*/
-
-    /* Set the frametype to CRIS_FRAME_NORMAL for the execution of
-       the signal handler. The frametype will be restored to its previous
-       value in restore_sigcontext. */
-    /*regs->frametype = CRIS_FRAME_NORMAL;*/
-
-    /* then some other stuff */
-    __put_user(mask, &sc->oldmask);
-    __put_user(usp, &sc->usp);
-}
+    for (i = 0; i < 32; ++i) {
+        __put_user(cpu_get_gpr(env, i), &sc->regs.gpr[i]);
+    }
 
-static inline unsigned long align_sigframe(unsigned long sp)
-{
-    return sp & ~3UL;
+    __put_user(env->pc, &sc->regs.pc);
+    __put_user(cpu_get_sr(env), &sc->regs.sr);
 }
 
 static inline abi_ulong get_sigframe(struct target_sigaction *ka,
-                                     CPUOpenRISCState *regs,
+                                     CPUOpenRISCState *env,
                                      size_t frame_size)
 {
-    unsigned long sp = get_sp_from_cpustate(regs);
-    int onsigstack = on_sig_stack(sp);
-
-    /* redzone */
-    sp = target_sigsp(sp, ka);
-
-    sp = align_sigframe(sp - frame_size);
+    target_ulong sp = get_sp_from_cpustate(env);
 
-    /*
-     * If we are on the alternate signal stack and would overflow it, don't.
-     * Return an always-bogus address instead so we will die with SIGSEGV.
+    /* Honor redzone now.  If we swap to signal stack, no need to waste
+     * the 128 bytes by subtracting afterward.
      */
+    sp -= 128;
 
-    if (onsigstack && !likely(on_sig_stack(sp))) {
-        return -1L;
-    }
+    sp = target_sigsp(sp, ka);
+    sp -= frame_size;
+    sp = QEMU_ALIGN_DOWN(sp, 4);
 
     return sp;
 }
@@ -147,11 +92,9 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
                     target_siginfo_t *info,
                     target_sigset_t *set, CPUOpenRISCState *env)
 {
-    int err = 0;
     abi_ulong frame_addr;
-    unsigned long return_ip;
-    struct target_rt_sigframe *frame;
-    abi_ulong info_addr, uc_addr;
+    target_rt_sigframe *frame;
+    int i;
 
     frame_addr = get_sigframe(ka, env, sizeof(*frame));
     trace_user_setup_rt_frame(env, frame_addr);
@@ -159,47 +102,37 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
         goto give_sigsegv;
     }
 
-    info_addr = frame_addr + offsetof(struct target_rt_sigframe, info);
-    __put_user(info_addr, &frame->pinfo);
-    uc_addr = frame_addr + offsetof(struct target_rt_sigframe, uc);
-    __put_user(uc_addr, &frame->puc);
-
     if (ka->sa_flags & SA_SIGINFO) {
         tswap_siginfo(&frame->info, info);
     }
 
-    /*err |= __clear_user(&frame->uc, offsetof(ucontext_t, uc_mcontext));*/
     __put_user(0, &frame->uc.tuc_flags);
     __put_user(0, &frame->uc.tuc_link);
-    target_save_altstack(&frame->uc.tuc_stack, env);
-    setup_sigcontext(&frame->sc, env, set->sig[0]);
 
-    /*err |= copy_to_user(frame->uc.tuc_sigmask, set, sizeof(*set));*/
-
-    /* trampoline - the desired return ip is the retcode itself */
-    return_ip = (unsigned long)&frame->retcode;
-    /* This is l.ori r11,r0,__NR_sigreturn, l.sys 1 */
-    __put_user(0xa960, (short *)(frame->retcode + 0));
-    __put_user(TARGET_NR_rt_sigreturn, (short *)(frame->retcode + 2));
-    __put_user(0x20000001, (unsigned long *)(frame->retcode + 4));
-    __put_user(0x15000000, (unsigned long *)(frame->retcode + 8));
-
-    if (err) {
-        goto give_sigsegv;
+    target_save_altstack(&frame->uc.tuc_stack, env);
+    setup_sigcontext(&frame->uc.tuc_mcontext, env);
+    for (i = 0; i < TARGET_NSIG_WORDS; ++i) {
+        __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]);
     }
 
-    /* TODO what is the current->exec_domain stuff and invmap ? */
+    /* This is l.ori r11,r0,__NR_sigreturn; l.sys 1; l.nop; l.nop */
+    __put_user(0xa9600000 | TARGET_NR_rt_sigreturn, frame->retcode + 0);
+    __put_user(0x20000001, frame->retcode + 1);
+    __put_user(0x15000000, frame->retcode + 2);
+    __put_user(0x15000000, frame->retcode + 3);
 
     /* Set up registers for signal handler */
-    env->pc = (unsigned long)ka->_sa_handler; /* what we enter NOW */
-    cpu_set_gpr(env, 9, (unsigned long)return_ip);     /* what we enter LATER */
-    cpu_set_gpr(env, 3, (unsigned long)sig);           /* arg 1: signo */
-    cpu_set_gpr(env, 4, (unsigned long)&frame->info);  /* arg 2: (siginfo_t*) */
-    cpu_set_gpr(env, 5, (unsigned long)&frame->uc);    /* arg 3: ucontext */
-
-    /* actually move the usp to reflect the stacked frame */
-    cpu_set_gpr(env, 1, (unsigned long)frame);
-
+    cpu_set_gpr(env, 9, frame_addr + offsetof(target_rt_sigframe, retcode));
+    cpu_set_gpr(env, 3, sig);
+    cpu_set_gpr(env, 4, frame_addr + offsetof(target_rt_sigframe, info));
+    cpu_set_gpr(env, 5, frame_addr + offsetof(target_rt_sigframe, uc));
+    cpu_set_gpr(env, 1, frame_addr);
+
+    /* For debugging convenience, set ppc to the insn that faulted.  */
+    env->ppc = env->pc;
+    /* When setting the PC for the signal handler, exit delay slot.  */
+    env->pc = ka->_sa_handler;
+    env->dflag = 0;
     return;
 
 give_sigsegv:
@@ -207,16 +140,34 @@ give_sigsegv:
     force_sigsegv(sig);
 }
 
-long do_sigreturn(CPUOpenRISCState *env)
-{
-    trace_user_do_sigreturn(env, 0);
-    fprintf(stderr, "do_sigreturn: not implemented\n");
-    return -TARGET_ENOSYS;
-}
-
 long do_rt_sigreturn(CPUOpenRISCState *env)
 {
+    abi_ulong frame_addr = get_sp_from_cpustate(env);
+    target_rt_sigframe *frame;
+    sigset_t set;
+
     trace_user_do_rt_sigreturn(env, 0);
-    fprintf(stderr, "do_rt_sigreturn: not implemented\n");
-    return -TARGET_ENOSYS;
+    if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
+        goto badframe;
+    }
+    if (frame_addr & 3) {
+        goto badframe;
+    }
+
+    target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
+    set_sigmask(&set);
+
+    restore_sigcontext(env, &frame->uc.tuc_mcontext);
+    if (do_sigaltstack(frame_addr + offsetof(target_rt_sigframe, uc.tuc_stack),
+                       0, frame_addr) == -EFAULT) {
+        goto badframe;
+    }
+
+    unlock_user_struct(frame, frame_addr, 0);
+    return cpu_get_gpr(env, 11);
+
+ badframe:
+    unlock_user_struct(frame, frame_addr, 0);
+    force_sig(TARGET_SIGSEGV);
+    return 0;
 }
diff --git a/linux-user/openrisc/target_signal.h b/linux-user/openrisc/target_signal.h
index c352a8b333..8283eaf544 100644
--- a/linux-user/openrisc/target_signal.h
+++ b/linux-user/openrisc/target_signal.h
@@ -5,8 +5,8 @@
 
 typedef struct target_sigaltstack {
     abi_long ss_sp;
+    abi_int ss_flags;
     abi_ulong ss_size;
-    abi_long ss_flags;
 } target_stack_t;
 
 /* sigaltstack controls  */
diff --git a/linux-user/openrisc/target_syscall.h b/linux-user/openrisc/target_syscall.h
index 03104f80af..d586d2a018 100644
--- a/linux-user/openrisc/target_syscall.h
+++ b/linux-user/openrisc/target_syscall.h
@@ -1,27 +1,15 @@
 #ifndef OPENRISC_TARGET_SYSCALL_H
 #define OPENRISC_TARGET_SYSCALL_H
 
+/* Note that in linux/arch/openrisc/include/uapi/asm/ptrace.h,
+ * this is called user_regs_struct.  Given that this is what
+ * is used within struct sigcontext we need this definition.
+ * However, elfload.c wants this name.
+ */
 struct target_pt_regs {
-    union {
-        struct {
-            /* Named registers */
-            uint32_t sr;       /* Stored in place of r0 */
-            target_ulong sp;   /* r1 */
-        };
-        struct {
-            /* Old style */
-            target_ulong offset[2];
-            target_ulong gprs[30];
-        };
-        struct {
-            /* New style */
-            target_ulong gpr[32];
-        };
-    };
-    target_ulong pc;
-    target_ulong orig_gpr11;   /* For restarting system calls */
-    uint32_t syscallno;        /* Syscall number (used by strace) */
-    target_ulong dummy;     /* Cheap alignment fix */
+    abi_ulong gpr[32];
+    abi_ulong pc;
+    abi_ulong sr;
 };
 
 #define UNAME_MACHINE "openrisc"
diff --git a/linux-user/ppc/cpu_loop.c b/linux-user/ppc/cpu_loop.c
index 2fb516cb00..133a87f349 100644
--- a/linux-user/ppc/cpu_loop.c
+++ b/linux-user/ppc/cpu_loop.c
@@ -65,99 +65,23 @@ int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val)
     return -1;
 }
 
-static int do_store_exclusive(CPUPPCState *env)
-{
-    target_ulong addr;
-    target_ulong page_addr;
-    target_ulong val, val2 __attribute__((unused)) = 0;
-    int flags;
-    int segv = 0;
-
-    addr = env->reserve_ea;
-    page_addr = addr & TARGET_PAGE_MASK;
-    start_exclusive();
-    mmap_lock();
-    flags = page_get_flags(page_addr);
-    if ((flags & PAGE_READ) == 0) {
-        segv = 1;
-    } else {
-        int reg = env->reserve_info & 0x1f;
-        int size = env->reserve_info >> 5;
-        int stored = 0;
-
-        if (addr == env->reserve_addr) {
-            switch (size) {
-            case 1: segv = get_user_u8(val, addr); break;
-            case 2: segv = get_user_u16(val, addr); break;
-            case 4: segv = get_user_u32(val, addr); break;
-#if defined(TARGET_PPC64)
-            case 8: segv = get_user_u64(val, addr); break;
-            case 16: {
-                segv = get_user_u64(val, addr);
-                if (!segv) {
-                    segv = get_user_u64(val2, addr + 8);
-                }
-                break;
-            }
-#endif
-            default: abort();
-            }
-            if (!segv && val == env->reserve_val) {
-                val = env->gpr[reg];
-                switch (size) {
-                case 1: segv = put_user_u8(val, addr); break;
-                case 2: segv = put_user_u16(val, addr); break;
-                case 4: segv = put_user_u32(val, addr); break;
-#if defined(TARGET_PPC64)
-                case 8: segv = put_user_u64(val, addr); break;
-                case 16: {
-                    if (val2 == env->reserve_val2) {
-                        if (msr_le) {
-                            val2 = val;
-                            val = env->gpr[reg+1];
-                        } else {
-                            val2 = env->gpr[reg+1];
-                        }
-                        segv = put_user_u64(val, addr);
-                        if (!segv) {
-                            segv = put_user_u64(val2, addr + 8);
-                        }
-                    }
-                    break;
-                }
-#endif
-                default: abort();
-                }
-                if (!segv) {
-                    stored = 1;
-                }
-            }
-        }
-        env->crf[0] = (stored << 1) | xer_so;
-        env->reserve_addr = (target_ulong)-1;
-    }
-    if (!segv) {
-        env->nip += 4;
-    }
-    mmap_unlock();
-    end_exclusive();
-    return segv;
-}
-
 void cpu_loop(CPUPPCState *env)
 {
     CPUState *cs = CPU(ppc_env_get_cpu(env));
     target_siginfo_t info;
-    int trapnr;
+    int trapnr, sig;
     target_ulong ret;
 
     for(;;) {
+        bool arch_interrupt;
+
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
         process_queued_cpu_work(cs);
 
-        switch(trapnr) {
+        arch_interrupt = true;
+        switch (trapnr) {
         case POWERPC_EXCP_NONE:
             /* Just go on */
             break;
@@ -524,26 +448,15 @@ void cpu_loop(CPUPPCState *env)
             }
             env->gpr[3] = ret;
             break;
-        case POWERPC_EXCP_STCX:
-            if (do_store_exclusive(env)) {
-                info.si_signo = TARGET_SIGSEGV;
+        case EXCP_DEBUG:
+            sig = gdb_handlesig(cs, TARGET_SIGTRAP);
+            if (sig) {
+                info.si_signo = sig;
                 info.si_errno = 0;
-                info.si_code = TARGET_SEGV_MAPERR;
-                info._sifields._sigfault._addr = env->nip;
+                info.si_code = TARGET_TRAP_BRKPT;
                 queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            }
-            break;
-        case EXCP_DEBUG:
-            {
-                int sig;
-
-                sig = gdb_handlesig(cs, TARGET_SIGTRAP);
-                if (sig) {
-                    info.si_signo = sig;
-                    info.si_errno = 0;
-                    info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-                  }
+            } else {
+                arch_interrupt = false;
             }
             break;
         case EXCP_INTERRUPT:
@@ -551,12 +464,22 @@ void cpu_loop(CPUPPCState *env)
             break;
         case EXCP_ATOMIC:
             cpu_exec_step_atomic(cs);
+            arch_interrupt = false;
             break;
         default:
             cpu_abort(cs, "Unknown exception 0x%x. Aborting\n", trapnr);
             break;
         }
         process_pending_signals(env);
+
+        /* Most of the traps imply a transition through kernel mode,
+         * which implies an REI instruction has been executed.  Which
+         * means that RX and LOCK_ADDR should be cleared.  But there
+         * are a few exceptions for traps internal to QEMU.
+         */
+        if (arch_interrupt) {
+            env->reserve_addr = -1;
+        }
     }
 }
 
diff --git a/linux-user/signal.c b/linux-user/signal.c
index be2815b45d..602b631b92 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -236,7 +236,7 @@ int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset)
     return 0;
 }
 
-#if !defined(TARGET_OPENRISC) && !defined(TARGET_NIOS2)
+#if !defined(TARGET_NIOS2)
 /* Just set the guest's signal mask to the specified value; the
  * caller is assumed to have called block_signals() already.
  */
diff --git a/monitor.c b/monitor.c
index 567668a0e7..3a0ea0c602 100644
--- a/monitor.c
+++ b/monitor.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include <dirent.h>
 #include "cpu.h"
 #include "hw/hw.h"
@@ -3303,7 +3304,7 @@ static QDict *monitor_parse_arguments(Monitor *mon,
                         monitor_printf(mon, "enter a positive value\n");
                         goto fail;
                     }
-                    val <<= 20;
+                    val *= MiB;
                 }
                 qdict_put_int(qdict, key, val);
             }
diff --git a/nbd/client.c b/nbd/client.c
index 232ff4f46d..40b74d9761 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2016-2017 Red Hat, Inc.
+ *  Copyright (C) 2016-2018 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
  *
  *  Network Block Device Client Side
@@ -831,7 +831,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
 
             if (info->structured_reply && base_allocation) {
                 result = nbd_negotiate_simple_meta_context(
-                        ioc, name, "base:allocation",
+                        ioc, name, info->x_dirty_bitmap ?: "base:allocation",
                         &info->meta_base_allocation_id, errp);
                 if (result < 0) {
                     goto fail;
diff --git a/nbd/server.c b/nbd/server.c
index 50ac8bfafc..e52b76bd1a 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1977,7 +1977,7 @@ static unsigned int bitmap_to_extents(BdrvDirtyBitmap *bitmap, uint64_t offset,
 
     bdrv_dirty_bitmap_unlock(bitmap);
 
-    assert(offset > end);
+    assert(offset < end);
     *length = end - offset;
     return i;
 }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 577ce5e999..90e554ed0f 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3471,12 +3471,17 @@
 #
 # @tls-creds:   TLS credentials ID
 #
+# @x-dirty-bitmap: A "qemu:dirty-bitmap:NAME" string to query in place of
+#                  traditional "base:allocation" block status (see
+#                  NBD_OPT_LIST_META_CONTEXT in the NBD protocol) (since 3.0)
+#
 # Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsNbd',
   'data': { 'server': 'SocketAddress',
             '*export': 'str',
-            '*tls-creds': 'str' } }
+            '*tls-creds': 'str',
+            '*x-dirty-bitmap': 'str' } }
 
 ##
 # @BlockdevOptionsRaw:
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e3d8c2cdfc..223681bfd0 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -242,6 +242,7 @@ our $UTF8	= qr{
 # There are still some false positives, but this catches most
 # common cases.
 our $typeTypedefs = qr{(?x:
+        (?![KMGTPE]iB)                      # IEC binary prefix (do not match)
         [A-Z][A-Z\d_]*[a-z][A-Za-z\d_]*     # camelcase
         | [A-Z][A-Z\d_]*AIOCB               # all uppercase
         | [A-Z][A-Z\d_]*CPU                 # all uppercase
diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh
index d7eefda0b8..a5cb96d79a 100755
--- a/scripts/qemu-binfmt-conf.sh
+++ b/scripts/qemu-binfmt-conf.sh
@@ -1,10 +1,10 @@
 #!/bin/sh
-# enable automatic i386/ARM/M68K/MIPS/SPARC/PPC/s390/HPPA/Xtensa/microblaze
-# program execution by the kernel
+# Enable automatic program execution by the kernel.
 
 qemu_target_list="i386 i486 alpha arm armeb sparc32plus ppc ppc64 ppc64le m68k \
 mips mipsel mipsn32 mipsn32el mips64 mips64el \
-sh4 sh4eb s390x aarch64 aarch64_be hppa riscv32 riscv64 xtensa xtensaeb microblaze microblazeel"
+sh4 sh4eb s390x aarch64 aarch64_be hppa riscv32 riscv64 xtensa xtensaeb \
+microblaze microblazeel or1k"
 
 i386_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00'
 i386_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
@@ -124,6 +124,10 @@ microblazeel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\
 microblazeel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
 microblazeel_family=microblazeel
 
+or1k_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x5c'
+or1k_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+or1k_family=or1k
+
 qemu_get_family() {
     cpu=${HOST_ARCH:-$(uname -m)}
     case "$cpu" in
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 1e6a7d0a75..b0b87c3d81 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qemu/cutils.h"
 #include "qemu/bitops.h"
 
@@ -65,9 +66,6 @@ struct CPUID2CacheDescriptorInfo {
     int associativity;
 };
 
-#define KiB 1024
-#define MiB (1024 * 1024)
-
 /*
  * Known CPUID 2 cache descriptors.
  * From Intel SDM Volume 2A, CPUID instruction
@@ -751,7 +749,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
 #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
           CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
 #define TCG_EXT4_FEATURES 0
-#define TCG_SVM_FEATURES 0
+#define TCG_SVM_FEATURES CPUID_SVM_NPT
 #define TCG_KVM_FEATURES 0
 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP | \
           CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX | \
@@ -5415,6 +5413,7 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false),
     DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false),
     DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false),
+    DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false),
     DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
     DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8eaefeee3e..2c5a0d90a6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -211,6 +211,7 @@ typedef enum X86Seg {
 #define HF2_VINTR_SHIFT          3 /* value of V_INTR_MASKING bit */
 #define HF2_SMM_INSIDE_NMI_SHIFT 4 /* CPU serving SMI nested inside NMI */
 #define HF2_MPX_PR_SHIFT         5 /* BNDCFGx.BNDPRESERVE */
+#define HF2_NPT_SHIFT            6 /* Nested Paging enabled */
 
 #define HF2_GIF_MASK            (1 << HF2_GIF_SHIFT)
 #define HF2_HIF_MASK            (1 << HF2_HIF_SHIFT)
@@ -218,6 +219,7 @@ typedef enum X86Seg {
 #define HF2_VINTR_MASK          (1 << HF2_VINTR_SHIFT)
 #define HF2_SMM_INSIDE_NMI_MASK (1 << HF2_SMM_INSIDE_NMI_SHIFT)
 #define HF2_MPX_PR_MASK         (1 << HF2_MPX_PR_SHIFT)
+#define HF2_NPT_MASK            (1 << HF2_NPT_SHIFT)
 
 #define CR0_PE_SHIFT 0
 #define CR0_MP_SHIFT 1
@@ -1265,12 +1267,16 @@ typedef struct CPUX86State {
     uint16_t intercept_dr_read;
     uint16_t intercept_dr_write;
     uint32_t intercept_exceptions;
+    uint64_t nested_cr3;
+    uint32_t nested_pg_mode;
     uint8_t v_tpr;
 
     /* KVM states, automatically cleared on reset */
     uint8_t nmi_injected;
     uint8_t nmi_pending;
 
+    uintptr_t retaddr;
+
     /* Fields up to this point are cleared by a CPU reset */
     struct {} end_reset_fields;
 
@@ -1367,6 +1373,7 @@ struct X86CPU {
     bool hyperv_stimer;
     bool hyperv_frequencies;
     bool hyperv_reenlightenment;
+    bool hyperv_tlbflush;
     bool check_cpuid;
     bool enforce_cpuid;
     bool expose_kvm;
diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c
index cb4d1b7d33..37a33d5ae0 100644
--- a/target/i386/excp_helper.c
+++ b/target/i386/excp_helper.c
@@ -157,6 +157,209 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
 
 #else
 
+static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
+                        int *prot)
+{
+    CPUX86State *env = &X86_CPU(cs)->env;
+    uint64_t rsvd_mask = PG_HI_RSVD_MASK;
+    uint64_t ptep, pte;
+    uint64_t exit_info_1 = 0;
+    target_ulong pde_addr, pte_addr;
+    uint32_t page_offset;
+    int page_size;
+
+    if (likely(!(env->hflags2 & HF2_NPT_MASK))) {
+        return gphys;
+    }
+
+    if (!(env->nested_pg_mode & SVM_NPT_NXE)) {
+        rsvd_mask |= PG_NX_MASK;
+    }
+
+    if (env->nested_pg_mode & SVM_NPT_PAE) {
+        uint64_t pde, pdpe;
+        target_ulong pdpe_addr;
+
+#ifdef TARGET_X86_64
+        if (env->nested_pg_mode & SVM_NPT_LMA) {
+            uint64_t pml5e;
+            uint64_t pml4e_addr, pml4e;
+
+            pml5e = env->nested_cr3;
+            ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
+
+            pml4e_addr = (pml5e & PG_ADDRESS_MASK) +
+                    (((gphys >> 39) & 0x1ff) << 3);
+            pml4e = x86_ldq_phys(cs, pml4e_addr);
+            if (!(pml4e & PG_PRESENT_MASK)) {
+                goto do_fault;
+            }
+            if (pml4e & (rsvd_mask | PG_PSE_MASK)) {
+                goto do_fault_rsvd;
+            }
+            if (!(pml4e & PG_ACCESSED_MASK)) {
+                pml4e |= PG_ACCESSED_MASK;
+                x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
+            }
+            ptep &= pml4e ^ PG_NX_MASK;
+            pdpe_addr = (pml4e & PG_ADDRESS_MASK) +
+                    (((gphys >> 30) & 0x1ff) << 3);
+            pdpe = x86_ldq_phys(cs, pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK)) {
+                goto do_fault;
+            }
+            if (pdpe & rsvd_mask) {
+                goto do_fault_rsvd;
+            }
+            ptep &= pdpe ^ PG_NX_MASK;
+            if (!(pdpe & PG_ACCESSED_MASK)) {
+                pdpe |= PG_ACCESSED_MASK;
+                x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
+            }
+            if (pdpe & PG_PSE_MASK) {
+                /* 1 GB page */
+                page_size = 1024 * 1024 * 1024;
+                pte_addr = pdpe_addr;
+                pte = pdpe;
+                goto do_check_protect;
+            }
+        } else
+#endif
+        {
+            pdpe_addr = (env->nested_cr3 & ~0x1f) + ((gphys >> 27) & 0x18);
+            pdpe = x86_ldq_phys(cs, pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK)) {
+                goto do_fault;
+            }
+            rsvd_mask |= PG_HI_USER_MASK;
+            if (pdpe & (rsvd_mask | PG_NX_MASK)) {
+                goto do_fault_rsvd;
+            }
+            ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
+        }
+
+        pde_addr = (pdpe & PG_ADDRESS_MASK) + (((gphys >> 21) & 0x1ff) << 3);
+        pde = x86_ldq_phys(cs, pde_addr);
+        if (!(pde & PG_PRESENT_MASK)) {
+            goto do_fault;
+        }
+        if (pde & rsvd_mask) {
+            goto do_fault_rsvd;
+        }
+        ptep &= pde ^ PG_NX_MASK;
+        if (pde & PG_PSE_MASK) {
+            /* 2 MB page */
+            page_size = 2048 * 1024;
+            pte_addr = pde_addr;
+            pte = pde;
+            goto do_check_protect;
+        }
+        /* 4 KB page */
+        if (!(pde & PG_ACCESSED_MASK)) {
+            pde |= PG_ACCESSED_MASK;
+            x86_stl_phys_notdirty(cs, pde_addr, pde);
+        }
+        pte_addr = (pde & PG_ADDRESS_MASK) + (((gphys >> 12) & 0x1ff) << 3);
+        pte = x86_ldq_phys(cs, pte_addr);
+        if (!(pte & PG_PRESENT_MASK)) {
+            goto do_fault;
+        }
+        if (pte & rsvd_mask) {
+            goto do_fault_rsvd;
+        }
+        /* combine pde and pte nx, user and rw protections */
+        ptep &= pte ^ PG_NX_MASK;
+        page_size = 4096;
+    } else {
+        uint32_t pde;
+
+        /* page directory entry */
+        pde_addr = (env->nested_cr3 & ~0xfff) + ((gphys >> 20) & 0xffc);
+        pde = x86_ldl_phys(cs, pde_addr);
+        if (!(pde & PG_PRESENT_MASK)) {
+            goto do_fault;
+        }
+        ptep = pde | PG_NX_MASK;
+
+        /* if PSE bit is set, then we use a 4MB page */
+        if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
+            page_size = 4096 * 1024;
+            pte_addr = pde_addr;
+
+            /* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
+             * Leave bits 20-13 in place for setting accessed/dirty bits below.
+             */
+            pte = pde | ((pde & 0x1fe000LL) << (32 - 13));
+            rsvd_mask = 0x200000;
+            goto do_check_protect_pse36;
+        }
+
+        if (!(pde & PG_ACCESSED_MASK)) {
+            pde |= PG_ACCESSED_MASK;
+            x86_stl_phys_notdirty(cs, pde_addr, pde);
+        }
+
+        /* page directory entry */
+        pte_addr = (pde & ~0xfff) + ((gphys >> 10) & 0xffc);
+        pte = x86_ldl_phys(cs, pte_addr);
+        if (!(pte & PG_PRESENT_MASK)) {
+            goto do_fault;
+        }
+        /* combine pde and pte user and rw protections */
+        ptep &= pte | PG_NX_MASK;
+        page_size = 4096;
+        rsvd_mask = 0;
+    }
+
+ do_check_protect:
+    rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
+ do_check_protect_pse36:
+    if (pte & rsvd_mask) {
+        goto do_fault_rsvd;
+    }
+    ptep ^= PG_NX_MASK;
+
+    if (!(ptep & PG_USER_MASK)) {
+        goto do_fault_protect;
+    }
+    if (ptep & PG_NX_MASK) {
+        if (access_type == MMU_INST_FETCH) {
+            goto do_fault_protect;
+        }
+        *prot &= ~PAGE_EXEC;
+    }
+    if (!(ptep & PG_RW_MASK)) {
+        if (access_type == MMU_DATA_STORE) {
+            goto do_fault_protect;
+        }
+        *prot &= ~PAGE_WRITE;
+    }
+
+    pte &= PG_ADDRESS_MASK & ~(page_size - 1);
+    page_offset = gphys & (page_size - 1);
+    return pte + page_offset;
+
+ do_fault_rsvd:
+    exit_info_1 |= SVM_NPTEXIT_RSVD;
+ do_fault_protect:
+    exit_info_1 |= SVM_NPTEXIT_P;
+ do_fault:
+    x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
+                 gphys);
+    exit_info_1 |= SVM_NPTEXIT_US;
+    if (access_type == MMU_DATA_STORE) {
+        exit_info_1 |= SVM_NPTEXIT_RW;
+    } else if (access_type == MMU_INST_FETCH) {
+        exit_info_1 |= SVM_NPTEXIT_ID;
+    }
+    if (prot) {
+        exit_info_1 |= SVM_NPTEXIT_GPA;
+    } else { /* page table access */
+        exit_info_1 |= SVM_NPTEXIT_GPT;
+    }
+    cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr);
+}
+
 /* return value:
  * -1 = cannot handle fault
  * 0  = nothing more to do
@@ -224,6 +427,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
             if (la57) {
                 pml5e_addr = ((env->cr[3] & ~0xfff) +
                         (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
+                pml5e_addr = get_hphys(cs, pml5e_addr, MMU_DATA_STORE, NULL);
                 pml5e = x86_ldq_phys(cs, pml5e_addr);
                 if (!(pml5e & PG_PRESENT_MASK)) {
                     goto do_fault;
@@ -243,6 +447,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
 
             pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
                     (((addr >> 39) & 0x1ff) << 3)) & a20_mask;
+            pml4e_addr = get_hphys(cs, pml4e_addr, MMU_DATA_STORE, false);
             pml4e = x86_ldq_phys(cs, pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK)) {
                 goto do_fault;
@@ -257,6 +462,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
             ptep &= pml4e ^ PG_NX_MASK;
             pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
                 a20_mask;
+            pdpe_addr = get_hphys(cs, pdpe_addr, MMU_DATA_STORE, NULL);
             pdpe = x86_ldq_phys(cs, pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK)) {
                 goto do_fault;
@@ -282,6 +488,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
             /* XXX: load them when cr3 is loaded ? */
             pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
                 a20_mask;
+            pdpe_addr = get_hphys(cs, pdpe_addr, MMU_DATA_STORE, false);
             pdpe = x86_ldq_phys(cs, pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK)) {
                 goto do_fault;
@@ -295,6 +502,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
 
         pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
             a20_mask;
+        pde_addr = get_hphys(cs, pde_addr, MMU_DATA_STORE, NULL);
         pde = x86_ldq_phys(cs, pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             goto do_fault;
@@ -317,6 +525,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
         }
         pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
             a20_mask;
+        pte_addr = get_hphys(cs, pte_addr, MMU_DATA_STORE, NULL);
         pte = x86_ldq_phys(cs, pte_addr);
         if (!(pte & PG_PRESENT_MASK)) {
             goto do_fault;
@@ -333,6 +542,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
         /* page directory entry */
         pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
             a20_mask;
+        pde_addr = get_hphys(cs, pde_addr, MMU_DATA_STORE, NULL);
         pde = x86_ldl_phys(cs, pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             goto do_fault;
@@ -360,6 +570,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
         /* page directory entry */
         pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
             a20_mask;
+        pte_addr = get_hphys(cs, pte_addr, MMU_DATA_STORE, NULL);
         pte = x86_ldl_phys(cs, pte_addr);
         if (!(pte & PG_PRESENT_MASK)) {
             goto do_fault;
@@ -442,12 +653,13 @@ do_check_protect_pse36:
 
     /* align to page_size */
     pte &= PG_ADDRESS_MASK & ~(page_size - 1);
+    page_offset = addr & (page_size - 1);
+    paddr = get_hphys(cs, pte + page_offset, is_write1, &prot);
 
     /* Even if 4MB pages, we map only one 4KB page in the cache to
        avoid filling it too fast */
     vaddr = addr & TARGET_PAGE_MASK;
-    page_offset = vaddr & (page_size - 1);
-    paddr = pte + page_offset;
+    paddr &= TARGET_PAGE_MASK;
 
     assert(prot & (1 << is_write1));
     tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env),
diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h
index 93352ebd2a..d6d5a79293 100644
--- a/target/i386/hyperv-proto.h
+++ b/target/i386/hyperv-proto.h
@@ -58,6 +58,7 @@
 #define HV_APIC_ACCESS_RECOMMENDED          (1u << 3)
 #define HV_SYSTEM_RESET_RECOMMENDED         (1u << 4)
 #define HV_RELAXED_TIMING_RECOMMENDED       (1u << 5)
+#define HV_EX_PROCESSOR_MASKS_RECOMMENDED   (1u << 11)
 
 /*
  * Basic virtualized MSRs
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 032f0ad2fc..ebb2d23aa4 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -601,7 +601,8 @@ static bool hyperv_enabled(X86CPU *cpu)
             cpu->hyperv_runtime ||
             cpu->hyperv_synic ||
             cpu->hyperv_stimer ||
-            cpu->hyperv_reenlightenment);
+            cpu->hyperv_reenlightenment ||
+            cpu->hyperv_tlbflush);
 }
 
 static int kvm_arch_set_tsc_khz(CPUState *cs)
@@ -839,6 +840,18 @@ int kvm_arch_init_vcpu(CPUState *cs)
         if (cpu->hyperv_vapic) {
             c->eax |= HV_APIC_ACCESS_RECOMMENDED;
         }
+        if (cpu->hyperv_tlbflush) {
+            if (kvm_check_extension(cs->kvm_state,
+                                    KVM_CAP_HYPERV_TLBFLUSH) <= 0) {
+                fprintf(stderr, "Hyper-V TLB flush support "
+                        "(requested by 'hv-tlbflush' cpu flag) "
+                        " is not supported by kernel\n");
+                return -ENOSYS;
+            }
+            c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED;
+            c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
+        }
+
         c->ebx = cpu->hyperv_spinlock_attempts;
 
         c = &cpuid_data.entries[cpuid_i++];
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 4d98d367c1..8b64dff487 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -935,6 +935,26 @@ static const VMStateDescription vmstate_msr_virt_ssbd = {
     }
 };
 
+static bool svm_npt_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+
+    return !!(env->hflags2 & HF2_NPT_MASK);
+}
+
+static const VMStateDescription vmstate_svm_npt = {
+    .name = "cpu/svn_npt",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = svm_npt_needed,
+    .fields = (VMStateField[]){
+        VMSTATE_UINT64(env.nested_cr3, X86CPU),
+        VMSTATE_UINT32(env.nested_pg_mode, X86CPU),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 VMStateDescription vmstate_x86_cpu = {
     .name = "cpu",
     .version_id = 12,
@@ -1059,6 +1079,7 @@ VMStateDescription vmstate_x86_cpu = {
         &vmstate_mcg_ext_ctl,
         &vmstate_msr_intel_pt,
         &vmstate_msr_virt_ssbd,
+        &vmstate_svm_npt,
         NULL
     }
 };
diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c
index a8ae694a9c..30c26b9d9c 100644
--- a/target/i386/mem_helper.c
+++ b/target/i386/mem_helper.c
@@ -202,13 +202,13 @@ void helper_boundl(CPUX86State *env, target_ulong a0, int v)
 void tlb_fill(CPUState *cs, target_ulong addr, int size,
               MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
 {
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
     int ret;
 
+    env->retaddr = retaddr;
     ret = x86_cpu_handle_mmu_fault(cs, addr, size, access_type, mmu_idx);
     if (ret) {
-        X86CPU *cpu = X86_CPU(cs);
-        CPUX86State *env = &cpu->env;
-
         raise_exception_err_ra(env, cs->exception_index, env->error_code, retaddr);
     }
 }
diff --git a/target/i386/monitor.c b/target/i386/monitor.c
index 6bbb3a96cd..74a13c571b 100644
--- a/target/i386/monitor.c
+++ b/target/i386/monitor.c
@@ -35,21 +35,28 @@
 #include "sev_i386.h"
 #include "qapi/qapi-commands-misc.h"
 
-
-static void print_pte(Monitor *mon, CPUArchState *env, hwaddr addr,
-                      hwaddr pte, hwaddr mask)
+/* Perform linear address sign extension */
+static hwaddr addr_canonical(CPUArchState *env, hwaddr addr)
 {
 #ifdef TARGET_X86_64
     if (env->cr[4] & CR4_LA57_MASK) {
         if (addr & (1ULL << 56)) {
-            addr |= -1LL << 57;
+            addr |= (hwaddr)-(1LL << 57);
         }
     } else {
         if (addr & (1ULL << 47)) {
-            addr |= -1LL << 48;
+            addr |= (hwaddr)-(1LL << 48);
         }
     }
 #endif
+    return addr;
+}
+
+static void print_pte(Monitor *mon, CPUArchState *env, hwaddr addr,
+                      hwaddr pte, hwaddr mask)
+{
+    addr = addr_canonical(env, addr);
+
     monitor_printf(mon, TARGET_FMT_plx ": " TARGET_FMT_plx
                    " %c%c%c%c%c%c%c%c%c\n",
                    addr,
@@ -243,8 +250,8 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict)
     }
 }
 
-static void mem_print(Monitor *mon, hwaddr *pstart,
-                      int *plast_prot,
+static void mem_print(Monitor *mon, CPUArchState *env,
+                      hwaddr *pstart, int *plast_prot,
                       hwaddr end, int prot)
 {
     int prot1;
@@ -253,7 +260,9 @@ static void mem_print(Monitor *mon, hwaddr *pstart,
         if (*pstart != -1) {
             monitor_printf(mon, TARGET_FMT_plx "-" TARGET_FMT_plx " "
                            TARGET_FMT_plx " %c%c%c\n",
-                           *pstart, end, end - *pstart,
+                           addr_canonical(env, *pstart),
+                           addr_canonical(env, end),
+                           addr_canonical(env, end - *pstart),
                            prot1 & PG_USER_MASK ? 'u' : '-',
                            'r',
                            prot1 & PG_RW_MASK ? 'w' : '-');
@@ -283,7 +292,7 @@ static void mem_info_32(Monitor *mon, CPUArchState *env)
         if (pde & PG_PRESENT_MASK) {
             if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
                 prot = pde & (PG_USER_MASK | PG_RW_MASK | PG_PRESENT_MASK);
-                mem_print(mon, &start, &last_prot, end, prot);
+                mem_print(mon, env, &start, &last_prot, end, prot);
             } else {
                 for(l2 = 0; l2 < 1024; l2++) {
                     cpu_physical_memory_read((pde & ~0xfff) + l2 * 4, &pte, 4);
@@ -295,16 +304,16 @@ static void mem_info_32(Monitor *mon, CPUArchState *env)
                     } else {
                         prot = 0;
                     }
-                    mem_print(mon, &start, &last_prot, end, prot);
+                    mem_print(mon, env, &start, &last_prot, end, prot);
                 }
             }
         } else {
             prot = 0;
-            mem_print(mon, &start, &last_prot, end, prot);
+            mem_print(mon, env, &start, &last_prot, end, prot);
         }
     }
     /* Flush last range */
-    mem_print(mon, &start, &last_prot, (hwaddr)1 << 32, 0);
+    mem_print(mon, env, &start, &last_prot, (hwaddr)1 << 32, 0);
 }
 
 static void mem_info_pae32(Monitor *mon, CPUArchState *env)
@@ -332,7 +341,7 @@ static void mem_info_pae32(Monitor *mon, CPUArchState *env)
                     if (pde & PG_PSE_MASK) {
                         prot = pde & (PG_USER_MASK | PG_RW_MASK |
                                       PG_PRESENT_MASK);
-                        mem_print(mon, &start, &last_prot, end, prot);
+                        mem_print(mon, env, &start, &last_prot, end, prot);
                     } else {
                         pt_addr = pde & 0x3fffffffff000ULL;
                         for (l3 = 0; l3 < 512; l3++) {
@@ -345,21 +354,21 @@ static void mem_info_pae32(Monitor *mon, CPUArchState *env)
                             } else {
                                 prot = 0;
                             }
-                            mem_print(mon, &start, &last_prot, end, prot);
+                            mem_print(mon, env, &start, &last_prot, end, prot);
                         }
                     }
                 } else {
                     prot = 0;
-                    mem_print(mon, &start, &last_prot, end, prot);
+                    mem_print(mon, env, &start, &last_prot, end, prot);
                 }
             }
         } else {
             prot = 0;
-            mem_print(mon, &start, &last_prot, end, prot);
+            mem_print(mon, env, &start, &last_prot, end, prot);
         }
     }
     /* Flush last range */
-    mem_print(mon, &start, &last_prot, (hwaddr)1 << 32, 0);
+    mem_print(mon, env, &start, &last_prot, (hwaddr)1 << 32, 0);
 }
 
 
@@ -389,7 +398,7 @@ static void mem_info_la48(Monitor *mon, CPUArchState *env)
                         prot = pdpe & (PG_USER_MASK | PG_RW_MASK |
                                        PG_PRESENT_MASK);
                         prot &= pml4e;
-                        mem_print(mon, &start, &last_prot, end, prot);
+                        mem_print(mon, env, &start, &last_prot, end, prot);
                     } else {
                         pd_addr = pdpe & 0x3fffffffff000ULL;
                         for (l3 = 0; l3 < 512; l3++) {
@@ -401,7 +410,8 @@ static void mem_info_la48(Monitor *mon, CPUArchState *env)
                                     prot = pde & (PG_USER_MASK | PG_RW_MASK |
                                                   PG_PRESENT_MASK);
                                     prot &= pml4e & pdpe;
-                                    mem_print(mon, &start, &last_prot, end, prot);
+                                    mem_print(mon, env, &start,
+                                              &last_prot, end, prot);
                                 } else {
                                     pt_addr = pde & 0x3fffffffff000ULL;
                                     for (l4 = 0; l4 < 512; l4++) {
@@ -418,27 +428,29 @@ static void mem_info_la48(Monitor *mon, CPUArchState *env)
                                         } else {
                                             prot = 0;
                                         }
-                                        mem_print(mon, &start, &last_prot, end, prot);
+                                        mem_print(mon, env, &start,
+                                                  &last_prot, end, prot);
                                     }
                                 }
                             } else {
                                 prot = 0;
-                                mem_print(mon, &start, &last_prot, end, prot);
+                                mem_print(mon, env, &start,
+                                          &last_prot, end, prot);
                             }
                         }
                     }
                 } else {
                     prot = 0;
-                    mem_print(mon, &start, &last_prot, end, prot);
+                    mem_print(mon, env, &start, &last_prot, end, prot);
                 }
             }
         } else {
             prot = 0;
-            mem_print(mon, &start, &last_prot, end, prot);
+            mem_print(mon, env, &start, &last_prot, end, prot);
         }
     }
     /* Flush last range */
-    mem_print(mon, &start, &last_prot, (hwaddr)1 << 48, 0);
+    mem_print(mon, env, &start, &last_prot, (hwaddr)1 << 48, 0);
 }
 
 static void mem_info_la57(Monitor *mon, CPUArchState *env)
@@ -457,7 +469,7 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env)
         end = l0 << 48;
         if (!(pml5e & PG_PRESENT_MASK)) {
             prot = 0;
-            mem_print(mon, &start, &last_prot, end, prot);
+            mem_print(mon, env, &start, &last_prot, end, prot);
             continue;
         }
 
@@ -468,7 +480,7 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env)
             end = (l0 << 48) + (l1 << 39);
             if (!(pml4e & PG_PRESENT_MASK)) {
                 prot = 0;
-                mem_print(mon, &start, &last_prot, end, prot);
+                mem_print(mon, env, &start, &last_prot, end, prot);
                 continue;
             }
 
@@ -479,7 +491,7 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env)
                 end = (l0 << 48) + (l1 << 39) + (l2 << 30);
                 if (pdpe & PG_PRESENT_MASK) {
                     prot = 0;
-                    mem_print(mon, &start, &last_prot, end, prot);
+                    mem_print(mon, env, &start, &last_prot, end, prot);
                     continue;
                 }
 
@@ -487,7 +499,7 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env)
                     prot = pdpe & (PG_USER_MASK | PG_RW_MASK |
                             PG_PRESENT_MASK);
                     prot &= pml5e & pml4e;
-                    mem_print(mon, &start, &last_prot, end, prot);
+                    mem_print(mon, env, &start, &last_prot, end, prot);
                     continue;
                 }
 
@@ -498,7 +510,7 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env)
                     end = (l0 << 48) + (l1 << 39) + (l2 << 30) + (l3 << 21);
                     if (pde & PG_PRESENT_MASK) {
                         prot = 0;
-                        mem_print(mon, &start, &last_prot, end, prot);
+                        mem_print(mon, env, &start, &last_prot, end, prot);
                         continue;
                     }
 
@@ -506,7 +518,7 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env)
                         prot = pde & (PG_USER_MASK | PG_RW_MASK |
                                 PG_PRESENT_MASK);
                         prot &= pml5e & pml4e & pdpe;
-                        mem_print(mon, &start, &last_prot, end, prot);
+                        mem_print(mon, env, &start, &last_prot, end, prot);
                         continue;
                     }
 
@@ -523,14 +535,14 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env)
                         } else {
                             prot = 0;
                         }
-                        mem_print(mon, &start, &last_prot, end, prot);
+                        mem_print(mon, env, &start, &last_prot, end, prot);
                     }
                 }
             }
         }
     }
     /* Flush last range */
-    mem_print(mon, &start, &last_prot, (hwaddr)1 << 57, 0);
+    mem_print(mon, env, &start, &last_prot, (hwaddr)1 << 57, 0);
 }
 #endif /* TARGET_X86_64 */
 
diff --git a/target/i386/svm.h b/target/i386/svm.h
index 922c8fd39c..23a3a040b8 100644
--- a/target/i386/svm.h
+++ b/target/i386/svm.h
@@ -130,6 +130,20 @@
 
 #define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
 
+#define SVM_NPT_ENABLED     (1 << 0)
+
+#define SVM_NPT_PAE         (1 << 0)
+#define SVM_NPT_LMA         (1 << 1)
+#define SVM_NPT_NXE         (1 << 2)
+
+#define SVM_NPTEXIT_P       (1ULL << 0)
+#define SVM_NPTEXIT_RW      (1ULL << 1)
+#define SVM_NPTEXIT_US      (1ULL << 2)
+#define SVM_NPTEXIT_RSVD    (1ULL << 3)
+#define SVM_NPTEXIT_ID      (1ULL << 4)
+#define SVM_NPTEXIT_GPA     (1ULL << 32)
+#define SVM_NPTEXIT_GPT     (1ULL << 33)
+
 struct QEMU_PACKED vmcb_control_area {
 	uint16_t intercept_cr_read;
 	uint16_t intercept_cr_write;
diff --git a/target/i386/svm_helper.c b/target/i386/svm_helper.c
index f245aec310..342ece082f 100644
--- a/target/i386/svm_helper.c
+++ b/target/i386/svm_helper.c
@@ -124,6 +124,7 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
 {
     CPUState *cs = CPU(x86_env_get_cpu(env));
     target_ulong addr;
+    uint64_t nested_ctl;
     uint32_t event_inj;
     uint32_t int_ctl;
 
@@ -206,6 +207,26 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
                                                   control.intercept_exceptions
                                                   ));
 
+    nested_ctl = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
+                                                          control.nested_ctl));
+    if (nested_ctl & SVM_NPT_ENABLED) {
+        env->nested_cr3 = x86_ldq_phys(cs,
+                                env->vm_vmcb + offsetof(struct vmcb,
+                                                        control.nested_cr3));
+        env->hflags2 |= HF2_NPT_MASK;
+
+        env->nested_pg_mode = 0;
+        if (env->cr[4] & CR4_PAE_MASK) {
+            env->nested_pg_mode |= SVM_NPT_PAE;
+        }
+        if (env->hflags & HF_LMA_MASK) {
+            env->nested_pg_mode |= SVM_NPT_LMA;
+        }
+        if (env->efer & MSR_EFER_NXE) {
+            env->nested_pg_mode |= SVM_NPT_NXE;
+        }
+    }
+
     /* enable intercepts */
     env->hflags |= HF_SVMI_MASK;
 
@@ -616,6 +637,7 @@ void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1)
         x86_stl_phys(cs,
                  env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0);
     }
+    env->hflags2 &= ~HF2_NPT_MASK;
 
     /* Save the VM state in the vmcb */
     svm_save_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.es),
diff --git a/target/openrisc/Makefile.objs b/target/openrisc/Makefile.objs
index 1b98a911ea..b5432f4684 100644
--- a/target/openrisc/Makefile.objs
+++ b/target/openrisc/Makefile.objs
@@ -1,7 +1,7 @@
 obj-$(CONFIG_SOFTMMU) += machine.o
-obj-y += cpu.o exception.o interrupt.o mmu.o translate.o
+obj-y += cpu.o exception.o interrupt.o mmu.o translate.o disas.o
 obj-y += exception_helper.o fpu_helper.o \
-         interrupt_helper.o mmu_helper.o sys_helper.o
+         interrupt_helper.o sys_helper.o
 obj-y += gdbstub.o
 
 DECODETREE = $(SRC_PATH)/scripts/decodetree.py
@@ -12,3 +12,4 @@ target/openrisc/decode.inc.c: \
 	  $(PYTHON) $(DECODETREE) -o $@ $<, "GEN", $(TARGET_DIR)$@)
 
 target/openrisc/translate.o: target/openrisc/decode.inc.c
+target/openrisc/disas.o: target/openrisc/decode.inc.c
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index a692a98ec0..fb7cb5c507 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -27,6 +27,7 @@ static void openrisc_cpu_set_pc(CPUState *cs, vaddr value)
     OpenRISCCPU *cpu = OPENRISC_CPU(cs);
 
     cpu->env.pc = value;
+    cpu->env.dflag = 0;
 }
 
 static bool openrisc_cpu_has_work(CPUState *cs)
@@ -35,6 +36,11 @@ static bool openrisc_cpu_has_work(CPUState *cs)
                                     CPU_INTERRUPT_TIMER);
 }
 
+static void openrisc_disas_set_info(CPUState *cpu, disassemble_info *info)
+{
+    info->print_insn = print_insn_or1k;
+}
+
 /* CPUClass::reset() */
 static void openrisc_cpu_reset(CPUState *s)
 {
@@ -52,8 +58,10 @@ static void openrisc_cpu_reset(CPUState *s)
 
     cpu->env.upr = UPR_UP | UPR_DMP | UPR_IMP | UPR_PICP | UPR_TTP |
                    UPR_PMP;
-    cpu->env.dmmucfgr = (DMMUCFGR_NTW & (0 << 2)) | (DMMUCFGR_NTS & (6 << 2));
-    cpu->env.immucfgr = (IMMUCFGR_NTW & (0 << 2)) | (IMMUCFGR_NTS & (6 << 2));
+    cpu->env.dmmucfgr = (DMMUCFGR_NTW & (0 << 2))
+                      | (DMMUCFGR_NTS & (ctz32(TLB_SIZE) << 2));
+    cpu->env.immucfgr = (IMMUCFGR_NTW & (0 << 2))
+                      | (IMMUCFGR_NTS & (ctz32(TLB_SIZE) << 2));
 
 #ifndef CONFIG_USER_ONLY
     cpu->env.picmr = 0x00000000;
@@ -87,10 +95,6 @@ static void openrisc_cpu_initfn(Object *obj)
     OpenRISCCPU *cpu = OPENRISC_CPU(obj);
 
     cs->env_ptr = &cpu->env;
-
-#ifndef CONFIG_USER_ONLY
-    cpu_openrisc_mmu_init(cpu);
-#endif
 }
 
 /* CPU models */
@@ -152,6 +156,7 @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
 #endif
     cc->gdb_num_core_regs = 32 + 3;
     cc->tcg_initialize = openrisc_translate_init;
+    cc->disas_set_info = openrisc_disas_set_info;
 }
 
 /* Sort alphabetically by type name, except for "any". */
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index 35cab65f11..f1b31bc24a 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -222,12 +222,8 @@ enum {
 
 /* TLB size */
 enum {
-    DTLB_WAYS = 1,
-    DTLB_SIZE = 64,
-    DTLB_MASK = (DTLB_SIZE-1),
-    ITLB_WAYS = 1,
-    ITLB_SIZE = 64,
-    ITLB_MASK = (ITLB_SIZE-1),
+    TLB_SIZE = 128,
+    TLB_MASK = TLB_SIZE - 1,
 };
 
 /* TLB prot */
@@ -241,14 +237,6 @@ enum {
     UXE = (1 << 7),
 };
 
-/* check if tlb available */
-enum {
-    TLBRET_INVALID = -3,
-    TLBRET_NOMATCH = -2,
-    TLBRET_BADADDR = -1,
-    TLBRET_MATCH = 0
-};
-
 typedef struct OpenRISCTLBEntry {
     uint32_t mr;
     uint32_t tr;
@@ -256,8 +244,8 @@ typedef struct OpenRISCTLBEntry {
 
 #ifndef CONFIG_USER_ONLY
 typedef struct CPUOpenRISCTLBContext {
-    OpenRISCTLBEntry itlb[ITLB_WAYS][ITLB_SIZE];
-    OpenRISCTLBEntry dtlb[DTLB_WAYS][DTLB_SIZE];
+    OpenRISCTLBEntry itlb[TLB_SIZE];
+    OpenRISCTLBEntry dtlb[TLB_SIZE];
 
     int (*cpu_openrisc_map_address_code)(struct OpenRISCCPU *cpu,
                                          hwaddr *physical,
@@ -301,6 +289,10 @@ typedef struct CPUOpenRISCState {
 
     uint32_t dflag;           /* In delay slot (boolean) */
 
+#ifndef CONFIG_USER_ONLY
+    CPUOpenRISCTLBContext tlb;
+#endif
+
     /* Fields up to this point are cleared by a CPU reset */
     struct {} end_reset_fields;
 
@@ -310,8 +302,6 @@ typedef struct CPUOpenRISCState {
     uint32_t cpucfgr;         /* CPU configure register */
 
 #ifndef CONFIG_USER_ONLY
-    CPUOpenRISCTLBContext * tlb;
-
     QEMUTimer *timer;
     uint32_t ttmr;          /* Timer tick mode register */
     int is_counting;
@@ -358,6 +348,7 @@ void openrisc_translate_init(void);
 int openrisc_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int size,
                                   int rw, int mmu_idx);
 int cpu_openrisc_signal_handler(int host_signum, void *pinfo, void *puc);
+int print_insn_or1k(bfd_vma addr, disassemble_info *info);
 
 #define cpu_list cpu_openrisc_list
 #define cpu_signal_handler cpu_openrisc_signal_handler
@@ -376,17 +367,6 @@ void cpu_openrisc_count_update(OpenRISCCPU *cpu);
 void cpu_openrisc_timer_update(OpenRISCCPU *cpu);
 void cpu_openrisc_count_start(OpenRISCCPU *cpu);
 void cpu_openrisc_count_stop(OpenRISCCPU *cpu);
-
-void cpu_openrisc_mmu_init(OpenRISCCPU *cpu);
-int cpu_openrisc_get_phys_nommu(OpenRISCCPU *cpu,
-                                hwaddr *physical,
-                                int *prot, target_ulong address, int rw);
-int cpu_openrisc_get_phys_code(OpenRISCCPU *cpu,
-                               hwaddr *physical,
-                               int *prot, target_ulong address, int rw);
-int cpu_openrisc_get_phys_data(OpenRISCCPU *cpu,
-                               hwaddr *physical,
-                               int *prot, target_ulong address, int rw);
 #endif
 
 #define OPENRISC_CPU_TYPE_SUFFIX "-" TYPE_OPENRISC_CPU
@@ -395,9 +375,12 @@ int cpu_openrisc_get_phys_data(OpenRISCCPU *cpu,
 
 #include "exec/cpu-all.h"
 
-#define TB_FLAGS_DFLAG 1
-#define TB_FLAGS_R0_0  2
+#define TB_FLAGS_SM    SR_SM
+#define TB_FLAGS_DME   SR_DME
+#define TB_FLAGS_IME   SR_IME
 #define TB_FLAGS_OVE   SR_OVE
+#define TB_FLAGS_DFLAG 2      /* reuse SR_TEE */
+#define TB_FLAGS_R0_0  4      /* reuse SR_IEE */
 
 static inline uint32_t cpu_get_gpr(const CPUOpenRISCState *env, int i)
 {
@@ -415,17 +398,21 @@ static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env,
 {
     *pc = env->pc;
     *cs_base = 0;
-    *flags = (env->dflag
-              | (cpu_get_gpr(env, 0) == 0 ? TB_FLAGS_R0_0 : 0)
-              | (env->sr & SR_OVE));
+    *flags = (env->dflag ? TB_FLAGS_DFLAG : 0)
+           | (cpu_get_gpr(env, 0) ? 0 : TB_FLAGS_R0_0)
+           | (env->sr & (SR_SM | SR_DME | SR_IME | SR_OVE));
 }
 
 static inline int cpu_mmu_index(CPUOpenRISCState *env, bool ifetch)
 {
-    if (!(env->sr & SR_IME)) {
-        return MMU_NOMMU_IDX;
+    int ret = MMU_NOMMU_IDX;  /* mmu is disabled */
+
+    if (env->sr & (ifetch ? SR_IME : SR_DME)) {
+        /* The mmu is enabled; test supervisor state.  */
+        ret = env->sr & SR_SM ? MMU_SUPERVISOR_IDX : MMU_USER_IDX;
     }
-    return (env->sr & SR_SM) == 0 ? MMU_USER_IDX : MMU_SUPERVISOR_IDX;
+
+    return ret;
 }
 
 static inline uint32_t cpu_get_sr(const CPUOpenRISCState *env)
diff --git a/target/openrisc/disas.c b/target/openrisc/disas.c
new file mode 100644
index 0000000000..4bfd2dd8a6
--- /dev/null
+++ b/target/openrisc/disas.c
@@ -0,0 +1,170 @@
+/*
+ * OpenRISC disassembler
+ *
+ * Copyright (c) 2018 Richard Henderson <rth@twiddle.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "disas/bfd.h"
+#include "qemu/bitops.h"
+#include "cpu.h"
+
+typedef disassemble_info DisasContext;
+
+/* Include the auto-generated decoder.  */
+#include "decode.inc.c"
+
+#define output(mnemonic, format, ...) \
+    (info->fprintf_func(info->stream, "%-9s " format, \
+                        mnemonic, ##__VA_ARGS__))
+
+int print_insn_or1k(bfd_vma addr, disassemble_info *info)
+{
+    bfd_byte buffer[4];
+    uint32_t insn;
+    int status;
+
+    status = info->read_memory_func(addr, buffer, 4, info);
+    if (status != 0) {
+        info->memory_error_func(status, addr, info);
+        return -1;
+    }
+    insn = bfd_getb32(buffer);
+
+    if (!decode(info, insn)) {
+        output(".long", "%#08x", insn);
+    }
+    return 4;
+}
+
+#define INSN(opcode, format, ...) \
+static bool trans_l_##opcode(disassemble_info *info,    \
+    arg_l_##opcode *a, uint32_t insn)                   \
+{                                                       \
+    output("l." #opcode, format, ##__VA_ARGS__);        \
+    return true;                                        \
+}
+
+INSN(add,    "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(addc,   "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(sub,    "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(and,    "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(or,     "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(xor,    "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(sll,    "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(srl,    "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(sra,    "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(ror,    "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(exths,  "r%d, r%d", a->d, a->a)
+INSN(extbs,  "r%d, r%d", a->d, a->a)
+INSN(exthz,  "r%d, r%d", a->d, a->a)
+INSN(extbz,  "r%d, r%d", a->d, a->a)
+INSN(cmov,   "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(ff1,    "r%d, r%d", a->d, a->a)
+INSN(fl1,    "r%d, r%d", a->d, a->a)
+INSN(mul,    "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(mulu,   "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(div,    "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(divu,   "r%d, r%d, r%d", a->d, a->a, a->b)
+INSN(muld,   "r%d, r%d", a->a, a->b)
+INSN(muldu,  "r%d, r%d", a->a, a->b)
+INSN(j,      "%d", a->n)
+INSN(jal,    "%d", a->n)
+INSN(bf,     "%d", a->n)
+INSN(bnf,    "%d", a->n)
+INSN(jr,     "r%d", a->b)
+INSN(jalr,   "r%d", a->b)
+INSN(lwa,    "r%d, %d(r%d)", a->d, a->i, a->a)
+INSN(lwz,    "r%d, %d(r%d)", a->d, a->i, a->a)
+INSN(lws,    "r%d, %d(r%d)", a->d, a->i, a->a)
+INSN(lbz,    "r%d, %d(r%d)", a->d, a->i, a->a)
+INSN(lbs,    "r%d, %d(r%d)", a->d, a->i, a->a)
+INSN(lhz,    "r%d, %d(r%d)", a->d, a->i, a->a)
+INSN(lhs,    "r%d, %d(r%d)", a->d, a->i, a->a)
+INSN(swa,    "%d(r%d), r%d", a->i, a->a, a->b)
+INSN(sw,     "%d(r%d), r%d", a->i, a->a, a->b)
+INSN(sb,     "%d(r%d), r%d", a->i, a->a, a->b)
+INSN(sh,     "%d(r%d), r%d", a->i, a->a, a->b)
+INSN(nop,    "")
+INSN(addi,   "r%d, r%d, %d", a->d, a->a, a->i)
+INSN(addic,  "r%d, r%d, %d", a->d, a->a, a->i)
+INSN(muli,   "r%d, r%d, %d", a->d, a->a, a->i)
+INSN(maci,   "r%d, %d", a->a, a->i)
+INSN(andi,   "r%d, r%d, %d", a->d, a->a, a->k)
+INSN(ori,    "r%d, r%d, %d", a->d, a->a, a->k)
+INSN(xori,   "r%d, r%d, %d", a->d, a->a, a->i)
+INSN(mfspr,  "r%d, r%d, %d", a->d, a->a, a->k)
+INSN(mtspr,  "r%d, r%d, %d", a->a, a->b, a->k)
+INSN(mac,    "r%d, r%d", a->a, a->b)
+INSN(msb,    "r%d, r%d", a->a, a->b)
+INSN(macu,   "r%d, r%d", a->a, a->b)
+INSN(msbu,   "r%d, r%d", a->a, a->b)
+INSN(slli,   "r%d, r%d, %d", a->d, a->a, a->l)
+INSN(srli,   "r%d, r%d, %d", a->d, a->a, a->l)
+INSN(srai,   "r%d, r%d, %d", a->d, a->a, a->l)
+INSN(rori,   "r%d, r%d, %d", a->d, a->a, a->l)
+INSN(movhi,  "r%d, %d", a->d, a->k)
+INSN(macrc,  "r%d", a->d)
+INSN(sfeq,   "r%d, r%d", a->a, a->b)
+INSN(sfne,   "r%d, r%d", a->a, a->b)
+INSN(sfgtu,  "r%d, r%d", a->a, a->b)
+INSN(sfgeu,  "r%d, r%d", a->a, a->b)
+INSN(sfltu,  "r%d, r%d", a->a, a->b)
+INSN(sfleu,  "r%d, r%d", a->a, a->b)
+INSN(sfgts,  "r%d, r%d", a->a, a->b)
+INSN(sfges,  "r%d, r%d", a->a, a->b)
+INSN(sflts,  "r%d, r%d", a->a, a->b)
+INSN(sfles,  "r%d, r%d", a->a, a->b)
+INSN(sfeqi,  "r%d, %d", a->a, a->i)
+INSN(sfnei,  "r%d, %d", a->a, a->i)
+INSN(sfgtui, "r%d, %d", a->a, a->i)
+INSN(sfgeui, "r%d, %d", a->a, a->i)
+INSN(sfltui, "r%d, %d", a->a, a->i)
+INSN(sfleui, "r%d, %d", a->a, a->i)
+INSN(sfgtsi, "r%d, %d", a->a, a->i)
+INSN(sfgesi, "r%d, %d", a->a, a->i)
+INSN(sfltsi, "r%d, %d", a->a, a->i)
+INSN(sflesi, "r%d, %d", a->a, a->i)
+INSN(sys,    "%d", a->k)
+INSN(trap,   "%d", a->k)
+INSN(msync,  "")
+INSN(psync,  "")
+INSN(csync,  "")
+INSN(rfe,    "")
+
+#define FP_INSN(opcode, suffix, format, ...) \
+static bool trans_lf_##opcode##_##suffix(disassemble_info *info, \
+    arg_lf_##opcode##_##suffix *a, uint32_t insn)                \
+{                                                                \
+    output("lf." #opcode "." #suffix, format, ##__VA_ARGS__);    \
+    return true;                                                 \
+}
+
+FP_INSN(add, s,  "r%d, r%d, r%d", a->d, a->a, a->b)
+FP_INSN(sub, s,  "r%d, r%d, r%d", a->d, a->a, a->b)
+FP_INSN(mul, s,  "r%d, r%d, r%d", a->d, a->a, a->b)
+FP_INSN(div, s,  "r%d, r%d, r%d", a->d, a->a, a->b)
+FP_INSN(rem, s,  "r%d, r%d, r%d", a->d, a->a, a->b)
+FP_INSN(itof, s, "r%d, r%d", a->d, a->a)
+FP_INSN(ftoi, s, "r%d, r%d", a->d, a->a)
+FP_INSN(madd, s, "r%d, r%d, r%d", a->d, a->a, a->b)
+FP_INSN(sfeq, s, "r%d, r%d", a->a, a->b)
+FP_INSN(sfne, s, "r%d, r%d", a->a, a->b)
+FP_INSN(sfgt, s, "r%d, r%d", a->a, a->b)
+FP_INSN(sfge, s, "r%d, r%d", a->a, a->b)
+FP_INSN(sflt, s, "r%d, r%d", a->a, a->b)
+FP_INSN(sfle, s, "r%d, r%d", a->a, a->b)
diff --git a/target/openrisc/helper.h b/target/openrisc/helper.h
index e37dabc77a..9db9bf3963 100644
--- a/target/openrisc/helper.h
+++ b/target/openrisc/helper.h
@@ -56,5 +56,5 @@ FOP_CMP(le)
 DEF_HELPER_FLAGS_1(rfe, 0, void, env)
 
 /* sys */
-DEF_HELPER_FLAGS_4(mtspr, 0, void, env, tl, tl, tl)
-DEF_HELPER_FLAGS_4(mfspr, TCG_CALL_NO_WG, tl, env, tl, tl, tl)
+DEF_HELPER_FLAGS_3(mtspr, 0, void, env, tl, tl)
+DEF_HELPER_FLAGS_3(mfspr, TCG_CALL_NO_WG, tl, env, tl, tl)
diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c
index 3959671c59..bbae956361 100644
--- a/target/openrisc/interrupt.c
+++ b/target/openrisc/interrupt.c
@@ -32,29 +32,22 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
 #ifndef CONFIG_USER_ONLY
     OpenRISCCPU *cpu = OPENRISC_CPU(cs);
     CPUOpenRISCState *env = &cpu->env;
+    int exception = cs->exception_index;
 
     env->epcr = env->pc;
-    if (env->dflag) {
-        env->dflag = 0;
-        env->sr |= SR_DSX;
-        env->epcr -= 4;
-    } else {
-        env->sr &= ~SR_DSX;
-    }
-    if (cs->exception_index == EXCP_SYSCALL) {
+    if (exception == EXCP_SYSCALL) {
         env->epcr += 4;
     }
     /* When we have an illegal instruction the error effective address
        shall be set to the illegal instruction address.  */
-    if (cs->exception_index == EXCP_ILLEGAL) {
+    if (exception == EXCP_ILLEGAL) {
         env->eear = env->pc;
     }
 
-    /* For machine-state changed between user-mode and supervisor mode,
-       we need flush TLB when we enter&exit EXCP.  */
-    tlb_flush(cs);
-
+    /* During exceptions esr is populared with the pre-exception sr.  */
     env->esr = cpu_get_sr(env);
+    /* In parallel sr is updated to disable mmu, interrupts, timers and
+       set the delay slot exception flag.  */
     env->sr &= ~SR_DME;
     env->sr &= ~SR_IME;
     env->sr |= SR_SM;
@@ -62,12 +55,38 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
     env->sr &= ~SR_TEE;
     env->pmr &= ~PMR_DME;
     env->pmr &= ~PMR_SME;
-    env->tlb->cpu_openrisc_map_address_data = &cpu_openrisc_get_phys_nommu;
-    env->tlb->cpu_openrisc_map_address_code = &cpu_openrisc_get_phys_nommu;
     env->lock_addr = -1;
 
-    if (cs->exception_index > 0 && cs->exception_index < EXCP_NR) {
-        hwaddr vect_pc = cs->exception_index << 8;
+    /* Set/clear dsx to indicate if we are in a delay slot exception.  */
+    if (env->dflag) {
+        env->dflag = 0;
+        env->sr |= SR_DSX;
+        env->epcr -= 4;
+    } else {
+        env->sr &= ~SR_DSX;
+    }
+
+    if (exception > 0 && exception < EXCP_NR) {
+        static const char * const int_name[EXCP_NR] = {
+            [EXCP_RESET]    = "RESET",
+            [EXCP_BUSERR]   = "BUSERR (bus error)",
+            [EXCP_DPF]      = "DFP (data protection fault)",
+            [EXCP_IPF]      = "IPF (code protection fault)",
+            [EXCP_TICK]     = "TICK (timer interrupt)",
+            [EXCP_ALIGN]    = "ALIGN",
+            [EXCP_ILLEGAL]  = "ILLEGAL",
+            [EXCP_INT]      = "INT (device interrupt)",
+            [EXCP_DTLBMISS] = "DTLBMISS (data tlb miss)",
+            [EXCP_ITLBMISS] = "ITLBMISS (code tlb miss)",
+            [EXCP_RANGE]    = "RANGE",
+            [EXCP_SYSCALL]  = "SYSCALL",
+            [EXCP_FPE]      = "FPE",
+            [EXCP_TRAP]     = "TRAP",
+        };
+
+        qemu_log_mask(CPU_LOG_INT, "INT: %s\n", int_name[exception]);
+
+        hwaddr vect_pc = exception << 8;
         if (env->cpucfgr & CPUCFGR_EVBARP) {
             vect_pc |= env->evbar;
         }
@@ -76,7 +95,7 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
         }
         env->pc = vect_pc;
     } else {
-        cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
+        cpu_abort(cs, "Unhandled exception 0x%x\n", exception);
     }
 #endif
 
diff --git a/target/openrisc/interrupt_helper.c b/target/openrisc/interrupt_helper.c
index 56620e0571..9c5489f5f7 100644
--- a/target/openrisc/interrupt_helper.c
+++ b/target/openrisc/interrupt_helper.c
@@ -25,36 +25,7 @@
 
 void HELPER(rfe)(CPUOpenRISCState *env)
 {
-    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
-    CPUState *cs = CPU(cpu);
-#ifndef CONFIG_USER_ONLY
-    int need_flush_tlb = (cpu->env.sr & (SR_SM | SR_IME | SR_DME)) ^
-                         (cpu->env.esr & (SR_SM | SR_IME | SR_DME));
-#endif
-    cpu->env.pc = cpu->env.epcr;
-    cpu_set_sr(&cpu->env, cpu->env.esr);
-    cpu->env.lock_addr = -1;
-
-#ifndef CONFIG_USER_ONLY
-    if (cpu->env.sr & SR_DME) {
-        cpu->env.tlb->cpu_openrisc_map_address_data =
-            &cpu_openrisc_get_phys_data;
-    } else {
-        cpu->env.tlb->cpu_openrisc_map_address_data =
-            &cpu_openrisc_get_phys_nommu;
-    }
-
-    if (cpu->env.sr & SR_IME) {
-        cpu->env.tlb->cpu_openrisc_map_address_code =
-            &cpu_openrisc_get_phys_code;
-    } else {
-        cpu->env.tlb->cpu_openrisc_map_address_code =
-            &cpu_openrisc_get_phys_nommu;
-    }
-
-    if (need_flush_tlb) {
-        tlb_flush(cs);
-    }
-#endif
-    cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
+    env->pc = env->epcr;
+    env->lock_addr = -1;
+    cpu_set_sr(env, env->esr);
 }
diff --git a/target/openrisc/machine.c b/target/openrisc/machine.c
index 0a793eb14d..1eedbf3dbe 100644
--- a/target/openrisc/machine.c
+++ b/target/openrisc/machine.c
@@ -24,31 +24,6 @@
 #include "hw/boards.h"
 #include "migration/cpu.h"
 
-static int env_post_load(void *opaque, int version_id)
-{
-    CPUOpenRISCState *env = opaque;
-
-    /* Restore MMU handlers */
-    if (env->sr & SR_DME) {
-        env->tlb->cpu_openrisc_map_address_data =
-            &cpu_openrisc_get_phys_data;
-    } else {
-        env->tlb->cpu_openrisc_map_address_data =
-            &cpu_openrisc_get_phys_nommu;
-    }
-
-    if (env->sr & SR_IME) {
-        env->tlb->cpu_openrisc_map_address_code =
-            &cpu_openrisc_get_phys_code;
-    } else {
-        env->tlb->cpu_openrisc_map_address_code =
-            &cpu_openrisc_get_phys_nommu;
-    }
-
-
-    return 0;
-}
-
 static const VMStateDescription vmstate_tlb_entry = {
     .name = "tlb_entry",
     .version_id = 1,
@@ -63,24 +38,17 @@ static const VMStateDescription vmstate_tlb_entry = {
 
 static const VMStateDescription vmstate_cpu_tlb = {
     .name = "cpu_tlb",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
     .fields = (VMStateField[]) {
-        VMSTATE_STRUCT_2DARRAY(itlb, CPUOpenRISCTLBContext,
-                             ITLB_WAYS, ITLB_SIZE, 0,
+        VMSTATE_STRUCT_ARRAY(itlb, CPUOpenRISCTLBContext, TLB_SIZE, 0,
                              vmstate_tlb_entry, OpenRISCTLBEntry),
-        VMSTATE_STRUCT_2DARRAY(dtlb, CPUOpenRISCTLBContext,
-                             DTLB_WAYS, DTLB_SIZE, 0,
+        VMSTATE_STRUCT_ARRAY(dtlb, CPUOpenRISCTLBContext, TLB_SIZE, 0,
                              vmstate_tlb_entry, OpenRISCTLBEntry),
         VMSTATE_END_OF_LIST()
     }
 };
 
-#define VMSTATE_CPU_TLB(_f, _s)                             \
-    VMSTATE_STRUCT_POINTER(_f, _s, vmstate_cpu_tlb, CPUOpenRISCTLBContext)
-
-
 static int get_sr(QEMUFile *f, void *opaque, size_t size, VMStateField *field)
 {
     CPUOpenRISCState *env = opaque;
@@ -106,7 +74,6 @@ static const VMStateDescription vmstate_env = {
     .name = "env",
     .version_id = 6,
     .minimum_version_id = 6,
-    .post_load = env_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINTTL_2DARRAY(shadow_gpr, CPUOpenRISCState, 16, 32),
         VMSTATE_UINTTL(pc, CPUOpenRISCState),
@@ -143,7 +110,8 @@ static const VMStateDescription vmstate_env = {
         VMSTATE_UINT32(fpcsr, CPUOpenRISCState),
         VMSTATE_UINT64(mac, CPUOpenRISCState),
 
-        VMSTATE_CPU_TLB(tlb, CPUOpenRISCState),
+        VMSTATE_STRUCT(tlb, CPUOpenRISCState, 1,
+                       vmstate_cpu_tlb, CPUOpenRISCTLBContext),
 
         VMSTATE_TIMER_PTR(timer, CPUOpenRISCState),
         VMSTATE_UINT32(ttmr, CPUOpenRISCState),
diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c
index 2bd782f89b..e7d5219e11 100644
--- a/target/openrisc/mmu.c
+++ b/target/openrisc/mmu.c
@@ -29,227 +29,156 @@
 #endif
 
 #ifndef CONFIG_USER_ONLY
-int cpu_openrisc_get_phys_nommu(OpenRISCCPU *cpu,
-                                hwaddr *physical,
-                                int *prot, target_ulong address, int rw)
+static inline void get_phys_nommu(hwaddr *phys_addr, int *prot,
+                                  target_ulong address)
 {
-    *physical = address;
+    *phys_addr = address;
     *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-    return TLBRET_MATCH;
 }
 
-int cpu_openrisc_get_phys_code(OpenRISCCPU *cpu,
-                               hwaddr *physical,
-                               int *prot, target_ulong address, int rw)
+static int get_phys_mmu(OpenRISCCPU *cpu, hwaddr *phys_addr, int *prot,
+                        target_ulong addr, int need, bool super)
 {
-    int vpn = address >> TARGET_PAGE_BITS;
-    int idx = vpn & ITLB_MASK;
-    int right = 0;
-
-    if ((cpu->env.tlb->itlb[0][idx].mr >> TARGET_PAGE_BITS) != vpn) {
-        return TLBRET_NOMATCH;
-    }
-    if (!(cpu->env.tlb->itlb[0][idx].mr & 1)) {
-        return TLBRET_INVALID;
-    }
-
-    if (cpu->env.sr & SR_SM) { /* supervisor mode */
-        if (cpu->env.tlb->itlb[0][idx].tr & SXE) {
-            right |= PAGE_EXEC;
-        }
-    } else {
-        if (cpu->env.tlb->itlb[0][idx].tr & UXE) {
-            right |= PAGE_EXEC;
+    int idx = (addr >> TARGET_PAGE_BITS) & TLB_MASK;
+    uint32_t imr = cpu->env.tlb.itlb[idx].mr;
+    uint32_t itr = cpu->env.tlb.itlb[idx].tr;
+    uint32_t dmr = cpu->env.tlb.dtlb[idx].mr;
+    uint32_t dtr = cpu->env.tlb.dtlb[idx].tr;
+    int right, match, valid;
+
+    /* If the ITLB and DTLB indexes map to the same page, we want to
+       load all permissions all at once.  If the destination pages do
+       not match, zap the one we don't need.  */
+    if (unlikely((itr ^ dtr) & TARGET_PAGE_MASK)) {
+        if (need & PAGE_EXEC) {
+            dmr = dtr = 0;
+        } else {
+            imr = itr = 0;
         }
     }
 
-    if ((rw & 2) && ((right & PAGE_EXEC) == 0)) {
-        return TLBRET_BADADDR;
-    }
-
-    *physical = (cpu->env.tlb->itlb[0][idx].tr & TARGET_PAGE_MASK) |
-                (address & (TARGET_PAGE_SIZE-1));
+    /* Check if either of the entries matches the source address.  */
+    match  = (imr ^ addr) & TARGET_PAGE_MASK ? 0 : PAGE_EXEC;
+    match |= (dmr ^ addr) & TARGET_PAGE_MASK ? 0 : PAGE_READ | PAGE_WRITE;
+
+    /* Check if either of the entries is valid.  */
+    valid  = imr & 1 ? PAGE_EXEC : 0;
+    valid |= dmr & 1 ? PAGE_READ | PAGE_WRITE : 0;
+    valid &= match;
+
+    /* Collect the permissions from the entries.  */
+    right  = itr & (super ? SXE : UXE) ? PAGE_EXEC : 0;
+    right |= dtr & (super ? SRE : URE) ? PAGE_READ : 0;
+    right |= dtr & (super ? SWE : UWE) ? PAGE_WRITE : 0;
+    right &= valid;
+
+    /* Note that above we validated that itr and dtr match on page.
+       So oring them together changes nothing without having to
+       check which one we needed.  We also want to store to these
+       variables even on failure, as it avoids compiler warnings.  */
+    *phys_addr = ((itr | dtr) & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK);
     *prot = right;
-    return TLBRET_MATCH;
-}
 
-int cpu_openrisc_get_phys_data(OpenRISCCPU *cpu,
-                               hwaddr *physical,
-                               int *prot, target_ulong address, int rw)
-{
-    int vpn = address >> TARGET_PAGE_BITS;
-    int idx = vpn & DTLB_MASK;
-    int right = 0;
+    qemu_log_mask(CPU_LOG_MMU,
+                  "MMU lookup: need %d match %d valid %d right %d -> %s\n",
+                  need, match, valid, right, (need & right) ? "OK" : "FAIL");
 
-    if ((cpu->env.tlb->dtlb[0][idx].mr >> TARGET_PAGE_BITS) != vpn) {
-        return TLBRET_NOMATCH;
-    }
-    if (!(cpu->env.tlb->dtlb[0][idx].mr & 1)) {
-        return TLBRET_INVALID;
+    /* Check the collective permissions are present.  */
+    if (likely(need & right)) {
+        return 0;  /* success! */
     }
 
-    if (cpu->env.sr & SR_SM) { /* supervisor mode */
-        if (cpu->env.tlb->dtlb[0][idx].tr & SRE) {
-            right |= PAGE_READ;
-        }
-        if (cpu->env.tlb->dtlb[0][idx].tr & SWE) {
-            right |= PAGE_WRITE;
-        }
+    /* Determine what kind of failure we have.  */
+    if (need & valid) {
+        return need & PAGE_EXEC ? EXCP_IPF : EXCP_DPF;
     } else {
-        if (cpu->env.tlb->dtlb[0][idx].tr & URE) {
-            right |= PAGE_READ;
-        }
-        if (cpu->env.tlb->dtlb[0][idx].tr & UWE) {
-            right |= PAGE_WRITE;
-        }
+        return need & PAGE_EXEC ? EXCP_ITLBMISS : EXCP_DTLBMISS;
     }
-
-    if (!(rw & 1) && ((right & PAGE_READ) == 0)) {
-        return TLBRET_BADADDR;
-    }
-    if ((rw & 1) && ((right & PAGE_WRITE) == 0)) {
-        return TLBRET_BADADDR;
-    }
-
-    *physical = (cpu->env.tlb->dtlb[0][idx].tr & TARGET_PAGE_MASK) |
-                (address & (TARGET_PAGE_SIZE-1));
-    *prot = right;
-    return TLBRET_MATCH;
-}
-
-static int cpu_openrisc_get_phys_addr(OpenRISCCPU *cpu,
-                                      hwaddr *physical,
-                                      int *prot, target_ulong address,
-                                      int rw)
-{
-    int ret = TLBRET_MATCH;
-
-    if (rw == MMU_INST_FETCH) {    /* ITLB */
-       *physical = 0;
-        ret = cpu->env.tlb->cpu_openrisc_map_address_code(cpu, physical,
-                                                          prot, address, rw);
-    } else {          /* DTLB */
-        ret = cpu->env.tlb->cpu_openrisc_map_address_data(cpu, physical,
-                                                          prot, address, rw);
-    }
-
-    return ret;
 }
 #endif
 
-static void cpu_openrisc_raise_mmu_exception(OpenRISCCPU *cpu,
-                                             target_ulong address,
-                                             int rw, int tlb_error)
+static void raise_mmu_exception(OpenRISCCPU *cpu, target_ulong address,
+                                int exception)
 {
     CPUState *cs = CPU(cpu);
-    int exception = 0;
-
-    switch (tlb_error) {
-    default:
-        if (rw == 2) {
-            exception = EXCP_IPF;
-        } else {
-            exception = EXCP_DPF;
-        }
-        break;
-#ifndef CONFIG_USER_ONLY
-    case TLBRET_BADADDR:
-        if (rw == 2) {
-            exception = EXCP_IPF;
-        } else {
-            exception = EXCP_DPF;
-        }
-        break;
-    case TLBRET_INVALID:
-    case TLBRET_NOMATCH:
-        /* No TLB match for a mapped address */
-        if (rw == 2) {
-            exception = EXCP_ITLBMISS;
-        } else {
-            exception = EXCP_DTLBMISS;
-        }
-        break;
-#endif
-    }
 
     cs->exception_index = exception;
     cpu->env.eear = address;
     cpu->env.lock_addr = -1;
 }
 
-#ifndef CONFIG_USER_ONLY
 int openrisc_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size,
                                   int rw, int mmu_idx)
 {
+#ifdef CONFIG_USER_ONLY
     OpenRISCCPU *cpu = OPENRISC_CPU(cs);
-    int ret = 0;
-    hwaddr physical = 0;
-    int prot = 0;
-
-    ret = cpu_openrisc_get_phys_addr(cpu, &physical, &prot,
-                                     address, rw);
-
-    if (ret == TLBRET_MATCH) {
-        tlb_set_page(cs, address & TARGET_PAGE_MASK,
-                     physical & TARGET_PAGE_MASK, prot,
-                     mmu_idx, TARGET_PAGE_SIZE);
-        ret = 0;
-    } else if (ret < 0) {
-        cpu_openrisc_raise_mmu_exception(cpu, address, rw, ret);
-        ret = 1;
-    }
-
-    return ret;
-}
+    raise_mmu_exception(cpu, address, EXCP_DPF);
+    return 1;
 #else
-int openrisc_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size,
-                                  int rw, int mmu_idx)
-{
-    OpenRISCCPU *cpu = OPENRISC_CPU(cs);
-    int ret = 0;
-
-    cpu_openrisc_raise_mmu_exception(cpu, address, rw, ret);
-    ret = 1;
-
-    return ret;
-}
+    g_assert_not_reached();
 #endif
+}
 
 #ifndef CONFIG_USER_ONLY
 hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 {
     OpenRISCCPU *cpu = OPENRISC_CPU(cs);
+    int prot, excp, sr = cpu->env.sr;
     hwaddr phys_addr;
-    int prot;
-    int miss;
 
-    /* Check memory for any kind of address, since during debug the
-       gdb can ask for anything, check data tlb for address */
-    miss = cpu_openrisc_get_phys_addr(cpu, &phys_addr, &prot, addr, 0);
-
-    /* Check instruction tlb */
-    if (miss) {
-        miss = cpu_openrisc_get_phys_addr(cpu, &phys_addr, &prot, addr,
-                                          MMU_INST_FETCH);
-    }
+    switch (sr & (SR_DME | SR_IME)) {
+    case SR_DME | SR_IME:
+        /* The mmu is definitely enabled.  */
+        excp = get_phys_mmu(cpu, &phys_addr, &prot, addr,
+                            PAGE_EXEC | PAGE_READ | PAGE_WRITE,
+                            (sr & SR_SM) != 0);
+        return excp ? -1 : phys_addr;
 
-    /* Last, fall back to a plain address */
-    if (miss) {
-        miss = cpu_openrisc_get_phys_nommu(cpu, &phys_addr, &prot, addr, 0);
-    }
+    default:
+        /* The mmu is partially enabled, and we don't really have
+           a "real" access type.  Begin by trying the mmu, but if
+           that fails try again without.  */
+        excp = get_phys_mmu(cpu, &phys_addr, &prot, addr,
+                            PAGE_EXEC | PAGE_READ | PAGE_WRITE,
+                            (sr & SR_SM) != 0);
+        if (!excp) {
+            return phys_addr;
+        }
+        /* fallthru */
 
-    if (miss) {
-        return -1;
-    } else {
+    case 0:
+        /* The mmu is definitely disabled; lookups never fail.  */
+        get_phys_nommu(&phys_addr, &prot, addr);
         return phys_addr;
     }
 }
 
-void cpu_openrisc_mmu_init(OpenRISCCPU *cpu)
+void tlb_fill(CPUState *cs, target_ulong addr, int size,
+              MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
 {
-    cpu->env.tlb = g_malloc0(sizeof(CPUOpenRISCTLBContext));
+    OpenRISCCPU *cpu = OPENRISC_CPU(cs);
+    int prot, excp;
+    hwaddr phys_addr;
+
+    if (mmu_idx == MMU_NOMMU_IDX) {
+        /* The mmu is disabled; lookups never fail.  */
+        get_phys_nommu(&phys_addr, &prot, addr);
+        excp = 0;
+    } else {
+        bool super = mmu_idx == MMU_SUPERVISOR_IDX;
+        int need = (access_type == MMU_INST_FETCH ? PAGE_EXEC
+                    : access_type == MMU_DATA_STORE ? PAGE_WRITE
+                    : PAGE_READ);
+        excp = get_phys_mmu(cpu, &phys_addr, &prot, addr, need, super);
+    }
+
+    if (unlikely(excp)) {
+        raise_mmu_exception(cpu, addr, excp);
+        cpu_loop_exit_restore(cs, retaddr);
+    }
 
-    cpu->env.tlb->cpu_openrisc_map_address_code = &cpu_openrisc_get_phys_nommu;
-    cpu->env.tlb->cpu_openrisc_map_address_data = &cpu_openrisc_get_phys_nommu;
+    tlb_set_page(cs, addr & TARGET_PAGE_MASK,
+                 phys_addr & TARGET_PAGE_MASK, prot,
+                 mmu_idx, TARGET_PAGE_SIZE);
 }
 #endif
diff --git a/target/openrisc/mmu_helper.c b/target/openrisc/mmu_helper.c
deleted file mode 100644
index 97e1d17b5a..0000000000
--- a/target/openrisc/mmu_helper.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * OpenRISC MMU helper routines
- *
- * Copyright (c) 2011-2012 Jia Liu <proljc@gmail.com>
- *                         Zhizhou Zhang <etouzh@gmail.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-
-#ifndef CONFIG_USER_ONLY
-
-void tlb_fill(CPUState *cs, target_ulong addr, int size,
-              MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
-{
-    int ret;
-
-    ret = openrisc_cpu_handle_mmu_fault(cs, addr, size, access_type, mmu_idx);
-
-    if (ret) {
-        /* Raise Exception.  */
-        cpu_loop_exit_restore(cs, retaddr);
-    }
-}
-#endif
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index b284064381..b66a45c1e0 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -27,13 +27,12 @@
 
 #define TO_SPR(group, number) (((group) << 11) + (number))
 
-void HELPER(mtspr)(CPUOpenRISCState *env,
-                   target_ulong ra, target_ulong rb, target_ulong offset)
+void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb)
 {
 #ifndef CONFIG_USER_ONLY
     OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    int spr = (ra | offset);
+    target_ulong mr;
     int idx;
 
     switch (spr) {
@@ -57,26 +56,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
         break;
 
     case TO_SPR(0, 17): /* SR */
-        if ((env->sr & (SR_IME | SR_DME | SR_SM)) ^
-            (rb & (SR_IME | SR_DME | SR_SM))) {
-            tlb_flush(cs);
-        }
         cpu_set_sr(env, rb);
-        if (env->sr & SR_DME) {
-            env->tlb->cpu_openrisc_map_address_data =
-                &cpu_openrisc_get_phys_data;
-        } else {
-            env->tlb->cpu_openrisc_map_address_data =
-                &cpu_openrisc_get_phys_nommu;
-        }
-
-        if (env->sr & SR_IME) {
-            env->tlb->cpu_openrisc_map_address_code =
-                &cpu_openrisc_get_phys_code;
-        } else {
-            env->tlb->cpu_openrisc_map_address_code =
-                &cpu_openrisc_get_phys_nommu;
-        }
         break;
 
     case TO_SPR(0, 18): /* PPC */
@@ -98,18 +78,22 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
     case TO_SPR(0, 1024) ... TO_SPR(0, 1024 + (16 * 32)): /* Shadow GPRs */
         idx = (spr - 1024);
         env->shadow_gpr[idx / 32][idx % 32] = rb;
+        break;
 
-    case TO_SPR(1, 512) ... TO_SPR(1, 512+DTLB_SIZE-1): /* DTLBW0MR 0-127 */
+    case TO_SPR(1, 512) ... TO_SPR(1, 512 + TLB_SIZE - 1): /* DTLBW0MR 0-127 */
         idx = spr - TO_SPR(1, 512);
-        if (!(rb & 1)) {
-            tlb_flush_page(cs, env->tlb->dtlb[0][idx].mr & TARGET_PAGE_MASK);
+        mr = env->tlb.dtlb[idx].mr;
+        if (mr & 1) {
+            tlb_flush_page(cs, mr & TARGET_PAGE_MASK);
+        }
+        if (rb & 1) {
+            tlb_flush_page(cs, rb & TARGET_PAGE_MASK);
         }
-        env->tlb->dtlb[0][idx].mr = rb;
+        env->tlb.dtlb[idx].mr = rb;
         break;
-
-    case TO_SPR(1, 640) ... TO_SPR(1, 640+DTLB_SIZE-1): /* DTLBW0TR 0-127 */
+    case TO_SPR(1, 640) ... TO_SPR(1, 640 + TLB_SIZE - 1): /* DTLBW0TR 0-127 */
         idx = spr - TO_SPR(1, 640);
-        env->tlb->dtlb[0][idx].tr = rb;
+        env->tlb.dtlb[idx].tr = rb;
         break;
     case TO_SPR(1, 768) ... TO_SPR(1, 895):   /* DTLBW1MR 0-127 */
     case TO_SPR(1, 896) ... TO_SPR(1, 1023):  /* DTLBW1TR 0-127 */
@@ -118,17 +102,21 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
     case TO_SPR(1, 1280) ... TO_SPR(1, 1407): /* DTLBW3MR 0-127 */
     case TO_SPR(1, 1408) ... TO_SPR(1, 1535): /* DTLBW3TR 0-127 */
         break;
-    case TO_SPR(2, 512) ... TO_SPR(2, 512+ITLB_SIZE-1):   /* ITLBW0MR 0-127 */
+
+    case TO_SPR(2, 512) ... TO_SPR(2, 512 + TLB_SIZE - 1): /* ITLBW0MR 0-127 */
         idx = spr - TO_SPR(2, 512);
-        if (!(rb & 1)) {
-            tlb_flush_page(cs, env->tlb->itlb[0][idx].mr & TARGET_PAGE_MASK);
+        mr = env->tlb.itlb[idx].mr;
+        if (mr & 1) {
+            tlb_flush_page(cs, mr & TARGET_PAGE_MASK);
+        }
+        if (rb & 1) {
+            tlb_flush_page(cs, rb & TARGET_PAGE_MASK);
         }
-        env->tlb->itlb[0][idx].mr = rb;
+        env->tlb.itlb[idx].mr = rb;
         break;
-
-    case TO_SPR(2, 640) ... TO_SPR(2, 640+ITLB_SIZE-1): /* ITLBW0TR 0-127 */
+    case TO_SPR(2, 640) ... TO_SPR(2, 640 + TLB_SIZE - 1): /* ITLBW0TR 0-127 */
         idx = spr - TO_SPR(2, 640);
-        env->tlb->itlb[0][idx].tr = rb;
+        env->tlb.itlb[idx].tr = rb;
         break;
     case TO_SPR(2, 768) ... TO_SPR(2, 895):   /* ITLBW1MR 0-127 */
     case TO_SPR(2, 896) ... TO_SPR(2, 1023):  /* ITLBW1TR 0-127 */
@@ -137,6 +125,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
     case TO_SPR(2, 1280) ... TO_SPR(2, 1407): /* ITLBW3MR 0-127 */
     case TO_SPR(2, 1408) ... TO_SPR(2, 1535): /* ITLBW3TR 0-127 */
         break;
+
     case TO_SPR(5, 1):  /* MACLO */
         env->mac = deposit64(env->mac, 0, 32, rb);
         break;
@@ -153,7 +142,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
         }
         break;
     case TO_SPR(9, 0):  /* PICMR */
-        env->picmr |= rb;
+        env->picmr = rb;
         break;
     case TO_SPR(9, 2):  /* PICSR */
         env->picsr &= ~rb;
@@ -201,13 +190,12 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
 #endif
 }
 
-target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
-                           target_ulong rd, target_ulong ra, uint32_t offset)
+target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd,
+                           target_ulong spr)
 {
 #ifndef CONFIG_USER_ONLY
     OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    int spr = (ra | offset);
     int idx;
 
     switch (spr) {
@@ -259,13 +247,13 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
         idx = (spr - 1024);
         return env->shadow_gpr[idx / 32][idx % 32];
 
-    case TO_SPR(1, 512) ... TO_SPR(1, 512+DTLB_SIZE-1): /* DTLBW0MR 0-127 */
+    case TO_SPR(1, 512) ... TO_SPR(1, 512 + TLB_SIZE - 1): /* DTLBW0MR 0-127 */
         idx = spr - TO_SPR(1, 512);
-        return env->tlb->dtlb[0][idx].mr;
+        return env->tlb.dtlb[idx].mr;
 
-    case TO_SPR(1, 640) ... TO_SPR(1, 640+DTLB_SIZE-1): /* DTLBW0TR 0-127 */
+    case TO_SPR(1, 640) ... TO_SPR(1, 640 + TLB_SIZE - 1): /* DTLBW0TR 0-127 */
         idx = spr - TO_SPR(1, 640);
-        return env->tlb->dtlb[0][idx].tr;
+        return env->tlb.dtlb[idx].tr;
 
     case TO_SPR(1, 768) ... TO_SPR(1, 895):   /* DTLBW1MR 0-127 */
     case TO_SPR(1, 896) ... TO_SPR(1, 1023):  /* DTLBW1TR 0-127 */
@@ -275,13 +263,13 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
     case TO_SPR(1, 1408) ... TO_SPR(1, 1535): /* DTLBW3TR 0-127 */
         break;
 
-    case TO_SPR(2, 512) ... TO_SPR(2, 512+ITLB_SIZE-1): /* ITLBW0MR 0-127 */
+    case TO_SPR(2, 512) ... TO_SPR(2, 512 + TLB_SIZE - 1): /* ITLBW0MR 0-127 */
         idx = spr - TO_SPR(2, 512);
-        return env->tlb->itlb[0][idx].mr;
+        return env->tlb.itlb[idx].mr;
 
-    case TO_SPR(2, 640) ... TO_SPR(2, 640+ITLB_SIZE-1): /* ITLBW0TR 0-127 */
+    case TO_SPR(2, 640) ... TO_SPR(2, 640 + TLB_SIZE - 1): /* ITLBW0TR 0-127 */
         idx = spr - TO_SPR(2, 640);
-        return env->tlb->itlb[0][idx].tr;
+        return env->tlb.itlb[idx].tr;
 
     case TO_SPR(2, 768) ... TO_SPR(2, 895):   /* ITLBW1MR 0-127 */
     case TO_SPR(2, 896) ... TO_SPR(2, 1023):  /* ITLBW1TR 0-127 */
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
index d69f8d0422..a271cd3903 100644
--- a/target/openrisc/translate.c
+++ b/target/openrisc/translate.c
@@ -36,22 +36,29 @@
 #include "trace-tcg.h"
 #include "exec/log.h"
 
-#define LOG_DIS(str, ...) \
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%08x: " str, dc->base.pc_next,    \
-                  ## __VA_ARGS__)
-
 /* is_jmp field values */
-#define DISAS_JUMP    DISAS_TARGET_0 /* only pc was modified dynamically */
-#define DISAS_UPDATE  DISAS_TARGET_1 /* cpu state was modified dynamically */
-#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */
+#define DISAS_EXIT    DISAS_TARGET_0  /* force exit to main loop */
+#define DISAS_JUMP    DISAS_TARGET_1  /* exit via jmp_pc/jmp_pc_imm */
 
 typedef struct DisasContext {
     DisasContextBase base;
     uint32_t mem_idx;
     uint32_t tb_flags;
     uint32_t delayed_branch;
+
+    /* If not -1, jmp_pc contains this value and so is a direct jump.  */
+    target_ulong jmp_pc_imm;
 } DisasContext;
 
+static inline bool is_user(DisasContext *dc)
+{
+#ifdef CONFIG_USER_ONLY
+    return true;
+#else
+    return !(dc->tb_flags & TB_FLAGS_SM);
+#endif
+}
+
 /* Include the auto-generated decoder.  */
 #include "decode.inc.c"
 
@@ -165,34 +172,6 @@ static void check_ov64s(DisasContext *dc)
         }                               \
     } while (0)
 
-static inline bool use_goto_tb(DisasContext *dc, target_ulong dest)
-{
-    if (unlikely(dc->base.singlestep_enabled)) {
-        return false;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    return (dc->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
-static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
-{
-    if (use_goto_tb(dc, dest)) {
-        tcg_gen_movi_tl(cpu_pc, dest);
-        tcg_gen_goto_tb(n);
-        tcg_gen_exit_tb(dc->base.tb, n);
-    } else {
-        tcg_gen_movi_tl(cpu_pc, dest);
-        if (dc->base.singlestep_enabled) {
-            gen_exception(dc, EXCP_DEBUG);
-        }
-        tcg_gen_exit_tb(NULL, 0);
-    }
-}
-
 static void gen_ove_cy(DisasContext *dc)
 {
     if (dc->tb_flags & SR_OVE) {
@@ -457,7 +436,6 @@ static void gen_msbu(DisasContext *dc, TCGv srca, TCGv srcb)
 
 static bool trans_l_add(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.add r%d, r%d, r%d\n", a->d, a->a, a->b);
     check_r0_write(a->d);
     gen_add(dc, cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -465,7 +443,6 @@ static bool trans_l_add(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_addc(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.addc r%d, r%d, r%d\n", a->d, a->a, a->b);
     check_r0_write(a->d);
     gen_addc(dc, cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -473,7 +450,6 @@ static bool trans_l_addc(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_sub(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.sub r%d, r%d, r%d\n", a->d, a->a, a->b);
     check_r0_write(a->d);
     gen_sub(dc, cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -481,7 +457,6 @@ static bool trans_l_sub(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_and(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.and r%d, r%d, r%d\n", a->d, a->a, a->b);
     check_r0_write(a->d);
     tcg_gen_and_tl(cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -489,7 +464,6 @@ static bool trans_l_and(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_or(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.or r%d, r%d, r%d\n", a->d, a->a, a->b);
     check_r0_write(a->d);
     tcg_gen_or_tl(cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -497,7 +471,6 @@ static bool trans_l_or(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_xor(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.xor r%d, r%d, r%d\n", a->d, a->a, a->b);
     check_r0_write(a->d);
     tcg_gen_xor_tl(cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -505,7 +478,6 @@ static bool trans_l_xor(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_sll(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.sll r%d, r%d, r%d\n", a->d, a->a, a->b);
     check_r0_write(a->d);
     tcg_gen_shl_tl(cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -513,7 +485,6 @@ static bool trans_l_sll(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_srl(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.srl r%d, r%d, r%d\n", a->d, a->a, a->b);
     check_r0_write(a->d);
     tcg_gen_shr_tl(cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -521,7 +492,6 @@ static bool trans_l_srl(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_sra(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.sra r%d, r%d, r%d\n", a->d, a->a, a->b);
     check_r0_write(a->d);
     tcg_gen_sar_tl(cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -529,7 +499,6 @@ static bool trans_l_sra(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_ror(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.ror r%d, r%d, r%d\n", a->d, a->a, a->b);
     check_r0_write(a->d);
     tcg_gen_rotr_tl(cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -537,7 +506,6 @@ static bool trans_l_ror(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_exths(DisasContext *dc, arg_da *a, uint32_t insn)
 {
-    LOG_DIS("l.exths r%d, r%d\n", a->d, a->a);
     check_r0_write(a->d);
     tcg_gen_ext16s_tl(cpu_R[a->d], cpu_R[a->a]);
     return true;
@@ -545,7 +513,6 @@ static bool trans_l_exths(DisasContext *dc, arg_da *a, uint32_t insn)
 
 static bool trans_l_extbs(DisasContext *dc, arg_da *a, uint32_t insn)
 {
-    LOG_DIS("l.extbs r%d, r%d\n", a->d, a->a);
     check_r0_write(a->d);
     tcg_gen_ext8s_tl(cpu_R[a->d], cpu_R[a->a]);
     return true;
@@ -553,7 +520,6 @@ static bool trans_l_extbs(DisasContext *dc, arg_da *a, uint32_t insn)
 
 static bool trans_l_exthz(DisasContext *dc, arg_da *a, uint32_t insn)
 {
-    LOG_DIS("l.exthz r%d, r%d\n", a->d, a->a);
     check_r0_write(a->d);
     tcg_gen_ext16u_tl(cpu_R[a->d], cpu_R[a->a]);
     return true;
@@ -561,7 +527,6 @@ static bool trans_l_exthz(DisasContext *dc, arg_da *a, uint32_t insn)
 
 static bool trans_l_extbz(DisasContext *dc, arg_da *a, uint32_t insn)
 {
-    LOG_DIS("l.extbz r%d, r%d\n", a->d, a->a);
     check_r0_write(a->d);
     tcg_gen_ext8u_tl(cpu_R[a->d], cpu_R[a->a]);
     return true;
@@ -570,7 +535,6 @@ static bool trans_l_extbz(DisasContext *dc, arg_da *a, uint32_t insn)
 static bool trans_l_cmov(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
     TCGv zero;
-    LOG_DIS("l.cmov r%d, r%d, r%d\n", a->d, a->a, a->b);
 
     check_r0_write(a->d);
     zero = tcg_const_tl(0);
@@ -582,8 +546,6 @@ static bool trans_l_cmov(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_ff1(DisasContext *dc, arg_da *a, uint32_t insn)
 {
-    LOG_DIS("l.ff1 r%d, r%d\n", a->d, a->a);
-
     check_r0_write(a->d);
     tcg_gen_ctzi_tl(cpu_R[a->d], cpu_R[a->a], -1);
     tcg_gen_addi_tl(cpu_R[a->d], cpu_R[a->d], 1);
@@ -592,8 +554,6 @@ static bool trans_l_ff1(DisasContext *dc, arg_da *a, uint32_t insn)
 
 static bool trans_l_fl1(DisasContext *dc, arg_da *a, uint32_t insn)
 {
-    LOG_DIS("l.fl1 r%d, r%d\n", a->d, a->a);
-
     check_r0_write(a->d);
     tcg_gen_clzi_tl(cpu_R[a->d], cpu_R[a->a], TARGET_LONG_BITS);
     tcg_gen_subfi_tl(cpu_R[a->d], TARGET_LONG_BITS, cpu_R[a->d]);
@@ -602,8 +562,6 @@ static bool trans_l_fl1(DisasContext *dc, arg_da *a, uint32_t insn)
 
 static bool trans_l_mul(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.mul r%d, r%d, r%d\n", a->d, a->a, a->b);
-
     check_r0_write(a->d);
     gen_mul(dc, cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -611,8 +569,6 @@ static bool trans_l_mul(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_mulu(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.mulu r%d, r%d, r%d\n", a->d, a->a, a->b);
-
     check_r0_write(a->d);
     gen_mulu(dc, cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -620,8 +576,6 @@ static bool trans_l_mulu(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_div(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.div r%d, r%d, r%d\n", a->d, a->a, a->b);
-
     check_r0_write(a->d);
     gen_div(dc, cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -629,8 +583,6 @@ static bool trans_l_div(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_divu(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("l.divu r%d, r%d, r%d\n", a->d, a->a, a->b);
-
     check_r0_write(a->d);
     gen_divu(dc, cpu_R[a->d], cpu_R[a->a], cpu_R[a->b]);
     return true;
@@ -638,14 +590,12 @@ static bool trans_l_divu(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_l_muld(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("l.muld r%d, r%d\n", a->a, a->b);
     gen_muld(dc, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_muldu(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("l.muldu r%d, r%d\n", a->a, a->b);
     gen_muldu(dc, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
@@ -654,8 +604,8 @@ static bool trans_l_j(DisasContext *dc, arg_l_j *a, uint32_t insn)
 {
     target_ulong tmp_pc = dc->base.pc_next + a->n * 4;
 
-    LOG_DIS("l.j %d\n", a->n);
     tcg_gen_movi_tl(jmp_pc, tmp_pc);
+    dc->jmp_pc_imm = tmp_pc;
     dc->delayed_branch = 2;
     return true;
 }
@@ -665,11 +615,11 @@ static bool trans_l_jal(DisasContext *dc, arg_l_jal *a, uint32_t insn)
     target_ulong tmp_pc = dc->base.pc_next + a->n * 4;
     target_ulong ret_pc = dc->base.pc_next + 8;
 
-    LOG_DIS("l.jal %d\n", a->n);
     tcg_gen_movi_tl(cpu_R[9], ret_pc);
     /* Optimize jal being used to load the PC for PIC.  */
     if (tmp_pc != ret_pc) {
         tcg_gen_movi_tl(jmp_pc, tmp_pc);
+        dc->jmp_pc_imm = tmp_pc;
         dc->delayed_branch = 2;
     }
     return true;
@@ -692,21 +642,18 @@ static void do_bf(DisasContext *dc, arg_l_bf *a, TCGCond cond)
 
 static bool trans_l_bf(DisasContext *dc, arg_l_bf *a, uint32_t insn)
 {
-    LOG_DIS("l.bf %d\n", a->n);
     do_bf(dc, a, TCG_COND_NE);
     return true;
 }
 
 static bool trans_l_bnf(DisasContext *dc, arg_l_bf *a, uint32_t insn)
 {
-    LOG_DIS("l.bnf %d\n", a->n);
     do_bf(dc, a, TCG_COND_EQ);
     return true;
 }
 
 static bool trans_l_jr(DisasContext *dc, arg_l_jr *a, uint32_t insn)
 {
-    LOG_DIS("l.jr r%d\n", a->b);
     tcg_gen_mov_tl(jmp_pc, cpu_R[a->b]);
     dc->delayed_branch = 2;
     return true;
@@ -714,7 +661,6 @@ static bool trans_l_jr(DisasContext *dc, arg_l_jr *a, uint32_t insn)
 
 static bool trans_l_jalr(DisasContext *dc, arg_l_jalr *a, uint32_t insn)
 {
-    LOG_DIS("l.jalr r%d\n", a->b);
     tcg_gen_mov_tl(jmp_pc, cpu_R[a->b]);
     tcg_gen_movi_tl(cpu_R[9], dc->base.pc_next + 8);
     dc->delayed_branch = 2;
@@ -725,8 +671,6 @@ static bool trans_l_lwa(DisasContext *dc, arg_load *a, uint32_t insn)
 {
     TCGv ea;
 
-    LOG_DIS("l.lwa r%d, r%d, %d\n", a->d, a->a, a->i);
-
     check_r0_write(a->d);
     ea = tcg_temp_new();
     tcg_gen_addi_tl(ea, cpu_R[a->a], a->i);
@@ -750,42 +694,36 @@ static void do_load(DisasContext *dc, arg_load *a, TCGMemOp mop)
 
 static bool trans_l_lwz(DisasContext *dc, arg_load *a, uint32_t insn)
 {
-    LOG_DIS("l.lwz r%d, r%d, %d\n", a->d, a->a, a->i);
     do_load(dc, a, MO_TEUL);
     return true;
 }
 
 static bool trans_l_lws(DisasContext *dc, arg_load *a, uint32_t insn)
 {
-    LOG_DIS("l.lws r%d, r%d, %d\n", a->d, a->a, a->i);
     do_load(dc, a, MO_TESL);
     return true;
 }
 
 static bool trans_l_lbz(DisasContext *dc, arg_load *a, uint32_t insn)
 {
-    LOG_DIS("l.lbz r%d, r%d, %d\n", a->d, a->a, a->i);
     do_load(dc, a, MO_UB);
     return true;
 }
 
 static bool trans_l_lbs(DisasContext *dc, arg_load *a, uint32_t insn)
 {
-    LOG_DIS("l.lbs r%d, r%d, %d\n", a->d, a->a, a->i);
     do_load(dc, a, MO_SB);
     return true;
 }
 
 static bool trans_l_lhz(DisasContext *dc, arg_load *a, uint32_t insn)
 {
-    LOG_DIS("l.lhz r%d, r%d, %d\n", a->d, a->a, a->i);
     do_load(dc, a, MO_TEUW);
     return true;
 }
 
 static bool trans_l_lhs(DisasContext *dc, arg_load *a, uint32_t insn)
 {
-    LOG_DIS("l.lhs r%d, r%d, %d\n", a->d, a->a, a->i);
     do_load(dc, a, MO_TESW);
     return true;
 }
@@ -795,8 +733,6 @@ static bool trans_l_swa(DisasContext *dc, arg_store *a, uint32_t insn)
     TCGv ea, val;
     TCGLabel *lab_fail, *lab_done;
 
-    LOG_DIS("l.swa r%d, r%d, %d\n", a->a, a->b, a->i);
-
     ea = tcg_temp_new();
     tcg_gen_addi_tl(ea, cpu_R[a->a], a->i);
 
@@ -837,28 +773,24 @@ static void do_store(DisasContext *dc, arg_store *a, TCGMemOp mop)
 
 static bool trans_l_sw(DisasContext *dc, arg_store *a, uint32_t insn)
 {
-    LOG_DIS("l.sw r%d, r%d, %d\n", a->a, a->b, a->i);
     do_store(dc, a, MO_TEUL);
     return true;
 }
 
 static bool trans_l_sb(DisasContext *dc, arg_store *a, uint32_t insn)
 {
-    LOG_DIS("l.sb r%d, r%d, %d\n", a->a, a->b, a->i);
     do_store(dc, a, MO_UB);
     return true;
 }
 
 static bool trans_l_sh(DisasContext *dc, arg_store *a, uint32_t insn)
 {
-    LOG_DIS("l.sh r%d, r%d, %d\n", a->a, a->b, a->i);
     do_store(dc, a, MO_TEUW);
     return true;
 }
 
 static bool trans_l_nop(DisasContext *dc, arg_l_nop *a, uint32_t insn)
 {
-    LOG_DIS("l.nop %d\n", a->k);
     return true;
 }
 
@@ -866,7 +798,6 @@ static bool trans_l_addi(DisasContext *dc, arg_rri *a, uint32_t insn)
 {
     TCGv t0;
 
-    LOG_DIS("l.addi r%d, r%d, %d\n", a->d, a->a, a->i);
     check_r0_write(a->d);
     t0 = tcg_const_tl(a->i);
     gen_add(dc, cpu_R[a->d], cpu_R[a->a], t0);
@@ -878,7 +809,6 @@ static bool trans_l_addic(DisasContext *dc, arg_rri *a, uint32_t insn)
 {
     TCGv t0;
 
-    LOG_DIS("l.addic r%d, r%d, %d\n", a->d, a->a, a->i);
     check_r0_write(a->d);
     t0 = tcg_const_tl(a->i);
     gen_addc(dc, cpu_R[a->d], cpu_R[a->a], t0);
@@ -890,7 +820,6 @@ static bool trans_l_muli(DisasContext *dc, arg_rri *a, uint32_t insn)
 {
     TCGv t0;
 
-    LOG_DIS("l.muli r%d, r%d, %d\n", a->d, a->a, a->i);
     check_r0_write(a->d);
     t0 = tcg_const_tl(a->i);
     gen_mul(dc, cpu_R[a->d], cpu_R[a->a], t0);
@@ -902,7 +831,6 @@ static bool trans_l_maci(DisasContext *dc, arg_l_maci *a, uint32_t insn)
 {
     TCGv t0;
 
-    LOG_DIS("l.maci r%d, %d\n", a->a, a->i);
     t0 = tcg_const_tl(a->i);
     gen_mac(dc, cpu_R[a->a], t0);
     tcg_temp_free(t0);
@@ -911,7 +839,6 @@ static bool trans_l_maci(DisasContext *dc, arg_l_maci *a, uint32_t insn)
 
 static bool trans_l_andi(DisasContext *dc, arg_rrk *a, uint32_t insn)
 {
-    LOG_DIS("l.andi r%d, r%d, %d\n", a->d, a->a, a->k);
     check_r0_write(a->d);
     tcg_gen_andi_tl(cpu_R[a->d], cpu_R[a->a], a->k);
     return true;
@@ -919,7 +846,6 @@ static bool trans_l_andi(DisasContext *dc, arg_rrk *a, uint32_t insn)
 
 static bool trans_l_ori(DisasContext *dc, arg_rrk *a, uint32_t insn)
 {
-    LOG_DIS("l.ori r%d, r%d, %d\n", a->d, a->a, a->k);
     check_r0_write(a->d);
     tcg_gen_ori_tl(cpu_R[a->d], cpu_R[a->a], a->k);
     return true;
@@ -927,7 +853,6 @@ static bool trans_l_ori(DisasContext *dc, arg_rrk *a, uint32_t insn)
 
 static bool trans_l_xori(DisasContext *dc, arg_rri *a, uint32_t insn)
 {
-    LOG_DIS("l.xori r%d, r%d, %d\n", a->d, a->a, a->i);
     check_r0_write(a->d);
     tcg_gen_xori_tl(cpu_R[a->d], cpu_R[a->a], a->i);
     return true;
@@ -935,72 +860,73 @@ static bool trans_l_xori(DisasContext *dc, arg_rri *a, uint32_t insn)
 
 static bool trans_l_mfspr(DisasContext *dc, arg_l_mfspr *a, uint32_t insn)
 {
-    LOG_DIS("l.mfspr r%d, r%d, %d\n", a->d, a->a, a->k);
     check_r0_write(a->d);
 
-#ifdef CONFIG_USER_ONLY
-    gen_illegal_exception(dc);
-#else
-    if (dc->mem_idx == MMU_USER_IDX) {
+    if (is_user(dc)) {
         gen_illegal_exception(dc);
     } else {
-        TCGv_i32 ti = tcg_const_i32(a->k);
-        gen_helper_mfspr(cpu_R[a->d], cpu_env, cpu_R[a->d], cpu_R[a->a], ti);
-        tcg_temp_free_i32(ti);
+        TCGv spr = tcg_temp_new();
+        tcg_gen_ori_tl(spr, cpu_R[a->a], a->k);
+        gen_helper_mfspr(cpu_R[a->d], cpu_env, cpu_R[a->d], spr);
+        tcg_temp_free(spr);
     }
-#endif
     return true;
 }
 
 static bool trans_l_mtspr(DisasContext *dc, arg_l_mtspr *a, uint32_t insn)
 {
-    LOG_DIS("l.mtspr r%d, r%d, %d\n", a->a, a->b, a->k);
-
-#ifdef CONFIG_USER_ONLY
-    gen_illegal_exception(dc);
-#else
-    if (dc->mem_idx == MMU_USER_IDX) {
+    if (is_user(dc)) {
         gen_illegal_exception(dc);
     } else {
-        TCGv_i32 ti = tcg_const_i32(a->k);
-        gen_helper_mtspr(cpu_env, cpu_R[a->a], cpu_R[a->b], ti);
-        tcg_temp_free_i32(ti);
+        TCGv spr;
+
+        /* For SR, we will need to exit the TB to recognize the new
+         * exception state.  For NPC, in theory this counts as a branch
+         * (although the SPR only exists for use by an ICE).  Save all
+         * of the cpu state first, allowing it to be overwritten.
+         */
+        if (dc->delayed_branch) {
+            tcg_gen_mov_tl(cpu_pc, jmp_pc);
+            tcg_gen_discard_tl(jmp_pc);
+        } else {
+            tcg_gen_movi_tl(cpu_pc, dc->base.pc_next + 4);
+        }
+        dc->base.is_jmp = DISAS_EXIT;
+
+        spr = tcg_temp_new();
+        tcg_gen_ori_tl(spr, cpu_R[a->a], a->k);
+        gen_helper_mtspr(cpu_env, spr, cpu_R[a->b]);
+        tcg_temp_free(spr);
     }
-#endif
     return true;
 }
 
 static bool trans_l_mac(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("l.mac r%d, r%d\n", a->a, a->b);
     gen_mac(dc, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_msb(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("l.msb r%d, r%d\n", a->a, a->b);
     gen_msb(dc, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_macu(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("l.mac r%d, r%d\n", a->a, a->b);
     gen_macu(dc, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_msbu(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("l.msb r%d, r%d\n", a->a, a->b);
     gen_msbu(dc, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_slli(DisasContext *dc, arg_dal *a, uint32_t insn)
 {
-    LOG_DIS("l.slli r%d, r%d, %d\n", a->d, a->a, a->l);
     check_r0_write(a->d);
     tcg_gen_shli_tl(cpu_R[a->d], cpu_R[a->a], a->l & (TARGET_LONG_BITS - 1));
     return true;
@@ -1008,7 +934,6 @@ static bool trans_l_slli(DisasContext *dc, arg_dal *a, uint32_t insn)
 
 static bool trans_l_srli(DisasContext *dc, arg_dal *a, uint32_t insn)
 {
-    LOG_DIS("l.srli r%d, r%d, %d\n", a->d, a->a, a->l);
     check_r0_write(a->d);
     tcg_gen_shri_tl(cpu_R[a->d], cpu_R[a->a], a->l & (TARGET_LONG_BITS - 1));
     return true;
@@ -1016,7 +941,6 @@ static bool trans_l_srli(DisasContext *dc, arg_dal *a, uint32_t insn)
 
 static bool trans_l_srai(DisasContext *dc, arg_dal *a, uint32_t insn)
 {
-    LOG_DIS("l.srai r%d, r%d, %d\n", a->d, a->a, a->l);
     check_r0_write(a->d);
     tcg_gen_sari_tl(cpu_R[a->d], cpu_R[a->a], a->l & (TARGET_LONG_BITS - 1));
     return true;
@@ -1024,7 +948,6 @@ static bool trans_l_srai(DisasContext *dc, arg_dal *a, uint32_t insn)
 
 static bool trans_l_rori(DisasContext *dc, arg_dal *a, uint32_t insn)
 {
-    LOG_DIS("l.rori r%d, r%d, %d\n", a->d, a->a, a->l);
     check_r0_write(a->d);
     tcg_gen_rotri_tl(cpu_R[a->d], cpu_R[a->a], a->l & (TARGET_LONG_BITS - 1));
     return true;
@@ -1032,7 +955,6 @@ static bool trans_l_rori(DisasContext *dc, arg_dal *a, uint32_t insn)
 
 static bool trans_l_movhi(DisasContext *dc, arg_l_movhi *a, uint32_t insn)
 {
-    LOG_DIS("l.movhi r%d, %d\n", a->d, a->k);
     check_r0_write(a->d);
     tcg_gen_movi_tl(cpu_R[a->d], a->k << 16);
     return true;
@@ -1040,7 +962,6 @@ static bool trans_l_movhi(DisasContext *dc, arg_l_movhi *a, uint32_t insn)
 
 static bool trans_l_macrc(DisasContext *dc, arg_l_macrc *a, uint32_t insn)
 {
-    LOG_DIS("l.macrc r%d\n", a->d);
     check_r0_write(a->d);
     tcg_gen_trunc_i64_tl(cpu_R[a->d], cpu_mac);
     tcg_gen_movi_i64(cpu_mac, 0);
@@ -1049,147 +970,126 @@ static bool trans_l_macrc(DisasContext *dc, arg_l_macrc *a, uint32_t insn)
 
 static bool trans_l_sfeq(DisasContext *dc, arg_ab *a, TCGCond cond)
 {
-    LOG_DIS("l.sfeq r%d, r%d\n", a->a, a->b);
     tcg_gen_setcond_tl(TCG_COND_EQ, cpu_sr_f, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_sfne(DisasContext *dc, arg_ab *a, TCGCond cond)
 {
-    LOG_DIS("l.sfne r%d, r%d\n", a->a, a->b);
     tcg_gen_setcond_tl(TCG_COND_NE, cpu_sr_f, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_sfgtu(DisasContext *dc, arg_ab *a, TCGCond cond)
 {
-    LOG_DIS("l.sfgtu r%d, r%d\n", a->a, a->b);
     tcg_gen_setcond_tl(TCG_COND_GTU, cpu_sr_f, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_sfgeu(DisasContext *dc, arg_ab *a, TCGCond cond)
 {
-    LOG_DIS("l.sfgeu r%d, r%d\n", a->a, a->b);
     tcg_gen_setcond_tl(TCG_COND_GEU, cpu_sr_f, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_sfltu(DisasContext *dc, arg_ab *a, TCGCond cond)
 {
-    LOG_DIS("l.sfltu r%d, r%d\n", a->a, a->b);
     tcg_gen_setcond_tl(TCG_COND_LTU, cpu_sr_f, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_sfleu(DisasContext *dc, arg_ab *a, TCGCond cond)
 {
-    LOG_DIS("l.sfleu r%d, r%d\n", a->a, a->b);
     tcg_gen_setcond_tl(TCG_COND_LEU, cpu_sr_f, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_sfgts(DisasContext *dc, arg_ab *a, TCGCond cond)
 {
-    LOG_DIS("l.sfgts r%d, r%d\n", a->a, a->b);
     tcg_gen_setcond_tl(TCG_COND_GT, cpu_sr_f, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_sfges(DisasContext *dc, arg_ab *a, TCGCond cond)
 {
-    LOG_DIS("l.sfges r%d, r%d\n", a->a, a->b);
     tcg_gen_setcond_tl(TCG_COND_GE, cpu_sr_f, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_sflts(DisasContext *dc, arg_ab *a, TCGCond cond)
 {
-    LOG_DIS("l.sflts r%d, r%d\n", a->a, a->b);
     tcg_gen_setcond_tl(TCG_COND_LT, cpu_sr_f, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_sfles(DisasContext *dc, arg_ab *a, TCGCond cond)
 {
-    LOG_DIS("l.sfles r%d, r%d\n", a->a, a->b);
     tcg_gen_setcond_tl(TCG_COND_LE, cpu_sr_f, cpu_R[a->a], cpu_R[a->b]);
     return true;
 }
 
 static bool trans_l_sfeqi(DisasContext *dc, arg_ai *a, TCGCond cond)
 {
-    LOG_DIS("l.sfeqi r%d, %d\n", a->a, a->i);
     tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_sr_f, cpu_R[a->a], a->i);
     return true;
 }
 
 static bool trans_l_sfnei(DisasContext *dc, arg_ai *a, TCGCond cond)
 {
-    LOG_DIS("l.sfnei r%d, %d\n", a->a, a->i);
     tcg_gen_setcondi_tl(TCG_COND_NE, cpu_sr_f, cpu_R[a->a], a->i);
     return true;
 }
 
 static bool trans_l_sfgtui(DisasContext *dc, arg_ai *a, TCGCond cond)
 {
-    LOG_DIS("l.sfgtui r%d, %d\n", a->a, a->i);
     tcg_gen_setcondi_tl(TCG_COND_GTU, cpu_sr_f, cpu_R[a->a], a->i);
     return true;
 }
 
 static bool trans_l_sfgeui(DisasContext *dc, arg_ai *a, TCGCond cond)
 {
-    LOG_DIS("l.sfgeui r%d, %d\n", a->a, a->i);
     tcg_gen_setcondi_tl(TCG_COND_GEU, cpu_sr_f, cpu_R[a->a], a->i);
     return true;
 }
 
 static bool trans_l_sfltui(DisasContext *dc, arg_ai *a, TCGCond cond)
 {
-    LOG_DIS("l.sfltui r%d, %d\n", a->a, a->i);
     tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_sr_f, cpu_R[a->a], a->i);
     return true;
 }
 
 static bool trans_l_sfleui(DisasContext *dc, arg_ai *a, TCGCond cond)
 {
-    LOG_DIS("l.sfleui r%d, %d\n", a->a, a->i);
     tcg_gen_setcondi_tl(TCG_COND_LEU, cpu_sr_f, cpu_R[a->a], a->i);
     return true;
 }
 
 static bool trans_l_sfgtsi(DisasContext *dc, arg_ai *a, TCGCond cond)
 {
-    LOG_DIS("l.sfgtsi r%d, %d\n", a->a, a->i);
     tcg_gen_setcondi_tl(TCG_COND_GT, cpu_sr_f, cpu_R[a->a], a->i);
     return true;
 }
 
 static bool trans_l_sfgesi(DisasContext *dc, arg_ai *a, TCGCond cond)
 {
-    LOG_DIS("l.sfgesi r%d, %d\n", a->a, a->i);
     tcg_gen_setcondi_tl(TCG_COND_GE, cpu_sr_f, cpu_R[a->a], a->i);
     return true;
 }
 
 static bool trans_l_sfltsi(DisasContext *dc, arg_ai *a, TCGCond cond)
 {
-    LOG_DIS("l.sfltsi r%d, %d\n", a->a, a->i);
     tcg_gen_setcondi_tl(TCG_COND_LT, cpu_sr_f, cpu_R[a->a], a->i);
     return true;
 }
 
 static bool trans_l_sflesi(DisasContext *dc, arg_ai *a, TCGCond cond)
 {
-    LOG_DIS("l.sflesi r%d, %d\n", a->a, a->i);
     tcg_gen_setcondi_tl(TCG_COND_LE, cpu_sr_f, cpu_R[a->a], a->i);
     return true;
 }
 
 static bool trans_l_sys(DisasContext *dc, arg_l_sys *a, uint32_t insn)
 {
-    LOG_DIS("l.sys %d\n", a->k);
     tcg_gen_movi_tl(cpu_pc, dc->base.pc_next);
     gen_exception(dc, EXCP_SYSCALL);
     dc->base.is_jmp = DISAS_NORETURN;
@@ -1198,7 +1098,6 @@ static bool trans_l_sys(DisasContext *dc, arg_l_sys *a, uint32_t insn)
 
 static bool trans_l_trap(DisasContext *dc, arg_l_trap *a, uint32_t insn)
 {
-    LOG_DIS("l.trap %d\n", a->k);
     tcg_gen_movi_tl(cpu_pc, dc->base.pc_next);
     gen_exception(dc, EXCP_TRAP);
     dc->base.is_jmp = DISAS_NORETURN;
@@ -1207,37 +1106,28 @@ static bool trans_l_trap(DisasContext *dc, arg_l_trap *a, uint32_t insn)
 
 static bool trans_l_msync(DisasContext *dc, arg_l_msync *a, uint32_t insn)
 {
-    LOG_DIS("l.msync\n");
     tcg_gen_mb(TCG_MO_ALL);
     return true;
 }
 
 static bool trans_l_psync(DisasContext *dc, arg_l_psync *a, uint32_t insn)
 {
-    LOG_DIS("l.psync\n");
     return true;
 }
 
 static bool trans_l_csync(DisasContext *dc, arg_l_csync *a, uint32_t insn)
 {
-    LOG_DIS("l.csync\n");
     return true;
 }
 
 static bool trans_l_rfe(DisasContext *dc, arg_l_rfe *a, uint32_t insn)
 {
-    LOG_DIS("l.rfe\n");
-
-#ifdef CONFIG_USER_ONLY
-    gen_illegal_exception(dc);
-#else
-    if (dc->mem_idx == MMU_USER_IDX) {
+    if (is_user(dc)) {
         gen_illegal_exception(dc);
     } else {
         gen_helper_rfe(cpu_env);
-        dc->base.is_jmp = DISAS_UPDATE;
+        dc->base.is_jmp = DISAS_EXIT;
     }
-#endif
     return true;
 }
 
@@ -1274,56 +1164,48 @@ static void do_fpcmp(DisasContext *dc, arg_ab *a,
 
 static bool trans_lf_add_s(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("lf.add.s r%d, r%d, r%d\n", a->d, a->a, a->b);
     do_fp3(dc, a, gen_helper_float_add_s);
     return true;
 }
 
 static bool trans_lf_sub_s(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("lf.sub.s r%d, r%d, r%d\n", a->d, a->a, a->b);
     do_fp3(dc, a, gen_helper_float_sub_s);
     return true;
 }
 
 static bool trans_lf_mul_s(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("lf.mul.s r%d, r%d, r%d\n", a->d, a->a, a->b);
     do_fp3(dc, a, gen_helper_float_mul_s);
     return true;
 }
 
 static bool trans_lf_div_s(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("lf.div.s r%d, r%d, r%d\n", a->d, a->a, a->b);
     do_fp3(dc, a, gen_helper_float_div_s);
     return true;
 }
 
 static bool trans_lf_rem_s(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("lf.rem.s r%d, r%d, r%d\n", a->d, a->a, a->b);
     do_fp3(dc, a, gen_helper_float_rem_s);
     return true;
 }
 
 static bool trans_lf_itof_s(DisasContext *dc, arg_da *a, uint32_t insn)
 {
-    LOG_DIS("lf.itof.s r%d, r%d\n", a->d, a->a);
     do_fp2(dc, a, gen_helper_itofs);
     return true;
 }
 
 static bool trans_lf_ftoi_s(DisasContext *dc, arg_da *a, uint32_t insn)
 {
-    LOG_DIS("lf.ftoi.s r%d, r%d\n", a->d, a->a);
     do_fp2(dc, a, gen_helper_ftois);
     return true;
 }
 
 static bool trans_lf_madd_s(DisasContext *dc, arg_dab *a, uint32_t insn)
 {
-    LOG_DIS("lf.madd.s r%d, r%d, r%d\n", a->d, a->a, a->b);
     check_r0_write(a->d);
     gen_helper_float_madd_s(cpu_R[a->d], cpu_env, cpu_R[a->d],
                             cpu_R[a->a], cpu_R[a->b]);
@@ -1333,42 +1215,36 @@ static bool trans_lf_madd_s(DisasContext *dc, arg_dab *a, uint32_t insn)
 
 static bool trans_lf_sfeq_s(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("lf.sfeq.s r%d, r%d\n", a->a, a->b);
     do_fpcmp(dc, a, gen_helper_float_eq_s, false, false);
     return true;
 }
 
 static bool trans_lf_sfne_s(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("lf.sfne.s r%d, r%d\n", a->a, a->b);
     do_fpcmp(dc, a, gen_helper_float_eq_s, true, false);
     return true;
 }
 
 static bool trans_lf_sfgt_s(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("lf.sfgt.s r%d, r%d\n", a->a, a->b);
     do_fpcmp(dc, a, gen_helper_float_lt_s, false, true);
     return true;
 }
 
 static bool trans_lf_sfge_s(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("lf.sfge.s r%d, r%d\n", a->a, a->b);
     do_fpcmp(dc, a, gen_helper_float_le_s, false, true);
     return true;
 }
 
 static bool trans_lf_sflt_s(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("lf.sflt.s r%d, r%d\n", a->a, a->b);
     do_fpcmp(dc, a, gen_helper_float_lt_s, false, false);
     return true;
 }
 
 static bool trans_lf_sfle_s(DisasContext *dc, arg_ab *a, uint32_t insn)
 {
-    LOG_DIS("lf.sfle.s r%d, r%d\n", a->a, a->b);
     do_fpcmp(dc, a, gen_helper_float_le_s, false, false);
     return true;
 }
@@ -1382,6 +1258,8 @@ static void openrisc_tr_init_disas_context(DisasContextBase *dcb, CPUState *cs)
     dc->mem_idx = cpu_mmu_index(env, false);
     dc->tb_flags = dc->base.tb->flags;
     dc->delayed_branch = (dc->tb_flags & TB_FLAGS_DFLAG) != 0;
+    dc->jmp_pc_imm = -1;
+
     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
     dc->base.max_insns = MIN(dc->base.max_insns, bound);
 }
@@ -1434,50 +1312,81 @@ static void openrisc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     }
     dc->base.pc_next += 4;
 
-    /* delay slot */
-    if (dc->delayed_branch) {
-        dc->delayed_branch--;
-        if (!dc->delayed_branch) {
-            tcg_gen_mov_tl(cpu_pc, jmp_pc);
-            tcg_gen_discard_tl(jmp_pc);
-            dc->base.is_jmp = DISAS_UPDATE;
-            return;
-        }
+    /* When exiting the delay slot normally, exit via jmp_pc.
+     * For DISAS_NORETURN, we have raised an exception and already exited.
+     * For DISAS_EXIT, we found l.rfe in a delay slot.  There's nothing
+     * in the manual saying this is illegal, but it surely it should.
+     * At least or1ksim overrides pcnext and ignores the branch.
+     */
+    if (dc->delayed_branch
+        && --dc->delayed_branch == 0
+        && dc->base.is_jmp == DISAS_NEXT) {
+        dc->base.is_jmp = DISAS_JUMP;
     }
 }
 
 static void openrisc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
+    target_ulong jmp_dest;
+
+    /* If we have already exited the TB, nothing following has effect.  */
+    if (dc->base.is_jmp == DISAS_NORETURN) {
+        return;
+    }
 
+    /* Adjust the delayed branch state for the next TB.  */
     if ((dc->tb_flags & TB_FLAGS_DFLAG ? 1 : 0) != (dc->delayed_branch != 0)) {
         tcg_gen_movi_i32(cpu_dflag, dc->delayed_branch != 0);
     }
 
-    tcg_gen_movi_tl(cpu_ppc, dc->base.pc_next - 4);
-    if (dc->base.is_jmp == DISAS_NEXT) {
-        dc->base.is_jmp = DISAS_UPDATE;
-        tcg_gen_movi_tl(cpu_pc, dc->base.pc_next);
-    }
-    if (unlikely(dc->base.singlestep_enabled)) {
-        gen_exception(dc, EXCP_DEBUG);
-    } else {
-        switch (dc->base.is_jmp) {
-        case DISAS_TOO_MANY:
-            gen_goto_tb(dc, 0, dc->base.pc_next);
-            break;
-        case DISAS_NORETURN:
-        case DISAS_JUMP:
-        case DISAS_TB_JUMP:
+    /* For DISAS_TOO_MANY, jump to the next insn.  */
+    jmp_dest = dc->base.pc_next;
+    tcg_gen_movi_tl(cpu_ppc, jmp_dest - 4);
+
+    switch (dc->base.is_jmp) {
+    case DISAS_JUMP:
+        jmp_dest = dc->jmp_pc_imm;
+        if (jmp_dest == -1) {
+            /* The jump destination is indirect/computed; use jmp_pc.  */
+            tcg_gen_mov_tl(cpu_pc, jmp_pc);
+            tcg_gen_discard_tl(jmp_pc);
+            if (unlikely(dc->base.singlestep_enabled)) {
+                gen_exception(dc, EXCP_DEBUG);
+            } else {
+                tcg_gen_lookup_and_goto_ptr();
+            }
             break;
-        case DISAS_UPDATE:
-            /* indicate that the hash table must be used
-               to find the next TB */
+        }
+        /* The jump destination is direct; use jmp_pc_imm.
+           However, we will have stored into jmp_pc as well;
+           we know now that it wasn't needed.  */
+        tcg_gen_discard_tl(jmp_pc);
+        /* fallthru */
+
+    case DISAS_TOO_MANY:
+        if (unlikely(dc->base.singlestep_enabled)) {
+            tcg_gen_movi_tl(cpu_pc, jmp_dest);
+            gen_exception(dc, EXCP_DEBUG);
+        } else if ((dc->base.pc_first ^ jmp_dest) & TARGET_PAGE_MASK) {
+            tcg_gen_movi_tl(cpu_pc, jmp_dest);
+            tcg_gen_lookup_and_goto_ptr();
+        } else {
+            tcg_gen_goto_tb(0);
+            tcg_gen_movi_tl(cpu_pc, jmp_dest);
+            tcg_gen_exit_tb(dc->base.tb, 0);
+        }
+        break;
+
+    case DISAS_EXIT:
+        if (unlikely(dc->base.singlestep_enabled)) {
+            gen_exception(dc, EXCP_DEBUG);
+        } else {
             tcg_gen_exit_tb(NULL, 0);
-            break;
-        default:
-            g_assert_not_reached();
         }
+        break;
+    default:
+        g_assert_not_reached();
     }
 }
 
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index c7f3fb6b73..4edcf62cf7 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -196,7 +196,6 @@ enum {
     /* QEMU exceptions: special cases we want to stop translation            */
     POWERPC_EXCP_SYNC         = 0x202, /* context synchronizing instruction  */
     POWERPC_EXCP_SYSCALL_USER = 0x203, /* System call in user mode only      */
-    POWERPC_EXCP_STCX         = 0x204 /* Conditional stores in user mode     */
 };
 
 /* Exceptions error codes                                                    */
@@ -994,10 +993,6 @@ struct CPUPPCState {
     /* Reservation value */
     target_ulong reserve_val;
     target_ulong reserve_val2;
-    /* Reservation store address */
-    target_ulong reserve_ea;
-    /* Reserved store source register and size */
-    target_ulong reserve_info;
 
     /* Those ones are used in supervisor mode only */
     /* machine state register */
@@ -1015,6 +1010,9 @@ struct CPUPPCState {
     /* Next instruction pointer */
     target_ulong nip;
 
+    /* High part of 128-bit helper return.  */
+    uint64_t retxh;
+
     int access_type; /* when a memory exception occurs, the access
                         type is stored here */
 
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index c092fbead0..d6e97a90e0 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -22,7 +22,7 @@
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
-
+#include "internal.h"
 #include "helper_regs.h"
 
 //#define DEBUG_OP
@@ -1198,3 +1198,19 @@ void helper_book3s_msgsnd(target_ulong rb)
     qemu_mutex_unlock_iothread();
 }
 #endif
+
+void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
+                                 MMUAccessType access_type,
+                                 int mmu_idx, uintptr_t retaddr)
+{
+    CPUPPCState *env = cs->env_ptr;
+    uint32_t insn;
+
+    /* Restore state and reload the insn we executed, for filling in DSISR.  */
+    cpu_restore_state(cs, retaddr, true);
+    insn = cpu_ldl_code(env, env->nip);
+
+    cs->exception_index = POWERPC_EXCP_ALIGN;
+    env->error_code = insn & 0x03FF0000;
+    cpu_loop_exit(cs);
+}
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 7714bfe0f9..8675d931b6 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -274,6 +274,7 @@ static inline void float_inexact_excp(CPUPPCState *env)
 {
     CPUState *cs = CPU(ppc_env_get_cpu(env));
 
+    env->fpscr |= 1 << FPSCR_FI;
     env->fpscr |= 1 << FPSCR_XX;
     /* Update the floating-point exception summary */
     env->fpscr |= FP_FX;
@@ -533,6 +534,7 @@ static void do_float_check_status(CPUPPCState *env, uintptr_t raddr)
 {
     CPUState *cs = CPU(ppc_env_get_cpu(env));
     int status = get_float_exception_flags(&env->fp_status);
+    bool inexact_happened = false;
 
     if (status & float_flag_divbyzero) {
         float_zero_divide_excp(env, raddr);
@@ -542,6 +544,12 @@ static void do_float_check_status(CPUPPCState *env, uintptr_t raddr)
         float_underflow_excp(env);
     } else if (status & float_flag_inexact) {
         float_inexact_excp(env);
+        inexact_happened = true;
+    }
+
+    /* if the inexact flag was not set */
+    if (inexact_happened == false) {
+        env->fpscr &= ~(1 << FPSCR_FI); /* clear the FPSCR[FI] bit */
     }
 
     if (cs->exception_index == POWERPC_EXCP_PROGRAM &&
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index d751f0e219..5706c2497f 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -799,3 +799,14 @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
 
 DEF_HELPER_1(tbegin, void, env)
 DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
+
+#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
+DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
+DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
+DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
+                   void, env, tl, i64, i64, i32)
+DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
+                   void, env, tl, i64, i64, i32)
+DEF_HELPER_5(stqcx_le_parallel, i32, env, tl, i64, i64, i32)
+DEF_HELPER_5(stqcx_be_parallel, i32, env, tl, i64, i64, i32)
+#endif
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 1f441c6483..a9bcadff42 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -252,4 +252,9 @@ static inline void putVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env)
 void helper_compute_fprf_float16(CPUPPCState *env, float16 arg);
 void helper_compute_fprf_float32(CPUPPCState *env, float32 arg);
 void helper_compute_fprf_float128(CPUPPCState *env, float128 arg);
+
+/* Raise a data fault alignment exception for the specified virtual address */
+void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                 MMUAccessType access_type,
+                                 int mmu_idx, uintptr_t retaddr);
 #endif /* PPC_INTERNAL_H */
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 4df4ff6cbf..9211ee2ee1 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -248,107 +248,25 @@ static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 
 
 #if defined(TARGET_PPC64)
-static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
-                                       struct kvm_ppc_smmu_info *info)
+static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
 {
-    CPUPPCState *env = &cpu->env;
-    CPUState *cs = CPU(cpu);
-
-    memset(info, 0, sizeof(*info));
-
-    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
-     * need to "guess" what the supported page sizes are.
-     *
-     * For that to work we make a few assumptions:
-     *
-     * - Check whether we are running "PR" KVM which only supports 4K
-     *   and 16M pages, but supports them regardless of the backing
-     *   store characteritics. We also don't support 1T segments.
-     *
-     *   This is safe as if HV KVM ever supports that capability or PR
-     *   KVM grows supports for more page/segment sizes, those versions
-     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
-     *   will not hit this fallback
-     *
-     * - Else we are running HV KVM. This means we only support page
-     *   sizes that fit in the backing store. Additionally we only
-     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
-     *   P7 encodings for the SLB and hash table. Here too, we assume
-     *   support for any newer processor will mean a kernel that
-     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
-     *   this fallback.
-     */
-    if (kvmppc_is_pr(cs->kvm_state)) {
-        /* No flags */
-        info->flags = 0;
-        info->slb_size = 64;
-
-        /* Standard 4k base page size segment */
-        info->sps[0].page_shift = 12;
-        info->sps[0].slb_enc = 0;
-        info->sps[0].enc[0].page_shift = 12;
-        info->sps[0].enc[0].pte_enc = 0;
-
-        /* Standard 16M large page size segment */
-        info->sps[1].page_shift = 24;
-        info->sps[1].slb_enc = SLB_VSID_L;
-        info->sps[1].enc[0].page_shift = 24;
-        info->sps[1].enc[0].pte_enc = 0;
-    } else {
-        int i = 0;
-
-        /* HV KVM has backing store size restrictions */
-        info->flags = KVM_PPC_PAGE_SIZES_REAL;
-
-        if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
-            info->flags |= KVM_PPC_1T_SEGMENTS;
-        }
-
-        if (env->mmu_model == POWERPC_MMU_2_06 ||
-            env->mmu_model == POWERPC_MMU_2_07) {
-            info->slb_size = 32;
-        } else {
-            info->slb_size = 64;
-        }
+    int ret;
 
-        /* Standard 4k base page size segment */
-        info->sps[i].page_shift = 12;
-        info->sps[i].slb_enc = 0;
-        info->sps[i].enc[0].page_shift = 12;
-        info->sps[i].enc[0].pte_enc = 0;
-        i++;
-
-        /* 64K on MMU 2.06 and later */
-        if (env->mmu_model == POWERPC_MMU_2_06 ||
-            env->mmu_model == POWERPC_MMU_2_07) {
-            info->sps[i].page_shift = 16;
-            info->sps[i].slb_enc = 0x110;
-            info->sps[i].enc[0].page_shift = 16;
-            info->sps[i].enc[0].pte_enc = 1;
-            i++;
-        }
+    assert(kvm_state != NULL);
 
-        /* Standard 16M large page size segment */
-        info->sps[i].page_shift = 24;
-        info->sps[i].slb_enc = SLB_VSID_L;
-        info->sps[i].enc[0].page_shift = 24;
-        info->sps[i].enc[0].pte_enc = 0;
+    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
+        error_setg(errp, "KVM doesn't expose the MMU features it supports");
+        error_append_hint(errp, "Consider switching to a newer KVM\n");
+        return;
     }
-}
-
-static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
-{
-    CPUState *cs = CPU(cpu);
-    int ret;
 
-    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
-        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
-        if (ret == 0) {
-            return;
-        }
+    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
+    if (ret == 0) {
+        return;
     }
 
-    kvm_get_fallback_smmu_info(cpu, info);
+    error_setg_errno(errp, -ret,
+                     "KVM failed to provide the MMU features it supports");
 }
 
 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
@@ -408,14 +326,13 @@ target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 
 bool kvmppc_hpt_needs_host_contiguous_pages(void)
 {
-    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
     static struct kvm_ppc_smmu_info smmu_info;
 
     if (!kvm_enabled()) {
         return false;
     }
 
-    kvm_get_smmu_info(cpu, &smmu_info);
+    kvm_get_smmu_info(&smmu_info, &error_fatal);
     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
 }
 
@@ -423,13 +340,18 @@ void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
 {
     struct kvm_ppc_smmu_info smmu_info;
     int iq, ik, jq, jk;
+    Error *local_err = NULL;
 
     /* For now, we only have anything to check on hash64 MMUs */
     if (!cpu->hash64_opts || !kvm_enabled()) {
         return;
     }
 
-    kvm_get_smmu_info(cpu, &smmu_info);
+    kvm_get_smmu_info(&smmu_info, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
@@ -2168,7 +2090,7 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
 
     /* Find the largest hardware supported page size that's less than
      * or equal to the (logical) backing page size of guest RAM */
-    kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
+    kvm_get_smmu_info(&info, &error_fatal);
     rampagesize = qemu_getrampagesize();
     best_page_shift = 0;
 
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index a34e604db3..8f0d86d104 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -21,9 +21,9 @@
 #include "exec/exec-all.h"
 #include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
-
 #include "helper_regs.h"
 #include "exec/cpu_ldst.h"
+#include "tcg.h"
 #include "internal.h"
 
 //#define DEBUG_OP
@@ -215,6 +215,76 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,
     return i;
 }
 
+#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
+uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
+                               uint32_t opidx)
+{
+    Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
+    env->retxh = int128_gethi(ret);
+    return int128_getlo(ret);
+}
+
+uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
+                               uint32_t opidx)
+{
+    Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
+    env->retxh = int128_gethi(ret);
+    return int128_getlo(ret);
+}
+
+void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr,
+                            uint64_t lo, uint64_t hi, uint32_t opidx)
+{
+    Int128 val = int128_make128(lo, hi);
+    helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC());
+}
+
+void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
+                            uint64_t lo, uint64_t hi, uint32_t opidx)
+{
+    Int128 val = int128_make128(lo, hi);
+    helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
+}
+
+uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
+                                  uint64_t new_lo, uint64_t new_hi,
+                                  uint32_t opidx)
+{
+    bool success = false;
+
+    if (likely(addr == env->reserve_addr)) {
+        Int128 oldv, cmpv, newv;
+
+        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
+        newv = int128_make128(new_lo, new_hi);
+        oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv,
+                                             opidx, GETPC());
+        success = int128_eq(oldv, cmpv);
+    }
+    env->reserve_addr = -1;
+    return env->so + success * CRF_EQ_BIT;
+}
+
+uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
+                                  uint64_t new_lo, uint64_t new_hi,
+                                  uint32_t opidx)
+{
+    bool success = false;
+
+    if (likely(addr == env->reserve_addr)) {
+        Int128 oldv, cmpv, newv;
+
+        cmpv = int128_make128(env->reserve_val2, env->reserve_val);
+        newv = int128_make128(new_lo, new_hi);
+        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv,
+                                             opidx, GETPC());
+        success = int128_eq(oldv, cmpv);
+    }
+    env->reserve_addr = -1;
+    return env->so + success * CRF_EQ_BIT;
+}
+#endif
+
 /*****************************************************************************/
 /* Altivec extension helpers */
 #if defined(HOST_WORDS_BIGENDIAN)
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index 98ce17985b..e6739e6c24 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -17,6 +17,7 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "sysemu/kvm.h"
@@ -1090,11 +1091,10 @@ static void mmubooke_dump_mmu(FILE *f, fprintf_function cpu_fprintf,
         pa = entry->RPN & mask;
         /* Extend the physical address to 36 bits */
         pa |= (hwaddr)(entry->RPN & 0xF) << 32;
-        size /= 1024;
-        if (size >= 1024) {
-            snprintf(size_buf, sizeof(size_buf), "%3" PRId64 "M", size / 1024);
+        if (size >= 1 * MiB) {
+            snprintf(size_buf, sizeof(size_buf), "%3" PRId64 "M", size / MiB);
         } else {
-            snprintf(size_buf, sizeof(size_buf), "%3" PRId64 "k", size);
+            snprintf(size_buf, sizeof(size_buf), "%3" PRId64 "k", size / KiB);
         }
         cpu_fprintf(f, "0x%016" PRIx64 " 0x%016" PRIx64 " %s %-5u %08x %08x\n",
                     (uint64_t)ea, (uint64_t)pa, size_buf, (uint32_t)entry->PID,
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 3a215a1dc6..9eaa10b421 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -2388,23 +2388,6 @@ static inline void gen_addr_add(DisasContext *ctx, TCGv ret, TCGv arg1,
     }
 }
 
-static inline void gen_check_align(DisasContext *ctx, TCGv EA, int mask)
-{
-    TCGLabel *l1 = gen_new_label();
-    TCGv t0 = tcg_temp_new();
-    TCGv_i32 t1, t2;
-    tcg_gen_andi_tl(t0, EA, mask);
-    tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-    t1 = tcg_const_i32(POWERPC_EXCP_ALIGN);
-    t2 = tcg_const_i32(ctx->opcode & 0x03FF0000);
-    gen_update_nip(ctx, ctx->base.pc_next - 4);
-    gen_helper_raise_exception_err(cpu_env, t1, t2);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t2);
-    gen_set_label(l1);
-    tcg_temp_free(t0);
-}
-
 static inline void gen_align_no_le(DisasContext *ctx)
 {
     gen_exception_err(ctx, POWERPC_EXCP_ALIGN,
@@ -2607,7 +2590,7 @@ static void gen_ld(DisasContext *ctx)
 static void gen_lq(DisasContext *ctx)
 {
     int ra, rd;
-    TCGv EA;
+    TCGv EA, hi, lo;
 
     /* lq is a legal user mode instruction starting in ISA 2.07 */
     bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
@@ -2633,16 +2616,35 @@ static void gen_lq(DisasContext *ctx)
     EA = tcg_temp_new();
     gen_addr_imm_index(ctx, EA, 0x0F);
 
-    /* We only need to swap high and low halves. gen_qemu_ld64_i64 does
-       necessary 64-bit byteswap already. */
-    if (unlikely(ctx->le_mode)) {
-        gen_qemu_ld64_i64(ctx, cpu_gpr[rd + 1], EA);
+    /* Note that the low part is always in RD+1, even in LE mode.  */
+    lo = cpu_gpr[rd + 1];
+    hi = cpu_gpr[rd];
+
+    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+#ifdef CONFIG_ATOMIC128
+        TCGv_i32 oi = tcg_temp_new_i32();
+        if (ctx->le_mode) {
+            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
+            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+        } else {
+            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
+            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+        }
+        tcg_temp_free_i32(oi);
+        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
+#else
+        /* Restart with exclusive lock.  */
+        gen_helper_exit_atomic(cpu_env);
+        ctx->base.is_jmp = DISAS_NORETURN;
+#endif
+    } else if (ctx->le_mode) {
+        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ);
         gen_addr_add(ctx, EA, EA, 8);
-        gen_qemu_ld64_i64(ctx, cpu_gpr[rd], EA);
+        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEQ);
     } else {
-        gen_qemu_ld64_i64(ctx, cpu_gpr[rd], EA);
+        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEQ);
         gen_addr_add(ctx, EA, EA, 8);
-        gen_qemu_ld64_i64(ctx, cpu_gpr[rd + 1], EA);
+        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEQ);
     }
     tcg_temp_free(EA);
 }
@@ -2741,6 +2743,7 @@ static void gen_std(DisasContext *ctx)
     if ((ctx->opcode & 0x3) == 0x2) { /* stq */
         bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
         bool le_is_supported = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
+        TCGv hi, lo;
 
         if (!(ctx->insns_flags & PPC_64BX)) {
             gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
@@ -2764,20 +2767,38 @@ static void gen_std(DisasContext *ctx)
         EA = tcg_temp_new();
         gen_addr_imm_index(ctx, EA, 0x03);
 
-        /* We only need to swap high and low halves. gen_qemu_st64_i64 does
-           necessary 64-bit byteswap already. */
-        if (unlikely(ctx->le_mode)) {
-            gen_qemu_st64_i64(ctx, cpu_gpr[rs + 1], EA);
+        /* Note that the low part is always in RS+1, even in LE mode.  */
+        lo = cpu_gpr[rs + 1];
+        hi = cpu_gpr[rs];
+
+        if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+#ifdef CONFIG_ATOMIC128
+            TCGv_i32 oi = tcg_temp_new_i32();
+            if (ctx->le_mode) {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
+                gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
+            } else {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
+                gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
+            }
+            tcg_temp_free_i32(oi);
+#else
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
+#endif
+        } else if (ctx->le_mode) {
+            tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ);
             gen_addr_add(ctx, EA, EA, 8);
-            gen_qemu_st64_i64(ctx, cpu_gpr[rs], EA);
+            tcg_gen_qemu_st_i64(hi, EA, ctx->mem_idx, MO_LEQ);
         } else {
-            gen_qemu_st64_i64(ctx, cpu_gpr[rs], EA);
+            tcg_gen_qemu_st_i64(hi, EA, ctx->mem_idx, MO_BEQ);
             gen_addr_add(ctx, EA, EA, 8);
-            gen_qemu_st64_i64(ctx, cpu_gpr[rs + 1], EA);
+            tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_BEQ);
         }
         tcg_temp_free(EA);
     } else {
-        /* std / stdu*/
+        /* std / stdu */
         if (Rc(ctx->opcode)) {
             if (unlikely(rA(ctx->opcode) == 0)) {
                 gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
@@ -3032,23 +3053,24 @@ static void gen_isync(DisasContext *ctx)
 
 #define MEMOP_GET_SIZE(x)  (1 << ((x) & MO_SIZE))
 
-#define LARX(name, memop)                                            \
-static void gen_##name(DisasContext *ctx)                            \
-{                                                                    \
-    TCGv t0;                                                         \
-    TCGv gpr = cpu_gpr[rD(ctx->opcode)];                             \
-    int len = MEMOP_GET_SIZE(memop);                                 \
-    gen_set_access_type(ctx, ACCESS_RES);                            \
-    t0 = tcg_temp_local_new();                                       \
-    gen_addr_reg_index(ctx, t0);                                     \
-    if ((len) > 1) {                                                 \
-        gen_check_align(ctx, t0, (len)-1);                           \
-    }                                                                \
-    tcg_gen_qemu_ld_tl(gpr, t0, ctx->mem_idx, memop);                \
-    tcg_gen_mov_tl(cpu_reserve, t0);                                 \
-    tcg_gen_mov_tl(cpu_reserve_val, gpr);                            \
-    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);                           \
-    tcg_temp_free(t0);                                               \
+static void gen_load_locked(DisasContext *ctx, TCGMemOp memop)
+{
+    TCGv gpr = cpu_gpr[rD(ctx->opcode)];
+    TCGv t0 = tcg_temp_new();
+
+    gen_set_access_type(ctx, ACCESS_RES);
+    gen_addr_reg_index(ctx, t0);
+    tcg_gen_qemu_ld_tl(gpr, t0, ctx->mem_idx, memop | MO_ALIGN);
+    tcg_gen_mov_tl(cpu_reserve, t0);
+    tcg_gen_mov_tl(cpu_reserve_val, gpr);
+    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+    tcg_temp_free(t0);
+}
+
+#define LARX(name, memop)                  \
+static void gen_##name(DisasContext *ctx)  \
+{                                          \
+    gen_load_locked(ctx, memop);           \
 }
 
 /* lwarx */
@@ -3056,134 +3078,239 @@ LARX(lbarx, DEF_MEMOP(MO_UB))
 LARX(lharx, DEF_MEMOP(MO_UW))
 LARX(lwarx, DEF_MEMOP(MO_UL))
 
-#define LD_ATOMIC(name, memop, tp, op, eop)                             \
-static void gen_##name(DisasContext *ctx)                               \
-{                                                                       \
-    int len = MEMOP_GET_SIZE(memop);                                    \
-    uint32_t gpr_FC = FC(ctx->opcode);                                  \
-    TCGv EA = tcg_temp_local_new();                                     \
-    TCGv_##tp t0, t1;                                                   \
-                                                                        \
-    gen_addr_register(ctx, EA);                                         \
-    if (len > 1) {                                                      \
-        gen_check_align(ctx, EA, len - 1);                              \
-    }                                                                   \
-    t0 = tcg_temp_new_##tp();                                           \
-    t1 = tcg_temp_new_##tp();                                           \
-    tcg_gen_##op(t0, cpu_gpr[rD(ctx->opcode) + 1]);                     \
-                                                                        \
-    switch (gpr_FC) {                                                   \
-    case 0: /* Fetch and add */                                         \
-        tcg_gen_atomic_fetch_add_##tp(t1, EA, t0, ctx->mem_idx, memop); \
-        break;                                                          \
-    case 1: /* Fetch and xor */                                         \
-        tcg_gen_atomic_fetch_xor_##tp(t1, EA, t0, ctx->mem_idx, memop); \
-        break;                                                          \
-    case 2: /* Fetch and or */                                          \
-        tcg_gen_atomic_fetch_or_##tp(t1, EA, t0, ctx->mem_idx, memop);  \
-        break;                                                          \
-    case 3: /* Fetch and 'and' */                                       \
-        tcg_gen_atomic_fetch_and_##tp(t1, EA, t0, ctx->mem_idx, memop); \
-        break;                                                          \
-    case 8: /* Swap */                                                  \
-        tcg_gen_atomic_xchg_##tp(t1, EA, t0, ctx->mem_idx, memop);      \
-        break;                                                          \
-    case 4:  /* Fetch and max unsigned */                               \
-    case 5:  /* Fetch and max signed */                                 \
-    case 6:  /* Fetch and min unsigned */                               \
-    case 7:  /* Fetch and min signed */                                 \
-    case 16: /* compare and swap not equal */                           \
-    case 24: /* Fetch and increment bounded */                          \
-    case 25: /* Fetch and increment equal */                            \
-    case 28: /* Fetch and decrement bounded */                          \
-        gen_invalid(ctx);                                               \
-        break;                                                          \
-    default:                                                            \
-        /* invoke data storage error handler */                         \
-        gen_exception_err(ctx, POWERPC_EXCP_DSI, POWERPC_EXCP_INVAL);   \
-    }                                                                   \
-    tcg_gen_##eop(cpu_gpr[rD(ctx->opcode)], t1);                        \
-    tcg_temp_free_##tp(t0);                                             \
-    tcg_temp_free_##tp(t1);                                             \
-    tcg_temp_free(EA);                                                  \
-}
-
-LD_ATOMIC(lwat, DEF_MEMOP(MO_UL), i32, trunc_tl_i32, extu_i32_tl)
-#if defined(TARGET_PPC64)
-LD_ATOMIC(ldat, DEF_MEMOP(MO_Q), i64, mov_i64, mov_i64)
-#endif
+static void gen_fetch_inc_conditional(DisasContext *ctx, TCGMemOp memop,
+                                      TCGv EA, TCGCond cond, int addend)
+{
+    TCGv t = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    TCGv u = tcg_temp_new();
 
-#define ST_ATOMIC(name, memop, tp, op)                                  \
-static void gen_##name(DisasContext *ctx)                               \
-{                                                                       \
-    int len = MEMOP_GET_SIZE(memop);                                    \
-    uint32_t gpr_FC = FC(ctx->opcode);                                  \
-    TCGv EA = tcg_temp_local_new();                                     \
-    TCGv_##tp t0, t1;                                                   \
-                                                                        \
-    gen_addr_register(ctx, EA);                                         \
-    if (len > 1) {                                                      \
-        gen_check_align(ctx, EA, len - 1);                              \
-    }                                                                   \
-    t0 = tcg_temp_new_##tp();                                           \
-    t1 = tcg_temp_new_##tp();                                           \
-    tcg_gen_##op(t0, cpu_gpr[rD(ctx->opcode) + 1]);                     \
-                                                                        \
-    switch (gpr_FC) {                                                   \
-    case 0: /* add and Store */                                         \
-        tcg_gen_atomic_add_fetch_##tp(t1, EA, t0, ctx->mem_idx, memop); \
-        break;                                                          \
-    case 1: /* xor and Store */                                         \
-        tcg_gen_atomic_xor_fetch_##tp(t1, EA, t0, ctx->mem_idx, memop); \
-        break;                                                          \
-    case 2: /* Or and Store */                                          \
-        tcg_gen_atomic_or_fetch_##tp(t1, EA, t0, ctx->mem_idx, memop);  \
-        break;                                                          \
-    case 3: /* 'and' and Store */                                       \
-        tcg_gen_atomic_and_fetch_##tp(t1, EA, t0, ctx->mem_idx, memop); \
-        break;                                                          \
-    case 4:  /* Store max unsigned */                                   \
-    case 5:  /* Store max signed */                                     \
-    case 6:  /* Store min unsigned */                                   \
-    case 7:  /* Store min signed */                                     \
-    case 24: /* Store twin  */                                          \
-        gen_invalid(ctx);                                               \
-        break;                                                          \
-    default:                                                            \
-        /* invoke data storage error handler */                         \
-        gen_exception_err(ctx, POWERPC_EXCP_DSI, POWERPC_EXCP_INVAL);   \
-    }                                                                   \
-    tcg_temp_free_##tp(t0);                                             \
-    tcg_temp_free_##tp(t1);                                             \
-    tcg_temp_free(EA);                                                  \
-}
-
-ST_ATOMIC(stwat, DEF_MEMOP(MO_UL), i32, trunc_tl_i32)
-#if defined(TARGET_PPC64)
-ST_ATOMIC(stdat, DEF_MEMOP(MO_Q), i64, mov_i64)
+    tcg_gen_qemu_ld_tl(t, EA, ctx->mem_idx, memop);
+    tcg_gen_addi_tl(t2, EA, MEMOP_GET_SIZE(memop));
+    tcg_gen_qemu_ld_tl(t2, t2, ctx->mem_idx, memop);
+    tcg_gen_addi_tl(u, t, addend);
+
+    /* E.g. for fetch and increment bounded... */
+    /* mem(EA,s) = (t != t2 ? u = t + 1 : t) */
+    tcg_gen_movcond_tl(cond, u, t, t2, u, t);
+    tcg_gen_qemu_st_tl(u, EA, ctx->mem_idx, memop);
+
+    /* RT = (t != t2 ? t : u = 1<<(s*8-1)) */
+    tcg_gen_movi_tl(u, 1 << (MEMOP_GET_SIZE(memop) * 8 - 1));
+    tcg_gen_movcond_tl(cond, cpu_gpr[rD(ctx->opcode)], t, t2, t, u);
+
+    tcg_temp_free(t);
+    tcg_temp_free(t2);
+    tcg_temp_free(u);
+}
+
+static void gen_ld_atomic(DisasContext *ctx, TCGMemOp memop)
+{
+    uint32_t gpr_FC = FC(ctx->opcode);
+    TCGv EA = tcg_temp_new();
+    int rt = rD(ctx->opcode);
+    bool need_serial;
+    TCGv src, dst;
+
+    gen_addr_register(ctx, EA);
+    dst = cpu_gpr[rt];
+    src = cpu_gpr[(rt + 1) & 31];
+
+    need_serial = false;
+    memop |= MO_ALIGN;
+    switch (gpr_FC) {
+    case 0: /* Fetch and add */
+        tcg_gen_atomic_fetch_add_tl(dst, EA, src, ctx->mem_idx, memop);
+        break;
+    case 1: /* Fetch and xor */
+        tcg_gen_atomic_fetch_xor_tl(dst, EA, src, ctx->mem_idx, memop);
+        break;
+    case 2: /* Fetch and or */
+        tcg_gen_atomic_fetch_or_tl(dst, EA, src, ctx->mem_idx, memop);
+        break;
+    case 3: /* Fetch and 'and' */
+        tcg_gen_atomic_fetch_and_tl(dst, EA, src, ctx->mem_idx, memop);
+        break;
+    case 4:  /* Fetch and max unsigned */
+        tcg_gen_atomic_fetch_umax_tl(dst, EA, src, ctx->mem_idx, memop);
+        break;
+    case 5:  /* Fetch and max signed */
+        tcg_gen_atomic_fetch_smax_tl(dst, EA, src, ctx->mem_idx, memop);
+        break;
+    case 6:  /* Fetch and min unsigned */
+        tcg_gen_atomic_fetch_umin_tl(dst, EA, src, ctx->mem_idx, memop);
+        break;
+    case 7:  /* Fetch and min signed */
+        tcg_gen_atomic_fetch_smin_tl(dst, EA, src, ctx->mem_idx, memop);
+        break;
+    case 8: /* Swap */
+        tcg_gen_atomic_xchg_tl(dst, EA, src, ctx->mem_idx, memop);
+        break;
+
+    case 16: /* Compare and swap not equal */
+        if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+            need_serial = true;
+        } else {
+            TCGv t0 = tcg_temp_new();
+            TCGv t1 = tcg_temp_new();
+
+            tcg_gen_qemu_ld_tl(t0, EA, ctx->mem_idx, memop);
+            if ((memop & MO_SIZE) == MO_64 || TARGET_LONG_BITS == 32) {
+                tcg_gen_mov_tl(t1, src);
+            } else {
+                tcg_gen_ext32u_tl(t1, src);
+            }
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t0, t1,
+                               cpu_gpr[(rt + 2) & 31], t0);
+            tcg_gen_qemu_st_tl(t1, EA, ctx->mem_idx, memop);
+            tcg_gen_mov_tl(dst, t0);
+
+            tcg_temp_free(t0);
+            tcg_temp_free(t1);
+        }
+        break;
+
+    case 24: /* Fetch and increment bounded */
+        if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+            need_serial = true;
+        } else {
+            gen_fetch_inc_conditional(ctx, memop, EA, TCG_COND_NE, 1);
+        }
+        break;
+    case 25: /* Fetch and increment equal */
+        if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+            need_serial = true;
+        } else {
+            gen_fetch_inc_conditional(ctx, memop, EA, TCG_COND_EQ, 1);
+        }
+        break;
+    case 28: /* Fetch and decrement bounded */
+        if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+            need_serial = true;
+        } else {
+            gen_fetch_inc_conditional(ctx, memop, EA, TCG_COND_NE, -1);
+        }
+        break;
+
+    default:
+        /* invoke data storage error handler */
+        gen_exception_err(ctx, POWERPC_EXCP_DSI, POWERPC_EXCP_INVAL);
+    }
+    tcg_temp_free(EA);
+
+    if (need_serial) {
+        /* Restart with exclusive lock.  */
+        gen_helper_exit_atomic(cpu_env);
+        ctx->base.is_jmp = DISAS_NORETURN;
+    }
+}
+
+static void gen_lwat(DisasContext *ctx)
+{
+    gen_ld_atomic(ctx, DEF_MEMOP(MO_UL));
+}
+
+#ifdef TARGET_PPC64
+static void gen_ldat(DisasContext *ctx)
+{
+    gen_ld_atomic(ctx, DEF_MEMOP(MO_Q));
+}
 #endif
 
-#if defined(CONFIG_USER_ONLY)
-static void gen_conditional_store(DisasContext *ctx, TCGv EA,
-                                  int reg, int memop)
+static void gen_st_atomic(DisasContext *ctx, TCGMemOp memop)
 {
-    TCGv t0 = tcg_temp_new();
+    uint32_t gpr_FC = FC(ctx->opcode);
+    TCGv EA = tcg_temp_new();
+    TCGv src, discard;
 
-    tcg_gen_st_tl(EA, cpu_env, offsetof(CPUPPCState, reserve_ea));
-    tcg_gen_movi_tl(t0, (MEMOP_GET_SIZE(memop) << 5) | reg);
-    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, reserve_info));
-    tcg_temp_free(t0);
-    gen_exception_err(ctx, POWERPC_EXCP_STCX, 0);
+    gen_addr_register(ctx, EA);
+    src = cpu_gpr[rD(ctx->opcode)];
+    discard = tcg_temp_new();
+
+    memop |= MO_ALIGN;
+    switch (gpr_FC) {
+    case 0: /* add and Store */
+        tcg_gen_atomic_add_fetch_tl(discard, EA, src, ctx->mem_idx, memop);
+        break;
+    case 1: /* xor and Store */
+        tcg_gen_atomic_xor_fetch_tl(discard, EA, src, ctx->mem_idx, memop);
+        break;
+    case 2: /* Or and Store */
+        tcg_gen_atomic_or_fetch_tl(discard, EA, src, ctx->mem_idx, memop);
+        break;
+    case 3: /* 'and' and Store */
+        tcg_gen_atomic_and_fetch_tl(discard, EA, src, ctx->mem_idx, memop);
+        break;
+    case 4:  /* Store max unsigned */
+        tcg_gen_atomic_umax_fetch_tl(discard, EA, src, ctx->mem_idx, memop);
+        break;
+    case 5:  /* Store max signed */
+        tcg_gen_atomic_smax_fetch_tl(discard, EA, src, ctx->mem_idx, memop);
+        break;
+    case 6:  /* Store min unsigned */
+        tcg_gen_atomic_umin_fetch_tl(discard, EA, src, ctx->mem_idx, memop);
+        break;
+    case 7:  /* Store min signed */
+        tcg_gen_atomic_smin_fetch_tl(discard, EA, src, ctx->mem_idx, memop);
+        break;
+    case 24: /* Store twin  */
+        if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
+        } else {
+            TCGv t = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
+            TCGv s = tcg_temp_new();
+            TCGv s2 = tcg_temp_new();
+            TCGv ea_plus_s = tcg_temp_new();
+
+            tcg_gen_qemu_ld_tl(t, EA, ctx->mem_idx, memop);
+            tcg_gen_addi_tl(ea_plus_s, EA, MEMOP_GET_SIZE(memop));
+            tcg_gen_qemu_ld_tl(t2, ea_plus_s, ctx->mem_idx, memop);
+            tcg_gen_movcond_tl(TCG_COND_EQ, s, t, t2, src, t);
+            tcg_gen_movcond_tl(TCG_COND_EQ, s2, t, t2, src, t2);
+            tcg_gen_qemu_st_tl(s, EA, ctx->mem_idx, memop);
+            tcg_gen_qemu_st_tl(s2, ea_plus_s, ctx->mem_idx, memop);
+
+            tcg_temp_free(ea_plus_s);
+            tcg_temp_free(s2);
+            tcg_temp_free(s);
+            tcg_temp_free(t2);
+            tcg_temp_free(t);
+        }
+        break;
+    default:
+        /* invoke data storage error handler */
+        gen_exception_err(ctx, POWERPC_EXCP_DSI, POWERPC_EXCP_INVAL);
+    }
+    tcg_temp_free(discard);
+    tcg_temp_free(EA);
 }
-#else
-static void gen_conditional_store(DisasContext *ctx, TCGv EA,
-                                  int reg, int memop)
+
+static void gen_stwat(DisasContext *ctx)
+{
+    gen_st_atomic(ctx, DEF_MEMOP(MO_UL));
+}
+
+#ifdef TARGET_PPC64
+static void gen_stdat(DisasContext *ctx)
+{
+    gen_st_atomic(ctx, DEF_MEMOP(MO_Q));
+}
+#endif
+
+static void gen_conditional_store(DisasContext *ctx, TCGMemOp memop)
 {
     TCGLabel *l1 = gen_new_label();
     TCGLabel *l2 = gen_new_label();
-    TCGv t0;
+    TCGv t0 = tcg_temp_new();
+    int reg = rS(ctx->opcode);
 
-    tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
+    gen_set_access_type(ctx, ACCESS_RES);
+    gen_addr_reg_index(ctx, t0);
+    tcg_gen_brcond_tl(TCG_COND_NE, t0, cpu_reserve, l1);
+    tcg_temp_free(t0);
 
     t0 = tcg_temp_new();
     tcg_gen_atomic_cmpxchg_tl(t0, cpu_reserve, cpu_reserve_val,
@@ -3206,21 +3333,11 @@ static void gen_conditional_store(DisasContext *ctx, TCGv EA,
     gen_set_label(l2);
     tcg_gen_movi_tl(cpu_reserve, -1);
 }
-#endif
 
-#define STCX(name, memop)                                   \
-static void gen_##name(DisasContext *ctx)                   \
-{                                                           \
-    TCGv t0;                                                \
-    int len = MEMOP_GET_SIZE(memop);                        \
-    gen_set_access_type(ctx, ACCESS_RES);                   \
-    t0 = tcg_temp_local_new();                              \
-    gen_addr_reg_index(ctx, t0);                            \
-    if (len > 1) {                                          \
-        gen_check_align(ctx, t0, (len) - 1);                \
-    }                                                       \
-    gen_conditional_store(ctx, t0, rS(ctx->opcode), memop); \
-    tcg_temp_free(t0);                                      \
+#define STCX(name, memop)                  \
+static void gen_##name(DisasContext *ctx)  \
+{                                          \
+    gen_conditional_store(ctx, memop);     \
 }
 
 STCX(stbcx_, DEF_MEMOP(MO_UB))
@@ -3236,9 +3353,8 @@ STCX(stdcx_, DEF_MEMOP(MO_Q))
 /* lqarx */
 static void gen_lqarx(DisasContext *ctx)
 {
-    TCGv EA;
     int rd = rD(ctx->opcode);
-    TCGv gpr1, gpr2;
+    TCGv EA, hi, lo;
 
     if (unlikely((rd & 1) || (rd == rA(ctx->opcode)) ||
                  (rd == rB(ctx->opcode)))) {
@@ -3247,73 +3363,125 @@ static void gen_lqarx(DisasContext *ctx)
     }
 
     gen_set_access_type(ctx, ACCESS_RES);
-    EA = tcg_temp_local_new();
+    EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
-    gen_check_align(ctx, EA, 15);
-    if (unlikely(ctx->le_mode)) {
-        gpr1 = cpu_gpr[rd+1];
-        gpr2 = cpu_gpr[rd];
+
+    /* Note that the low part is always in RD+1, even in LE mode.  */
+    lo = cpu_gpr[rd + 1];
+    hi = cpu_gpr[rd];
+
+    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+#ifdef CONFIG_ATOMIC128
+        TCGv_i32 oi = tcg_temp_new_i32();
+        if (ctx->le_mode) {
+            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
+                                                ctx->mem_idx));
+            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+        } else {
+            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
+                                                ctx->mem_idx));
+            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+        }
+        tcg_temp_free_i32(oi);
+        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
+#else
+        /* Restart with exclusive lock.  */
+        gen_helper_exit_atomic(cpu_env);
+        ctx->base.is_jmp = DISAS_NORETURN;
+        tcg_temp_free(EA);
+        return;
+#endif
+    } else if (ctx->le_mode) {
+        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16);
+        tcg_gen_mov_tl(cpu_reserve, EA);
+        gen_addr_add(ctx, EA, EA, 8);
+        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEQ);
     } else {
-        gpr1 = cpu_gpr[rd];
-        gpr2 = cpu_gpr[rd+1];
+        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEQ | MO_ALIGN_16);
+        tcg_gen_mov_tl(cpu_reserve, EA);
+        gen_addr_add(ctx, EA, EA, 8);
+        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEQ);
     }
-    tcg_gen_qemu_ld_i64(gpr1, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
-    tcg_gen_mov_tl(cpu_reserve, EA);
-    gen_addr_add(ctx, EA, EA, 8);
-    tcg_gen_qemu_ld_i64(gpr2, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
-
-    tcg_gen_st_tl(gpr1, cpu_env, offsetof(CPUPPCState, reserve_val));
-    tcg_gen_st_tl(gpr2, cpu_env, offsetof(CPUPPCState, reserve_val2));
     tcg_temp_free(EA);
+
+    tcg_gen_st_tl(hi, cpu_env, offsetof(CPUPPCState, reserve_val));
+    tcg_gen_st_tl(lo, cpu_env, offsetof(CPUPPCState, reserve_val2));
 }
 
 /* stqcx. */
 static void gen_stqcx_(DisasContext *ctx)
 {
-    TCGv EA;
-    int reg = rS(ctx->opcode);
-    int len = 16;
-#if !defined(CONFIG_USER_ONLY)
-    TCGLabel *l1;
-    TCGv gpr1, gpr2;
-#endif
+    int rs = rS(ctx->opcode);
+    TCGv EA, hi, lo;
 
-    if (unlikely((rD(ctx->opcode) & 1))) {
+    if (unlikely(rs & 1)) {
         gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
         return;
     }
+
     gen_set_access_type(ctx, ACCESS_RES);
-    EA = tcg_temp_local_new();
+    EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
-    if (len > 1) {
-        gen_check_align(ctx, EA, (len) - 1);
-    }
 
-#if defined(CONFIG_USER_ONLY)
-    gen_conditional_store(ctx, EA, reg, 16);
-#else
-    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
-    l1 = gen_new_label();
-    tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
-    tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ);
+    /* Note that the low part is always in RS+1, even in LE mode.  */
+    lo = cpu_gpr[rs + 1];
+    hi = cpu_gpr[rs];
 
-    if (unlikely(ctx->le_mode)) {
-        gpr1 = cpu_gpr[reg + 1];
-        gpr2 = cpu_gpr[reg];
+    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+        TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
+#ifdef CONFIG_ATOMIC128
+        if (ctx->le_mode) {
+            gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
+        } else {
+            gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
+        }
+#else
+        /* Restart with exclusive lock.  */
+        gen_helper_exit_atomic(cpu_env);
+        ctx->base.is_jmp = DISAS_NORETURN;
+#endif
+        tcg_temp_free(EA);
+        tcg_temp_free_i32(oi);
     } else {
-        gpr1 = cpu_gpr[reg];
-        gpr2 = cpu_gpr[reg + 1];
-    }
-    tcg_gen_qemu_st_tl(gpr1, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
-    gen_addr_add(ctx, EA, EA, 8);
-    tcg_gen_qemu_st_tl(gpr2, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
+        TCGLabel *lab_fail = gen_new_label();
+        TCGLabel *lab_over = gen_new_label();
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
 
-    gen_set_label(l1);
-    tcg_gen_movi_tl(cpu_reserve, -1);
-#endif
-    tcg_temp_free(EA);
-}
+        tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
+        tcg_temp_free(EA);
 
+        gen_qemu_ld64_i64(ctx, t0, cpu_reserve);
+        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
+                                     ? offsetof(CPUPPCState, reserve_val2)
+                                     : offsetof(CPUPPCState, reserve_val)));
+        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
+
+        tcg_gen_addi_i64(t0, cpu_reserve, 8);
+        gen_qemu_ld64_i64(ctx, t0, t0);
+        tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
+                                     ? offsetof(CPUPPCState, reserve_val)
+                                     : offsetof(CPUPPCState, reserve_val2)));
+        tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
+
+        /* Success */
+        gen_qemu_st64_i64(ctx, ctx->le_mode ? lo : hi, cpu_reserve);
+        tcg_gen_addi_i64(t0, cpu_reserve, 8);
+        gen_qemu_st64_i64(ctx, ctx->le_mode ? hi : lo, t0);
+
+        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ);
+        tcg_gen_br(lab_over);
+
+        gen_set_label(lab_fail);
+        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+
+        gen_set_label(lab_over);
+        tcg_gen_movi_tl(cpu_reserve, -1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
 #endif /* defined(TARGET_PPC64) */
 
 /* sync */
@@ -4636,8 +4804,8 @@ static void gen_eciwx(DisasContext *ctx)
     gen_set_access_type(ctx, ACCESS_EXT);
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
-    gen_check_align(ctx, t0, 0x03);
-    gen_qemu_ld32u(ctx, cpu_gpr[rD(ctx->opcode)], t0);
+    tcg_gen_qemu_ld_tl(cpu_gpr[rD(ctx->opcode)], t0, ctx->mem_idx,
+                       DEF_MEMOP(MO_UL | MO_ALIGN));
     tcg_temp_free(t0);
 }
 
@@ -4649,8 +4817,8 @@ static void gen_ecowx(DisasContext *ctx)
     gen_set_access_type(ctx, ACCESS_EXT);
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
-    gen_check_align(ctx, t0, 0x03);
-    gen_qemu_st32(ctx, cpu_gpr[rD(ctx->opcode)], t0);
+    tcg_gen_qemu_st_tl(cpu_gpr[rD(ctx->opcode)], t0, ctx->mem_idx,
+                       DEF_MEMOP(MO_UL | MO_ALIGN));
     tcg_temp_free(t0);
 }
 
@@ -6886,7 +7054,7 @@ GEN_HANDLER(stop##u, opc, 0xFF, 0xFF, 0x00000000, type),
 #define GEN_STUX(name, stop, opc2, opc3, type)                                \
 GEN_HANDLER(name##ux, 0x1F, opc2, opc3, 0x00000001, type),
 #define GEN_STX_E(name, stop, opc2, opc3, type, type2, chk)                   \
-GEN_HANDLER_E(name##x, 0x1F, opc2, opc3, 0x00000001, type, type2),
+GEN_HANDLER_E(name##x, 0x1F, opc2, opc3, 0x00000000, type, type2),
 #define GEN_STS(name, stop, op, type)                                         \
 GEN_ST(name, stop, op | 0x20, type)                                           \
 GEN_STU(name, stop, op | 0x21, type)                                          \
@@ -7314,6 +7482,7 @@ static bool ppc_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
 
     gen_debug_exception(ctx);
+    dcbase->is_jmp = DISAS_NORETURN;
     /* The address covered by the breakpoint must be included in
        [tb->pc, tb->pc + tb->size) in order to for it to be
        properly cleared -- thus we increment the PC here so that
diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c
index 76d6f3fd5e..7813b1b004 100644
--- a/target/ppc/translate_init.inc.c
+++ b/target/ppc/translate_init.inc.c
@@ -10457,6 +10457,7 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
     cc->set_pc = ppc_cpu_set_pc;
     cc->gdb_read_register = ppc_cpu_gdb_read_register;
     cc->gdb_write_register = ppc_cpu_gdb_write_register;
+    cc->do_unaligned_access = ppc_cpu_do_unaligned_access;
 #ifdef CONFIG_USER_ONLY
     cc->handle_mmu_fault = ppc_cpu_handle_mmu_fault;
 #else
diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c
index 34844eead3..c9a6132700 100644
--- a/target/xtensa/helper.c
+++ b/target/xtensa/helper.c
@@ -26,6 +26,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "exec/gdbstub.h"
@@ -726,10 +727,10 @@ static void dump_tlb(FILE *f, fprintf_function cpu_fprintf,
         bool print_header = true;
 
         if (sz >= 0x100000) {
-            sz >>= 20;
+            sz /= MiB;
             sz_text = "MB";
         } else {
-            sz >>= 10;
+            sz /= KiB;
             sz_text = "KB";
         }
 
diff --git a/tests/benchmark-crypto-cipher.c b/tests/benchmark-crypto-cipher.c
index cf98443468..f5a0d0bc32 100644
--- a/tests/benchmark-crypto-cipher.c
+++ b/tests/benchmark-crypto-cipher.c
@@ -11,6 +11,7 @@
  * top-level directory.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "crypto/init.h"
 #include "crypto/cipher.h"
 
@@ -56,8 +57,7 @@ static void test_cipher_speed(const void *opaque)
         total += chunk_size;
     } while (g_test_timer_elapsed() < 5.0);
 
-    total /= 1024 * 1024; /* to MB */
-
+    total /= MiB;
     g_print("cbc(aes128): ");
     g_print("Testing chunk_size %zu bytes ", chunk_size);
     g_print("done: %.2f MB in %.2f secs: ", total, g_test_timer_last());
@@ -78,7 +78,7 @@ int main(int argc, char **argv)
     g_test_init(&argc, &argv, NULL);
     g_assert(qcrypto_init(NULL) == 0);
 
-    for (i = 512; i <= (64 * 1204); i *= 2) {
+    for (i = 512; i <= 64 * KiB; i *= 2) {
         memset(name, 0 , sizeof(name));
         snprintf(name, sizeof(name), "/crypto/cipher/speed-%zu", i);
         g_test_add_data_func(name, (void *)i, test_cipher_speed);
diff --git a/tests/benchmark-crypto-hash.c b/tests/benchmark-crypto-hash.c
index 122bfb6b85..9b6f7a9155 100644
--- a/tests/benchmark-crypto-hash.c
+++ b/tests/benchmark-crypto-hash.c
@@ -11,6 +11,7 @@
  * top-level directory.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "crypto/init.h"
 #include "crypto/hash.h"
 
@@ -39,7 +40,7 @@ static void test_hash_speed(const void *opaque)
         total += chunk_size;
     } while (g_test_timer_elapsed() < 5.0);
 
-    total /= 1024 * 1024; /* to MB */
+    total /= MiB;
     g_print("sha256: ");
     g_print("Testing chunk_size %zu bytes ", chunk_size);
     g_print("done: %.2f MB in %.2f secs: ", total, g_test_timer_last());
@@ -57,7 +58,7 @@ int main(int argc, char **argv)
     g_test_init(&argc, &argv, NULL);
     g_assert(qcrypto_init(NULL) == 0);
 
-    for (i = 512; i <= (64 * 1204); i *= 2) {
+    for (i = 512; i <= 64 * KiB; i *= 2) {
         memset(name, 0 , sizeof(name));
         snprintf(name, sizeof(name), "/crypto/hash/speed-%zu", i);
         g_test_add_data_func(name, (void *)i, test_hash_speed);
diff --git a/tests/benchmark-crypto-hmac.c b/tests/benchmark-crypto-hmac.c
index c30250df3e..f1dfa240cb 100644
--- a/tests/benchmark-crypto-hmac.c
+++ b/tests/benchmark-crypto-hmac.c
@@ -11,6 +11,7 @@
  * top-level directory.
  */
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "crypto/init.h"
 #include "crypto/hmac.h"
 
@@ -53,8 +54,7 @@ static void test_hmac_speed(const void *opaque)
         total += chunk_size;
     } while (g_test_timer_elapsed() < 5.0);
 
-    total /= 1024 * 1024; /* to MB */
-
+    total /= MiB;
     g_print("hmac(sha256): ");
     g_print("Testing chunk_size %zu bytes ", chunk_size);
     g_print("done: %.2f MB in %.2f secs: ", total, g_test_timer_last());
@@ -72,7 +72,7 @@ int main(int argc, char **argv)
     g_test_init(&argc, &argv, NULL);
     g_assert(qcrypto_init(NULL) == 0);
 
-    for (i = 512; i <= (64 * 1204); i *= 2) {
+    for (i = 512; i <= 64 * KiB; i *= 2) {
         memset(name, 0 , sizeof(name));
         snprintf(name, sizeof(name), "/crypto/hmac/speed-%zu", i);
         g_test_add_data_func(name, (void *)i, test_hmac_speed);
diff --git a/tests/qemu-iotests/222 b/tests/qemu-iotests/222
new file mode 100644
index 0000000000..ff3bfc1470
--- /dev/null
+++ b/tests/qemu-iotests/222
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+#
+# This test covers the basic fleecing workflow, which provides a
+# point-in-time snapshot of a node that can be queried over NBD.
+#
+# Copyright (C) 2018 Red Hat, Inc.
+# John helped, too.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Creator/Owner: John Snow <jsnow@redhat.com>
+
+import iotests
+from iotests import log, qemu_img, qemu_io, qemu_io_silent
+
+iotests.verify_platform(['linux'])
+
+patterns = [("0x5d", "0",         "64k"),
+            ("0xd5", "1M",        "64k"),
+            ("0xdc", "32M",       "64k"),
+            ("0xcd", "0x3ff0000", "64k")]  # 64M - 64K
+
+overwrite = [("0xab", "0",         "64k"), # Full overwrite
+             ("0xad", "0x00f8000", "64k"), # Partial-left (1M-32K)
+             ("0x1d", "0x2008000", "64k"), # Partial-right (32M+32K)
+             ("0xea", "0x3fe0000", "64k")] # Adjacent-left (64M - 128K)
+
+zeroes = [("0", "0x00f8000", "32k"), # Left-end of partial-left (1M-32K)
+          ("0", "0x2010000", "32k"), # Right-end of partial-right (32M+64K)
+          ("0", "0x3fe0000", "64k")] # overwrite[3]
+
+remainder = [("0xd5", "0x108000",  "32k"), # Right-end of partial-left [1]
+             ("0xdc", "32M",       "32k"), # Left-end of partial-right [2]
+             ("0xcd", "0x3ff0000", "64k")] # patterns[3]
+
+with iotests.FilePath('base.img') as base_img_path, \
+     iotests.FilePath('fleece.img') as fleece_img_path, \
+     iotests.FilePath('nbd.sock') as nbd_sock_path, \
+     iotests.VM() as vm:
+
+    log('--- Setting up images ---')
+    log('')
+
+    assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
+    assert qemu_img('create', '-f', "qcow2", fleece_img_path, '64M') == 0
+
+    for p in patterns:
+        qemu_io('-f', iotests.imgfmt,
+                '-c', 'write -P%s %s %s' % p, base_img_path)
+
+    log('Done')
+
+    log('')
+    log('--- Launching VM ---')
+    log('')
+
+    vm.add_drive(base_img_path)
+    vm.launch()
+    log('Done')
+
+    log('')
+    log('--- Setting up Fleecing Graph ---')
+    log('')
+
+    src_node = "drive0"
+    tgt_node = "fleeceNode"
+
+    # create tgt_node backed by src_node
+    log(vm.qmp("blockdev-add", **{
+        "driver": "qcow2",
+        "node-name": tgt_node,
+        "file": {
+            "driver": "file",
+            "filename": fleece_img_path,
+        },
+        "backing": src_node,
+    }))
+
+    # Establish COW from source to fleecing node
+    log(vm.qmp("blockdev-backup",
+               device=src_node,
+               target=tgt_node,
+               sync="none"))
+
+    log('')
+    log('--- Setting up NBD Export ---')
+    log('')
+
+    nbd_uri = 'nbd+unix:///%s?socket=%s' % (tgt_node, nbd_sock_path)
+    log(vm.qmp("nbd-server-start",
+               **{"addr": { "type": "unix",
+                            "data": { "path": nbd_sock_path } } }))
+
+    log(vm.qmp("nbd-server-add", device=tgt_node))
+
+    log('')
+    log('--- Sanity Check ---')
+    log('')
+
+    for p in (patterns + zeroes):
+        cmd = "read -P%s %s %s" % p
+        log(cmd)
+        assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0
+
+    log('')
+    log('--- Testing COW ---')
+    log('')
+
+    for p in overwrite:
+        cmd = "write -P%s %s %s" % p
+        log(cmd)
+        log(vm.hmp_qemu_io(src_node, cmd))
+
+    log('')
+    log('--- Verifying Data ---')
+    log('')
+
+    for p in (patterns + zeroes):
+        cmd = "read -P%s %s %s" % p
+        log(cmd)
+        assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0
+
+    log('')
+    log('--- Cleanup ---')
+    log('')
+
+    log(vm.qmp('block-job-cancel', device=src_node))
+    log(vm.event_wait('BLOCK_JOB_CANCELLED'),
+        filters=[iotests.filter_qmp_event])
+    log(vm.qmp('nbd-server-stop'))
+    log(vm.qmp('blockdev-del', node_name=tgt_node))
+    vm.shutdown()
+
+    log('')
+    log('--- Confirming writes ---')
+    log('')
+
+    for p in (overwrite + remainder):
+        cmd = "read -P%s %s %s" % p
+        log(cmd)
+        assert qemu_io_silent(base_img_path, '-c', cmd) == 0
+
+    log('')
+    log('Done')
diff --git a/tests/qemu-iotests/222.out b/tests/qemu-iotests/222.out
new file mode 100644
index 0000000000..48f336a02b
--- /dev/null
+++ b/tests/qemu-iotests/222.out
@@ -0,0 +1,67 @@
+--- Setting up images ---
+
+Done
+
+--- Launching VM ---
+
+Done
+
+--- Setting up Fleecing Graph ---
+
+{u'return': {}}
+{u'return': {}}
+
+--- Setting up NBD Export ---
+
+{u'return': {}}
+{u'return': {}}
+
+--- Sanity Check ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff0000 64k
+read -P0 0x00f8000 32k
+read -P0 0x2010000 32k
+read -P0 0x3fe0000 64k
+
+--- Testing COW ---
+
+write -P0xab 0 64k
+{u'return': u''}
+write -P0xad 0x00f8000 64k
+{u'return': u''}
+write -P0x1d 0x2008000 64k
+{u'return': u''}
+write -P0xea 0x3fe0000 64k
+{u'return': u''}
+
+--- Verifying Data ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff0000 64k
+read -P0 0x00f8000 32k
+read -P0 0x2010000 32k
+read -P0 0x3fe0000 64k
+
+--- Cleanup ---
+
+{u'return': {}}
+{u'timestamp': {u'seconds': 'SECS', u'microseconds': 'USECS'}, u'data': {u'device': u'drive0', u'type': u'backup', u'speed': 0, u'len': 67108864, u'offset': 393216}, u'event': u'BLOCK_JOB_CANCELLED'}
+{u'return': {}}
+{u'return': {}}
+
+--- Confirming writes ---
+
+read -P0xab 0 64k
+read -P0xad 0x00f8000 64k
+read -P0x1d 0x2008000 64k
+read -P0xea 0x3fe0000 64k
+read -P0xd5 0x108000 32k
+read -P0xdc 32M 32k
+read -P0xcd 0x3ff0000 64k
+
+Done
diff --git a/tests/qemu-iotests/223 b/tests/qemu-iotests/223
new file mode 100755
index 0000000000..b63b7a4f9e
--- /dev/null
+++ b/tests/qemu-iotests/223
@@ -0,0 +1,138 @@
+#!/bin/bash
+#
+# Test reading dirty bitmap over NBD
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1 # failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+    _cleanup_qemu
+    rm -f "$TEST_DIR/nbd"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt qcow2
+_supported_proto file # uses NBD as well
+_supported_os Linux
+
+function do_run_qemu()
+{
+    echo Testing: "$@"
+    $QEMU -nographic -qmp stdio -serial none "$@"
+    echo
+}
+
+function run_qemu()
+{
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp \
+                          | _filter_qemu | _filter_imgfmt \
+                          | _filter_actual_image_size
+}
+
+echo
+echo "=== Create partially sparse image, then add dirty bitmap ==="
+echo
+
+_make_test_img 4M
+$QEMU_IO -c 'w -P 0x11 1M 2M' "$TEST_IMG" | _filter_qemu_io
+run_qemu <<EOF
+{ "execute": "qmp_capabilities" }
+{ "execute": "blockdev-add",
+  "arguments": {
+    "driver": "$IMGFMT",
+    "node-name": "n",
+    "file": {
+      "driver": "file",
+      "filename": "$TEST_IMG"
+    }
+  }
+}
+{ "execute": "block-dirty-bitmap-add",
+  "arguments": {
+    "node": "n",
+    "name": "b",
+    "persistent": true
+  }
+}
+{ "execute": "quit" }
+EOF
+
+echo
+echo "=== Write part of the file under active bitmap ==="
+echo
+
+$QEMU_IO -c 'w -P 0x22 2M 2M' "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== End dirty bitmap, and start serving image over NBD ==="
+echo
+
+_launch_qemu 2> >(_filter_nbd)
+
+silent=
+_send_qemu_cmd $QEMU_HANDLE '{"execute":"qmp_capabilities"}' "return"
+_send_qemu_cmd $QEMU_HANDLE '{"execute":"blockdev-add",
+  "arguments":{"driver":"qcow2", "node-name":"n",
+    "file":{"driver":"file", "filename":"'"$TEST_IMG"'"}}}' "return"
+_send_qemu_cmd $QEMU_HANDLE '{"execute":"x-block-dirty-bitmap-disable",
+  "arguments":{"node":"n", "name":"b"}}' "return"
+_send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-start",
+  "arguments":{"addr":{"type":"unix",
+    "data":{"path":"'"$TEST_DIR/nbd"'"}}}}' "return"
+_send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-add",
+  "arguments":{"device":"n"}}' "return"
+_send_qemu_cmd $QEMU_HANDLE '{"execute":"x-nbd-server-add-bitmap",
+  "arguments":{"name":"n", "bitmap":"b"}}' "return"
+
+echo
+echo "=== Contrast normal status with dirty-bitmap status ==="
+echo
+
+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT
+IMG="driver=nbd,export=n,server.type=unix,server.path=$TEST_DIR/nbd"
+$QEMU_IO -r -c 'r -P 0 0 1m' -c 'r -P 0x11 1m 1m' \
+  -c 'r -P 0x22 2m 2m' --image-opts "$IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json --image-opts \
+  "$IMG" | _filter_qemu_img_map
+$QEMU_IMG map --output=json --image-opts \
+  "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b" | _filter_qemu_img_map
+
+echo
+echo "=== End NBD server ==="
+echo
+
+_send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-remove",
+  "arguments":{"name":"n"}}' "return"
+_send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-stop"}' "return"
+_send_qemu_cmd $QEMU_HANDLE '{"execute":"quit"}' "return"
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out
new file mode 100644
index 0000000000..33021c8e6a
--- /dev/null
+++ b/tests/qemu-iotests/223.out
@@ -0,0 +1,49 @@
+QA output created by 223
+
+=== Create partially sparse image, then add dirty bitmap ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+wrote 2097152/2097152 bytes at offset 1048576
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Testing:
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
+
+
+=== Write part of the file under active bitmap ===
+
+wrote 2097152/2097152 bytes at offset 2097152
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== End dirty bitmap, and start serving image over NBD ===
+
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+
+=== Contrast normal status with dirty-bitmap status ===
+
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2097152/2097152 bytes at offset 2097152
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false},
+{ "start": 1048576, "length": 3145728, "depth": 0, "zero": false, "data": true}]
+[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true},
+{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": false}]
+
+=== End NBD server ===
+
+{"return": {}}
+{"return": {}}
+{"return": {}}
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index eea75819d2..af309ebba7 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -220,3 +220,5 @@
 218 rw auto quick
 219 rw auto
 221 rw auto quick
+222 rw auto quick
+223 rw auto quick
diff --git a/tests/test-cutils.c b/tests/test-cutils.c
index 64a489c2e9..d85c3e0f6d 100644
--- a/tests/test-cutils.c
+++ b/tests/test-cutils.c
@@ -26,8 +26,9 @@
  */
 
 #include "qemu/osdep.h"
-
+#include "qemu/units.h"
 #include "qemu/cutils.h"
+#include "qemu/units.h"
 
 static void test_parse_uint_null(void)
 {
@@ -2022,7 +2023,7 @@ static void test_qemu_strtosz_units(void)
     /* default is M */
     err = qemu_strtosz_MiB(none, &endptr, &res);
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, M_BYTE);
+    g_assert_cmpint(res, ==, MiB);
     g_assert(endptr == none + 1);
 
     err = qemu_strtosz(b, &endptr, &res);
@@ -2032,32 +2033,32 @@ static void test_qemu_strtosz_units(void)
 
     err = qemu_strtosz(k, &endptr, &res);
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, K_BYTE);
+    g_assert_cmpint(res, ==, KiB);
     g_assert(endptr == k + 2);
 
     err = qemu_strtosz(m, &endptr, &res);
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, M_BYTE);
+    g_assert_cmpint(res, ==, MiB);
     g_assert(endptr == m + 2);
 
     err = qemu_strtosz(g, &endptr, &res);
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, G_BYTE);
+    g_assert_cmpint(res, ==, GiB);
     g_assert(endptr == g + 2);
 
     err = qemu_strtosz(t, &endptr, &res);
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, T_BYTE);
+    g_assert_cmpint(res, ==, TiB);
     g_assert(endptr == t + 2);
 
     err = qemu_strtosz(p, &endptr, &res);
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, P_BYTE);
+    g_assert_cmpint(res, ==, PiB);
     g_assert(endptr == p + 2);
 
     err = qemu_strtosz(e, &endptr, &res);
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, E_BYTE);
+    g_assert_cmpint(res, ==, EiB);
     g_assert(endptr == e + 2);
 }
 
@@ -2070,7 +2071,7 @@ static void test_qemu_strtosz_float(void)
 
     err = qemu_strtosz(str, &endptr, &res);
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 12.345 * M_BYTE);
+    g_assert_cmpint(res, ==, 12.345 * MiB);
     g_assert(endptr == str + 7);
 }
 
@@ -2106,7 +2107,7 @@ static void test_qemu_strtosz_trailing(void)
 
     str = "123xxx";
     err = qemu_strtosz_MiB(str, &endptr, &res);
-    g_assert_cmpint(res, ==, 123 * M_BYTE);
+    g_assert_cmpint(res, ==, 123 * MiB);
     g_assert(endptr == str + 3);
 
     err = qemu_strtosz(str, NULL, &res);
diff --git a/tests/test-keyval.c b/tests/test-keyval.c
index 63cb14629b..09b0ae3c68 100644
--- a/tests/test-keyval.c
+++ b/tests/test-keyval.c
@@ -11,6 +11,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
@@ -457,11 +458,11 @@ static void test_keyval_visit_size(void)
     visit_type_size(v, "sz2", &sz, &error_abort);
     g_assert_cmpuint(sz, ==, 1536);
     visit_type_size(v, "sz3", &sz, &error_abort);
-    g_assert_cmphex(sz, ==, 2 * M_BYTE);
+    g_assert_cmphex(sz, ==, 2 * MiB);
     visit_type_size(v, "sz4", &sz, &error_abort);
-    g_assert_cmphex(sz, ==, G_BYTE / 10);
+    g_assert_cmphex(sz, ==, GiB / 10);
     visit_type_size(v, "sz5", &sz, &error_abort);
-    g_assert_cmphex(sz, ==, 16777215 * T_BYTE);
+    g_assert_cmphex(sz, ==, 16777215ULL * TiB);
     visit_check_struct(v, &error_abort);
     visit_end_struct(v, NULL);
     visit_free(v);
diff --git a/tests/test-qemu-opts.c b/tests/test-qemu-opts.c
index 7092e216f7..ef96e84aed 100644
--- a/tests/test-qemu-opts.c
+++ b/tests/test-qemu-opts.c
@@ -8,7 +8,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/cutils.h"
+#include "qemu/units.h"
 #include "qemu/option.h"
 #include "qemu/option_int.h"
 #include "qapi/error.h"
@@ -704,13 +704,12 @@ static void test_opts_parse_size(void)
     g_assert_cmpuint(opts_count(opts), ==, 3);
     g_assert_cmphex(qemu_opt_get_size(opts, "size1", 0), ==, 8);
     g_assert_cmphex(qemu_opt_get_size(opts, "size2", 0), ==, 1536);
-    g_assert_cmphex(qemu_opt_get_size(opts, "size3", 0), ==, 2 * M_BYTE);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size3", 0), ==, 2 * MiB);
     opts = qemu_opts_parse(&opts_list_02, "size1=0.1G,size2=16777215T",
                            false, &error_abort);
     g_assert_cmpuint(opts_count(opts), ==, 2);
-    g_assert_cmphex(qemu_opt_get_size(opts, "size1", 0), ==, G_BYTE / 10);
-    g_assert_cmphex(qemu_opt_get_size(opts, "size2", 0),
-                     ==, 16777215 * T_BYTE);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size1", 0), ==, GiB / 10);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size2", 0), ==, 16777215ULL * TiB);
 
     /* Beyond limit with suffix */
     opts = qemu_opts_parse(&opts_list_02, "size1=16777216T",
diff --git a/vl.c b/vl.c
index 9442beee21..16b913f9d5 100644
--- a/vl.c
+++ b/vl.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu-version.h"
 #include "qemu/cutils.h"
@@ -2808,8 +2809,8 @@ static void set_memory_options(uint64_t *ram_slots, ram_addr_t *maxram_size,
         if (g_ascii_isdigit(mem_str[strlen(mem_str) - 1])) {
             uint64_t overflow_check = sz;
 
-            sz <<= 20;
-            if ((sz >> 20) != overflow_check) {
+            sz *= MiB;
+            if (sz / MiB != overflow_check) {
                 error_report("too large 'size' option value");
                 exit(EXIT_FAILURE);
             }