summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--block/sheepdog.c70
-rwxr-xr-xconfigure33
-rw-r--r--hw/arm_gic.c46
-rw-r--r--hw/arm_gic_common.c16
-rw-r--r--hw/arm_gic_internal.h20
-rw-r--r--hw/armv7m_nvic.c6
-rw-r--r--hw/cadence_ttc.c2
-rw-r--r--hw/ds1338.c123
-rw-r--r--hw/e1000.c12
-rw-r--r--hw/pc_piix.c8
-rw-r--r--hw/qxl-render.c4
-rw-r--r--hw/qxl.c57
-rw-r--r--hw/qxl.h5
-rw-r--r--hw/rtl8139.c24
-rw-r--r--hw/sun4m.c11
-rw-r--r--hw/sun4u.c36
-rw-r--r--hw/usb/hcd-uhci.c10
-rw-r--r--hw/usb/redirect.c24
-rw-r--r--hw/versatilepb.c5
-rw-r--r--hw/vfio_pci.c498
-rw-r--r--hw/vfio_pci_int.h114
-rw-r--r--hw/virtio-net.c5
-rw-r--r--hw/zynq_slcr.c2
-rw-r--r--linux-user/main.c7
-rw-r--r--linux-user/syscall.c2
-rw-r--r--net.c11
-rw-r--r--net/clients.h (renamed from net/socket.h)28
-rw-r--r--net/dump.c2
-rw-r--r--net/dump.h33
-rw-r--r--net/hub.c1
-rw-r--r--net/hub.h2
-rw-r--r--net/slirp.c3
-rw-r--r--net/slirp.h3
-rw-r--r--net/socket.c3
-rw-r--r--net/tap-win32.c3
-rw-r--r--net/tap.c3
-rw-r--r--net/tap.h6
-rw-r--r--net/vde.c3
-rw-r--r--net/vde.h37
-rw-r--r--qemu-ga.c6
-rw-r--r--sparc-dis.c2
-rw-r--r--target-arm/neon_helper.c1
-rw-r--r--target-arm/translate.c2
-rw-r--r--target-i386/translate.c2
-rw-r--r--target-microblaze/cpu.h1
-rw-r--r--target-mips/op_helper.c63
-rw-r--r--target-sparc/int32_helper.c5
-rw-r--r--target-sparc/int64_helper.c5
-rw-r--r--target-sparc/translate.c119
-rw-r--r--targphys.h4
-rw-r--r--tcg/arm/tcg-target.c56
-rw-r--r--tcg/arm/tcg-target.h2
-rw-r--r--tcg/ia64/tcg-target.c194
-rw-r--r--tcg/ia64/tcg-target.h11
-rw-r--r--tcg/optimize.c471
-rw-r--r--tcg/sparc/tcg-target.c643
-rw-r--r--tcg/sparc/tcg-target.h9
-rw-r--r--tcg/tcg-op.h11
-rw-r--r--tcg/tcg.c53
-rw-r--r--ui/spice-core.c51
-rw-r--r--ui/spice-display.c38
-rw-r--r--ui/spice-display.h5
-rw-r--r--ui/vnc-jobs.c16
-rw-r--r--ui/vnc.c6
64 files changed, 1725 insertions, 1329 deletions
diff --git a/block/sheepdog.c b/block/sheepdog.c
index f35ff5bbe1..93061744d6 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -201,12 +201,12 @@ static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
     return hval;
 }
 
-static inline int is_data_obj_writable(SheepdogInode *inode, unsigned int idx)
+static inline bool is_data_obj_writable(SheepdogInode *inode, unsigned int idx)
 {
     return inode->vdi_id == inode->data_vdi_id[idx];
 }
 
-static inline int is_data_obj(uint64_t oid)
+static inline bool is_data_obj(uint64_t oid)
 {
     return !(VDI_BIT & oid);
 }
@@ -231,7 +231,7 @@ static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx)
     return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
 }
 
-static inline int is_snapshot(struct SheepdogInode *inode)
+static inline bool is_snapshot(struct SheepdogInode *inode)
 {
     return !!inode->snap_ctime;
 }
@@ -281,7 +281,7 @@ struct SheepdogAIOCB {
     Coroutine *coroutine;
     void (*aio_done_func)(SheepdogAIOCB *);
 
-    int canceled;
+    bool canceled;
     int nr_pending;
 };
 
@@ -292,8 +292,8 @@ typedef struct BDRVSheepdogState {
     uint32_t max_dirty_data_idx;
 
     char name[SD_MAX_VDI_LEN];
-    int is_snapshot;
-    uint8_t cache_enabled;
+    bool is_snapshot;
+    bool cache_enabled;
 
     char *addr;
     char *port;
@@ -417,7 +417,7 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
      */
     acb->ret = -EIO;
     qemu_coroutine_enter(acb->coroutine, NULL);
-    acb->canceled = 1;
+    acb->canceled = true;
 }
 
 static AIOPool sd_aio_pool = {
@@ -439,7 +439,7 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
     acb->nb_sectors = nb_sectors;
 
     acb->aio_done_func = NULL;
-    acb->canceled = 0;
+    acb->canceled = false;
     acb->coroutine = qemu_coroutine_self();
     acb->ret = 0;
     acb->nr_pending = 0;
@@ -613,7 +613,7 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
 }
 
 static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                           struct iovec *iov, int niov, int create,
+                           struct iovec *iov, int niov, bool create,
                            enum AIOCBState aiocb_type);
 
 
@@ -646,7 +646,7 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
         QLIST_REMOVE(aio_req, aio_siblings);
         QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
         ret = add_aio_request(s, aio_req, acb->qiov->iov,
-                              acb->qiov->niov, 0, acb->aiocb_type);
+                              acb->qiov->niov, false, acb->aiocb_type);
         if (ret < 0) {
             error_report("add_aio_request is failed");
             free_aio_req(s, aio_req);
@@ -943,7 +943,7 @@ out:
 }
 
 static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                           struct iovec *iov, int niov, int create,
+                           struct iovec *iov, int niov, bool create,
                            enum AIOCBState aiocb_type)
 {
     int nr_copies = s->inode.nr_copies;
@@ -1022,7 +1022,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
 
 static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
                              unsigned int datalen, uint64_t offset,
-                             int write, int create, uint8_t cache)
+                             bool write, bool create, bool cache)
 {
     SheepdogObjReq hdr;
     SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
@@ -1071,18 +1071,18 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
 }
 
 static int read_object(int fd, char *buf, uint64_t oid, int copies,
-                       unsigned int datalen, uint64_t offset, uint8_t cache)
+                       unsigned int datalen, uint64_t offset, bool cache)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0,
-                             cache);
+    return read_write_object(fd, buf, oid, copies, datalen, offset, false,
+                             false, cache);
 }
 
 static int write_object(int fd, char *buf, uint64_t oid, int copies,
-                        unsigned int datalen, uint64_t offset, int create,
-                        uint8_t cache)
+                        unsigned int datalen, uint64_t offset, bool create,
+                        bool cache)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create,
-                             cache);
+    return read_write_object(fd, buf, oid, copies, datalen, offset, true,
+                             create, cache);
 }
 
 static int sd_open(BlockDriverState *bs, const char *filename, int flags)
@@ -1117,7 +1117,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
         goto out;
     }
 
-    s->cache_enabled = 1;
+    s->cache_enabled = true;
     s->flush_fd = connect_to_sdog(s->addr, s->port);
     if (s->flush_fd < 0) {
         error_report("failed to connect");
@@ -1127,7 +1127,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
 
     if (snapid || tag[0] != '\0') {
         dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
-        s->is_snapshot = 1;
+        s->is_snapshot = true;
     }
 
     fd = connect_to_sdog(s->addr, s->port);
@@ -1270,7 +1270,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
     BDRVSheepdogState *s;
     char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
     uint32_t snapid;
-    int prealloc = 0;
+    bool prealloc = false;
     const char *vdiname;
 
     s = g_malloc0(sizeof(BDRVSheepdogState));
@@ -1292,9 +1292,9 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
             backing_file = options->value.s;
         } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
             if (!options->value.s || !strcmp(options->value.s, "off")) {
-                prealloc = 0;
+                prealloc = false;
             } else if (!strcmp(options->value.s, "full")) {
-                prealloc = 1;
+                prealloc = true;
             } else {
                 error_report("Invalid preallocation mode: '%s'",
                              options->value.s);
@@ -1422,7 +1422,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
     datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
     s->inode.vdi_size = offset;
     ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
+                       s->inode.nr_copies, datalen, 0, false, s->cache_enabled);
     close(fd);
 
     if (ret < 0) {
@@ -1461,7 +1461,7 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
         aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
                                 data_len, offset, 0, 0, offset);
         QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-        ret = add_aio_request(s, aio_req, &iov, 1, 0, AIOCB_WRITE_UDATA);
+        ret = add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
         if (ret) {
             free_aio_req(s, aio_req);
             acb->ret = -EIO;
@@ -1515,7 +1515,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
 
     memcpy(&s->inode, buf, sizeof(s->inode));
 
-    s->is_snapshot = 0;
+    s->is_snapshot = false;
     ret = 0;
     dprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id);
 
@@ -1570,7 +1570,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
     while (done != total) {
         uint8_t flags = 0;
         uint64_t old_oid = 0;
-        int create = 0;
+        bool create = false;
 
         oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
 
@@ -1585,10 +1585,10 @@ static int coroutine_fn sd_co_rw_vector(void *p)
             break;
         case AIOCB_WRITE_UDATA:
             if (!inode->data_vdi_id[idx]) {
-                create = 1;
+                create = true;
             } else if (!is_data_obj_writable(inode, idx)) {
                 /* Copy-On-Write */
-                create = 1;
+                create = true;
                 old_oid = oid;
                 flags = SD_FLAG_CMD_COW;
             }
@@ -1722,7 +1722,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
     if (rsp->result == SD_RES_INVALID_PARMS) {
         dprintf("disable write cache since the server doesn't support it\n");
 
-        s->cache_enabled = 0;
+        s->cache_enabled = false;
         closesocket(s->flush_fd);
         return 0;
     }
@@ -1773,7 +1773,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     }
 
     ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
+                       s->inode.nr_copies, datalen, 0, false, s->cache_enabled);
     if (ret < 0) {
         error_report("failed to write snapshot's inode.");
         goto cleanup;
@@ -1860,7 +1860,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
         goto out;
     }
 
-    s->is_snapshot = 1;
+    s->is_snapshot = true;
 
     g_free(buf);
     g_free(old_s);
@@ -1978,8 +1978,8 @@ out:
 static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
                                 int64_t pos, int size, int load)
 {
-    int fd, create;
-    int ret = 0, remaining = size;
+    bool create;
+    int fd, ret = 0, remaining = size;
     unsigned int data_len;
     uint64_t vmstate_oid;
     uint32_t vdi_index;
diff --git a/configure b/configure
index a6bdf9aff9..f9c31f4195 100755
--- a/configure
+++ b/configure
@@ -185,6 +185,7 @@ libdir="\${prefix}/lib"
 libexecdir="\${prefix}/libexec"
 includedir="\${prefix}/include"
 sysconfdir="\${prefix}/etc"
+local_statedir="\${prefix}/var"
 confsuffix="/qemu"
 slirp="yes"
 fmod_lib=""
@@ -554,6 +555,7 @@ EOF
   qemu_docdir="\${prefix}"
   bindir="\${prefix}"
   sysconfdir="\${prefix}"
+  local_statedir="\${prefix}"
   confsuffix=""
   libs_qga="-lws2_32 -lwinmm -lpowrprof $libs_qga"
 fi
@@ -630,7 +632,9 @@ for opt do
   ;;
   --sysconfdir=*) sysconfdir="$optarg"
   ;;
-  --sbindir=*|--sharedstatedir=*|--localstatedir=*|\
+  --localstatedir=*) local_statedir="$optarg"
+  ;;
+  --sbindir=*|--sharedstatedir=*|\
   --oldincludedir=*|--datarootdir=*|--infodir=*|--localedir=*|\
   --htmldir=*|--dvidir=*|--pdfdir=*|--psdir=*)
     # These switches are silently ignored, for compatibility with
@@ -1001,6 +1005,7 @@ echo "  --datadir=PATH           install firmware in PATH$confsuffix"
 echo "  --docdir=PATH            install documentation in PATH$confsuffix"
 echo "  --bindir=PATH            install binaries in PATH"
 echo "  --sysconfdir=PATH        install config in PATH$confsuffix"
+echo "  --localstatedir=PATH     install local state in PATH"
 echo "  --with-confsuffix=SUFFIX suffix for QEMU data inside datadir and sysconfdir [$confsuffix]"
 echo "  --enable-debug-tcg       enable TCG debugging"
 echo "  --disable-debug-tcg      disable TCG debugging (default)"
@@ -1291,7 +1296,7 @@ if test -z "$cross_prefix" ; then
 # big/little endian test
 cat > $TMPC << EOF
 #include <inttypes.h>
-int main(int argc, char ** argv){
+int main(void) {
         volatile uint32_t i=0x01234567;
         return (*((uint8_t*)(&i))) == 0x67;
 }
@@ -2689,20 +2694,14 @@ int main(void) { spice_server_new(); return 0; }
 EOF
   spice_cflags=$($pkg_config --cflags spice-protocol spice-server 2>/dev/null)
   spice_libs=$($pkg_config --libs spice-protocol spice-server 2>/dev/null)
-  if $pkg_config --atleast-version=0.8.2 spice-server >/dev/null 2>&1 && \
-     $pkg_config --atleast-version=0.8.1 spice-protocol > /dev/null 2>&1 && \
+  if $pkg_config --atleast-version=0.12.0 spice-server >/dev/null 2>&1 && \
+     $pkg_config --atleast-version=0.12.2 spice-protocol > /dev/null 2>&1 && \
      compile_prog "$spice_cflags" "$spice_libs" ; then
     spice="yes"
     libs_softmmu="$libs_softmmu $spice_libs"
     QEMU_CFLAGS="$QEMU_CFLAGS $spice_cflags"
     spice_protocol_version=$($pkg_config --modversion spice-protocol)
     spice_server_version=$($pkg_config --modversion spice-server)
-    if $pkg_config --atleast-version=0.12.0 spice-protocol >/dev/null 2>&1; then
-        spice_qxl_io_monitors_config_async="yes"
-    fi
-    if $pkg_config --atleast-version=0.12.2 spice-protocol > /dev/null 2>&1; then
-        spice_qxl_client_monitors_config="yes"
-    fi
   else
     if test "$spice" = "yes" ; then
       feature_not_found "spice"
@@ -2870,7 +2869,7 @@ static int sfaa(int *ptr)
   return __sync_fetch_and_and(ptr, 0);
 }
 
-int main(int argc, char **argv)
+int main(void)
 {
   int val = 42;
   sfaa(&val);
@@ -3090,6 +3089,7 @@ echo "library directory `eval echo $libdir`"
 echo "libexec directory `eval echo $libexecdir`"
 echo "include directory `eval echo $includedir`"
 echo "config directory  `eval echo $sysconfdir`"
+echo "local state directory   `eval echo $local_statedir`"
 if test "$mingw32" = "no" ; then
 echo "Manual directory  `eval echo $mandir`"
 echo "ELF interp prefix $interp_prefix"
@@ -3199,6 +3199,7 @@ echo "sysconfdir=$sysconfdir" >> $config_host_mak
 echo "qemu_confdir=$qemu_confdir" >> $config_host_mak
 echo "qemu_datadir=$qemu_datadir" >> $config_host_mak
 echo "qemu_docdir=$qemu_docdir" >> $config_host_mak
+echo "qemu_localstatedir=$local_statedir" >> $config_host_mak
 echo "CONFIG_QEMU_HELPERDIR=\"$libexecdir\"" >> $config_host_mak
 
 echo "ARCH=$ARCH" >> $config_host_mak
@@ -3447,14 +3448,6 @@ if test "$spice" = "yes" ; then
   echo "CONFIG_SPICE=y" >> $config_host_mak
 fi
 
-if test "$spice_qxl_io_monitors_config_async" = "yes" ; then
-  echo "CONFIG_QXL_IO_MONITORS_CONFIG_ASYNC=y" >> $config_host_mak
-fi
-
-if test "$spice_qxl_client_monitors_config" = "yes" ; then
-  echo "CONFIG_QXL_CLIENT_MONITORS_CONFIG=y" >> $config_host_mak
-fi
-
 if test "$smartcard" = "yes" ; then
   echo "CONFIG_SMARTCARD=y" >> $config_host_mak
 fi
@@ -3706,7 +3699,6 @@ case "$target_arch2" in
   ;;
   x86_64)
     TARGET_BASE_ARCH=i386
-    target_phys_bits=64
     target_long_alignment=8
   ;;
   alpha)
@@ -3809,7 +3801,6 @@ case "$target_arch2" in
     target_long_alignment=8
   ;;
   unicore32)
-    target_phys_bits=32
   ;;
   xtensa|xtensaeb)
     TARGET_ARCH=xtensa
diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index 55871fad19..56376c0d75 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -36,7 +36,7 @@ static const uint8_t gic_id[] = {
 
 #define NUM_CPU(s) ((s)->num_cpu)
 
-static inline int gic_get_current_cpu(gic_state *s)
+static inline int gic_get_current_cpu(GICState *s)
 {
     if (s->num_cpu > 1) {
         return cpu_single_env->cpu_index;
@@ -46,7 +46,7 @@ static inline int gic_get_current_cpu(gic_state *s)
 
 /* TODO: Many places that call this routine could be optimized.  */
 /* Update interrupt status after enabled or pending bits have been changed.  */
-void gic_update(gic_state *s)
+void gic_update(GICState *s)
 {
     int best_irq;
     int best_prio;
@@ -84,7 +84,7 @@ void gic_update(gic_state *s)
     }
 }
 
-void gic_set_pending_private(gic_state *s, int cpu, int irq)
+void gic_set_pending_private(GICState *s, int cpu, int irq)
 {
     int cm = 1 << cpu;
 
@@ -105,7 +105,7 @@ static void gic_set_irq(void *opaque, int irq, int level)
      *  [N+32..N+63] : PPI (internal interrupts for CPU 1
      *  ...
      */
-    gic_state *s = (gic_state *)opaque;
+    GICState *s = (GICState *)opaque;
     int cm, target;
     if (irq < (s->num_irq - GIC_INTERNAL)) {
         /* The first external input line is internal interrupt 32.  */
@@ -137,7 +137,7 @@ static void gic_set_irq(void *opaque, int irq, int level)
     gic_update(s);
 }
 
-static void gic_set_running_irq(gic_state *s, int cpu, int irq)
+static void gic_set_running_irq(GICState *s, int cpu, int irq)
 {
     s->running_irq[cpu] = irq;
     if (irq == 1023) {
@@ -148,7 +148,7 @@ static void gic_set_running_irq(gic_state *s, int cpu, int irq)
     gic_update(s);
 }
 
-uint32_t gic_acknowledge_irq(gic_state *s, int cpu)
+uint32_t gic_acknowledge_irq(GICState *s, int cpu)
 {
     int new_irq;
     int cm = 1 << cpu;
@@ -167,7 +167,7 @@ uint32_t gic_acknowledge_irq(gic_state *s, int cpu)
     return new_irq;
 }
 
-void gic_complete_irq(gic_state *s, int cpu, int irq)
+void gic_complete_irq(GICState *s, int cpu, int irq)
 {
     int update = 0;
     int cm = 1 << cpu;
@@ -214,7 +214,7 @@ void gic_complete_irq(gic_state *s, int cpu, int irq)
 
 static uint32_t gic_dist_readb(void *opaque, target_phys_addr_t offset)
 {
-    gic_state *s = (gic_state *)opaque;
+    GICState *s = (GICState *)opaque;
     uint32_t res;
     int irq;
     int i;
@@ -347,7 +347,7 @@ static uint32_t gic_dist_readl(void *opaque, target_phys_addr_t offset)
 static void gic_dist_writeb(void *opaque, target_phys_addr_t offset,
                             uint32_t value)
 {
-    gic_state *s = (gic_state *)opaque;
+    GICState *s = (GICState *)opaque;
     int irq;
     int i;
     int cpu;
@@ -500,7 +500,7 @@ static void gic_dist_writew(void *opaque, target_phys_addr_t offset,
 static void gic_dist_writel(void *opaque, target_phys_addr_t offset,
                             uint32_t value)
 {
-    gic_state *s = (gic_state *)opaque;
+    GICState *s = (GICState *)opaque;
     if (offset == 0xf00) {
         int cpu;
         int irq;
@@ -539,7 +539,7 @@ static const MemoryRegionOps gic_dist_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static uint32_t gic_cpu_read(gic_state *s, int cpu, int offset)
+static uint32_t gic_cpu_read(GICState *s, int cpu, int offset)
 {
     switch (offset) {
     case 0x00: /* Control */
@@ -561,12 +561,12 @@ static uint32_t gic_cpu_read(gic_state *s, int cpu, int offset)
     }
 }
 
-static void gic_cpu_write(gic_state *s, int cpu, int offset, uint32_t value)
+static void gic_cpu_write(GICState *s, int cpu, int offset, uint32_t value)
 {
     switch (offset) {
     case 0x00: /* Control */
         s->cpu_enabled[cpu] = (value & 1);
-        DPRINTF("CPU %d %sabled\n", cpu, s->cpu_enabled ? "En" : "Dis");
+        DPRINTF("CPU %d %sabled\n", cpu, s->cpu_enabled[cpu] ? "En" : "Dis");
         break;
     case 0x04: /* Priority mask */
         s->priority_mask[cpu] = (value & 0xff);
@@ -587,25 +587,25 @@ static void gic_cpu_write(gic_state *s, int cpu, int offset, uint32_t value)
 static uint64_t gic_thiscpu_read(void *opaque, target_phys_addr_t addr,
                                  unsigned size)
 {
-    gic_state *s = (gic_state *)opaque;
+    GICState *s = (GICState *)opaque;
     return gic_cpu_read(s, gic_get_current_cpu(s), addr);
 }
 
 static void gic_thiscpu_write(void *opaque, target_phys_addr_t addr,
                               uint64_t value, unsigned size)
 {
-    gic_state *s = (gic_state *)opaque;
+    GICState *s = (GICState *)opaque;
     gic_cpu_write(s, gic_get_current_cpu(s), addr, value);
 }
 
 /* Wrappers to read/write the GIC CPU interface for a specific CPU.
- * These just decode the opaque pointer into gic_state* + cpu id.
+ * These just decode the opaque pointer into GICState* + cpu id.
  */
 static uint64_t gic_do_cpu_read(void *opaque, target_phys_addr_t addr,
                                 unsigned size)
 {
-    gic_state **backref = (gic_state **)opaque;
-    gic_state *s = *backref;
+    GICState **backref = (GICState **)opaque;
+    GICState *s = *backref;
     int id = (backref - s->backref);
     return gic_cpu_read(s, id, addr);
 }
@@ -613,8 +613,8 @@ static uint64_t gic_do_cpu_read(void *opaque, target_phys_addr_t addr,
 static void gic_do_cpu_write(void *opaque, target_phys_addr_t addr,
                              uint64_t value, unsigned size)
 {
-    gic_state **backref = (gic_state **)opaque;
-    gic_state *s = *backref;
+    GICState **backref = (GICState **)opaque;
+    GICState *s = *backref;
     int id = (backref - s->backref);
     gic_cpu_write(s, id, addr, value);
 }
@@ -631,7 +631,7 @@ static const MemoryRegionOps gic_cpu_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-void gic_init_irqs_and_distributor(gic_state *s, int num_irq)
+void gic_init_irqs_and_distributor(GICState *s, int num_irq)
 {
     int i;
 
@@ -657,7 +657,7 @@ static int arm_gic_init(SysBusDevice *dev)
 {
     /* Device instance init function for the GIC sysbus device */
     int i;
-    gic_state *s = FROM_SYSBUS(gic_state, dev);
+    GICState *s = FROM_SYSBUS(GICState, dev);
     ARMGICClass *agc = ARM_GIC_GET_CLASS(s);
 
     agc->parent_init(dev);
@@ -701,7 +701,7 @@ static void arm_gic_class_init(ObjectClass *klass, void *data)
 static TypeInfo arm_gic_info = {
     .name = TYPE_ARM_GIC,
     .parent = TYPE_ARM_GIC_COMMON,
-    .instance_size = sizeof(gic_state),
+    .instance_size = sizeof(GICState),
     .class_init = arm_gic_class_init,
     .class_size = sizeof(ARMGICClass),
 };
diff --git a/hw/arm_gic_common.c b/hw/arm_gic_common.c
index 360e7823f7..8369309d21 100644
--- a/hw/arm_gic_common.c
+++ b/hw/arm_gic_common.c
@@ -22,7 +22,7 @@
 
 static void gic_save(QEMUFile *f, void *opaque)
 {
-    gic_state *s = (gic_state *)opaque;
+    GICState *s = (GICState *)opaque;
     int i;
     int j;
 
@@ -56,7 +56,7 @@ static void gic_save(QEMUFile *f, void *opaque)
 
 static int gic_load(QEMUFile *f, void *opaque, int version_id)
 {
-    gic_state *s = (gic_state *)opaque;
+    GICState *s = (GICState *)opaque;
     int i;
     int j;
 
@@ -96,7 +96,7 @@ static int gic_load(QEMUFile *f, void *opaque, int version_id)
 
 static int arm_gic_common_init(SysBusDevice *dev)
 {
-    gic_state *s = FROM_SYSBUS(gic_state, dev);
+    GICState *s = FROM_SYSBUS(GICState, dev);
     int num_irq = s->num_irq;
 
     if (s->num_cpu > NCPU) {
@@ -123,7 +123,7 @@ static int arm_gic_common_init(SysBusDevice *dev)
 
 static void arm_gic_common_reset(DeviceState *dev)
 {
-    gic_state *s = FROM_SYSBUS(gic_state, sysbus_from_qdev(dev));
+    GICState *s = FROM_SYSBUS(GICState, sysbus_from_qdev(dev));
     int i;
     memset(s->irq_state, 0, GIC_MAXIRQ * sizeof(gic_irq_state));
     for (i = 0 ; i < s->num_cpu; i++) {
@@ -147,13 +147,13 @@ static void arm_gic_common_reset(DeviceState *dev)
 }
 
 static Property arm_gic_common_properties[] = {
-    DEFINE_PROP_UINT32("num-cpu", gic_state, num_cpu, 1),
-    DEFINE_PROP_UINT32("num-irq", gic_state, num_irq, 32),
+    DEFINE_PROP_UINT32("num-cpu", GICState, num_cpu, 1),
+    DEFINE_PROP_UINT32("num-irq", GICState, num_irq, 32),
     /* Revision can be 1 or 2 for GIC architecture specification
      * versions 1 or 2, or 0 to indicate the legacy 11MPCore GIC.
      * (Internally, 0xffffffff also indicates "not a GIC but an NVIC".)
      */
-    DEFINE_PROP_UINT32("revision", gic_state, revision, 1),
+    DEFINE_PROP_UINT32("revision", GICState, revision, 1),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -170,7 +170,7 @@ static void arm_gic_common_class_init(ObjectClass *klass, void *data)
 static TypeInfo arm_gic_common_type = {
     .name = TYPE_ARM_GIC_COMMON,
     .parent = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(gic_state),
+    .instance_size = sizeof(GICState),
     .class_size = sizeof(ARMGICCommonClass),
     .class_init = arm_gic_common_class_init,
     .abstract = true,
diff --git a/hw/arm_gic_internal.h b/hw/arm_gic_internal.h
index db4fad564f..699352ca8b 100644
--- a/hw/arm_gic_internal.h
+++ b/hw/arm_gic_internal.h
@@ -69,7 +69,7 @@ typedef struct gic_irq_state {
     unsigned trigger:1; /* nonzero = edge triggered.  */
 } gic_irq_state;
 
-typedef struct gic_state {
+typedef struct GICState {
     SysBusDevice busdev;
     qemu_irq parent_irq[NCPU];
     int enabled;
@@ -92,25 +92,25 @@ typedef struct gic_state {
     /* This is just so we can have an opaque pointer which identifies
      * both this GIC and which CPU interface we should be accessing.
      */
-    struct gic_state *backref[NCPU];
+    struct GICState *backref[NCPU];
     MemoryRegion cpuiomem[NCPU+1]; /* CPU interfaces */
     uint32_t num_irq;
     uint32_t revision;
-} gic_state;
+} GICState;
 
 /* The special cases for the revision property: */
 #define REV_11MPCORE 0
 #define REV_NVIC 0xffffffff
 
-void gic_set_pending_private(gic_state *s, int cpu, int irq);
-uint32_t gic_acknowledge_irq(gic_state *s, int cpu);
-void gic_complete_irq(gic_state *s, int cpu, int irq);
-void gic_update(gic_state *s);
-void gic_init_irqs_and_distributor(gic_state *s, int num_irq);
+void gic_set_pending_private(GICState *s, int cpu, int irq);
+uint32_t gic_acknowledge_irq(GICState *s, int cpu);
+void gic_complete_irq(GICState *s, int cpu, int irq);
+void gic_update(GICState *s);
+void gic_init_irqs_and_distributor(GICState *s, int num_irq);
 
 #define TYPE_ARM_GIC_COMMON "arm_gic_common"
 #define ARM_GIC_COMMON(obj) \
-     OBJECT_CHECK(gic_state, (obj), TYPE_ARM_GIC_COMMON)
+     OBJECT_CHECK(GICState, (obj), TYPE_ARM_GIC_COMMON)
 #define ARM_GIC_COMMON_CLASS(klass) \
      OBJECT_CLASS_CHECK(ARMGICCommonClass, (klass), TYPE_ARM_GIC_COMMON)
 #define ARM_GIC_COMMON_GET_CLASS(obj) \
@@ -122,7 +122,7 @@ typedef struct ARMGICCommonClass {
 
 #define TYPE_ARM_GIC "arm_gic"
 #define ARM_GIC(obj) \
-     OBJECT_CHECK(gic_state, (obj), TYPE_ARM_GIC)
+     OBJECT_CHECK(GICState, (obj), TYPE_ARM_GIC)
 #define ARM_GIC_CLASS(klass) \
      OBJECT_CLASS_CHECK(ARMGICClass, (klass), TYPE_ARM_GIC)
 #define ARM_GIC_GET_CLASS(obj) \
diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index 5c09116478..c449e08089 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -17,7 +17,7 @@
 #include "arm_gic_internal.h"
 
 typedef struct {
-    gic_state gic;
+    GICState gic;
     struct {
         uint32_t control;
         uint32_t reload;
@@ -505,9 +505,9 @@ static void armv7m_nvic_instance_init(Object *obj)
      * than our superclass. This function runs after qdev init
      * has set the defaults from the Property array and before
      * any user-specified property setting, so just modify the
-     * value in the gic_state struct.
+     * value in the GICState struct.
      */
-    gic_state *s = ARM_GIC_COMMON(obj);
+    GICState *s = ARM_GIC_COMMON(obj);
     /* The ARM v7m may have anything from 0 to 496 external interrupt
      * IRQ lines. We default to 64. Other boards may differ and should
      * set the num-irq property appropriately.
diff --git a/hw/cadence_ttc.c b/hw/cadence_ttc.c
index dd02f86eb9..77b6976eda 100644
--- a/hw/cadence_ttc.c
+++ b/hw/cadence_ttc.c
@@ -274,6 +274,7 @@ static uint32_t cadence_ttc_read_imp(void *opaque, target_phys_addr_t offset)
         /* cleared after read */
         value = s->reg_intr;
         s->reg_intr = 0;
+        cadence_timer_update(s);
         return value;
 
     case 0x60: /* interrupt enable */
@@ -355,7 +356,6 @@ static void cadence_ttc_write(void *opaque, target_phys_addr_t offset,
     case 0x54: /* interrupt register */
     case 0x58:
     case 0x5c:
-        s->reg_intr &= (~value & 0xfff);
         break;
 
     case 0x60: /* interrupt enable */
diff --git a/hw/ds1338.c b/hw/ds1338.c
index d590d9c007..b576d56438 100644
--- a/hw/ds1338.c
+++ b/hw/ds1338.c
@@ -12,39 +12,84 @@
 
 #include "i2c.h"
 
+/* Size of NVRAM including both the user-accessible area and the
+ * secondary register area.
+ */
+#define NVRAM_SIZE 64
+
 typedef struct {
     I2CSlave i2c;
-    time_t offset;
-    struct tm now;
-    uint8_t nvram[56];
-    int ptr;
-    int addr_byte;
+    int64_t offset;
+    uint8_t nvram[NVRAM_SIZE];
+    int32_t ptr;
+    bool addr_byte;
 } DS1338State;
 
+static const VMStateDescription vmstate_ds1338 = {
+    .name = "ds1338",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_I2C_SLAVE(i2c, DS1338State),
+        VMSTATE_INT64(offset, DS1338State),
+        VMSTATE_UINT8_ARRAY(nvram, DS1338State, NVRAM_SIZE),
+        VMSTATE_INT32(ptr, DS1338State),
+        VMSTATE_BOOL(addr_byte, DS1338State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void capture_current_time(DS1338State *s)
+{
+    /* Capture the current time into the secondary registers
+     * which will be actually read by the data transfer operation.
+     */
+    struct tm now;
+    qemu_get_timedate(&now, s->offset);
+    s->nvram[0] = to_bcd(now.tm_sec);
+    s->nvram[1] = to_bcd(now.tm_min);
+    if (s->nvram[2] & 0x40) {
+        s->nvram[2] = (to_bcd((now.tm_hour % 12)) + 1) | 0x40;
+        if (now.tm_hour >= 12) {
+            s->nvram[2] |= 0x20;
+        }
+    } else {
+        s->nvram[2] = to_bcd(now.tm_hour);
+    }
+    s->nvram[3] = to_bcd(now.tm_wday) + 1;
+    s->nvram[4] = to_bcd(now.tm_mday);
+    s->nvram[5] = to_bcd(now.tm_mon) + 1;
+    s->nvram[6] = to_bcd(now.tm_year - 100);
+}
+
+static void inc_regptr(DS1338State *s)
+{
+    /* The register pointer wraps around after 0x3F; wraparound
+     * causes the current time/date to be retransferred into
+     * the secondary registers.
+     */
+    s->ptr = (s->ptr + 1) & (NVRAM_SIZE - 1);
+    if (!s->ptr) {
+        capture_current_time(s);
+    }
+}
+
 static void ds1338_event(I2CSlave *i2c, enum i2c_event event)
 {
     DS1338State *s = FROM_I2C_SLAVE(DS1338State, i2c);
 
     switch (event) {
     case I2C_START_RECV:
-        qemu_get_timedate(&s->now, s->offset);
-        s->nvram[0] = to_bcd(s->now.tm_sec);
-        s->nvram[1] = to_bcd(s->now.tm_min);
-        if (s->nvram[2] & 0x40) {
-            s->nvram[2] = (to_bcd((s->now.tm_hour % 12)) + 1) | 0x40;
-            if (s->now.tm_hour >= 12) {
-                s->nvram[2] |= 0x20;
-            }
-        } else {
-            s->nvram[2] = to_bcd(s->now.tm_hour);
-        }
-        s->nvram[3] = to_bcd(s->now.tm_wday) + 1;
-        s->nvram[4] = to_bcd(s->now.tm_mday);
-        s->nvram[5] = to_bcd(s->now.tm_mon) + 1;
-        s->nvram[6] = to_bcd(s->now.tm_year - 100);
+        /* In h/w, capture happens on any START condition, not just a
+         * START_RECV, but there is no need to actually capture on
+         * START_SEND, because the guest can't get at that data
+         * without going through a START_RECV which would overwrite it.
+         */
+        capture_current_time(s);
         break;
     case I2C_START_SEND:
-        s->addr_byte = 1;
+        s->addr_byte = true;
         break;
     default:
         break;
@@ -57,7 +102,7 @@ static int ds1338_recv(I2CSlave *i2c)
     uint8_t res;
 
     res  = s->nvram[s->ptr];
-    s->ptr = (s->ptr + 1) & 0xff;
+    inc_regptr(s);
     return res;
 }
 
@@ -65,20 +110,20 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data)
 {
     DS1338State *s = FROM_I2C_SLAVE(DS1338State, i2c);
     if (s->addr_byte) {
-        s->ptr = data;
-        s->addr_byte = 0;
+        s->ptr = data & (NVRAM_SIZE - 1);
+        s->addr_byte = false;
         return 0;
     }
-    s->nvram[s->ptr - 8] = data;
-    if (data < 8) {
-        qemu_get_timedate(&s->now, s->offset);
-        switch(data) {
+    if (s->ptr < 8) {
+        struct tm now;
+        qemu_get_timedate(&now, s->offset);
+        switch(s->ptr) {
         case 0:
             /* TODO: Implement CH (stop) bit.  */
-            s->now.tm_sec = from_bcd(data & 0x7f);
+            now.tm_sec = from_bcd(data & 0x7f);
             break;
         case 1:
-            s->now.tm_min = from_bcd(data & 0x7f);
+            now.tm_min = from_bcd(data & 0x7f);
             break;
         case 2:
             if (data & 0x40) {
@@ -90,27 +135,29 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data)
             } else {
                 data = from_bcd(data);
             }
-            s->now.tm_hour = data;
+            now.tm_hour = data;
             break;
         case 3:
-            s->now.tm_wday = from_bcd(data & 7) - 1;
+            now.tm_wday = from_bcd(data & 7) - 1;
             break;
         case 4:
-            s->now.tm_mday = from_bcd(data & 0x3f);
+            now.tm_mday = from_bcd(data & 0x3f);
             break;
         case 5:
-            s->now.tm_mon = from_bcd(data & 0x1f) - 1;
+            now.tm_mon = from_bcd(data & 0x1f) - 1;
             break;
         case 6:
-            s->now.tm_year = from_bcd(data) + 100;
+            now.tm_year = from_bcd(data) + 100;
             break;
         case 7:
             /* Control register. Currently ignored.  */
             break;
         }
-        s->offset = qemu_timedate_diff(&s->now);
+        s->offset = qemu_timedate_diff(&now);
+    } else {
+        s->nvram[s->ptr] = data;
     }
-    s->ptr = (s->ptr + 1) & 0xff;
+    inc_regptr(s);
     return 0;
 }
 
@@ -121,12 +168,14 @@ static int ds1338_init(I2CSlave *i2c)
 
 static void ds1338_class_init(ObjectClass *klass, void *data)
 {
+    DeviceClass *dc = DEVICE_CLASS(klass);
     I2CSlaveClass *k = I2C_SLAVE_CLASS(klass);
 
     k->init = ds1338_init;
     k->event = ds1338_event;
     k->recv = ds1338_recv;
     k->send = ds1338_send;
+    dc->vmsd = &vmstate_ds1338;
 }
 
 static TypeInfo ds1338_info = {
diff --git a/hw/e1000.c b/hw/e1000.c
index ec3a7c4ecc..63fee10794 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -1079,11 +1079,23 @@ static bool is_version_1(void *opaque, int version_id)
     return version_id == 1;
 }
 
+static int e1000_post_load(void *opaque, int version_id)
+{
+    E1000State *s = opaque;
+
+    /* nc.link_down can't be migrated, so infer link_down according
+     * to link status bit in mac_reg[STATUS] */
+    s->nic->nc.link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
+
+    return 0;
+}
+
 static const VMStateDescription vmstate_e1000 = {
     .name = "e1000",
     .version_id = 2,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
+    .post_load = e1000_post_load,
     .fields      = (VMStateField []) {
         VMSTATE_PCI_DEVICE(dev, E1000State),
         VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index fd5898fba6..82364ab0d5 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -371,6 +371,14 @@ static QEMUMachine pc_machine_v1_3 = {
             .driver   = "ivshmem",\
             .property = "use64",\
             .value    = "0",\
+        },{\
+            .driver   = "qxl",\
+            .property = "revision",\
+            .value    = stringify(3),\
+        },{\
+            .driver   = "qxl-vga",\
+            .property = "revision",\
+            .value    = stringify(3),\
         }
 
 static QEMUMachine pc_machine_v1_2 = {
diff --git a/hw/qxl-render.c b/hw/qxl-render.c
index e2e3fe2d37..b66c168ef6 100644
--- a/hw/qxl-render.c
+++ b/hw/qxl-render.c
@@ -99,7 +99,6 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl)
 {
     VGACommonState *vga = &qxl->vga;
     int i;
-    DisplaySurface *surface = vga->ds->surface;
 
     if (qxl->guest_primary.resized) {
         qxl->guest_primary.resized = 0;
@@ -112,9 +111,6 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl)
                qxl->guest_primary.qxl_stride,
                qxl->guest_primary.bytes_pp,
                qxl->guest_primary.bits_pp);
-    }
-    if (surface->width != qxl->guest_primary.surface.width ||
-        surface->height != qxl->guest_primary.surface.height) {
         if (qxl->guest_primary.qxl_stride > 0) {
             qemu_free_displaysurface(vga->ds);
             qemu_create_displaysurface_from(qxl->guest_primary.surface.width,
diff --git a/hw/qxl.c b/hw/qxl.c
index 33169f348a..1d16863c5e 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -29,11 +29,6 @@
 
 #include "qxl.h"
 
-#ifndef CONFIG_QXL_IO_MONITORS_CONFIG_ASYNC
-/* spice-protocol is too old, add missing definitions */
-#define QXL_IO_MONITORS_CONFIG_ASYNC (QXL_IO_FLUSH_RELEASE + 1)
-#endif
-
 /*
  * NOTE: SPICE_RING_PROD_ITEM accesses memory on the pci bar and as
  * such can be changed by the guest, so to avoid a guest trigerrable
@@ -262,9 +257,6 @@ static void qxl_spice_destroy_surfaces(PCIQXLDevice *qxl, qxl_async_io async)
 static void qxl_spice_monitors_config_async(PCIQXLDevice *qxl, int replay)
 {
     trace_qxl_spice_monitors_config(qxl->id);
-/* 0x000b01 == 0.11.1 */
-#if SPICE_SERVER_VERSION >= 0x000b01 && \
-    defined(CONFIG_QXL_IO_MONITORS_CONFIG_ASYNC)
     if (replay) {
         /*
          * don't use QXL_COOKIE_TYPE_IO:
@@ -286,10 +278,6 @@ static void qxl_spice_monitors_config_async(PCIQXLDevice *qxl, int replay)
                 (uintptr_t)qxl_cookie_new(QXL_COOKIE_TYPE_IO,
                                           QXL_IO_MONITORS_CONFIG_ASYNC));
     }
-#else
-    fprintf(stderr, "qxl: too old spice-protocol/spice-server for "
-            "QXL_IO_MONITORS_CONFIG_ASYNC\n");
-#endif
 }
 
 void qxl_spice_reset_image_cache(PCIQXLDevice *qxl)
@@ -948,8 +936,6 @@ static void interface_async_complete(QXLInstance *sin, uint64_t cookie_token)
     }
 }
 
-#if SPICE_SERVER_VERSION >= 0x000b04
-
 /* called from spice server thread context only */
 static void interface_set_client_capabilities(QXLInstance *sin,
                                               uint8_t client_present,
@@ -971,11 +957,6 @@ static void interface_set_client_capabilities(QXLInstance *sin,
     qxl_send_events(qxl, QXL_INTERRUPT_CLIENT);
 }
 
-#endif
-
-#if defined(CONFIG_QXL_CLIENT_MONITORS_CONFIG) \
-    && SPICE_SERVER_VERSION >= 0x000b05
-
 static uint32_t qxl_crc32(const uint8_t *p, unsigned len)
 {
     /*
@@ -1044,7 +1025,6 @@ static int interface_client_monitors_config(QXLInstance *sin,
     qxl_send_events(qxl, QXL_INTERRUPT_CLIENT_MONITORS_CONFIG);
     return 1;
 }
-#endif
 
 static const QXLInterface qxl_interface = {
     .base.type               = SPICE_INTERFACE_QXL,
@@ -1067,13 +1047,8 @@ static const QXLInterface qxl_interface = {
     .flush_resources         = interface_flush_resources,
     .async_complete          = interface_async_complete,
     .update_area_complete    = interface_update_area_complete,
-#if SPICE_SERVER_VERSION >= 0x000b04
     .set_client_capabilities = interface_set_client_capabilities,
-#endif
-#if SPICE_SERVER_VERSION >= 0x000b05 && \
-    defined(CONFIG_QXL_CLIENT_MONITORS_CONFIG)
     .client_monitors_config = interface_client_monitors_config,
-#endif
 };
 
 static void qxl_enter_vga_mode(PCIQXLDevice *d)
@@ -1461,12 +1436,12 @@ static void ioport_write(void *opaque, target_phys_addr_t addr,
     qxl_async_io async = QXL_SYNC;
     uint32_t orig_io_port = io_port;
 
-    if (d->guest_bug && !io_port == QXL_IO_RESET) {
+    if (d->guest_bug && io_port != QXL_IO_RESET) {
         return;
     }
 
     if (d->revision <= QXL_REVISION_STABLE_V10 &&
-        io_port >= QXL_IO_FLUSH_SURFACES_ASYNC) {
+        io_port > QXL_IO_FLUSH_RELEASE) {
         qxl_set_guest_bug(d, "unsupported io %d for revision %d\n",
             io_port, d->revision);
         return;
@@ -1547,20 +1522,13 @@ async_common:
         if (d->ram->update_surface > d->ssd.num_surfaces) {
             qxl_set_guest_bug(d, "QXL_IO_UPDATE_AREA: invalid surface id %d\n",
                               d->ram->update_surface);
-            return;
+            break;
         }
-        if (update.left >= update.right || update.top >= update.bottom) {
+        if (update.left >= update.right || update.top >= update.bottom ||
+            update.left < 0 || update.top < 0) {
             qxl_set_guest_bug(d,
                     "QXL_IO_UPDATE_AREA: invalid area (%ux%u)x(%ux%u)\n",
                     update.left, update.top, update.right, update.bottom);
-            return;
-        }
-
-        if (update.left < 0 || update.top < 0 || update.left >= update.right ||
-            update.top >= update.bottom) {
-            qxl_set_guest_bug(d, "QXL_IO_UPDATE_AREA: "
-                              "invalid area(%d,%d,%d,%d)\n", update.left,
-                              update.right, update.top, update.bottom);
             break;
         }
         if (async == QXL_ASYNC) {
@@ -1811,7 +1779,7 @@ static void qxl_hw_text_update(void *opaque, console_ch_t *chardata)
 
 static void qxl_dirty_surfaces(PCIQXLDevice *qxl)
 {
-    intptr_t vram_start;
+    uintptr_t vram_start;
     int i;
 
     if (qxl->mode != QXL_MODE_NATIVE && qxl->mode != QXL_MODE_COMPAT) {
@@ -1822,7 +1790,7 @@ static void qxl_dirty_surfaces(PCIQXLDevice *qxl)
     qxl_set_dirty(&qxl->vga.vram, qxl->shadow_rom.draw_area_offset,
                   qxl->shadow_rom.surface0_area_size);
 
-    vram_start =  (intptr_t)memory_region_get_ram_ptr(&qxl->vram_bar);
+    vram_start = (uintptr_t)memory_region_get_ram_ptr(&qxl->vram_bar);
 
     /* dirty the off-screen surfaces */
     for (i = 0; i < qxl->ssd.num_surfaces; i++) {
@@ -1854,7 +1822,6 @@ static void qxl_vm_change_state_handler(void *opaque, int running,
                                         RunState state)
 {
     PCIQXLDevice *qxl = opaque;
-    qemu_spice_vm_change_state_handler(&qxl->ssd, running, state);
 
     if (running) {
         /*
@@ -1971,14 +1938,10 @@ static int qxl_init_common(PCIQXLDevice *qxl)
         pci_device_rev = QXL_REVISION_STABLE_V10;
         io_size = 32; /* PCI region size must be pow2 */
         break;
-/* 0x000b01 == 0.11.1 */
-#if SPICE_SERVER_VERSION >= 0x000b01 && \
-        defined(CONFIG_QXL_IO_MONITORS_CONFIG_ASYNC)
     case 4: /* qxl-4 */
         pci_device_rev = QXL_REVISION_STABLE_V12;
         io_size = msb_mask(QXL_IO_RANGE_SIZE * 2 - 1);
         break;
-#endif
     default:
         error_report("Invalid revision %d for qxl device (max %d)",
                      qxl->revision, QXL_DEFAULT_REVISION);
@@ -2044,7 +2007,11 @@ static int qxl_init_common(PCIQXLDevice *qxl)
 
     qxl->ssd.qxl.base.sif = &qxl_interface.base;
     qxl->ssd.qxl.id = qxl->id;
-    qemu_spice_add_interface(&qxl->ssd.qxl.base);
+    if (qemu_spice_add_interface(&qxl->ssd.qxl.base) != 0) {
+        error_report("qxl interface %d.%d not supported by spice-server\n",
+                     SPICE_INTERFACE_QXL_MAJOR, SPICE_INTERFACE_QXL_MINOR);
+        return -1;
+    }
     qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl);
 
     init_pipe_signaling(qxl);
diff --git a/hw/qxl.h b/hw/qxl.h
index 5553824bee..e583cfb750 100644
--- a/hw/qxl.h
+++ b/hw/qxl.h
@@ -129,12 +129,7 @@ typedef struct PCIQXLDevice {
         }                                                               \
     } while (0)
 
-#if 0
-/* spice-server 0.12 is still in development */
 #define QXL_DEFAULT_REVISION QXL_REVISION_STABLE_V12
-#else
-#define QXL_DEFAULT_REVISION QXL_REVISION_STABLE_V10
-#endif
 
 /* qxl.c */
 void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id);
diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index b7c82ee027..6b28fea96a 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -167,7 +167,7 @@ enum IntrStatusBits {
     PCIErr = 0x8000,
     PCSTimeout = 0x4000,
     RxFIFOOver = 0x40,
-    RxUnderrun = 0x20,
+    RxUnderrun = 0x20, /* Packet Underrun / Link Change */
     RxOverflow = 0x10,
     TxErr = 0x08,
     TxOK = 0x04,
@@ -3003,7 +3003,8 @@ static uint32_t rtl8139_io_readb(void *opaque, uint8_t addr)
             break;
 
         case MediaStatus:
-            ret = 0xd0;
+            /* The LinkDown bit of MediaStatus is inverse with link status */
+            ret = 0xd0 | (~s->BasicModeStatus & 0x04);
             DPRINTF("MediaStatus read 0x%x\n", ret);
             break;
 
@@ -3258,6 +3259,10 @@ static int rtl8139_post_load(void *opaque, int version_id)
         s->cplus_enabled = s->CpCmd != 0;
     }
 
+    /* nc.link_down can't be migrated, so infer link_down according
+     * to link status bit in BasicModeStatus */
+    s->nic->nc.link_down = (s->BasicModeStatus & 0x04) == 0;
+
     return 0;
 }
 
@@ -3449,12 +3454,27 @@ static void pci_rtl8139_uninit(PCIDevice *dev)
     qemu_del_net_client(&s->nic->nc);
 }
 
+static void rtl8139_set_link_status(NetClientState *nc)
+{
+    RTL8139State *s = DO_UPCAST(NICState, nc, nc)->opaque;
+
+    if (nc->link_down) {
+        s->BasicModeStatus &= ~0x04;
+    } else {
+        s->BasicModeStatus |= 0x04;
+    }
+
+    s->IntrStatus |= RxUnderrun;
+    rtl8139_update_irq(s);
+}
+
 static NetClientInfo net_rtl8139_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = rtl8139_can_receive,
     .receive = rtl8139_receive,
     .cleanup = rtl8139_cleanup,
+    .link_status_changed = rtl8139_set_link_status,
 };
 
 static int pci_rtl8139_init(PCIDevice *dev)
diff --git a/hw/sun4m.c b/hw/sun4m.c
index c98cd5ec3f..a04b485eda 100644
--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -253,8 +253,10 @@ void cpu_check_irqs(CPUSPARCState *env)
     }
 }
 
-static void cpu_kick_irq(CPUSPARCState *env)
+static void cpu_kick_irq(SPARCCPU *cpu)
 {
+    CPUSPARCState *env = &cpu->env;
+
     env->halted = 0;
     cpu_check_irqs(env);
     qemu_cpu_kick(env);
@@ -262,12 +264,13 @@ static void cpu_kick_irq(CPUSPARCState *env)
 
 static void cpu_set_irq(void *opaque, int irq, int level)
 {
-    CPUSPARCState *env = opaque;
+    SPARCCPU *cpu = opaque;
+    CPUSPARCState *env = &cpu->env;
 
     if (level) {
         trace_sun4m_cpu_set_irq_raise(irq);
         env->pil_in |= 1 << irq;
-        cpu_kick_irq(env);
+        cpu_kick_irq(cpu);
     } else {
         trace_sun4m_cpu_set_irq_lower(irq);
         env->pil_in &= ~(1 << irq);
@@ -840,7 +843,7 @@ static void cpu_devinit(const char *cpu_model, unsigned int id,
         qemu_register_reset(secondary_cpu_reset, cpu);
         env->halted = 1;
     }
-    *cpu_irqs = qemu_allocate_irqs(cpu_set_irq, env, MAX_PILS);
+    *cpu_irqs = qemu_allocate_irqs(cpu_set_irq, cpu, MAX_PILS);
     env->prom_addr = prom_addr;
 }
 
diff --git a/hw/sun4u.c b/hw/sun4u.c
index 137a7c6666..940db3348a 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -310,8 +310,10 @@ void cpu_check_irqs(CPUSPARCState *env)
     }
 }
 
-static void cpu_kick_irq(CPUSPARCState *env)
+static void cpu_kick_irq(SPARCCPU *cpu)
 {
+    CPUSPARCState *env = &cpu->env;
+
     env->halted = 0;
     cpu_check_irqs(env);
     qemu_cpu_kick(env);
@@ -319,7 +321,8 @@ static void cpu_kick_irq(CPUSPARCState *env)
 
 static void cpu_set_ivec_irq(void *opaque, int irq, int level)
 {
-    CPUSPARCState *env = opaque;
+    SPARCCPU *cpu = opaque;
+    CPUSPARCState *env = &cpu->env;
 
     if (level) {
         if (!(env->ivec_status & 0x20)) {
@@ -366,7 +369,7 @@ void cpu_get_timer(QEMUFile *f, CPUTimer *s)
     qemu_get_timer(f, s->qtimer);
 }
 
-static CPUTimer* cpu_timer_create(const char* name, CPUSPARCState *env,
+static CPUTimer *cpu_timer_create(const char *name, SPARCCPU *cpu,
                                   QEMUBHFunc *cb, uint32_t frequency,
                                   uint64_t disabled_mask)
 {
@@ -379,7 +382,7 @@ static CPUTimer* cpu_timer_create(const char* name, CPUSPARCState *env,
     timer->disabled = 1;
     timer->clock_offset = qemu_get_clock_ns(vm_clock);
 
-    timer->qtimer = qemu_new_timer_ns(vm_clock, cb, env);
+    timer->qtimer = qemu_new_timer_ns(vm_clock, cb, cpu);
 
     return timer;
 }
@@ -418,7 +421,8 @@ static void main_cpu_reset(void *opaque)
 
 static void tick_irq(void *opaque)
 {
-    CPUSPARCState *env = opaque;
+    SPARCCPU *cpu = opaque;
+    CPUSPARCState *env = &cpu->env;
 
     CPUTimer* timer = env->tick;
 
@@ -430,12 +434,13 @@ static void tick_irq(void *opaque)
     }
 
     env->softint |= SOFTINT_TIMER;
-    cpu_kick_irq(env);
+    cpu_kick_irq(cpu);
 }
 
 static void stick_irq(void *opaque)
 {
-    CPUSPARCState *env = opaque;
+    SPARCCPU *cpu = opaque;
+    CPUSPARCState *env = &cpu->env;
 
     CPUTimer* timer = env->stick;
 
@@ -447,12 +452,13 @@ static void stick_irq(void *opaque)
     }
 
     env->softint |= SOFTINT_STIMER;
-    cpu_kick_irq(env);
+    cpu_kick_irq(cpu);
 }
 
 static void hstick_irq(void *opaque)
 {
-    CPUSPARCState *env = opaque;
+    SPARCCPU *cpu = opaque;
+    CPUSPARCState *env = &cpu->env;
 
     CPUTimer* timer = env->hstick;
 
@@ -464,7 +470,7 @@ static void hstick_irq(void *opaque)
     }
 
     env->softint |= SOFTINT_STIMER;
-    cpu_kick_irq(env);
+    cpu_kick_irq(cpu);
 }
 
 static int64_t cpu_to_timer_ticks(int64_t cpu_ticks, uint32_t frequency)
@@ -772,13 +778,13 @@ static SPARCCPU *cpu_devinit(const char *cpu_model, const struct hwdef *hwdef)
     }
     env = &cpu->env;
 
-    env->tick = cpu_timer_create("tick", env, tick_irq,
+    env->tick = cpu_timer_create("tick", cpu, tick_irq,
                                   tick_frequency, TICK_NPT_MASK);
 
-    env->stick = cpu_timer_create("stick", env, stick_irq,
+    env->stick = cpu_timer_create("stick", cpu, stick_irq,
                                    stick_frequency, TICK_INT_DIS);
 
-    env->hstick = cpu_timer_create("hstick", env, hstick_irq,
+    env->hstick = cpu_timer_create("hstick", cpu, hstick_irq,
                                     hstick_frequency, TICK_INT_DIS);
 
     reset_info = g_malloc0(sizeof(ResetData));
@@ -797,7 +803,6 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
                         const struct hwdef *hwdef)
 {
     SPARCCPU *cpu;
-    CPUSPARCState *env;
     M48t59State *nvram;
     unsigned int i;
     uint64_t initrd_addr, initrd_size, kernel_addr, kernel_size, kernel_entry;
@@ -810,14 +815,13 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
 
     /* init CPUs */
     cpu = cpu_devinit(cpu_model, hwdef);
-    env = &cpu->env;
 
     /* set up devices */
     ram_init(0, RAM_size);
 
     prom_init(hwdef->prom_addr, bios_name);
 
-    ivec_irqs = qemu_allocate_irqs(cpu_set_ivec_irq, env, IVEC_MAX);
+    ivec_irqs = qemu_allocate_irqs(cpu_set_ivec_irq, cpu, IVEC_MAX);
     pci_bus = pci_apb_init(APB_SPECIAL_BASE, APB_MEM_BASE, ivec_irqs, &pci_bus2,
                            &pci_bus3, &pbm_irqs);
     pci_vga_init(pci_bus);
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index cdc8bc3fba..c2f08e3735 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -826,8 +826,16 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
     USBEndpoint *ep;
 
     /* Is active ? */
-    if (!(td->ctrl & TD_CTRL_ACTIVE))
+    if (!(td->ctrl & TD_CTRL_ACTIVE)) {
+        /*
+         * ehci11d spec page 22: "Even if the Active bit in the TD is already
+         * cleared when the TD is fetched ... an IOC interrupt is generated"
+         */
+        if (td->ctrl & TD_CTRL_IOC) {
+                *int_mask |= 0x01;
+        }
         return TD_RESULT_NEXT_QH;
+    }
 
     async = uhci_async_find_td(s, addr, td);
     if (async) {
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index b10241a139..2283565b0c 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -862,15 +862,11 @@ static void usbredir_chardev_close_bh(void *opaque)
     }
 }
 
-static void usbredir_chardev_open(USBRedirDevice *dev)
+static void usbredir_create_parser(USBRedirDevice *dev)
 {
     uint32_t caps[USB_REDIR_CAPS_SIZE] = { 0, };
     int flags = 0;
 
-    /* Make sure any pending closes are handled (no-op if none pending) */
-    usbredir_chardev_close_bh(dev);
-    qemu_bh_cancel(dev->chardev_close_bh);
-
     DPRINTF("creating usbredirparser\n");
 
     dev->parser = qemu_oom_check(usbredirparser_create());
@@ -982,7 +978,10 @@ static void usbredir_chardev_event(void *opaque, int event)
     switch (event) {
     case CHR_EVENT_OPENED:
         DPRINTF("chardev open\n");
-        usbredir_chardev_open(dev);
+        /* Make sure any pending closes are handled (no-op if none pending) */
+        usbredir_chardev_close_bh(dev);
+        qemu_bh_cancel(dev->chardev_close_bh);
+        usbredir_create_parser(dev);
         break;
     case CHR_EVENT_CLOSED:
         DPRINTF("chardev close\n");
@@ -1615,12 +1614,17 @@ static int usbredir_get_parser(QEMUFile *f, void *priv, size_t unused)
     }
 
     /*
-     * Our chardev should be open already at this point, otherwise
-     * the usbredir channel will be broken (ie spice without seamless)
+     * If our chardev is not open already at this point the usbredir connection
+     * has been broken (non seamless migration, or restore from disk).
+     *
+     * In this case create a temporary parser to receive the migration data,
+     * and schedule the close_bh to report the device as disconnected to the
+     * guest and to destroy the parser again.
      */
     if (dev->parser == NULL) {
-        ERROR("get_parser called with closed chardev, failing migration\n");
-        return -1;
+        WARNING("usb-redir connection broken during migration\n");
+        usbredir_create_parser(dev);
+        qemu_bh_schedule(dev->chardev_close_bh);
     }
 
     data = g_malloc(len);
diff --git a/hw/versatilepb.c b/hw/versatilepb.c
index b3f8077146..7b1b0256aa 100644
--- a/hw/versatilepb.c
+++ b/hw/versatilepb.c
@@ -266,6 +266,11 @@ static void versatile_init(ram_addr_t ram_size,
     sysbus_create_simple("sp804", 0x101e2000, pic[4]);
     sysbus_create_simple("sp804", 0x101e3000, pic[5]);
 
+    sysbus_create_simple("pl061", 0x101e4000, pic[6]);
+    sysbus_create_simple("pl061", 0x101e5000, pic[7]);
+    sysbus_create_simple("pl061", 0x101e6000, pic[8]);
+    sysbus_create_simple("pl061", 0x101e7000, pic[9]);
+
     /* The versatile/PB actually has a modified Color LCD controller
        that includes hardware cursor support from the PL111.  */
     dev = sysbus_create_simple("pl110_versatile", 0x10120000, pic[16]);
diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index a1eeced8fd..639371e7a2 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -33,9 +33,11 @@
 #include "memory.h"
 #include "msi.h"
 #include "msix.h"
+#include "pci.h"
+#include "qemu-common.h"
 #include "qemu-error.h"
+#include "qemu-queue.h"
 #include "range.h"
-#include "vfio_pci_int.h"
 
 /* #define DEBUG_VFIO */
 #ifdef DEBUG_VFIO
@@ -46,6 +48,99 @@
     do { } while (0)
 #endif
 
+typedef struct VFIOBAR {
+    off_t fd_offset; /* offset of BAR within device fd */
+    int fd; /* device fd, allows us to pass VFIOBAR as opaque data */
+    MemoryRegion mem; /* slow, read/write access */
+    MemoryRegion mmap_mem; /* direct mapped access */
+    void *mmap;
+    size_t size;
+    uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
+    uint8_t nr; /* cache the BAR number for debug */
+} VFIOBAR;
+
+typedef struct VFIOINTx {
+    bool pending; /* interrupt pending */
+    bool kvm_accel; /* set when QEMU bypass through KVM enabled */
+    uint8_t pin; /* which pin to pull for qemu_set_irq */
+    EventNotifier interrupt; /* eventfd triggered on interrupt */
+    EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
+    PCIINTxRoute route; /* routing info for QEMU bypass */
+    uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
+    QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
+} VFIOINTx;
+
+struct VFIODevice;
+
+typedef struct VFIOMSIVector {
+    EventNotifier interrupt; /* eventfd triggered on interrupt */
+    struct VFIODevice *vdev; /* back pointer to device */
+    int virq; /* KVM irqchip route for QEMU bypass */
+    bool use;
+} VFIOMSIVector;
+
+enum {
+    VFIO_INT_NONE = 0,
+    VFIO_INT_INTx = 1,
+    VFIO_INT_MSI  = 2,
+    VFIO_INT_MSIX = 3,
+};
+
+struct VFIOGroup;
+
+typedef struct VFIOContainer {
+    int fd; /* /dev/vfio/vfio, empowered by the attached groups */
+    struct {
+        /* enable abstraction to support various iommu backends */
+        union {
+            MemoryListener listener; /* Used by type1 iommu */
+        };
+        void (*release)(struct VFIOContainer *);
+    } iommu_data;
+    QLIST_HEAD(, VFIOGroup) group_list;
+    QLIST_ENTRY(VFIOContainer) next;
+} VFIOContainer;
+
+/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
+typedef struct VFIOMSIXInfo {
+    uint8_t table_bar;
+    uint8_t pba_bar;
+    uint16_t entries;
+    uint32_t table_offset;
+    uint32_t pba_offset;
+    MemoryRegion mmap_mem;
+    void *mmap;
+} VFIOMSIXInfo;
+
+typedef struct VFIODevice {
+    PCIDevice pdev;
+    int fd;
+    VFIOINTx intx;
+    unsigned int config_size;
+    off_t config_offset; /* Offset of config space region within device fd */
+    unsigned int rom_size;
+    off_t rom_offset; /* Offset of ROM region within device fd */
+    int msi_cap_size;
+    VFIOMSIVector *msi_vectors;
+    VFIOMSIXInfo *msix;
+    int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
+    int interrupt; /* Current interrupt type */
+    VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
+    PCIHostDeviceAddress host;
+    QLIST_ENTRY(VFIODevice) next;
+    struct VFIOGroup *group;
+    bool reset_works;
+} VFIODevice;
+
+typedef struct VFIOGroup {
+    int fd;
+    int groupid;
+    VFIOContainer *container;
+    QLIST_HEAD(, VFIODevice) device_list;
+    QLIST_ENTRY(VFIOGroup) next;
+    QLIST_ENTRY(VFIOGroup) container_next;
+} VFIOGroup;
+
 #define MSIX_CAP_LENGTH 12
 
 static QLIST_HEAD(, VFIOContainer)
@@ -72,8 +167,6 @@ static void vfio_disable_irqindex(VFIODevice *vdev, int index)
     };
 
     ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
-
-    vdev->interrupt = VFIO_INT_NONE;
 }
 
 /*
@@ -92,6 +185,34 @@ static void vfio_unmask_intx(VFIODevice *vdev)
     ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
 }
 
+/*
+ * Disabling BAR mmaping can be slow, but toggling it around INTx can
+ * also be a huge overhead.  We try to get the best of both worlds by
+ * waiting until an interrupt to disable mmaps (subsequent transitions
+ * to the same state are effectively no overhead).  If the interrupt has
+ * been serviced and the time gap is long enough, we re-enable mmaps for
+ * performance.  This works well for things like graphics cards, which
+ * may not use their interrupt at all and are penalized to an unusable
+ * level by read/write BAR traps.  Other devices, like NICs, have more
+ * regular interrupts and see much better latency by staying in non-mmap
+ * mode.  We therefore set the default mmap_timeout such that a ping
+ * is just enough to keep the mmap disabled.  Users can experiment with
+ * other options with the x-intx-mmap-timeout-ms parameter (a value of
+ * zero disables the timer).
+ */
+static void vfio_intx_mmap_enable(void *opaque)
+{
+    VFIODevice *vdev = opaque;
+
+    if (vdev->intx.pending) {
+        qemu_mod_timer(vdev->intx.mmap_timer,
+                       qemu_get_clock_ms(vm_clock) + vdev->intx.mmap_timeout);
+        return;
+    }
+
+    vfio_mmap_set_enabled(vdev, true);
+}
+
 static void vfio_intx_interrupt(void *opaque)
 {
     VFIODevice *vdev = opaque;
@@ -106,6 +227,11 @@ static void vfio_intx_interrupt(void *opaque)
 
     vdev->intx.pending = true;
     qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 1);
+    vfio_mmap_set_enabled(vdev, false);
+    if (vdev->intx.mmap_timeout) {
+        qemu_mod_timer(vdev->intx.mmap_timer,
+                       qemu_get_clock_ms(vm_clock) + vdev->intx.mmap_timeout);
+    }
 }
 
 static void vfio_eoi(VFIODevice *vdev)
@@ -122,26 +248,14 @@ static void vfio_eoi(VFIODevice *vdev)
     vfio_unmask_intx(vdev);
 }
 
-typedef struct QEMU_PACKED VFIOIRQSetFD {
-    struct vfio_irq_set irq_set;
-    int32_t fd;
-} VFIOIRQSetFD;
-
 static int vfio_enable_intx(VFIODevice *vdev)
 {
-    VFIOIRQSetFD irq_set_fd = {
-        .irq_set = {
-            .argsz = sizeof(irq_set_fd),
-            .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
-            .index = VFIO_PCI_INTX_IRQ_INDEX,
-            .start = 0,
-            .count = 1,
-        },
-    };
     uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
-    int ret;
+    int ret, argsz;
+    struct vfio_irq_set *irq_set;
+    int32_t *pfd;
 
-    if (vdev->intx.disabled || !pin) {
+    if (!pin) {
         return 0;
     }
 
@@ -154,24 +268,28 @@ static int vfio_enable_intx(VFIODevice *vdev)
         return ret;
     }
 
-    irq_set_fd.fd = event_notifier_get_fd(&vdev->intx.interrupt);
-    qemu_set_fd_handler(irq_set_fd.fd, vfio_intx_interrupt, NULL, vdev);
+    argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+    irq_set = g_malloc0(argsz);
+    irq_set->argsz = argsz;
+    irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+    irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+    irq_set->start = 0;
+    irq_set->count = 1;
+    pfd = (int32_t *)&irq_set->data;
+
+    *pfd = event_notifier_get_fd(&vdev->intx.interrupt);
+    qemu_set_fd_handler(*pfd, vfio_intx_interrupt, NULL, vdev);
 
-    if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd)) {
+    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+    g_free(irq_set);
+    if (ret) {
         error_report("vfio: Error: Failed to setup INTx fd: %m\n");
+        qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+        event_notifier_cleanup(&vdev->intx.interrupt);
         return -errno;
     }
 
-    /*
-     * Disable mmaps so we can trap on BAR accesses.  We interpret any
-     * access as a response to an interrupt and unmask the physical
-     * device.  The device will re-assert if the interrupt is still
-     * pending.  We'll likely retrigger on the host multiple times per
-     * guest interrupt, but without EOI notification it's better than
-     * nothing.  Acceleration paths through KVM will avoid this.
-     */
-    vfio_mmap_set_enabled(vdev, false);
-
     vdev->interrupt = VFIO_INT_INTx;
 
     DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
@@ -184,6 +302,7 @@ static void vfio_disable_intx(VFIODevice *vdev)
 {
     int fd;
 
+    qemu_del_timer(vdev->intx.mmap_timer);
     vfio_disable_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX);
     vdev->intx.pending = false;
     qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
@@ -254,10 +373,6 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
 
     g_free(irq_set);
 
-    if (!ret) {
-        vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
-    }
-
     return ret;
 }
 
@@ -272,15 +387,6 @@ static int vfio_msix_vector_use(PCIDevice *pdev,
             vdev->host.domain, vdev->host.bus, vdev->host.slot,
             vdev->host.function, nr);
 
-    if (vdev->interrupt != VFIO_INT_MSIX) {
-        vfio_disable_interrupts(vdev);
-    }
-
-    if (!vdev->msi_vectors) {
-        vdev->msi_vectors = g_malloc0(vdev->msix->entries *
-                                      sizeof(VFIOMSIVector));
-    }
-
     vector = &vdev->msi_vectors[nr];
     vector->vdev = vdev;
     vector->use = true;
@@ -313,43 +419,35 @@ static int vfio_msix_vector_use(PCIDevice *pdev,
      * increase them as needed.
      */
     if (vdev->nr_vectors < nr + 1) {
-        int i;
-
         vfio_disable_irqindex(vdev, VFIO_PCI_MSIX_IRQ_INDEX);
         vdev->nr_vectors = nr + 1;
         ret = vfio_enable_vectors(vdev, true);
         if (ret) {
             error_report("vfio: failed to enable vectors, %d\n", ret);
         }
-
-        /* We don't know if we've missed interrupts in the interim... */
-        for (i = 0; i < vdev->msix->entries; i++) {
-            if (vdev->msi_vectors[i].use) {
-                msix_notify(&vdev->pdev, i);
-            }
-        }
     } else {
-        VFIOIRQSetFD irq_set_fd = {
-            .irq_set = {
-                .argsz = sizeof(irq_set_fd),
-                .flags = VFIO_IRQ_SET_DATA_EVENTFD |
-                         VFIO_IRQ_SET_ACTION_TRIGGER,
-                .index = VFIO_PCI_MSIX_IRQ_INDEX,
-                .start = nr,
-                .count = 1,
-            },
-            .fd = event_notifier_get_fd(&vector->interrupt),
-        };
-        ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd);
+        int argsz;
+        struct vfio_irq_set *irq_set;
+        int32_t *pfd;
+
+        argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+        irq_set = g_malloc0(argsz);
+        irq_set->argsz = argsz;
+        irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+                         VFIO_IRQ_SET_ACTION_TRIGGER;
+        irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+        irq_set->start = nr;
+        irq_set->count = 1;
+        pfd = (int32_t *)&irq_set->data;
+
+        *pfd = event_notifier_get_fd(&vector->interrupt);
+
+        ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+        g_free(irq_set);
         if (ret) {
             error_report("vfio: failed to modify vector, %d\n", ret);
         }
-
-        /*
-         * If we were connected to the hardware PBA we could skip this,
-         * until then, a spurious interrupt is better than starvation.
-         */
-        msix_notify(&vdev->pdev, nr);
     }
 
     return 0;
@@ -359,17 +457,9 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
 {
     VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
     VFIOMSIVector *vector = &vdev->msi_vectors[nr];
-    VFIOIRQSetFD irq_set_fd = {
-        .irq_set = {
-            .argsz = sizeof(irq_set_fd),
-            .flags = VFIO_IRQ_SET_DATA_EVENTFD |
-                     VFIO_IRQ_SET_ACTION_TRIGGER,
-            .index = VFIO_PCI_MSIX_IRQ_INDEX,
-            .start = nr,
-            .count = 1,
-        },
-        .fd = -1,
-    };
+    int argsz;
+    struct vfio_irq_set *irq_set;
+    int32_t *pfd;
 
     DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__,
             vdev->host.domain, vdev->host.bus, vdev->host.slot,
@@ -381,7 +471,23 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
      * bouncing through userspace and let msix.c drop it?  Not sure.
      */
     msix_vector_unuse(pdev, nr);
-    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd);
+
+    argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+    irq_set = g_malloc0(argsz);
+    irq_set->argsz = argsz;
+    irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+                     VFIO_IRQ_SET_ACTION_TRIGGER;
+    irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+    irq_set->start = nr;
+    irq_set->count = 1;
+    pfd = (int32_t *)&irq_set->data;
+
+    *pfd = -1;
+
+    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+    g_free(irq_set);
 
     if (vector->virq < 0) {
         qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
@@ -419,18 +525,21 @@ static MSIMessage msi_get_msg(PCIDevice *pdev, unsigned int vector)
     return msg;
 }
 
-/* So should this */
-static void msi_set_qsize(PCIDevice *pdev, uint8_t size)
+static void vfio_enable_msix(VFIODevice *vdev)
 {
-    uint8_t *config = pdev->config + pdev->msi_cap;
-    uint16_t flags;
-
-    flags = pci_get_word(config + PCI_MSI_FLAGS);
-    flags = le16_to_cpu(flags);
-    flags &= ~PCI_MSI_FLAGS_QSIZE;
-    flags |= (size & 0x7) << 4;
-    flags = cpu_to_le16(flags);
-    pci_set_word(config + PCI_MSI_FLAGS, flags);
+    vfio_disable_interrupts(vdev);
+
+    vdev->msi_vectors = g_malloc0(vdev->msix->entries * sizeof(VFIOMSIVector));
+
+    vdev->interrupt = VFIO_INT_MSIX;
+
+    if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
+                                  vfio_msix_vector_release)) {
+        error_report("vfio: msix_set_vector_notifiers failed\n");
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
 }
 
 static void vfio_enable_msi(VFIODevice *vdev)
@@ -503,19 +612,43 @@ retry:
         return;
     }
 
-    msi_set_qsize(&vdev->pdev, vdev->nr_vectors);
+    vdev->interrupt = VFIO_INT_MSI;
 
     DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__,
             vdev->host.domain, vdev->host.bus, vdev->host.slot,
             vdev->host.function, vdev->nr_vectors);
 }
 
-static void vfio_disable_msi_x(VFIODevice *vdev, bool msix)
+static void vfio_disable_msi_common(VFIODevice *vdev)
+{
+    g_free(vdev->msi_vectors);
+    vdev->msi_vectors = NULL;
+    vdev->nr_vectors = 0;
+    vdev->interrupt = VFIO_INT_NONE;
+
+    vfio_enable_intx(vdev);
+}
+
+static void vfio_disable_msix(VFIODevice *vdev)
+{
+    msix_unset_vector_notifiers(&vdev->pdev);
+
+    if (vdev->nr_vectors) {
+        vfio_disable_irqindex(vdev, VFIO_PCI_MSIX_IRQ_INDEX);
+    }
+
+    vfio_disable_msi_common(vdev);
+
+    DPRINTF("%s(%04x:%02x:%02x.%x, msi%s)\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, msix ? "x" : "");
+}
+
+static void vfio_disable_msi(VFIODevice *vdev)
 {
     int i;
 
-    vfio_disable_irqindex(vdev, msix ? VFIO_PCI_MSIX_IRQ_INDEX :
-                                       VFIO_PCI_MSI_IRQ_INDEX);
+    vfio_disable_irqindex(vdev, VFIO_PCI_MSI_IRQ_INDEX);
 
     for (i = 0; i < vdev->nr_vectors; i++) {
         VFIOMSIVector *vector = &vdev->msi_vectors[i];
@@ -534,26 +667,13 @@ static void vfio_disable_msi_x(VFIODevice *vdev, bool msix)
                                 NULL, NULL, NULL);
         }
 
-        if (msix) {
-            msix_vector_unuse(&vdev->pdev, i);
-        }
-
         event_notifier_cleanup(&vector->interrupt);
     }
 
-    g_free(vdev->msi_vectors);
-    vdev->msi_vectors = NULL;
-    vdev->nr_vectors = 0;
-
-    if (!msix) {
-        msi_set_qsize(&vdev->pdev, 0); /* Actually still means 1 vector */
-    }
-
-    DPRINTF("%s(%04x:%02x:%02x.%x, msi%s)\n", __func__,
-            vdev->host.domain, vdev->host.bus, vdev->host.slot,
-            vdev->host.function, msix ? "x" : "");
+    vfio_disable_msi_common(vdev);
 
-    vfio_enable_intx(vdev);
+    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
 }
 
 /*
@@ -601,7 +721,7 @@ static void vfio_bar_write(void *opaque, target_phys_addr_t addr,
      * which access will service the interrupt, so we're potentially
      * getting quite a few host interrupts per guest interrupt.
      */
-    vfio_eoi(DO_UPCAST(VFIODevice, bars[bar->nr], bar));
+    vfio_eoi(container_of(bar, VFIODevice, bars[bar->nr]));
 }
 
 static uint64_t vfio_bar_read(void *opaque,
@@ -641,7 +761,7 @@ static uint64_t vfio_bar_read(void *opaque,
             __func__, bar->nr, addr, size, data);
 
     /* Same as write above */
-    vfio_eoi(DO_UPCAST(VFIODevice, bars[bar->nr], bar));
+    vfio_eoi(container_of(bar, VFIODevice, bars[bar->nr]));
 
     return data;
 }
@@ -739,7 +859,7 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
         if (!was_enabled && is_enabled) {
             vfio_enable_msi(vdev);
         } else if (was_enabled && !is_enabled) {
-            vfio_disable_msi_x(vdev, false);
+            vfio_disable_msi(vdev);
         }
     }
 
@@ -752,9 +872,9 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
         is_enabled = msix_enabled(pdev);
 
         if (!was_enabled && is_enabled) {
-            /* vfio_msix_vector_use handles this automatically */
+            vfio_enable_msix(vdev);
         } else if (was_enabled && !is_enabled) {
-            vfio_disable_msi_x(vdev, true);
+            vfio_disable_msix(vdev);
         }
     }
 }
@@ -762,45 +882,52 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
 /*
  * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
  */
-static int vfio_dma_map(VFIOContainer *container, target_phys_addr_t iova,
-                        ram_addr_t size, void *vaddr, bool readonly)
+static int vfio_dma_unmap(VFIOContainer *container,
+                          target_phys_addr_t iova, ram_addr_t size)
 {
-    struct vfio_iommu_type1_dma_map map = {
-        .argsz = sizeof(map),
-        .flags = VFIO_DMA_MAP_FLAG_READ,
-        .vaddr = (__u64)(intptr_t)vaddr,
+    struct vfio_iommu_type1_dma_unmap unmap = {
+        .argsz = sizeof(unmap),
+        .flags = 0,
         .iova = iova,
         .size = size,
     };
 
-    if (!readonly) {
-        map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
-    }
-
-    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
-        DPRINTF("VFIO_MAP_DMA: %d\n", -errno);
+    if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+        DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno);
         return -errno;
     }
 
     return 0;
 }
 
-static int vfio_dma_unmap(VFIOContainer *container,
-                          target_phys_addr_t iova, ram_addr_t size)
+static int vfio_dma_map(VFIOContainer *container, target_phys_addr_t iova,
+                        ram_addr_t size, void *vaddr, bool readonly)
 {
-    struct vfio_iommu_type1_dma_unmap unmap = {
-        .argsz = sizeof(unmap),
-        .flags = 0,
+    struct vfio_iommu_type1_dma_map map = {
+        .argsz = sizeof(map),
+        .flags = VFIO_DMA_MAP_FLAG_READ,
+        .vaddr = (__u64)(uintptr_t)vaddr,
         .iova = iova,
         .size = size,
     };
 
-    if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
-        DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno);
-        return -errno;
+    if (!readonly) {
+        map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
     }
 
-    return 0;
+    /*
+     * Try the mapping, if it fails with EBUSY, unmap the region and try
+     * again.  This shouldn't be necessary, but we sometimes see it in
+     * the the VGA ROM space.
+     */
+    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
+        (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
+         ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
+        return 0;
+    }
+
+    DPRINTF("VFIO_MAP_DMA: %d\n", -errno);
+    return -errno;
 }
 
 static void vfio_listener_dummy1(MemoryListener *listener)
@@ -942,10 +1069,10 @@ static void vfio_disable_interrupts(VFIODevice *vdev)
         vfio_disable_intx(vdev);
         break;
     case VFIO_INT_MSI:
-        vfio_disable_msi_x(vdev, false);
+        vfio_disable_msi(vdev);
         break;
     case VFIO_INT_MSIX:
-        vfio_disable_msi_x(vdev, true);
+        vfio_disable_msix(vdev);
         break;
     }
 }
@@ -956,14 +1083,6 @@ static int vfio_setup_msi(VFIODevice *vdev, int pos)
     bool msi_64bit, msi_maskbit;
     int ret, entries;
 
-    /*
-     * TODO: don't peek into msi_supported, let msi_init fail and
-     * check for ENOTSUP
-     */
-    if (!msi_supported) {
-        return 0;
-    }
-
     if (pread(vdev->fd, &ctrl, sizeof(ctrl),
               vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
         return -errno;
@@ -979,6 +1098,9 @@ static int vfio_setup_msi(VFIODevice *vdev, int pos)
 
     ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit);
     if (ret < 0) {
+        if (ret == -ENOTSUP) {
+            return 0;
+        }
         error_report("vfio: msi_init failed\n");
         return ret;
     }
@@ -1045,33 +1167,19 @@ static int vfio_setup_msix(VFIODevice *vdev, int pos)
 {
     int ret;
 
-    /*
-     * TODO: don't peek into msi_supported, let msix_init fail and
-     * check for ENOTSUP
-     */
-    if (!msi_supported) {
-        return 0;
-    }
-
     ret = msix_init(&vdev->pdev, vdev->msix->entries,
                     &vdev->bars[vdev->msix->table_bar].mem,
                     vdev->msix->table_bar, vdev->msix->table_offset,
                     &vdev->bars[vdev->msix->pba_bar].mem,
                     vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
     if (ret < 0) {
+        if (ret == -ENOTSUP) {
+            return 0;
+        }
         error_report("vfio: msix_init failed\n");
         return ret;
     }
 
-    ret = msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
-                                    vfio_msix_vector_release);
-    if (ret) {
-        error_report("vfio: msix_set_vector_notifiers failed %d\n", ret);
-        msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem,
-                    &vdev->bars[vdev->msix->pba_bar].mem);
-        return ret;
-    }
-
     return 0;
 }
 
@@ -1080,12 +1188,6 @@ static void vfio_teardown_msi(VFIODevice *vdev)
     msi_uninit(&vdev->pdev);
 
     if (vdev->msix) {
-        /* FIXME: Why can't unset just silently do nothing?? */
-        if (vdev->pdev.msix_vector_use_notifier &&
-            vdev->pdev.msix_vector_release_notifier) {
-            msix_unset_vector_notifiers(&vdev->pdev);
-        }
-
         msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem,
                     &vdev->bars[vdev->msix->pba_bar].mem);
     }
@@ -1766,17 +1868,8 @@ static int vfio_initfn(PCIDevice *pdev)
     }
 
     if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
-        if (vdev->intx.intx && strcmp(vdev->intx.intx, "off")) {
-            error_report("vfio: Unknown option x-intx=%s, "
-                         "valid options: \"off\".\n", vdev->intx.intx);
-            ret = -EINVAL;
-            goto out_teardown;
-        }
-
-        if (vdev->intx.intx && !strcmp(vdev->intx.intx, "off")) {
-            vdev->intx.disabled = true;
-        }
-
+        vdev->intx.mmap_timer = qemu_new_timer_ms(vm_clock,
+                                                  vfio_intx_mmap_enable, vdev);
         ret = vfio_enable_intx(vdev);
         if (ret) {
             goto out_teardown;
@@ -1802,6 +1895,9 @@ static void vfio_exitfn(PCIDevice *pdev)
 
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
     vfio_disable_interrupts(vdev);
+    if (vdev->intx.mmap_timer) {
+        qemu_free_timer(vdev->intx.mmap_timer);
+    }
     vfio_teardown_msi(vdev);
     vfio_unmap_bars(vdev);
     vfio_put_device(vdev);
@@ -1812,21 +1908,37 @@ static void vfio_pci_reset(DeviceState *dev)
 {
     PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
     VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    uint16_t cmd;
 
-    if (!vdev->reset_works) {
-        return;
-    }
+    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+    vfio_disable_interrupts(vdev);
 
-    if (ioctl(vdev->fd, VFIO_DEVICE_RESET)) {
-        error_report("vfio: Error unable to reset physical device "
-                     "(%04x:%02x:%02x.%x): %m\n", vdev->host.domain,
-                     vdev->host.bus, vdev->host.slot, vdev->host.function);
+    /*
+     * Stop any ongoing DMA by disconecting I/O, MMIO, and bus master.
+     * Also put INTx Disable in known state.
+     */
+    cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
+    cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
+             PCI_COMMAND_INTX_DISABLE);
+    vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
+
+    if (vdev->reset_works) {
+        if (ioctl(vdev->fd, VFIO_DEVICE_RESET)) {
+            error_report("vfio: Error unable to reset physical device "
+                         "(%04x:%02x:%02x.%x): %m\n", vdev->host.domain,
+                         vdev->host.bus, vdev->host.slot, vdev->host.function);
+        }
     }
+
+    vfio_enable_intx(vdev);
 }
 
 static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host),
-    DEFINE_PROP_STRING("x-intx", VFIODevice, intx.intx),
+    DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIODevice,
+                       intx.mmap_timeout, 1100),
     /*
      * TODO - support passed fds... is this necessary?
      * DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name),
diff --git a/hw/vfio_pci_int.h b/hw/vfio_pci_int.h
deleted file mode 100644
index 3812d8d7f1..0000000000
--- a/hw/vfio_pci_int.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * vfio based device assignment support
- *
- * Copyright Red Hat, Inc. 2012
- *
- * Authors:
- *  Alex Williamson <alex.williamson@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- */
-
-#ifndef HW_VFIO_PCI_INT_H
-#define HW_VFIO_PCI_INT_H
-
-#include "qemu-common.h"
-#include "qemu-queue.h"
-#include "pci.h"
-#include "event_notifier.h"
-
-typedef struct VFIOBAR {
-    off_t fd_offset; /* offset of BAR within device fd */
-    int fd; /* device fd, allows us to pass VFIOBAR as opaque data */
-    MemoryRegion mem; /* slow, read/write access */
-    MemoryRegion mmap_mem; /* direct mapped access */
-    void *mmap;
-    size_t size;
-    uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
-    uint8_t nr; /* cache the BAR number for debug */
-} VFIOBAR;
-
-typedef struct VFIOINTx {
-    bool pending; /* interrupt pending */
-    bool kvm_accel; /* set when QEMU bypass through KVM enabled */
-    uint8_t pin; /* which pin to pull for qemu_set_irq */
-    EventNotifier interrupt; /* eventfd triggered on interrupt */
-    EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
-    PCIINTxRoute route; /* routing info for QEMU bypass */
-    bool disabled;
-    char *intx;
-} VFIOINTx;
-
-struct VFIODevice;
-
-typedef struct VFIOMSIVector {
-    EventNotifier interrupt; /* eventfd triggered on interrupt */
-    struct VFIODevice *vdev; /* back pointer to device */
-    int virq; /* KVM irqchip route for QEMU bypass */
-    bool use;
-} VFIOMSIVector;
-
-enum {
-    VFIO_INT_NONE = 0,
-    VFIO_INT_INTx = 1,
-    VFIO_INT_MSI  = 2,
-    VFIO_INT_MSIX = 3,
-};
-
-struct VFIOGroup;
-
-typedef struct VFIOContainer {
-    int fd; /* /dev/vfio/vfio, empowered by the attached groups */
-    struct {
-        /* enable abstraction to support various iommu backends */
-        union {
-            MemoryListener listener; /* Used by type1 iommu */
-        };
-        void (*release)(struct VFIOContainer *);
-    } iommu_data;
-    QLIST_HEAD(, VFIOGroup) group_list;
-    QLIST_ENTRY(VFIOContainer) next;
-} VFIOContainer;
-
-/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
-typedef struct VFIOMSIXInfo {
-    uint8_t table_bar;
-    uint8_t pba_bar;
-    uint16_t entries;
-    uint32_t table_offset;
-    uint32_t pba_offset;
-    MemoryRegion mmap_mem;
-    void *mmap;
-} VFIOMSIXInfo;
-
-typedef struct VFIODevice {
-    PCIDevice pdev;
-    int fd;
-    VFIOINTx intx;
-    unsigned int config_size;
-    off_t config_offset; /* Offset of config space region within device fd */
-    unsigned int rom_size;
-    off_t rom_offset; /* Offset of ROM region within device fd */
-    int msi_cap_size;
-    VFIOMSIVector *msi_vectors;
-    VFIOMSIXInfo *msix;
-    int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
-    int interrupt; /* Current interrupt type */
-    VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
-    PCIHostDeviceAddress host;
-    QLIST_ENTRY(VFIODevice) next;
-    struct VFIOGroup *group;
-    bool reset_works;
-} VFIODevice;
-
-typedef struct VFIOGroup {
-    int fd;
-    int groupid;
-    VFIOContainer *container;
-    QLIST_HEAD(, VFIODevice) device_list;
-    QLIST_ENTRY(VFIOGroup) next;
-    QLIST_ENTRY(VFIOGroup) container_next;
-} VFIOGroup;
-
-#endif /* HW_VFIO_PCI_INT_H */
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 247d7bef56..8342391d90 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -973,6 +973,11 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
         }
     }
     n->mac_table.first_multi = i;
+
+    /* nc.link_down can't be migrated, so infer link_down according
+     * to link status bit in n->status */
+    n->nic->nc.link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
+
     return 0;
 }
 
diff --git a/hw/zynq_slcr.c b/hw/zynq_slcr.c
index 4f97575770..8acba01c3a 100644
--- a/hw/zynq_slcr.c
+++ b/hw/zynq_slcr.c
@@ -91,7 +91,7 @@ typedef enum {
 typedef enum {
   PSS,
   DDDR,
-  DMAC,
+  DMAC = 3,
   USB,
   GEM,
   SDIO,
diff --git a/linux-user/main.c b/linux-user/main.c
index bcaadb67cb..5d20abd3e5 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -1114,6 +1114,11 @@ void cpu_loop (CPUSPARCState *env)
     while (1) {
         trapnr = cpu_sparc_exec (env);
 
+        /* Compute PSR before exposing state.  */
+        if (env->cc_op != CC_OP_FLAGS) {
+            cpu_get_psr(env);
+        }
+
         switch (trapnr) {
 #ifndef TARGET_SPARC64
         case 0x88:
@@ -2522,6 +2527,7 @@ void cpu_loop(CPUMBState *env)
         case EXCP_BREAK:
             /* Return address is 4 bytes after the call.  */
             env->regs[14] += 4;
+            env->sregs[SR_PC] = env->regs[14];
             ret = do_syscall(env, 
                              env->regs[12], 
                              env->regs[5], 
@@ -2532,7 +2538,6 @@ void cpu_loop(CPUMBState *env)
                              env->regs[10],
                              0, 0);
             env->regs[3] = ret;
-            env->sregs[SR_PC] = env->regs[14];
             break;
         case EXCP_HW_EXCP:
             env->regs[17] = env->sregs[SR_PC] + 4;
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 14a6b32ab1..e4291ed776 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -6942,6 +6942,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         ret = get_errno(do_fork(cpu_env, arg1, arg2, arg3, arg5, arg4));
 #elif defined(TARGET_CRIS)
         ret = get_errno(do_fork(cpu_env, arg2, arg1, arg3, arg4, arg5));
+#elif defined(TARGET_MICROBLAZE)
+        ret = get_errno(do_fork(cpu_env, arg1, arg2, arg4, arg6, arg5));
 #elif defined(TARGET_S390X)
         ret = get_errno(do_fork(cpu_env, arg2, arg1, arg3, arg5, arg4));
 #else
diff --git a/net.c b/net.c
index a187a7b3db..ae4bc0d431 100644
--- a/net.c
+++ b/net.c
@@ -21,17 +21,14 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-#include "net.h"
-
 #include "config-host.h"
 
-#include "net/tap.h"
-#include "net/socket.h"
-#include "net/dump.h"
-#include "net/slirp.h"
-#include "net/vde.h"
+#include "net.h"
+#include "net/clients.h"
 #include "net/hub.h"
+#include "net/slirp.h"
 #include "net/util.h"
+
 #include "monitor.h"
 #include "qemu-common.h"
 #include "qemu_socket.h"
diff --git a/net/socket.h b/net/clients.h
index 3f8a092459..c58cc6087c 100644
--- a/net/socket.h
+++ b/net/clients.h
@@ -21,13 +21,35 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-#ifndef QEMU_NET_SOCKET_H
-#define QEMU_NET_SOCKET_H
+#ifndef QEMU_NET_CLIENTS_H
+#define QEMU_NET_CLIENTS_H
 
 #include "net.h"
 #include "qapi-types.h"
 
+int net_init_dump(const NetClientOptions *opts, const char *name,
+                  NetClientState *peer);
+
+#ifdef CONFIG_SLIRP
+int net_init_slirp(const NetClientOptions *opts, const char *name,
+                   NetClientState *peer);
+#endif
+
+int net_init_hubport(const NetClientOptions *opts, const char *name,
+                     NetClientState *peer);
+
 int net_init_socket(const NetClientOptions *opts, const char *name,
                     NetClientState *peer);
 
-#endif /* QEMU_NET_SOCKET_H */
+int net_init_tap(const NetClientOptions *opts, const char *name,
+                 NetClientState *peer);
+
+int net_init_bridge(const NetClientOptions *opts, const char *name,
+                    NetClientState *peer);
+
+#ifdef CONFIG_VDE
+int net_init_vde(const NetClientOptions *opts, const char *name,
+                 NetClientState *peer);
+#endif
+
+#endif /* QEMU_NET_CLIENTS_H */
diff --git a/net/dump.c b/net/dump.c
index 004231d481..e0a5d74644 100644
--- a/net/dump.c
+++ b/net/dump.c
@@ -22,7 +22,7 @@
  * THE SOFTWARE.
  */
 
-#include "dump.h"
+#include "clients.h"
 #include "qemu-common.h"
 #include "qemu-error.h"
 #include "qemu-log.h"
diff --git a/net/dump.h b/net/dump.h
deleted file mode 100644
index 33f152b460..0000000000
--- a/net/dump.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef QEMU_NET_DUMP_H
-#define QEMU_NET_DUMP_H
-
-#include "net.h"
-#include "qapi-types.h"
-
-int net_init_dump(const NetClientOptions *opts, const char *name,
-                  NetClientState *peer);
-
-#endif /* QEMU_NET_DUMP_H */
diff --git a/net/hub.c b/net/hub.c
index 650a8b4a40..be413012bb 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -14,6 +14,7 @@
 
 #include "monitor.h"
 #include "net.h"
+#include "clients.h"
 #include "hub.h"
 #include "iov.h"
 
diff --git a/net/hub.h b/net/hub.h
index 26a1ade1f9..4cbfdb128b 100644
--- a/net/hub.h
+++ b/net/hub.h
@@ -17,8 +17,6 @@
 
 #include "qemu-common.h"
 
-int net_init_hubport(const NetClientOptions *opts, const char *name,
-                     NetClientState *peer);
 NetClientState *net_hub_add_port(int hub_id, const char *name);
 NetClientState *net_hub_find_client_by_name(int hub_id, const char *name);
 void net_hub_info(Monitor *mon);
diff --git a/net/slirp.c b/net/slirp.c
index 8db66ea539..bf86a446c3 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -30,7 +30,8 @@
 #include <sys/wait.h>
 #endif
 #include "net.h"
-#include "net/hub.h"
+#include "clients.h"
+#include "hub.h"
 #include "monitor.h"
 #include "qemu_socket.h"
 #include "slirp/libslirp.h"
diff --git a/net/slirp.h b/net/slirp.h
index 5f685c4fb1..2ca09b65b7 100644
--- a/net/slirp.h
+++ b/net/slirp.h
@@ -31,9 +31,6 @@
 
 #ifdef CONFIG_SLIRP
 
-int net_init_slirp(const NetClientOptions *opts, const char *name,
-                   NetClientState *peer);
-
 void net_slirp_hostfwd_add(Monitor *mon, const QDict *qdict);
 void net_slirp_hostfwd_remove(Monitor *mon, const QDict *qdict);
 
diff --git a/net/socket.c b/net/socket.c
index f3d7878264..b75d567695 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -21,11 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-#include "net/socket.h"
-
 #include "config-host.h"
 
 #include "net.h"
+#include "clients.h"
 #include "monitor.h"
 #include "qemu-char.h"
 #include "qemu-common.h"
diff --git a/net/tap-win32.c b/net/tap-win32.c
index c0ea954ca1..22dad3f8fb 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -26,9 +26,10 @@
  *  distribution); if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "net/tap.h"
+#include "tap.h"
 
 #include "qemu-common.h"
+#include "clients.h"            /* net_init_tap */
 #include "net.h"
 #include "sysemu.h"
 #include "qemu-error.h"
diff --git a/net/tap.c b/net/tap.c
index a88ae8f61a..df89caaac6 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -23,7 +23,7 @@
  * THE SOFTWARE.
  */
 
-#include "net/tap.h"
+#include "tap.h"
 
 #include "config-host.h"
 
@@ -34,6 +34,7 @@
 #include <net/if.h>
 
 #include "net.h"
+#include "clients.h"
 #include "monitor.h"
 #include "sysemu.h"
 #include "qemu-char.h"
diff --git a/net/tap.h b/net/tap.h
index 0fb018c4b7..d44d83ae73 100644
--- a/net/tap.h
+++ b/net/tap.h
@@ -32,9 +32,6 @@
 #define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
 #define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/qemu-ifdown"
 
-int net_init_tap(const NetClientOptions *opts, const char *name,
-                 NetClientState *peer);
-
 int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required);
 
 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
@@ -58,7 +55,4 @@ int tap_get_fd(NetClientState *nc);
 struct vhost_net;
 struct vhost_net *tap_get_vhost_net(NetClientState *nc);
 
-int net_init_bridge(const NetClientOptions *opts, const char *name,
-                    NetClientState *peer);
-
 #endif /* QEMU_NET_TAP_H */
diff --git a/net/vde.c b/net/vde.c
index b91a6c799b..275bda92c3 100644
--- a/net/vde.c
+++ b/net/vde.c
@@ -21,13 +21,12 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-#include "net/vde.h"
-
 #include "config-host.h"
 
 #include <libvdeplug.h>
 
 #include "net.h"
+#include "clients.h"
 #include "qemu-char.h"
 #include "qemu-common.h"
 #include "qemu-option.h"
diff --git a/net/vde.h b/net/vde.h
deleted file mode 100644
index 6ce6698937..0000000000
--- a/net/vde.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef QEMU_NET_VDE_H
-#define QEMU_NET_VDE_H
-
-#include "qemu-common.h"
-#include "qapi-types.h"
-
-#ifdef CONFIG_VDE
-
-int net_init_vde(const NetClientOptions *opts, const char *name,
-                 NetClientState *peer);
-
-#endif /* CONFIG_VDE */
-
-#endif /* QEMU_NET_VDE_H */
diff --git a/qemu-ga.c b/qemu-ga.c
index b7474708f0..9b59a52461 100644
--- a/qemu-ga.c
+++ b/qemu-ga.c
@@ -40,8 +40,8 @@
 #else
 #define QGA_VIRTIO_PATH_DEFAULT "\\\\.\\Global\\org.qemu.guest_agent.0"
 #endif
-#define QGA_PIDFILE_DEFAULT "/var/run/qemu-ga.pid"
-#define QGA_STATEDIR_DEFAULT "/tmp"
+#define QGA_STATEDIR_DEFAULT CONFIG_QEMU_LOCALSTATEDIR "/run"
+#define QGA_PIDFILE_DEFAULT QGA_STATEDIR_DEFAULT "/qemu-ga.pid"
 #define QGA_SENTINEL_BYTE 0xFF
 
 struct GAState {
@@ -255,7 +255,7 @@ static bool ga_open_pidfile(const char *pidfile)
         g_critical("Failed to truncate pid file");
         goto fail;
     }
-    sprintf(pidstr, "%d", getpid());
+    snprintf(pidstr, sizeof(pidstr), "%d\n", getpid());
     if (write(pidfd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
         g_critical("Failed to write pid file");
         goto fail;
diff --git a/sparc-dis.c b/sparc-dis.c
index cdd337a738..1d017faaa3 100644
--- a/sparc-dis.c
+++ b/sparc-dis.c
@@ -3270,6 +3270,6 @@ print_insn_sparc (bfd_vma memaddr, disassemble_info *info)
     }
 
   info->insn_type = dis_noninsn;        /* Mark as non-valid instruction.  */
-  (*info->fprintf_func) (stream, _("unknown"));
+  (*info->fprintf_func) (stream, ".long %#08lx", insn);
   return sizeof (buffer);
 }
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 8bb5129d6a..9aa920d2ac 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -788,7 +788,6 @@ uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t valop, uint64_t shift
     return helper_neon_qshl_u64(env, valop, shiftop);
 }
 
-/* FIXME: This is wrong.  */
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
diff --git a/target-arm/translate.c b/target-arm/translate.c
index c6840b7832..daccb15c23 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -516,10 +516,10 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
             tcg_gen_rotri_i32(var, var, shift); break;
         } else {
             TCGv tmp = tcg_temp_new_i32();
+            tcg_gen_shli_i32(tmp, cpu_CF, 31);
             if (flags)
                 shifter_out_im(var, 0);
             tcg_gen_shri_i32(var, var, 1);
-            tcg_gen_shli_i32(tmp, cpu_CF, 31);
             tcg_gen_or_i32(var, var, tmp);
             tcg_temp_free_i32(tmp);
         }
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 0a7e4e3487..ee7585044b 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -7768,7 +7768,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             goto illegal_op;
 
         modrm = cpu_ldub_code(cpu_single_env, s->pc++);
-        reg = ((modrm >> 3) & 7);
+        reg = ((modrm >> 3) & 7) | rex_r;
 
         if (s->prefix & PREFIX_DATA)
             ot = OT_WORD;
diff --git a/target-microblaze/cpu.h b/target-microblaze/cpu.h
index 4968c244e8..88430b5057 100644
--- a/target-microblaze/cpu.h
+++ b/target-microblaze/cpu.h
@@ -345,6 +345,7 @@ static inline void cpu_clone_regs(CPUMBState *env, target_ulong newsp)
 
 static inline void cpu_set_tls(CPUMBState *env, target_ulong newtls)
 {
+    env->regs[21] = newtls;
 }
 
 static inline int cpu_interrupts_enabled(CPUMBState *env)
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index ce5ddaf050..05b7730987 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -714,11 +714,13 @@ void helper_sdm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
 
 #ifndef CONFIG_USER_ONLY
 /* SMP helpers.  */
-static int mips_vpe_is_wfi(CPUMIPSState *c)
+static bool mips_vpe_is_wfi(MIPSCPU *c)
 {
+    CPUMIPSState *env = &c->env;
+
     /* If the VPE is halted but otherwise active, it means it's waiting for
        an interrupt.  */
-    return c->halted && mips_vpe_active(c);
+    return env->halted && mips_vpe_active(env);
 }
 
 static inline void mips_vpe_wake(CPUMIPSState *c)
@@ -729,27 +731,33 @@ static inline void mips_vpe_wake(CPUMIPSState *c)
     cpu_interrupt(c, CPU_INTERRUPT_WAKE);
 }
 
-static inline void mips_vpe_sleep(CPUMIPSState *c)
+static inline void mips_vpe_sleep(MIPSCPU *cpu)
 {
+    CPUMIPSState *c = &cpu->env;
+
     /* The VPE was shut off, really go to bed.
        Reset any old _WAKE requests.  */
     c->halted = 1;
     cpu_reset_interrupt(c, CPU_INTERRUPT_WAKE);
 }
 
-static inline void mips_tc_wake(CPUMIPSState *c, int tc)
+static inline void mips_tc_wake(MIPSCPU *cpu, int tc)
 {
+    CPUMIPSState *c = &cpu->env;
+
     /* FIXME: TC reschedule.  */
-    if (mips_vpe_active(c) && !mips_vpe_is_wfi(c)) {
+    if (mips_vpe_active(c) && !mips_vpe_is_wfi(cpu)) {
         mips_vpe_wake(c);
     }
 }
 
-static inline void mips_tc_sleep(CPUMIPSState *c, int tc)
+static inline void mips_tc_sleep(MIPSCPU *cpu, int tc)
 {
+    CPUMIPSState *c = &cpu->env;
+
     /* FIXME: TC reschedule.  */
     if (!mips_vpe_active(c)) {
-        mips_vpe_sleep(c);
+        mips_vpe_sleep(cpu);
     }
 }
 
@@ -1342,13 +1350,15 @@ void helper_mttc0_tcrestart(CPUMIPSState *env, target_ulong arg1)
 
 void helper_mtc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
+    MIPSCPU *cpu = mips_env_get_cpu(env);
+
     env->active_tc.CP0_TCHalt = arg1 & 0x1;
 
     // TODO: Halt TC / Restart (if allocated+active) TC.
     if (env->active_tc.CP0_TCHalt & 1) {
-        mips_tc_sleep(env, env->current_tc);
+        mips_tc_sleep(cpu, env->current_tc);
     } else {
-        mips_tc_wake(env, env->current_tc);
+        mips_tc_wake(cpu, env->current_tc);
     }
 }
 
@@ -1356,6 +1366,7 @@ void helper_mttc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+    MIPSCPU *other_cpu = mips_env_get_cpu(other);
 
     // TODO: Halt TC / Restart (if allocated+active) TC.
 
@@ -1365,9 +1376,9 @@ void helper_mttc0_tchalt(CPUMIPSState *env, target_ulong arg1)
         other->tcs[other_tc].CP0_TCHalt = arg1;
 
     if (arg1 & 1) {
-        mips_tc_sleep(other, other_tc);
+        mips_tc_sleep(other_cpu, other_tc);
     } else {
-        mips_tc_wake(other, other_tc);
+        mips_tc_wake(other_cpu, other_tc);
     }
 }
 
@@ -1874,35 +1885,39 @@ target_ulong helper_emt(void)
 
 target_ulong helper_dvpe(CPUMIPSState *env)
 {
-    CPUMIPSState *other_cpu = first_cpu;
+    CPUMIPSState *other_cpu_env = first_cpu;
     target_ulong prev = env->mvp->CP0_MVPControl;
 
     do {
         /* Turn off all VPEs except the one executing the dvpe.  */
-        if (other_cpu != env) {
-            other_cpu->mvp->CP0_MVPControl &= ~(1 << CP0MVPCo_EVP);
+        if (other_cpu_env != env) {
+            MIPSCPU *other_cpu = mips_env_get_cpu(other_cpu_env);
+
+            other_cpu_env->mvp->CP0_MVPControl &= ~(1 << CP0MVPCo_EVP);
             mips_vpe_sleep(other_cpu);
         }
-        other_cpu = other_cpu->next_cpu;
-    } while (other_cpu);
+        other_cpu_env = other_cpu_env->next_cpu;
+    } while (other_cpu_env);
     return prev;
 }
 
 target_ulong helper_evpe(CPUMIPSState *env)
 {
-    CPUMIPSState *other_cpu = first_cpu;
+    CPUMIPSState *other_cpu_env = first_cpu;
     target_ulong prev = env->mvp->CP0_MVPControl;
 
     do {
-        if (other_cpu != env
-           /* If the VPE is WFI, don't disturb its sleep.  */
-           && !mips_vpe_is_wfi(other_cpu)) {
+        MIPSCPU *other_cpu = mips_env_get_cpu(other_cpu_env);
+
+        if (other_cpu_env != env
+            /* If the VPE is WFI, don't disturb its sleep.  */
+            && !mips_vpe_is_wfi(other_cpu)) {
             /* Enable the VPE.  */
-            other_cpu->mvp->CP0_MVPControl |= (1 << CP0MVPCo_EVP);
-            mips_vpe_wake(other_cpu); /* And wake it up.  */
+            other_cpu_env->mvp->CP0_MVPControl |= (1 << CP0MVPCo_EVP);
+            mips_vpe_wake(other_cpu_env); /* And wake it up.  */
         }
-        other_cpu = other_cpu->next_cpu;
-    } while (other_cpu);
+        other_cpu_env = other_cpu_env->next_cpu;
+    } while (other_cpu_env);
     return prev;
 }
 #endif /* !CONFIG_USER_ONLY */
diff --git a/target-sparc/int32_helper.c b/target-sparc/int32_helper.c
index 9ac5aac33a..507c355cac 100644
--- a/target-sparc/int32_helper.c
+++ b/target-sparc/int32_helper.c
@@ -62,6 +62,11 @@ void do_interrupt(CPUSPARCState *env)
 {
     int cwp, intno = env->exception_index;
 
+    /* Compute PSR before exposing state.  */
+    if (env->cc_op != CC_OP_FLAGS) {
+        cpu_get_psr(env);
+    }
+
 #ifdef DEBUG_PCALL
     if (qemu_loglevel_mask(CPU_LOG_INT)) {
         static int count;
diff --git a/target-sparc/int64_helper.c b/target-sparc/int64_helper.c
index 5d0bc6c6d2..df37aa1d14 100644
--- a/target-sparc/int64_helper.c
+++ b/target-sparc/int64_helper.c
@@ -64,6 +64,11 @@ void do_interrupt(CPUSPARCState *env)
     int intno = env->exception_index;
     trap_state *tsptr;
 
+    /* Compute PSR before exposing state.  */
+    if (env->cc_op != CC_OP_FLAGS) {
+        cpu_get_psr(env);
+    }
+
 #ifdef DEBUG_PCALL
     if (qemu_loglevel_mask(CPU_LOG_INT)) {
         static int count;
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 472eb518cd..6cef96bfa6 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -143,7 +143,7 @@ static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src)
     if (src & 1) {
         return MAKE_TCGV_I32(GET_TCGV_I64(cpu_fpr[src / 2]));
     } else {
-        TCGv_i32 ret = tcg_temp_local_new_i32();
+        TCGv_i32 ret = tcg_temp_new_i32();
         TCGv_i64 t = tcg_temp_new_i64();
 
         tcg_gen_shri_i64(t, cpu_fpr[src / 2], 32);
@@ -1005,14 +1005,17 @@ static inline void save_npc(DisasContext *dc)
     }
 }
 
-static inline void save_state(DisasContext *dc)
+static inline void update_psr(DisasContext *dc)
 {
-    tcg_gen_movi_tl(cpu_pc, dc->pc);
-    /* flush pending conditional evaluations before exposing cpu state */
     if (dc->cc_op != CC_OP_FLAGS) {
         dc->cc_op = CC_OP_FLAGS;
         gen_helper_compute_psr(cpu_env);
     }
+}
+
+static inline void save_state(DisasContext *dc)
+{
+    tcg_gen_movi_tl(cpu_pc, dc->pc);
     save_npc(dc);
 }
 
@@ -1050,7 +1053,7 @@ static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
                         DisasContext *dc)
 {
     static int subcc_cond[16] = {
-        -1, /* never */
+        TCG_COND_NEVER,
         TCG_COND_EQ,
         TCG_COND_LE,
         TCG_COND_LT,
@@ -1058,7 +1061,7 @@ static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
         TCG_COND_LTU,
         -1, /* neg */
         -1, /* overflow */
-        -1, /* always */
+        TCG_COND_ALWAYS,
         TCG_COND_NE,
         TCG_COND_GT,
         TCG_COND_GE,
@@ -1068,6 +1071,25 @@ static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
         -1, /* no overflow */
     };
 
+    static int logic_cond[16] = {
+        TCG_COND_NEVER,
+        TCG_COND_EQ,     /* eq:  Z */
+        TCG_COND_LE,     /* le:  Z | (N ^ V) -> Z | N */
+        TCG_COND_LT,     /* lt:  N ^ V -> N */
+        TCG_COND_EQ,     /* leu: C | Z -> Z */
+        TCG_COND_NEVER,  /* ltu: C -> 0 */
+        TCG_COND_LT,     /* neg: N */
+        TCG_COND_NEVER,  /* vs:  V -> 0 */
+        TCG_COND_ALWAYS,
+        TCG_COND_NE,     /* ne:  !Z */
+        TCG_COND_GT,     /* gt:  !(Z | (N ^ V)) -> !(Z | N) */
+        TCG_COND_GE,     /* ge:  !(N ^ V) -> !N */
+        TCG_COND_NE,     /* gtu: !(C | Z) -> !Z */
+        TCG_COND_ALWAYS, /* geu: !C -> 1 */
+        TCG_COND_GE,     /* pos: !N */
+        TCG_COND_ALWAYS, /* vc:  !V -> 1 */
+    };
+
     TCGv_i32 r_src;
     TCGv r_dst;
 
@@ -1082,28 +1104,31 @@ static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
 #endif
 
     switch (dc->cc_op) {
+    case CC_OP_LOGIC:
+        cmp->cond = logic_cond[cond];
+    do_compare_dst_0:
+        cmp->is_bool = false;
+        cmp->g2 = false;
+        cmp->c2 = tcg_const_tl(0);
+#ifdef TARGET_SPARC64
+        if (!xcc) {
+            cmp->g1 = false;
+            cmp->c1 = tcg_temp_new();
+            tcg_gen_ext32s_tl(cmp->c1, cpu_cc_dst);
+            break;
+        }
+#endif
+        cmp->g1 = true;
+        cmp->c1 = cpu_cc_dst;
+        break;
+
     case CC_OP_SUB:
         switch (cond) {
         case 6:  /* neg */
         case 14: /* pos */
             cmp->cond = (cond == 6 ? TCG_COND_LT : TCG_COND_GE);
-            cmp->is_bool = false;
-            cmp->g2 = false;
-            cmp->c2 = tcg_const_tl(0);
-#ifdef TARGET_SPARC64
-            if (!xcc) {
-                cmp->g1 = false;
-                cmp->c1 = tcg_temp_new();
-                tcg_gen_ext32s_tl(cmp->c1, cpu_cc_dst);
-                break;
-            }
-#endif
-            cmp->g1 = true;
-            cmp->c1 = cpu_cc_dst;
-            break;
+            goto do_compare_dst_0;
 
-        case 0: /* never */
-        case 8: /* always */
         case 7: /* overflow */
         case 15: /* !overflow */
             goto do_dynamic;
@@ -1120,6 +1145,7 @@ static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
                 cmp->c2 = tcg_temp_new();
                 tcg_gen_ext32s_tl(cmp->c1, cpu_cc_src);
                 tcg_gen_ext32s_tl(cmp->c2, cpu_cc_src2);
+                break;
             }
 #endif
             cmp->g1 = cmp->g2 = true;
@@ -2681,7 +2707,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     break;
 #ifdef TARGET_SPARC64
                 case 0x2: /* V9 rdccr */
-                    gen_helper_compute_psr(cpu_env);
+                    update_psr(dc);
                     gen_helper_rdccr(cpu_dst, cpu_env);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
@@ -2760,10 +2786,10 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 #if !defined(CONFIG_USER_ONLY)
             } else if (xop == 0x29) { /* rdpsr / UA2005 rdhpr */
 #ifndef TARGET_SPARC64
-                if (!supervisor(dc))
+                if (!supervisor(dc)) {
                     goto priv_insn;
-                gen_helper_compute_psr(cpu_env);
-                dc->cc_op = CC_OP_FLAGS;
+                }
+                update_psr(dc);
                 gen_helper_rdpsr(cpu_dst, cpu_env);
 #else
                 CHECK_IU_FEATURE(dc, HYPV);
@@ -3589,7 +3615,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         dc->cc_op = CC_OP_TSUBTV;
                         break;
                     case 0x24: /* mulscc */
-                        gen_helper_compute_psr(cpu_env);
+                        update_psr(dc);
                         gen_op_mulscc(cpu_dst, cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADD);
@@ -3862,28 +3888,16 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 tcg_gen_mov_tl(cpu_tbr, cpu_tmp0);
                                 break;
                             case 6: // pstate
-                                {
-                                    TCGv r_tmp = tcg_temp_local_new();
-
-                                    tcg_gen_mov_tl(r_tmp, cpu_tmp0);
-                                    save_state(dc);
-                                    gen_helper_wrpstate(cpu_env, r_tmp);
-                                    tcg_temp_free(r_tmp);
-                                    dc->npc = DYNAMIC_PC;
-                                }
+                                save_state(dc);
+                                gen_helper_wrpstate(cpu_env, cpu_tmp0);
+                                dc->npc = DYNAMIC_PC;
                                 break;
                             case 7: // tl
-                                {
-                                    TCGv r_tmp = tcg_temp_local_new();
-
-                                    tcg_gen_mov_tl(r_tmp, cpu_tmp0);
-                                    save_state(dc);
-                                    tcg_gen_trunc_tl_i32(cpu_tmp32, r_tmp);
-                                    tcg_temp_free(r_tmp);
-                                    tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                                   offsetof(CPUSPARCState, tl));
-                                    dc->npc = DYNAMIC_PC;
-                                }
+                                save_state(dc);
+                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
+                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
+                                               offsetof(CPUSPARCState, tl));
+                                dc->npc = DYNAMIC_PC;
                                 break;
                             case 8: // pil
                                 gen_helper_wrpil(cpu_env, cpu_tmp0);
@@ -4640,12 +4654,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
         {
             unsigned int xop = GET_FIELD(insn, 7, 12);
 
-            /* flush pending conditional evaluations before exposing
-               cpu state */
-            if (dc->cc_op != CC_OP_FLAGS) {
-                dc->cc_op = CC_OP_FLAGS;
-                gen_helper_compute_psr(cpu_env);
-            }
             cpu_src1 = get_src1(insn, cpu_src1);
             if (xop == 0x3c || xop == 0x3e) { // V9 casa/casxa
                 rs2 = GET_FIELD(insn, 27, 31);
@@ -5496,9 +5504,4 @@ void restore_state_to_opc(CPUSPARCState *env, TranslationBlock *tb, int pc_pos)
     } else {
         env->npc = npc;
     }
-
-    /* flush pending conditional evaluations before exposing cpu state */
-    if (CC_OP != CC_OP_FLAGS) {
-        helper_compute_psr(env);
-    }
 }
diff --git a/targphys.h b/targphys.h
index 08cade9096..50911fd12f 100644
--- a/targphys.h
+++ b/targphys.h
@@ -3,6 +3,8 @@
 #ifndef TARGPHYS_H
 #define TARGPHYS_H
 
+#ifndef CONFIG_USER_ONLY
+
 #define TARGET_PHYS_ADDR_BITS 64
 /* target_phys_addr_t is the type of a physical address (its size can
    be different from 'target_ulong').  */
@@ -18,3 +20,5 @@ typedef uint64_t target_phys_addr_t;
 #define TARGET_PRIXPHYS PRIX64
 
 #endif
+
+#endif
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 737200e5e6..e790bf04b4 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -467,6 +467,21 @@ static inline void tcg_out_movi32(TCGContext *s,
     }
 }
 
+static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
+                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
+{
+    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
+     * rhs must satisfy the "rI" constraint.
+     */
+    if (rhs_is_const) {
+        int rot = encode_imm(rhs);
+        assert(rot >= 0);
+        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+    } else {
+        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+    }
+}
+
 static inline void tcg_out_mul32(TCGContext *s,
                 int cond, int rd, int rs, int rm)
 {
@@ -1557,6 +1572,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_movi_i32:
         tcg_out_movi32(s, COND_AL, args[0], args[1]);
         break;
+    case INDEX_op_movcond_i32:
+        /* Constraints mean that v2 is always in the same register as dest,
+         * so we only need to do "if condition passed, move v1 to dest".
+         */
+        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+                       args[1], args[2], const_args[2]);
+        tcg_out_dat_rI(s, tcg_cond_to_arm_cond[args[5]],
+                       ARITH_MOV, args[0], 0, args[3], const_args[3]);
+        break;
     case INDEX_op_add_i32:
         c = ARITH_ADD;
         goto gen_arith;
@@ -1576,14 +1600,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         c = ARITH_EOR;
         /* Fall through.  */
     gen_arith:
-        if (const_args[2]) {
-            int rot;
-            rot = encode_imm(args[2]);
-            tcg_out_dat_imm(s, COND_AL, c,
-                            args[0], args[1], rotl(args[2], rot) | (rot << 7));
-        } else
-            tcg_out_dat_reg(s, COND_AL, c,
-                            args[0], args[1], args[2], SHIFT_IMM_LSL(0));
+        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
         break;
     case INDEX_op_add2_i32:
         tcg_out_dat_reg2(s, COND_AL, ARITH_ADD, ARITH_ADC,
@@ -1643,15 +1660,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_brcond_i32:
-        if (const_args[1]) {
-            int rot;
-            rot = encode_imm(args[1]);
-            tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
-                            args[0], rotl(args[1], rot) | (rot << 7));
-        } else {
-            tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
-                            args[0], args[1], SHIFT_IMM_LSL(0));
-        }
+        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+                       args[0], args[1], const_args[1]);
         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]);
         break;
     case INDEX_op_brcond2_i32:
@@ -1670,15 +1680,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]);
         break;
     case INDEX_op_setcond_i32:
-        if (const_args[2]) {
-            int rot;
-            rot = encode_imm(args[2]);
-            tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
-                            args[1], rotl(args[2], rot) | (rot << 7));
-        } else {
-            tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
-                            args[1], args[2], SHIFT_IMM_LSL(0));
-        }
+        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+                       args[1], args[2], const_args[2]);
         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
                         ARITH_MOV, args[0], 0, 1);
         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
@@ -1788,6 +1791,7 @@ static const TCGTargetOpDef arm_op_defs[] = {
 
     { INDEX_op_brcond_i32, { "r", "rI" } },
     { INDEX_op_setcond_i32, { "r", "r", "rI" } },
+    { INDEX_op_movcond_i32, { "r", "r", "rI", "rI", "0" } },
 
     /* TODO: "r", "r", "r", "r", "ri", "ri" */
     { INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } },
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 2bc7dff846..98fa11b286 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -73,7 +73,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_movcond_i32      1
 
 enum {
     TCG_AREG0 = TCG_REG_R6,
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 705712f775..06570bea38 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -230,6 +230,8 @@ enum {
     OPC_CMP4_LT_A6            = 0x18400000000ull,
     OPC_CMP4_LTU_A6           = 0x1a400000000ull,
     OPC_CMP4_EQ_A6            = 0x1c400000000ull,
+    OPC_DEP_I14               = 0x0ae00000000ull,
+    OPC_DEP_I15               = 0x08000000000ull,
     OPC_DEP_Z_I12             = 0x0a600000000ull,
     OPC_EXTR_I11              = 0x0a400002000ull,
     OPC_EXTR_U_I11            = 0x0a400000000ull,
@@ -501,6 +503,30 @@ static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1,
            | (qp & 0x3f);
 }
 
+static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm,
+                                   int r3, uint64_t pos, uint64_t len)
+{
+    return opc
+           | ((imm & 0x01) << 36)
+           | ((len & 0x3f) << 27)
+           | ((r3 & 0x7f) << 20)
+           | ((pos & 0x3f) << 14)
+           | ((r1 & 0x7f) << 6)
+           | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1, int r2,
+                                   int r3, uint64_t pos, uint64_t len)
+{
+    return opc
+           | ((pos & 0x3f) << 31)
+           | ((len & 0x0f) << 27)
+           | ((r3 & 0x7f) << 20)
+           | ((r2 & 0x7f) << 13)
+           | ((r1 & 0x7f) << 6)
+           | (qp & 0x3f);
+}
+
 static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm)
 {
     return opc
@@ -1312,6 +1338,37 @@ static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
                    tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
 }
 
+static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
+                                   TCGArg a2, int const_a2, int pos, int len)
+{
+    uint64_t i1 = 0, i2 = 0;
+    int cpos = 63 - pos, lm1 = len - 1;
+
+    if (const_a2) {
+        /* Truncate the value of a constant a2 to the width of the field.  */
+        int mask = (1u << len) - 1;
+        a2 &= mask;
+
+        if (a2 == 0 || a2 == mask) {
+            /* 1-bit signed constant inserted into register.  */
+            i2 = tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, ret, a2, a1, cpos, lm1);
+        } else {
+            /* Otherwise, load any constant into a temporary.  Do this into
+               the first I slot to help out with cross-unit delays.  */
+            i1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+                            TCG_REG_R2, a2, TCG_REG_R0);
+            a2 = TCG_REG_R2;
+        }
+    }
+    if (i2 == 0) {
+        i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1);
+    }
+    tcg_out_bundle(s, (i1 ? mII : miI),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   i1 ? i1 : tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                   i2);
+}
+
 static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1,
                                      TCGArg arg2, int cmp4)
 {
@@ -1404,21 +1461,47 @@ static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret,
                    tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0));
 }
 
+static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
+                                   TCGArg c1, TCGArg c2,
+                                   TCGArg v1, int const_v1,
+                                   TCGArg v2, int const_v2, int cmp4)
+{
+    uint64_t opc1, opc2;
+
+    if (const_v1) {
+        opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0);
+    } else if (ret == v1) {
+        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+    } else {
+        opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1);
+    }
+    if (const_v2) {
+        opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
+    } else if (ret == v2) {
+        opc2 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+    } else {
+        opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2);
+    }
+
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4),
+                   opc1,
+                   opc2);
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
 
 /* Load and compare a TLB entry, and return the result in (p6, p7).
    R2 is loaded with the address of the addend TLB entry.
-   R56 is loaded with the address, zero extented on 32-bit targets. */
+   R57 is loaded with the address, zero extented on 32-bit targets. */
 static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                                     int s_bits, uint64_t offset_rw,
                                     uint64_t offset_addend)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3,
-                               TARGET_PAGE_MASK | ((1 << s_bits) - 1),
-                               TCG_REG_R0),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                    tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
                                addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1),
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2,
@@ -1428,9 +1511,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                    tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
                                offset_rw, TCG_REG_R2),
 #if TARGET_LONG_BITS == 32
-                   tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R56, addr_reg),
+                   tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg),
 #else
-                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R56,
+                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R57,
                               0, addr_reg),
 #endif
                    tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
@@ -1438,12 +1521,13 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
     tcg_out_bundle(s, mII,
                    tcg_opc_m3 (TCG_REG_P0,
                                (TARGET_LONG_BITS == 32
-                                ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R57,
+                                ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56,
                                TCG_REG_R2, offset_addend - offset_rw),
-                   tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R56),
+                   tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0,
+                               TCG_REG_R57, 63 - s_bits,
+                               TARGET_PAGE_BITS - s_bits - 1),
                    tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
-                               TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
+                               TCG_REG_P7, TCG_REG_R3, TCG_REG_R56));
 }
 
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
@@ -1480,8 +1564,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R57,
-                               mem_index, TCG_REG_R0),
+                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
+                               TCG_REG_R56, 0, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_ld_helpers[s_bits]));
@@ -1489,7 +1573,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                    tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R56),
+                               TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
     if (bswap && s_bits == 1) {
@@ -1513,23 +1597,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
-    /* XXX/FIXME: suboptimal */
-    tcg_out_bundle(s, mII,
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
-                               mem_index, TCG_REG_R0),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R57, 0, TCG_REG_R56),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R56, 0, TCG_AREG0));
     if (!bswap || s_bits == 0) {
         tcg_out_bundle(s, miB,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
+                                   mem_index, TCG_REG_R0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                    TCG_REG_B0, TCG_REG_B6));
     } else {
         tcg_out_bundle(s, miB,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
+                                   mem_index, TCG_REG_R0),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R8, TCG_REG_R8, 0xb),
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
@@ -1581,8 +1659,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
-                   tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R57,
-                              0, data_reg),
+                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
+                               TCG_REG_R56, 0, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_st_helpers[opc]));
@@ -1590,31 +1668,42 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                    tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R56),
+                               TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
 
     if (!bswap || opc == 0) {
-        tcg_out_bundle(s, mII,
+        tcg_out_bundle(s, mii,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     } else if (opc == 1) {
-        tcg_out_bundle(s, mII,
+        tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 15, 15),
+                                   TCG_REG_R2, data_reg, 15, 15));
+        tcg_out_bundle(s, miI,
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
     } else if (opc == 2) {
-        tcg_out_bundle(s, mII,
+        tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 31, 31),
+                                   TCG_REG_R2, data_reg, 31, 31));
+        tcg_out_bundle(s, miI,
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
@@ -1622,25 +1711,18 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, data_reg, 0xb));
         data_reg = TCG_REG_R2;
     }
 
-    /* XXX/FIXME: suboptimal */
-    tcg_out_bundle(s, mII,
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
-                               mem_index, TCG_REG_R0),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R58, 0, TCG_REG_R57),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R57, 0, TCG_REG_R56));
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
                                data_reg, TCG_REG_R3),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R56, 0, TCG_AREG0),
+                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
+                               mem_index, TCG_REG_R0),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
 }
@@ -2092,6 +2174,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_bswap64(s, args[0], args[1]);
         break;
 
+    case INDEX_op_deposit_i32:
+    case INDEX_op_deposit_i64:
+        tcg_out_deposit(s, args[0], args[1], args[2], const_args[2],
+                        args[3], args[4]);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], const_args[0],
                        args[1], const_args[1], args[3], 1);
@@ -2106,6 +2194,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_setcond_i64:
         tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
         break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        args[3], const_args[3], args[4], const_args[4], 1);
+        break;
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        args[3], const_args[3], args[4], const_args[4], 0);
+        break;
 
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
@@ -2196,6 +2292,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
 
     { INDEX_op_brcond_i32, { "rI", "rI" } },
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
+    { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
     { INDEX_op_movi_i64, { "r" } },
@@ -2245,6 +2342,10 @@ static const TCGTargetOpDef ia64_op_defs[] = {
 
     { INDEX_op_brcond_i64, { "rI", "rI" } },
     { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } },
+    { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } },
+
+    { INDEX_op_deposit_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_deposit_i64, { "r", "rZ", "ri" } },
 
     { INDEX_op_qemu_ld8u, { "r", "r" } },
     { INDEX_op_qemu_ld8s, { "r", "r" } },
@@ -2269,9 +2370,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     int frame_size;
 
     /* reserve some stack space */
-    frame_size = TCG_STATIC_CALL_ARGS_SIZE;
+    frame_size = TCG_STATIC_CALL_ARGS_SIZE +
+                 CPU_TEMP_BUF_NLONGS * sizeof(long);
     frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
                  ~(TCG_TARGET_STACK_ALIGN - 1);
+    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 
     /* First emit adhoc function descriptor */
     *(uint64_t *)(s->code_ptr) = (uint64_t)s->code_ptr + 16; /* entry point */
@@ -2378,6 +2482,4 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
 
     tcg_add_target_add_op_defs(ia64_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index b7e01b25ae..91fe7a3b06 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -131,10 +131,13 @@ typedef enum {
 #define TCG_TARGET_HAS_orc_i64          1
 #define TCG_TARGET_HAS_rot_i32          1
 #define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_movcond_i32      0
-#define TCG_TARGET_HAS_movcond_i64      0
+#define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_movcond_i64      1
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_deposit_i64      1
+
+#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
+#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub r1, r0, r3 */
diff --git a/tcg/optimize.c b/tcg/optimize.c
index edb2b0ea90..a06c8eb43e 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -292,6 +292,82 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
     return res;
 }
 
+static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int32_t)x < (int32_t)y;
+    case TCG_COND_GE:
+        return (int32_t)x >= (int32_t)y;
+    case TCG_COND_LE:
+        return (int32_t)x <= (int32_t)y;
+    case TCG_COND_GT:
+        return (int32_t)x > (int32_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int64_t)x < (int64_t)y;
+    case TCG_COND_GE:
+        return (int64_t)x >= (int64_t)y;
+    case TCG_COND_LE:
+        return (int64_t)x <= (int64_t)y;
+    case TCG_COND_GT:
+        return (int64_t)x > (int64_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_eq(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_GT:
+    case TCG_COND_LTU:
+    case TCG_COND_LT:
+    case TCG_COND_GTU:
+    case TCG_COND_NE:
+        return 0;
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+    case TCG_COND_EQ:
+        return 1;
+    default:
+        tcg_abort();
+    }
+}
+
 /* Return 2 if the condition can't be simplified, and the result
    of the condition (0 or 1) if it can */
 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
@@ -300,75 +376,14 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
         switch (op_bits(op)) {
         case 32:
-            switch (c) {
-            case TCG_COND_EQ:
-                return (uint32_t)temps[x].val == (uint32_t)temps[y].val;
-            case TCG_COND_NE:
-                return (uint32_t)temps[x].val != (uint32_t)temps[y].val;
-            case TCG_COND_LT:
-                return (int32_t)temps[x].val < (int32_t)temps[y].val;
-            case TCG_COND_GE:
-                return (int32_t)temps[x].val >= (int32_t)temps[y].val;
-            case TCG_COND_LE:
-                return (int32_t)temps[x].val <= (int32_t)temps[y].val;
-            case TCG_COND_GT:
-                return (int32_t)temps[x].val > (int32_t)temps[y].val;
-            case TCG_COND_LTU:
-                return (uint32_t)temps[x].val < (uint32_t)temps[y].val;
-            case TCG_COND_GEU:
-                return (uint32_t)temps[x].val >= (uint32_t)temps[y].val;
-            case TCG_COND_LEU:
-                return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
-            case TCG_COND_GTU:
-                return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
-            default:
-                break;
-            }
-            break;
+            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
         case 64:
-            switch (c) {
-            case TCG_COND_EQ:
-                return (uint64_t)temps[x].val == (uint64_t)temps[y].val;
-            case TCG_COND_NE:
-                return (uint64_t)temps[x].val != (uint64_t)temps[y].val;
-            case TCG_COND_LT:
-                return (int64_t)temps[x].val < (int64_t)temps[y].val;
-            case TCG_COND_GE:
-                return (int64_t)temps[x].val >= (int64_t)temps[y].val;
-            case TCG_COND_LE:
-                return (int64_t)temps[x].val <= (int64_t)temps[y].val;
-            case TCG_COND_GT:
-                return (int64_t)temps[x].val > (int64_t)temps[y].val;
-            case TCG_COND_LTU:
-                return (uint64_t)temps[x].val < (uint64_t)temps[y].val;
-            case TCG_COND_GEU:
-                return (uint64_t)temps[x].val >= (uint64_t)temps[y].val;
-            case TCG_COND_LEU:
-                return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
-            case TCG_COND_GTU:
-                return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
-            default:
-                break;
-            }
-            break;
-        }
-    } else if (temps_are_copies(x, y)) {
-        switch (c) {
-        case TCG_COND_GT:
-        case TCG_COND_LTU:
-        case TCG_COND_LT:
-        case TCG_COND_GTU:
-        case TCG_COND_NE:
-            return 0;
-        case TCG_COND_GE:
-        case TCG_COND_GEU:
-        case TCG_COND_LE:
-        case TCG_COND_LEU:
-        case TCG_COND_EQ:
-            return 1;
+            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
         default:
-            break;
+            tcg_abort();
         }
+    } else if (temps_are_copies(x, y)) {
+        return do_constant_folding_cond_eq(c);
     } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
         switch (c) {
         case TCG_COND_LTU:
@@ -381,11 +396,73 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     } else {
         return 2;
     }
+}
 
-    fprintf(stderr,
-            "Unrecognized bitness %d or condition %d in "
-            "do_constant_folding_cond.\n", op_bits(op), c);
-    tcg_abort();
+/* Return 2 if the condition can't be simplified, and the result
+   of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+{
+    TCGArg al = p1[0], ah = p1[1];
+    TCGArg bl = p2[0], bh = p2[1];
+
+    if (temps[bl].state == TCG_TEMP_CONST
+        && temps[bh].state == TCG_TEMP_CONST) {
+        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
+
+        if (temps[al].state == TCG_TEMP_CONST
+            && temps[ah].state == TCG_TEMP_CONST) {
+            uint64_t a;
+            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
+            return do_constant_folding_cond_64(a, b, c);
+        }
+        if (b == 0) {
+            switch (c) {
+            case TCG_COND_LTU:
+                return 0;
+            case TCG_COND_GEU:
+                return 1;
+            default:
+                break;
+            }
+        }
+    }
+    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
+        return do_constant_folding_cond_eq(c);
+    }
+    return 2;
+}
+
+static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
+{
+    TCGArg a1 = *p1, a2 = *p2;
+    int sum = 0;
+    sum += temps[a1].state == TCG_TEMP_CONST;
+    sum -= temps[a2].state == TCG_TEMP_CONST;
+
+    /* Prefer the constant in second argument, and then the form
+       op a, a, b, which is better handled on non-RISC hosts. */
+    if (sum > 0 || (sum == 0 && dest == a2)) {
+        *p1 = a2;
+        *p2 = a1;
+        return true;
+    }
+    return false;
+}
+
+static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+{
+    int sum = 0;
+    sum += temps[p1[0]].state == TCG_TEMP_CONST;
+    sum += temps[p1[1]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[0]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[1]].state == TCG_TEMP_CONST;
+    if (sum > 0) {
+        TCGArg t;
+        t = p1[0], p1[0] = p2[0], p2[0] = t;
+        t = p1[1], p1[1] = p2[1], p2[1] = t;
+        return true;
+    }
+    return false;
 }
 
 /* Propagate constants and copies, fold constant expressions. */
@@ -397,7 +474,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     const TCGOpDef *def;
     TCGArg *gen_args;
     TCGArg tmp;
-    TCGCond cond;
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
@@ -440,52 +516,46 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(eqv):
         CASE_OP_32_64(nand):
         CASE_OP_32_64(nor):
-            /* Prefer the constant in second argument, and then the form
-               op a, a, b, which is better handled on non-RISC hosts. */
-            if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2]
-                && temps[args[2]].state != TCG_TEMP_CONST)) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
-            }
+            swap_commutative(args[0], &args[1], &args[2]);
             break;
         CASE_OP_32_64(brcond):
-            if (temps[args[0]].state == TCG_TEMP_CONST
-                && temps[args[1]].state != TCG_TEMP_CONST) {
-                tmp = args[0];
-                args[0] = args[1];
-                args[1] = tmp;
+            if (swap_commutative(-1, &args[0], &args[1])) {
                 args[2] = tcg_swap_cond(args[2]);
             }
             break;
         CASE_OP_32_64(setcond):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state != TCG_TEMP_CONST) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
+            if (swap_commutative(args[0], &args[1], &args[2])) {
                 args[3] = tcg_swap_cond(args[3]);
             }
             break;
         CASE_OP_32_64(movcond):
-            cond = args[5];
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state != TCG_TEMP_CONST) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
-                cond = tcg_swap_cond(cond);
+            if (swap_commutative(-1, &args[1], &args[2])) {
+                args[5] = tcg_swap_cond(args[5]);
             }
             /* For movcond, we canonicalize the "false" input reg to match
                the destination reg so that the tcg backend can implement
                a "move if true" operation.  */
-            if (args[0] == args[3]) {
-                tmp = args[3];
-                args[3] = args[4];
-                args[4] = tmp;
-                cond = tcg_invert_cond(cond);
+            if (swap_commutative(args[0], &args[4], &args[3])) {
+                args[5] = tcg_invert_cond(args[5]);
             }
-            args[5] = cond;
+            break;
+        case INDEX_op_add2_i32:
+            swap_commutative(args[0], &args[2], &args[4]);
+            swap_commutative(args[1], &args[3], &args[5]);
+            break;
+        case INDEX_op_mulu2_i32:
+            swap_commutative(args[0], &args[2], &args[3]);
+            break;
+        case INDEX_op_brcond2_i32:
+            if (swap_commutative2(&args[0], &args[2])) {
+                args[4] = tcg_swap_cond(args[4]);
+            }
+            break;
+        case INDEX_op_setcond2_i32:
+            if (swap_commutative2(&args[1], &args[3])) {
+                args[5] = tcg_swap_cond(args[5]);
+            }
+            break;
         default:
             break;
         }
@@ -622,6 +692,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             gen_args += 2;
             args += 2;
             break;
+
         CASE_OP_32_64(not):
         CASE_OP_32_64(neg):
         CASE_OP_32_64(ext8s):
@@ -634,14 +705,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
+                gen_args += 2;
+                args += 2;
+                break;
             }
-            gen_args += 2;
-            args += 2;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(mul):
@@ -665,15 +734,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                           temps[args[2]].val);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args += 3;
+                args += 3;
+                break;
             }
-            args += 3;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(deposit):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
@@ -683,33 +748,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                       | ((temps[args[2]].val & tmp) << args[3]);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args[4] = args[4];
-                gen_args += 5;
+                args += 5;
+                break;
             }
-            args += 5;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(setcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
             if (tmp != 2) {
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args += 4;
+                args += 4;
+                break;
             }
-            args += 4;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(brcond):
             tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
             if (tmp != 2) {
@@ -721,17 +775,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 } else {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 }
-            } else {
-                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args += 4;
+                args += 4;
+                break;
             }
-            args += 4;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(movcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
             if (tmp != 2) {
@@ -746,18 +794,125 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
                     gen_args += 2;
                 }
+                args += 6;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_add2_i32:
+        case INDEX_op_sub2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST
+                && temps[args[4]].state == TCG_TEMP_CONST
+                && temps[args[5]].state == TCG_TEMP_CONST) {
+                uint32_t al = temps[args[2]].val;
+                uint32_t ah = temps[args[3]].val;
+                uint32_t bl = temps[args[4]].val;
+                uint32_t bh = temps[args[5]].val;
+                uint64_t a = ((uint64_t)ah << 32) | al;
+                uint64_t b = ((uint64_t)bh << 32) | bl;
+                TCGArg rl, rh;
+
+                if (op == INDEX_op_add2_i32) {
+                    a += b;
+                } else {
+                    a -= b;
+                }
+
+                /* We emit the extra nop when we emit the add2/sub2.  */
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
+                gen_args += 4;
+                args += 6;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_mulu2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST) {
+                uint32_t a = temps[args[2]].val;
+                uint32_t b = temps[args[3]].val;
+                uint64_t r = (uint64_t)a * b;
+                TCGArg rl, rh;
+
+                /* We emit the extra nop when we emit the mulu2.  */
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
+                gen_args += 4;
+                args += 4;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_brcond2_i32:
+            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
+            if (tmp != 2) {
+                if (tmp) {
+                    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                    gen_opc_buf[op_index] = INDEX_op_br;
+                    gen_args[0] = args[5];
+                    gen_args += 1;
+                } else {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                }
+            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
+                       && temps[args[2]].state == TCG_TEMP_CONST
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[2]].val == 0
+                       && temps[args[3]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                gen_opc_buf[op_index] = INDEX_op_brcond_i32;
+                gen_args[0] = args[1];
+                gen_args[1] = args[3];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
             } else {
-                reset_temp(args[0]);
+                goto do_default;
+            }
+            args += 6;
+            break;
+
+        case INDEX_op_setcond2_i32:
+            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
+            if (tmp != 2) {
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[4]].state == TCG_TEMP_CONST
+                       && temps[args[3]].val == 0
+                       && temps[args[4]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
+                gen_opc_buf[op_index] = INDEX_op_setcond_i32;
                 gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args[4] = args[4];
-                gen_args[5] = args[5];
-                gen_args += 6;
+                gen_args[1] = args[2];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
+            } else {
+                goto do_default;
             }
             args += 6;
             break;
+
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
@@ -776,11 +931,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 i--;
             }
             break;
+
         default:
-            /* Default case: we do know nothing about operation so no
-               propagation is done.  We trash everything if the operation
-               is the end of a basic block, otherwise we only trash the
-               output args.  */
+        do_default:
+            /* Default case: we know nothing about operation (or were unable
+               to compute the operation result) so no propagation is done.
+               We trash everything if the operation is the end of a basic
+               block, otherwise we only trash the output args.  */
             if (def->flags & TCG_OPF_BB_END) {
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             } else {
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 0c32baa50e..f146647874 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -115,96 +115,6 @@ static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_O3,
 };
 
-static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
-{
-    return (val << ((sizeof(tcg_target_long) * 8 - bits))
-            >> (sizeof(tcg_target_long) * 8 - bits)) == val;
-}
-
-static inline int check_fit_i32(uint32_t val, unsigned int bits)
-{
-    return ((val << (32 - bits)) >> (32 - bits)) == val;
-}
-
-static void patch_reloc(uint8_t *code_ptr, int type,
-                        tcg_target_long value, tcg_target_long addend)
-{
-    value += addend;
-    switch (type) {
-    case R_SPARC_32:
-        if (value != (uint32_t)value)
-            tcg_abort();
-        *(uint32_t *)code_ptr = value;
-        break;
-    case R_SPARC_WDISP22:
-        value -= (long)code_ptr;
-        value >>= 2;
-        if (!check_fit_tl(value, 22))
-            tcg_abort();
-        *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x3fffff) | value;
-        break;
-    case R_SPARC_WDISP19:
-        value -= (long)code_ptr;
-        value >>= 2;
-        if (!check_fit_tl(value, 19))
-            tcg_abort();
-        *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x7ffff) | value;
-        break;
-    default:
-        tcg_abort();
-    }
-}
-
-/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
-{
-    const char *ct_str;
-
-    ct_str = *pct_str;
-    switch (ct_str[0]) {
-    case 'r':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
-        break;
-    case 'L': /* qemu_ld/st constraint */
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
-        // Helper args
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
-        break;
-    case 'I':
-        ct->ct |= TCG_CT_CONST_S11;
-        break;
-    case 'J':
-        ct->ct |= TCG_CT_CONST_S13;
-        break;
-    default:
-        return -1;
-    }
-    ct_str++;
-    *pct_str = ct_str;
-    return 0;
-}
-
-/* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val,
-                                         const TCGArgConstraint *arg_ct)
-{
-    int ct;
-
-    ct = arg_ct->ct;
-    if (ct & TCG_CT_CONST)
-        return 1;
-    else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11))
-        return 1;
-    else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13))
-        return 1;
-    else
-        return 0;
-}
-
 #define INSN_OP(x)  ((x) << 30)
 #define INSN_OP2(x) ((x) << 22)
 #define INSN_OP3(x) ((x) << 19)
@@ -214,12 +124,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define INSN_RS2(x) (x)
 #define INSN_ASI(x) ((x) << 5)
 
+#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
 #define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
 #define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
+#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
 #define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
-#define INSN_OFF22(x) (((x) >> 2) & 0x3fffff)
+#define INSN_COND(x) ((x) << 25)
 
-#define INSN_COND(x, a) (((x) << 25) | ((a) << 29))
 #define COND_N     0x0
 #define COND_E     0x1
 #define COND_LE    0x2
@@ -236,11 +147,26 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define COND_CC    0xd
 #define COND_POS   0xe
 #define COND_VC    0xf
-#define BA         (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2))
+#define BA         (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
+
+#define RCOND_Z    1
+#define RCOND_LEZ  2
+#define RCOND_LZ   3
+#define RCOND_NZ   5
+#define RCOND_GZ   6
+#define RCOND_GEZ  7
 
 #define MOVCC_ICC  (1 << 18)
 #define MOVCC_XCC  (1 << 18 | 1 << 12)
 
+#define BPCC_ICC   0
+#define BPCC_XCC   (2 << 20)
+#define BPCC_PT    (1 << 19)
+#define BPCC_PN    0
+#define BPCC_A     (1 << 29)
+
+#define BPR_PT     BPCC_PT
+
 #define ARITH_ADD  (INSN_OP(2) | INSN_OP3(0x00))
 #define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
 #define ARITH_AND  (INSN_OP(2) | INSN_OP3(0x01))
@@ -260,6 +186,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
 #define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
 #define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
+#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
 
 #define SHIFT_SLL  (INSN_OP(2) | INSN_OP3(0x25))
 #define SHIFT_SRL  (INSN_OP(2) | INSN_OP3(0x26))
@@ -313,6 +240,109 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE))
 #define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
 
+static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
+{
+    return (val << ((sizeof(tcg_target_long) * 8 - bits))
+            >> (sizeof(tcg_target_long) * 8 - bits)) == val;
+}
+
+static inline int check_fit_i32(uint32_t val, unsigned int bits)
+{
+    return ((val << (32 - bits)) >> (32 - bits)) == val;
+}
+
+static void patch_reloc(uint8_t *code_ptr, int type,
+                        tcg_target_long value, tcg_target_long addend)
+{
+    uint32_t insn;
+    value += addend;
+    switch (type) {
+    case R_SPARC_32:
+        if (value != (uint32_t)value) {
+            tcg_abort();
+        }
+        *(uint32_t *)code_ptr = value;
+        break;
+    case R_SPARC_WDISP16:
+        value -= (long)code_ptr;
+        if (!check_fit_tl(value >> 2, 16)) {
+            tcg_abort();
+        }
+        insn = *(uint32_t *)code_ptr;
+        insn &= ~INSN_OFF16(-1);
+        insn |= INSN_OFF16(value);
+        *(uint32_t *)code_ptr = insn;
+        break;
+    case R_SPARC_WDISP19:
+        value -= (long)code_ptr;
+        if (!check_fit_tl(value >> 2, 19)) {
+            tcg_abort();
+        }
+        insn = *(uint32_t *)code_ptr;
+        insn &= ~INSN_OFF19(-1);
+        insn |= INSN_OFF19(value);
+        *(uint32_t *)code_ptr = insn;
+        break;
+    default:
+        tcg_abort();
+    }
+}
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+    const char *ct_str;
+
+    ct_str = *pct_str;
+    switch (ct_str[0]) {
+    case 'r':
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+        break;
+    case 'L': /* qemu_ld/st constraint */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+        // Helper args
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
+        break;
+    case 'I':
+        ct->ct |= TCG_CT_CONST_S11;
+        break;
+    case 'J':
+        ct->ct |= TCG_CT_CONST_S13;
+        break;
+    case 'Z':
+        ct->ct |= TCG_CT_CONST_ZERO;
+        break;
+    default:
+        return -1;
+    }
+    ct_str++;
+    *pct_str = ct_str;
+    return 0;
+}
+
+/* test if a constant matches the constraint */
+static inline int tcg_target_const_match(tcg_target_long val,
+                                         const TCGArgConstraint *arg_ct)
+{
+    int ct = arg_ct->ct;
+
+    if (ct & TCG_CT_CONST) {
+        return 1;
+    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+        return 1;
+    } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
+        return 1;
+    } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
 static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
                                  int op)
 {
@@ -337,7 +367,9 @@ static void tcg_out_arithc(TCGContext *s, int rd, int rs1,
 static inline void tcg_out_mov(TCGContext *s, TCGType type,
                                TCGReg ret, TCGReg arg)
 {
-    tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+    if (ret != arg) {
+        tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+    }
 }
 
 static inline void tcg_out_sethi(TCGContext *s, int ret, uint32_t arg)
@@ -479,39 +511,6 @@ static inline void tcg_out_nop(TCGContext *s)
     tcg_out_sethi(s, TCG_REG_G0, 0);
 }
 
-static void tcg_out_branch_i32(TCGContext *s, int opc, int label_index)
-{
-    TCGLabel *l = &s->labels[label_index];
-    uint32_t off22;
-
-    if (l->has_value) {
-        off22 = INSN_OFF22(l->u.value - (unsigned long)s->code_ptr);
-    } else {
-        /* Make sure to preserve destinations during retranslation.  */
-        off22 = *(uint32_t *)s->code_ptr & INSN_OFF22(-1);
-        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0);
-    }
-    tcg_out32(s, INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | off22);
-}
-
-#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
-{
-    TCGLabel *l = &s->labels[label_index];
-    uint32_t off19;
-
-    if (l->has_value) {
-        off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
-    } else {
-        /* Make sure to preserve destinations during retranslation.  */
-        off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
-        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0);
-    }
-    tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
-                  (0x5 << 19) | off19));
-}
-#endif
-
 static const uint8_t tcg_cond_to_bcond[] = {
     [TCG_COND_EQ] = COND_E,
     [TCG_COND_NE] = COND_NE,
@@ -525,70 +524,144 @@ static const uint8_t tcg_cond_to_bcond[] = {
     [TCG_COND_GTU] = COND_GU,
 };
 
+static const uint8_t tcg_cond_to_rcond[] = {
+    [TCG_COND_EQ] = RCOND_Z,
+    [TCG_COND_NE] = RCOND_NZ,
+    [TCG_COND_LT] = RCOND_LZ,
+    [TCG_COND_GT] = RCOND_GZ,
+    [TCG_COND_LE] = RCOND_LEZ,
+    [TCG_COND_GE] = RCOND_GEZ
+};
+
+static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
+{
+    tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
+}
+
+static void tcg_out_bpcc(TCGContext *s, int scond, int flags, int label)
+{
+    TCGLabel *l = &s->labels[label];
+    int off19;
+
+    if (l->has_value) {
+        off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
+    } else {
+        /* Make sure to preserve destinations during retranslation.  */
+        off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
+        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label, 0);
+    }
+    tcg_out_bpcc0(s, scond, flags, off19);
+}
+
 static void tcg_out_cmp(TCGContext *s, TCGArg c1, TCGArg c2, int c2const)
 {
     tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
 }
 
-static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond,
-                               TCGArg arg1, TCGArg arg2, int const_arg2,
-                               int label_index)
+static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGArg arg1,
+                               TCGArg arg2, int const_arg2, int label)
 {
     tcg_out_cmp(s, arg1, arg2, const_arg2);
-    tcg_out_branch_i32(s, tcg_cond_to_bcond[cond], label_index);
+    tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, label);
     tcg_out_nop(s);
 }
 
+static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGArg ret,
+                          TCGArg v1, int v1const)
+{
+    tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
+              | INSN_RS1(tcg_cond_to_bcond[cond])
+              | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
+                                TCGArg c1, TCGArg c2, int c2const,
+                                TCGArg v1, int v1const)
+{
+    tcg_out_cmp(s, c1, c2, c2const);
+    tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
+}
+
 #if TCG_TARGET_REG_BITS == 64
-static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond,
-                               TCGArg arg1, TCGArg arg2, int const_arg2,
-                               int label_index)
+static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1,
+                               TCGArg arg2, int const_arg2, int label)
 {
-    tcg_out_cmp(s, arg1, arg2, const_arg2);
-    tcg_out_branch_i64(s, tcg_cond_to_bcond[cond], label_index);
+    /* For 64-bit signed comparisons vs zero, we can avoid the compare.  */
+    if (arg2 == 0 && !is_unsigned_cond(cond)) {
+        TCGLabel *l = &s->labels[label];
+        int off16;
+
+        if (l->has_value) {
+            off16 = INSN_OFF16(l->u.value - (unsigned long)s->code_ptr);
+        } else {
+            /* Make sure to preserve destinations during retranslation.  */
+            off16 = *(uint32_t *)s->code_ptr & INSN_OFF16(-1);
+            tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, label, 0);
+        }
+        tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
+                  | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
+    } else {
+        tcg_out_cmp(s, arg1, arg2, const_arg2);
+        tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, label);
+    }
     tcg_out_nop(s);
 }
+
+static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1,
+                         TCGArg v1, int v1const)
+{
+    tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
+              | (tcg_cond_to_rcond[cond] << 10)
+              | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGArg ret,
+                                TCGArg c1, TCGArg c2, int c2const,
+                                TCGArg v1, int v1const)
+{
+    /* For 64-bit signed comparisons vs zero, we can avoid the compare.
+       Note that the immediate range is one bit smaller, so we must check
+       for that as well.  */
+    if (c2 == 0 && !is_unsigned_cond(cond)
+        && (!v1const || check_fit_tl(v1, 10))) {
+        tcg_out_movr(s, cond, ret, c1, v1, v1const);
+    } else {
+        tcg_out_cmp(s, c1, c2, c2const);
+        tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
+    }
+}
 #else
 static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond,
                                 TCGArg al, TCGArg ah,
                                 TCGArg bl, int blconst,
                                 TCGArg bh, int bhconst, int label_dest)
 {
-    int cc, label_next = gen_new_label();
+    int scond, label_next = gen_new_label();
 
     tcg_out_cmp(s, ah, bh, bhconst);
 
     /* Note that we fill one of the delay slots with the second compare.  */
     switch (cond) {
     case TCG_COND_EQ:
-        cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0);
-        tcg_out_branch_i32(s, cc, label_next);
+        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
         tcg_out_cmp(s, al, bl, blconst);
-        cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_EQ], 0);
-        tcg_out_branch_i32(s, cc, label_dest);
+        tcg_out_bpcc(s, COND_E, BPCC_ICC | BPCC_PT, label_dest);
         break;
 
     case TCG_COND_NE:
-        cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0);
-        tcg_out_branch_i32(s, cc, label_dest);
+        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
         tcg_out_cmp(s, al, bl, blconst);
-        tcg_out_branch_i32(s, cc, label_dest);
+        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
         break;
 
     default:
-        /* ??? One could fairly easily special-case 64-bit unsigned
-           compares against 32-bit zero-extended constants.  For instance,
-           we know that (unsigned)AH < 0 is false and need not emit it.
-           Similarly, (unsigned)AH > 0 being true implies AH != 0, so the
-           second branch will never be taken.  */
-        cc = INSN_COND(tcg_cond_to_bcond[cond], 0);
-        tcg_out_branch_i32(s, cc, label_dest);
+        scond = tcg_cond_to_bcond[tcg_high_cond(cond)];
+        tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
         tcg_out_nop(s);
-        cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0);
-        tcg_out_branch_i32(s, cc, label_next);
+        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
         tcg_out_cmp(s, al, bl, blconst);
-        cc = INSN_COND(tcg_cond_to_bcond[tcg_unsigned_cond(cond)], 0);
-        tcg_out_branch_i32(s, cc, label_dest);
+        scond = tcg_cond_to_bcond[tcg_unsigned_cond(cond)];
+        tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
         break;
     }
     tcg_out_nop(s);
@@ -600,39 +673,42 @@ static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond,
 static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
                                 TCGArg c1, TCGArg c2, int c2const)
 {
-    TCGArg t;
-
     /* For 32-bit comparisons, we can play games with ADDX/SUBX.  */
     switch (cond) {
+    case TCG_COND_LTU:
+    case TCG_COND_GEU:
+        /* The result of the comparison is in the carry bit.  */
+        break;
+
     case TCG_COND_EQ:
     case TCG_COND_NE:
+        /* For equality, we can transform to inequality vs zero.  */
         if (c2 != 0) {
             tcg_out_arithc(s, ret, c1, c2, c2const, ARITH_XOR);
         }
         c1 = TCG_REG_G0, c2 = ret, c2const = 0;
-        cond = (cond == TCG_COND_EQ ? TCG_COND_LEU : TCG_COND_LTU);
+        cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
 	break;
 
     case TCG_COND_GTU:
-    case TCG_COND_GEU:
-        if (c2const && c2 != 0) {
-            tcg_out_movi_imm13(s, TCG_REG_T1, c2);
-            c2 = TCG_REG_T1;
-        }
-        t = c1, c1 = c2, c2 = t, c2const = 0;
-        cond = tcg_swap_cond(cond);
-        break;
-
-    case TCG_COND_LTU:
     case TCG_COND_LEU:
-        break;
+        /* If we don't need to load a constant into a register, we can
+           swap the operands on GTU/LEU.  There's no benefit to loading
+           the constant into a temporary register.  */
+        if (!c2const || c2 == 0) {
+            TCGArg t = c1;
+            c1 = c2;
+            c2 = t;
+            c2const = 0;
+            cond = tcg_swap_cond(cond);
+            break;
+        }
+        /* FALLTHRU */
 
     default:
         tcg_out_cmp(s, c1, c2, c2const);
         tcg_out_movi_imm13(s, ret, 0);
-        tcg_out32(s, ARITH_MOVCC | INSN_RD(ret)
-                  | INSN_RS1(tcg_cond_to_bcond[cond])
-                  | MOVCC_ICC | INSN_IMM11(1));
+        tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
         return;
     }
 
@@ -648,11 +724,16 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
 static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret,
                                 TCGArg c1, TCGArg c2, int c2const)
 {
-    tcg_out_cmp(s, c1, c2, c2const);
-    tcg_out_movi_imm13(s, ret, 0);
-    tcg_out32 (s, ARITH_MOVCC | INSN_RD(ret)
-               | INSN_RS1(tcg_cond_to_bcond[cond])
-               | MOVCC_XCC | INSN_IMM11(1));
+    /* For 64-bit signed comparisons vs zero, we can avoid the compare
+       if the input does not overlap the output.  */
+    if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
+        tcg_out_movi_imm13(s, ret, 0);
+        tcg_out_movr(s, cond, ret, c1, 1, 1);
+    } else {
+        tcg_out_cmp(s, c1, c2, c2const);
+        tcg_out_movi_imm13(s, ret, 0);
+        tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
+    }
 }
 #else
 static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
@@ -660,35 +741,56 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
                                  TCGArg bl, int blconst,
                                  TCGArg bh, int bhconst)
 {
-    int lab;
+    int tmp = TCG_REG_T1;
+
+    /* Note that the low parts are fully consumed before tmp is set.  */
+    if (ret != ah && (bhconst || ret != bh)) {
+        tmp = ret;
+    }
 
     switch (cond) {
     case TCG_COND_EQ:
-        tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_T1, al, bl, blconst);
-        tcg_out_setcond_i32(s, TCG_COND_EQ, ret, ah, bh, bhconst);
-        tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_AND);
-        break;
-
     case TCG_COND_NE:
-        tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_T1, al, al, blconst);
-        tcg_out_setcond_i32(s, TCG_COND_NE, ret, ah, bh, bhconst);
-        tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_OR);
+        if (bl == 0 && bh == 0) {
+            if (cond == TCG_COND_EQ) {
+                tcg_out_arith(s, TCG_REG_G0, al, ah, ARITH_ORCC);
+                tcg_out_movi(s, TCG_TYPE_I32, ret, 1);
+            } else {
+                tcg_out_arith(s, ret, al, ah, ARITH_ORCC);
+            }
+        } else {
+            tcg_out_setcond_i32(s, cond, tmp, al, bl, blconst);
+            tcg_out_cmp(s, ah, bh, bhconst);
+            tcg_out_mov(s, TCG_TYPE_I32, ret, tmp);
+        }
+        tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, cond == TCG_COND_NE, 1);
         break;
 
     default:
-        lab = gen_new_label();
-
+        /* <= : ah < bh | (ah == bh && al <= bl) */
+        tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), tmp, al, bl, blconst);
         tcg_out_cmp(s, ah, bh, bhconst);
-        tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), lab);
-        tcg_out_movi_imm13(s, ret, 1);
-        tcg_out_branch_i32(s, INSN_COND(COND_NE, 1), lab);
-        tcg_out_movi_imm13(s, ret, 0);
+        tcg_out_mov(s, TCG_TYPE_I32, ret, tmp);
+        tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, 0, 1);
+        tcg_out_movcc(s, tcg_high_cond(cond), MOVCC_ICC, ret, 1, 1);
+        break;
+    }
+}
 
-        tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), ret, al, bl, blconst);
+static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh,
+                            TCGArg al, TCGArg ah, TCGArg bl, int blconst,
+                            TCGArg bh, int bhconst, int opl, int oph)
+{
+    TCGArg tmp = TCG_REG_T1;
 
-        tcg_out_label(s, lab, s->code_ptr);
-        break;
+    /* Note that the low parts are fully consumed before tmp is set.  */
+    if (rl != ah && (bhconst || rl != bh)) {
+        tmp = rl;
     }
+
+    tcg_out_arithc(s, tmp, al, bl, blconst, opl);
+    tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
+    tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
 }
 #endif
 
@@ -859,8 +961,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
 
         /* bne,pn %[xi]cc, label0 */
         label_ptr[0] = (uint32_t *)s->code_ptr;
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1)
-                      | ((TARGET_LONG_BITS == 64) << 21)));
+        tcg_out_bpcc0(s, COND_NE, BPCC_PN
+                      | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
 
         /* TLB Hit.  */
         /* Load all 64-bits into an O/G register.  */
@@ -875,8 +977,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
 
         /* b,a,pt label1 */
         label_ptr[1] = (uint32_t *)s->code_ptr;
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
-                      | (1 << 29) | (1 << 19)));
+        tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
     } else {
         /* The fast path is exactly one insn.  Thus we can perform the
            entire TLB Hit in the (annulled) delay slot of the branch
@@ -885,9 +986,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
         /* beq,a,pt %[xi]cc, label0 */
         label_ptr[0] = NULL;
         label_ptr[1] = (uint32_t *)s->code_ptr;
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
-                      | ((TARGET_LONG_BITS == 64) << 21)
-                      | (1 << 29) | (1 << 19)));
+        tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+                      | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
         /* delay slot */
         tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
     }
@@ -976,7 +1076,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
 {
     int addrlo_idx = 1, datalo, datahi, addr_reg;
 #if defined(CONFIG_SOFTMMU)
-    int memi_idx, memi, n;
+    int memi_idx, memi, n, datafull;
     uint32_t *label_ptr;
 #endif
 
@@ -993,23 +1093,23 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
     addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args,
                                 offsetof(CPUTLBEntry, addr_write));
 
+    datafull = datalo;
     if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
         /* Reconstruct the full 64-bit value.  */
         tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
         tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
         tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
-        datalo = TCG_REG_O2;
+        datafull = TCG_REG_O2;
     }
 
     /* The fast path is exactly one insn.  Thus we can perform the entire
        TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
     /* beq,a,pt %[xi]cc, label0 */
     label_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
-                  | ((TARGET_LONG_BITS == 64) << 21)
-                  | (1 << 29) | (1 << 19)));
+    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
     /* delay slot */
-    tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
+    tcg_out_ldst_rr(s, datafull, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
 
     /* TLB Miss.  */
 
@@ -1098,7 +1198,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out_nop(s);
         break;
     case INDEX_op_br:
-        tcg_out_branch_i32(s, COND_A, args[0]);
+        tcg_out_bpcc(s, COND_A, BPCC_PT, args[0]);
         tcg_out_nop(s);
         break;
     case INDEX_op_movi_i32:
@@ -1211,6 +1311,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out_setcond_i32(s, args[3], args[0], args[1],
                             args[2], const_args[2]);
         break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond_i32(s, args[5], args[0], args[1],
+                            args[2], const_args[2], args[3], const_args[3]);
+        break;
 
 #if TCG_TARGET_REG_BITS == 32
     case INDEX_op_brcond2_i32:
@@ -1224,16 +1328,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                              args[4], const_args[4]);
         break;
     case INDEX_op_add2_i32:
-        tcg_out_arithc(s, args[0], args[2], args[4], const_args[4],
-                       ARITH_ADDCC);
-        tcg_out_arithc(s, args[1], args[3], args[5], const_args[5],
-                       ARITH_ADDX);
+        tcg_out_addsub2(s, args[0], args[1], args[2], args[3],
+                        args[4], const_args[4], args[5], const_args[5],
+                        ARITH_ADDCC, ARITH_ADDX);
         break;
     case INDEX_op_sub2_i32:
-        tcg_out_arithc(s, args[0], args[2], args[4], const_args[4],
-                       ARITH_SUBCC);
-        tcg_out_arithc(s, args[1], args[3], args[5], const_args[5],
-                       ARITH_SUBX);
+        tcg_out_addsub2(s, args[0], args[1], args[2], args[3],
+                        args[4], const_args[4], args[5], const_args[5],
+                        ARITH_SUBCC, ARITH_SUBX);
         break;
     case INDEX_op_mulu2_i32:
         tcg_out_arithc(s, args[0], args[2], args[3], const_args[3],
@@ -1346,7 +1448,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out_setcond_i64(s, args[3], args[0], args[1],
                             args[2], const_args[2]);
         break;
-
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond_i64(s, args[5], args[0], args[1],
+                            args[2], const_args[2], args[3], const_args[3]);
+        break;
 #endif
     gen_arith:
         tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c);
@@ -1375,39 +1480,40 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_ld16u_i32, { "r", "r" } },
     { INDEX_op_ld16s_i32, { "r", "r" } },
     { INDEX_op_ld_i32, { "r", "r" } },
-    { INDEX_op_st8_i32, { "r", "r" } },
-    { INDEX_op_st16_i32, { "r", "r" } },
-    { INDEX_op_st_i32, { "r", "r" } },
-
-    { INDEX_op_add_i32, { "r", "r", "rJ" } },
-    { INDEX_op_mul_i32, { "r", "r", "rJ" } },
-    { INDEX_op_div_i32, { "r", "r", "rJ" } },
-    { INDEX_op_divu_i32, { "r", "r", "rJ" } },
-    { INDEX_op_rem_i32, { "r", "r", "rJ" } },
-    { INDEX_op_remu_i32, { "r", "r", "rJ" } },
-    { INDEX_op_sub_i32, { "r", "r", "rJ" } },
-    { INDEX_op_and_i32, { "r", "r", "rJ" } },
-    { INDEX_op_andc_i32, { "r", "r", "rJ" } },
-    { INDEX_op_or_i32, { "r", "r", "rJ" } },
-    { INDEX_op_orc_i32, { "r", "r", "rJ" } },
-    { INDEX_op_xor_i32, { "r", "r", "rJ" } },
-
-    { INDEX_op_shl_i32, { "r", "r", "rJ" } },
-    { INDEX_op_shr_i32, { "r", "r", "rJ" } },
-    { INDEX_op_sar_i32, { "r", "r", "rJ" } },
+    { INDEX_op_st8_i32, { "rZ", "r" } },
+    { INDEX_op_st16_i32, { "rZ", "r" } },
+    { INDEX_op_st_i32, { "rZ", "r" } },
+
+    { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_mul_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_div_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_divu_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_rem_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_remu_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_sub_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_and_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_andc_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_or_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_orc_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_xor_i32, { "r", "rZ", "rJ" } },
+
+    { INDEX_op_shl_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_shr_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_sar_i32, { "r", "rZ", "rJ" } },
 
     { INDEX_op_neg_i32, { "r", "rJ" } },
     { INDEX_op_not_i32, { "r", "rJ" } },
 
-    { INDEX_op_brcond_i32, { "r", "rJ" } },
-    { INDEX_op_setcond_i32, { "r", "r", "rJ" } },
+    { INDEX_op_brcond_i32, { "rZ", "rJ" } },
+    { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } },
+    { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } },
 
 #if TCG_TARGET_REG_BITS == 32
-    { INDEX_op_brcond2_i32, { "r", "r", "rJ", "rJ" } },
-    { INDEX_op_setcond2_i32, { "r", "r", "r", "rJ", "rJ" } },
-    { INDEX_op_add2_i32, { "r", "r", "r", "r", "rJ", "rJ" } },
-    { INDEX_op_sub2_i32, { "r", "r", "r", "r", "rJ", "rJ" } },
-    { INDEX_op_mulu2_i32, { "r", "r", "r", "rJ" } },
+    { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } },
+    { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } },
+    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+    { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } },
 #endif
 
 #if TCG_TARGET_REG_BITS == 64
@@ -1420,27 +1526,27 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_ld32u_i64, { "r", "r" } },
     { INDEX_op_ld32s_i64, { "r", "r" } },
     { INDEX_op_ld_i64, { "r", "r" } },
-    { INDEX_op_st8_i64, { "r", "r" } },
-    { INDEX_op_st16_i64, { "r", "r" } },
-    { INDEX_op_st32_i64, { "r", "r" } },
-    { INDEX_op_st_i64, { "r", "r" } },
-
-    { INDEX_op_add_i64, { "r", "r", "rJ" } },
-    { INDEX_op_mul_i64, { "r", "r", "rJ" } },
-    { INDEX_op_div_i64, { "r", "r", "rJ" } },
-    { INDEX_op_divu_i64, { "r", "r", "rJ" } },
-    { INDEX_op_rem_i64, { "r", "r", "rJ" } },
-    { INDEX_op_remu_i64, { "r", "r", "rJ" } },
-    { INDEX_op_sub_i64, { "r", "r", "rJ" } },
-    { INDEX_op_and_i64, { "r", "r", "rJ" } },
-    { INDEX_op_andc_i64, { "r", "r", "rJ" } },
-    { INDEX_op_or_i64, { "r", "r", "rJ" } },
-    { INDEX_op_orc_i64, { "r", "r", "rJ" } },
-    { INDEX_op_xor_i64, { "r", "r", "rJ" } },
-
-    { INDEX_op_shl_i64, { "r", "r", "rJ" } },
-    { INDEX_op_shr_i64, { "r", "r", "rJ" } },
-    { INDEX_op_sar_i64, { "r", "r", "rJ" } },
+    { INDEX_op_st8_i64, { "rZ", "r" } },
+    { INDEX_op_st16_i64, { "rZ", "r" } },
+    { INDEX_op_st32_i64, { "rZ", "r" } },
+    { INDEX_op_st_i64, { "rZ", "r" } },
+
+    { INDEX_op_add_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_mul_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_div_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_divu_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_rem_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_remu_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_sub_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_and_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_andc_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_or_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_orc_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_xor_i64, { "r", "rZ", "rJ" } },
+
+    { INDEX_op_shl_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_shr_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_sar_i64, { "r", "rZ", "rJ" } },
 
     { INDEX_op_neg_i64, { "r", "rJ" } },
     { INDEX_op_not_i64, { "r", "rJ" } },
@@ -1448,8 +1554,9 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_ext32s_i64, { "r", "ri" } },
     { INDEX_op_ext32u_i64, { "r", "ri" } },
 
-    { INDEX_op_brcond_i64, { "r", "rJ" } },
-    { INDEX_op_setcond_i64, { "r", "r", "rJ" } },
+    { INDEX_op_brcond_i64, { "rZ", "rJ" } },
+    { INDEX_op_setcond_i64, { "r", "rZ", "rJ" } },
+    { INDEX_op_movcond_i64, { "r", "rZ", "rJ", "rI", "0" } },
 #endif
 
 #if TCG_TARGET_REG_BITS == 64
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 4a17f1e118..0e7d398a41 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -62,8 +62,9 @@ typedef enum {
     TCG_REG_I7,
 } TCGReg;
 
-#define TCG_CT_CONST_S11 0x100
-#define TCG_CT_CONST_S13 0x200
+#define TCG_CT_CONST_S11  0x100
+#define TCG_CT_CONST_S13  0x200
+#define TCG_CT_CONST_ZERO 0x400
 
 /* used for function call generation */
 #define TCG_REG_CALL_STACK TCG_REG_O6
@@ -99,7 +100,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_movcond_i32      1
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div_i64          1
@@ -121,7 +122,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_movcond_i64      0
+#define TCG_TARGET_HAS_movcond_i64      1
 #endif
 
 #define TCG_AREG0 TCG_REG_I0
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 551845801d..8100a5a2e0 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -25,6 +25,11 @@
 
 int gen_new_label(void);
 
+static inline void tcg_gen_op0(TCGOpcode opc)
+{
+    *gen_opc_ptr++ = opc;
+}
+
 static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
 {
     *gen_opc_ptr++ = opc;
@@ -886,6 +891,8 @@ static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace add2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -893,6 +900,8 @@ static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace sub2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1018,6 +1027,8 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 
     tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
                     TCGV_LOW(arg1), TCGV_LOW(arg2));
+    /* Allow the optimizer room to replace mulu2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 
     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a171f784a4..5faaca5ebf 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1303,8 +1303,58 @@ static void tcg_liveness_analysis(TCGContext *s)
             break;
         case INDEX_op_end:
             break;
-            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
+
+        case INDEX_op_add2_i32:
+        case INDEX_op_sub2_i32:
+            args -= 6;
+            nb_iargs = 4;
+            nb_oargs = 2;
+            /* Test if the high part of the operation is dead, but not
+               the low part.  The result can be optimized to a simple
+               add or sub.  This happens often for x86_64 guest when the
+               cpu mode is set to 32 bit.  */
+            if (dead_temps[args[1]]) {
+                if (dead_temps[args[0]]) {
+                    goto do_remove;
+                }
+                /* Create the single operation plus nop.  */
+                if (op == INDEX_op_add2_i32) {
+                    op = INDEX_op_add_i32;
+                } else {
+                    op = INDEX_op_sub_i32;
+                }
+                gen_opc_buf[op_index] = op;
+                args[1] = args[2];
+                args[2] = args[4];
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 3);
+                /* Fall through and mark the single-word operation live.  */
+                nb_iargs = 2;
+                nb_oargs = 1;
+            }
+            goto do_not_remove;
+
+        case INDEX_op_mulu2_i32:
+            args -= 4;
+            nb_iargs = 2;
+            nb_oargs = 2;
+            /* Likewise, test for the high part of the operation dead.  */
+            if (dead_temps[args[1]]) {
+                if (dead_temps[args[0]]) {
+                    goto do_remove;
+                }
+                gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+                args[1] = args[2];
+                args[2] = args[3];
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1);
+                /* Fall through and mark the single-word operation live.  */
+                nb_oargs = 1;
+            }
+            goto do_not_remove;
+
         default:
+            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
             args -= def->nb_args;
             nb_iargs = def->nb_iargs;
             nb_oargs = def->nb_oargs;
@@ -1318,6 +1368,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                     if (!dead_temps[arg])
                         goto do_not_remove;
                 }
+            do_remove:
                 tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
 #ifdef CONFIG_PROFILER
                 s->del_op_count++;
diff --git a/ui/spice-core.c b/ui/spice-core.c
index ba0d0bdbc2..51473650c0 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -223,7 +223,6 @@ static void channel_event(int event, SpiceChannelEventInfo *info)
     client = qdict_new();
     server = qdict_new();
 
-#ifdef SPICE_CHANNEL_EVENT_FLAG_ADDR_EXT
     if (info->flags & SPICE_CHANNEL_EVENT_FLAG_ADDR_EXT) {
         add_addr_info(client, (struct sockaddr *)&info->paddr_ext,
                       info->plen_ext);
@@ -232,12 +231,7 @@ static void channel_event(int event, SpiceChannelEventInfo *info)
     } else {
         error_report("spice: %s, extended address is expected",
                      __func__);
-#endif
-        add_addr_info(client, &info->paddr, info->plen);
-        add_addr_info(server, &info->laddr, info->llen);
-#ifdef SPICE_CHANNEL_EVENT_FLAG_ADDR_EXT
     }
-#endif
 
     if (event == SPICE_CHANNEL_EVENT_INITIALIZED) {
         qdict_put(server, "auth", qstring_from_str(auth));
@@ -276,7 +270,6 @@ static SpiceCoreInterface core_interface = {
     .channel_event      = channel_event,
 };
 
-#ifdef SPICE_INTERFACE_MIGRATION
 typedef struct SpiceMigration {
     SpiceMigrateInstance sin;
     struct {
@@ -313,7 +306,6 @@ static void migrate_end_complete_cb(SpiceMigrateInstance *sin)
     monitor_protocol_event(QEVENT_SPICE_MIGRATE_COMPLETED, NULL);
     spice_migration_completed = true;
 }
-#endif
 
 /* config string parsing */
 
@@ -393,17 +385,13 @@ static SpiceChannelList *qmp_query_spice_channels(void)
         chan = g_malloc0(sizeof(*chan));
         chan->value = g_malloc0(sizeof(*chan->value));
 
-#ifdef SPICE_CHANNEL_EVENT_FLAG_ADDR_EXT
         if (item->info->flags & SPICE_CHANNEL_EVENT_FLAG_ADDR_EXT) {
             paddr = (struct sockaddr *)&item->info->paddr_ext;
             plen = item->info->plen_ext;
         } else {
-#endif
             paddr = &item->info->paddr;
             plen = item->info->plen;
-#ifdef SPICE_CHANNEL_EVENT_FLAG_ADDR_EXT
         }
-#endif
 
         getnameinfo(paddr, plen,
                     host, sizeof(host), port, sizeof(port),
@@ -473,13 +461,10 @@ SpiceInfo *qmp_query_spice(Error **errp)
         info->tls_port = tls_port;
     }
 
-#if SPICE_SERVER_VERSION >= 0x000a03 /* 0.10.3 */
     info->mouse_mode = spice_server_is_server_mouse(spice_server) ?
                        SPICE_QUERY_MOUSE_MODE_SERVER :
                        SPICE_QUERY_MOUSE_MODE_CLIENT;
-#else
-    info->mouse_mode = SPICE_QUERY_MOUSE_MODE_UNKNOWN;
-#endif
+
     /* for compatibility with the original command */
     info->has_channels = true;
     info->channels = qmp_query_spice_channels();
@@ -492,19 +477,11 @@ static void migration_state_notifier(Notifier *notifier, void *data)
     MigrationState *s = data;
 
     if (migration_is_active(s)) {
-#ifdef SPICE_INTERFACE_MIGRATION
         spice_server_migrate_start(spice_server);
-#endif
     } else if (migration_has_finished(s)) {
-#ifndef SPICE_INTERFACE_MIGRATION
-        spice_server_migrate_switch(spice_server);
-        monitor_protocol_event(QEVENT_SPICE_MIGRATE_COMPLETED, NULL);
-        spice_migration_completed = true;
-#else
         spice_server_migrate_end(spice_server, true);
     } else if (migration_has_failed(s)) {
         spice_server_migrate_end(spice_server, false);
-#endif
     }
 }
 
@@ -513,16 +490,11 @@ int qemu_spice_migrate_info(const char *hostname, int port, int tls_port,
                             MonitorCompletion *cb, void *opaque)
 {
     int ret;
-#ifdef SPICE_INTERFACE_MIGRATION
+
     spice_migrate.connect_complete.cb = cb;
     spice_migrate.connect_complete.opaque = opaque;
     ret = spice_server_migrate_connect(spice_server, hostname,
                                        port, tls_port, subject);
-#else
-    ret = spice_server_migrate_info(spice_server, hostname,
-                                    port, tls_port, subject);
-    cb(opaque, NULL);
-#endif
     return ret;
 }
 
@@ -561,7 +533,6 @@ static int add_channel(const char *name, const char *value, void *opaque)
 static void vm_change_state_handler(void *opaque, int running,
                                     RunState state)
 {
-#if SPICE_SERVER_VERSION >= 0x000b02 /* 0.11.2 */
     if (running) {
         qemu_spice_display_start();
         spice_server_vm_start(spice_server);
@@ -569,7 +540,6 @@ static void vm_change_state_handler(void *opaque, int running,
         spice_server_vm_stop(spice_server);
         qemu_spice_display_stop();
     }
-#endif
 }
 
 void qemu_spice_init(void)
@@ -585,9 +555,7 @@ void qemu_spice_init(void)
     int port, tls_port, len, addr_flags;
     spice_image_compression_t compression;
     spice_wan_compression_t wan_compr;
-#if SPICE_SERVER_VERSION >= 0x000b02 /* 0.11.2 */
     bool seamless_migration;
-#endif
 
     qemu_thread_get_self(&me);
 
@@ -672,16 +640,11 @@ void qemu_spice_init(void)
         spice_server_set_ticket(spice_server, password, 0, 0, 0);
     }
     if (qemu_opt_get_bool(opts, "sasl", 0)) {
-#if SPICE_SERVER_VERSION >= 0x000900 /* 0.9.0 */
         if (spice_server_set_sasl_appname(spice_server, "qemu") == -1 ||
             spice_server_set_sasl(spice_server, 1) == -1) {
             error_report("spice: failed to enable sasl");
             exit(1);
         }
-#else
-        error_report("spice: sasl is not available (spice >= 0.9 required)");
-        exit(1);
-#endif
     }
     if (qemu_opt_get_bool(opts, "disable-ticketing", 0)) {
         auth = "none";
@@ -726,15 +689,11 @@ void qemu_spice_init(void)
 
     qemu_opt_foreach(opts, add_channel, &tls_port, 0);
 
-#if SPICE_SERVER_VERSION >= 0x000a02 /* 0.10.2 */
     spice_server_set_name(spice_server, qemu_name);
     spice_server_set_uuid(spice_server, qemu_uuid);
-#endif
 
-#if SPICE_SERVER_VERSION >= 0x000b02 /* 0.11.2 */
     seamless_migration = qemu_opt_get_bool(opts, "seamless-migration", 0);
     spice_server_set_seamless_migration(spice_server, seamless_migration);
-#endif
     if (0 != spice_server_init(spice_server, &core_interface)) {
         error_report("failed to initialize spice server");
         exit(1);
@@ -743,11 +702,9 @@ void qemu_spice_init(void)
 
     migration_state.notify = migration_state_notifier;
     add_migration_state_change_notifier(&migration_state);
-#ifdef SPICE_INTERFACE_MIGRATION
     spice_migrate.sin.base.sif = &migrate_interface.base;
     spice_migrate.connect_complete.cb = NULL;
     qemu_spice_add_interface(&spice_migrate.sin.base);
-#endif
 
     qemu_spice_input_init();
     qemu_spice_audio_init();
@@ -815,15 +772,11 @@ int qemu_spice_set_pw_expire(time_t expires)
 
 int qemu_spice_display_add_client(int csock, int skipauth, int tls)
 {
-#if SPICE_SERVER_VERSION >= 0x000a01
     if (tls) {
         return spice_server_add_ssl_client(spice_server, csock, skipauth);
     } else {
         return spice_server_add_client(spice_server, csock, skipauth);
     }
-#else
-    return -1;
-#endif
 }
 
 static void spice_register_config(void)
diff --git a/ui/spice-display.c b/ui/spice-display.c
index 50fbefb067..b61764f381 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -126,21 +126,6 @@ void qemu_spice_wakeup(SimpleSpiceDisplay *ssd)
     ssd->worker->wakeup(ssd->worker);
 }
 
-#if SPICE_SERVER_VERSION < 0x000b02 /* before 0.11.2 */
-static void qemu_spice_start(SimpleSpiceDisplay *ssd)
-{
-    trace_qemu_spice_start(ssd->qxl.id);
-    ssd->worker->start(ssd->worker);
-}
-
-static void qemu_spice_stop(SimpleSpiceDisplay *ssd)
-{
-    trace_qemu_spice_stop(ssd->qxl.id);
-    ssd->worker->stop(ssd->worker);
-}
-
-#else
-
 static int spice_display_is_running;
 
 void qemu_spice_display_start(void)
@@ -153,15 +138,9 @@ void qemu_spice_display_stop(void)
     spice_display_is_running = false;
 }
 
-#endif
-
 int qemu_spice_display_is_running(SimpleSpiceDisplay *ssd)
 {
-#if SPICE_SERVER_VERSION < 0x000b02 /* before 0.11.2 */
-    return ssd->running;
-#else
     return spice_display_is_running;
-#endif
 }
 
 static void qemu_spice_create_one_update(SimpleSpiceDisplay *ssd,
@@ -364,22 +343,6 @@ void qemu_spice_destroy_host_primary(SimpleSpiceDisplay *ssd)
     qemu_spice_destroy_primary_surface(ssd, 0, QXL_SYNC);
 }
 
-void qemu_spice_vm_change_state_handler(void *opaque, int running,
-                                        RunState state)
-{
-#if SPICE_SERVER_VERSION < 0x000b02 /* before 0.11.2 */
-    SimpleSpiceDisplay *ssd = opaque;
-
-    if (running) {
-        ssd->running = true;
-        qemu_spice_start(ssd);
-    } else {
-        qemu_spice_stop(ssd);
-        ssd->running = false;
-    }
-#endif
-}
-
 void qemu_spice_display_init_common(SimpleSpiceDisplay *ssd, DisplayState *ds)
 {
     ssd->ds = ds;
@@ -623,7 +586,6 @@ void qemu_spice_display_init(DisplayState *ds)
     qemu_spice_add_interface(&sdpy.qxl.base);
     assert(sdpy.worker);
 
-    qemu_add_vm_change_state_handler(qemu_spice_vm_change_state_handler, &sdpy);
     qemu_spice_create_host_memslot(&sdpy);
     qemu_spice_create_host_primary(&sdpy);
 }
diff --git a/ui/spice-display.h b/ui/spice-display.h
index dea41c1b71..d7669277fd 100644
--- a/ui/spice-display.h
+++ b/ui/spice-display.h
@@ -83,9 +83,6 @@ struct SimpleSpiceDisplay {
 
     QXLRect dirty;
     int notify;
-#if SPICE_SERVER_VERSION < 0x000b02 /* before 0.11.2 */
-    int running;
-#endif
 
     /*
      * All struct members below this comment can be accessed from
@@ -133,8 +130,6 @@ void qemu_spice_create_primary_surface(SimpleSpiceDisplay *ssd, uint32_t id,
 void qemu_spice_destroy_primary_surface(SimpleSpiceDisplay *ssd,
                                         uint32_t id, qxl_async_io async);
 void qemu_spice_wakeup(SimpleSpiceDisplay *ssd);
-#if SPICE_SERVER_VERSION >= 0x000b02 /* before 0.11.2 */
 void qemu_spice_display_start(void);
 void qemu_spice_display_stop(void);
-#endif
 int qemu_spice_display_is_running(SimpleSpiceDisplay *ssd);
diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
index 087b84d319..3c592b3f3d 100644
--- a/ui/vnc-jobs.c
+++ b/ui/vnc-jobs.c
@@ -33,21 +33,21 @@
 /*
  * Locking:
  *
- * There is three levels of locking:
+ * There are three levels of locking:
  * - jobs queue lock: for each operation on the queue (push, pop, isEmpty?)
  * - VncDisplay global lock: mainly used for framebuffer updates to avoid
  *                      screen corruption if the framebuffer is updated
- *			while the worker is doing something.
+ *                      while the worker is doing something.
  * - VncState::output lock: used to make sure the output buffer is not corrupted
- * 		   	 if two threads try to write on it at the same time
+ *                          if two threads try to write on it at the same time
  *
- * While the VNC worker thread is working, the VncDisplay global lock is hold
- * to avoid screen corruptions (this does not block vnc_refresh() because it
- * uses trylock()) but the output lock is not hold because the thread work on
+ * While the VNC worker thread is working, the VncDisplay global lock is held
+ * to avoid screen corruption (this does not block vnc_refresh() because it
+ * uses trylock()) but the output lock is not held because the thread works on
  * its own output buffer.
  * When the encoding job is done, the worker thread will hold the output lock
  * and copy its output buffer in vs->output.
-*/
+ */
 
 struct VncJobQueue {
     QemuCond cond;
@@ -62,7 +62,7 @@ typedef struct VncJobQueue VncJobQueue;
 
 /*
  * We use a single global queue, but most of the functions are
- * already reetrant, so we can easilly add more than one encoding thread
+ * already reentrant, so we can easily add more than one encoding thread
  */
 static VncJobQueue *queue;
 
diff --git a/ui/vnc.c b/ui/vnc.c
index 01b2dafea2..66ae93010f 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -372,6 +372,10 @@ VncInfo *qmp_query_vnc(Error **errp)
             }
         }
 
+        if (vnc_display->lsock == -1) {
+            return info;
+        }
+
         if (getsockname(vnc_display->lsock, (struct sockaddr *)&sa,
                         &salen) == -1) {
             error_set(errp, QERR_UNDEFINED_ERROR);
@@ -1802,10 +1806,12 @@ static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings)
             vs->features |= VNC_FEATURE_TIGHT_MASK;
             vs->vnc_encoding = enc;
             break;
+#ifdef CONFIG_VNC_PNG
         case VNC_ENCODING_TIGHT_PNG:
             vs->features |= VNC_FEATURE_TIGHT_PNG_MASK;
             vs->vnc_encoding = enc;
             break;
+#endif
         case VNC_ENCODING_ZLIB:
             vs->features |= VNC_FEATURE_ZLIB_MASK;
             vs->vnc_encoding = enc;