summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS4
-rw-r--r--Makefile4
-rwxr-xr-xconfigure37
-rw-r--r--docs/specs/standard-vga.txt64
-rw-r--r--exec-all.h5
-rw-r--r--exec.c238
-rw-r--r--hw/alpha_dp264.c12
-rw-r--r--hw/an5206.c8
-rw-r--r--hw/axis_dev88.c9
-rw-r--r--hw/boards.h16
-rw-r--r--hw/collie.c9
-rw-r--r--hw/dummy_m68k.c8
-rw-r--r--hw/exynos4_boards.c16
-rw-r--r--hw/gumstix.c11
-rw-r--r--hw/highbank.c10
-rw-r--r--hw/hw.h1
-rw-r--r--hw/integratorcp.c10
-rw-r--r--hw/kzm.c10
-rw-r--r--hw/leon3.c10
-rw-r--r--hw/lm32_boards.c18
-rw-r--r--hw/mainstone.c10
-rw-r--r--hw/mcf5208.c8
-rw-r--r--hw/milkymist.c10
-rw-r--r--hw/mips_fulong2e.c9
-rw-r--r--hw/mips_jazz.c14
-rw-r--r--hw/mips_malta.c10
-rw-r--r--hw/mips_mipssim.c10
-rw-r--r--hw/mips_r4k.c10
-rw-r--r--hw/musicpal.c9
-rw-r--r--hw/nseries.c22
-rw-r--r--hw/null-machine.c7
-rw-r--r--hw/omap_sx1.c22
-rw-r--r--hw/openrisc_sim.c10
-rw-r--r--hw/palm.c9
-rw-r--r--hw/pc_piix.c54
-rw-r--r--hw/petalogix_ml605_mmu.c8
-rw-r--r--hw/petalogix_s3adsp1800_mmu.c8
-rw-r--r--hw/pl011.c11
-rw-r--r--hw/pl022.c8
-rw-r--r--hw/pl031.c16
-rw-r--r--hw/pl041.c5
-rw-r--r--hw/pl181.c18
-rw-r--r--hw/pl190.c6
-rw-r--r--hw/ppc/e500plat.c13
-rw-r--r--hw/ppc/mpc8544ds.c13
-rw-r--r--hw/ppc405_boards.c25
-rw-r--r--hw/ppc440_bamboo.c12
-rw-r--r--hw/ppc_newworld.c13
-rw-r--r--hw/ppc_oldworld.c13
-rw-r--r--hw/ppc_prep.c13
-rw-r--r--hw/puv3.c8
-rw-r--r--hw/r2d.c9
-rw-r--r--hw/realview.c44
-rw-r--r--hw/s390-virtio.c13
-rw-r--r--hw/shix.c6
-rw-r--r--hw/spapr.c13
-rw-r--r--hw/spitz.c40
-rw-r--r--hw/stellaris.c14
-rw-r--r--hw/sun4m.c133
-rw-r--r--hw/sun4u.c39
-rw-r--r--hw/tosa.c9
-rw-r--r--hw/versatilepb.c22
-rw-r--r--hw/vexpress.c26
-rw-r--r--hw/vga-isa.c2
-rw-r--r--hw/vga-pci.c138
-rw-r--r--hw/vga.c40
-rw-r--r--hw/vga_int.h30
-rw-r--r--hw/virtex_ml507.c10
-rw-r--r--hw/xen_machine_pv.c11
-rw-r--r--hw/xilinx_zynq.c9
-rw-r--r--hw/xtensa_lx60.c22
-rw-r--r--hw/xtensa_sim.c11
-rw-r--r--hw/z2.c9
-rw-r--r--linux-user/alpha/target_signal.h7
-rw-r--r--linux-user/linuxload.c8
-rw-r--r--linux-user/main.c4
-rw-r--r--linux-user/qemu.h3
-rw-r--r--linux-user/signal.c59
-rw-r--r--linux-user/syscall.c202
-rw-r--r--net/tap-win32.c1
-rw-r--r--qemu-log.c3
-rw-r--r--qemu-log.h1
-rw-r--r--qemu-options.hx38
-rw-r--r--target-arm/neon_helper.c1
-rw-r--r--target-arm/translate.c2
-rw-r--r--target-microblaze/cpu.h1
-rw-r--r--target-mips/op_helper.c63
-rw-r--r--target-sparc/translate.c1238
-rw-r--r--targphys.h4
-rw-r--r--tcg/arm/tcg-target.c56
-rw-r--r--tcg/arm/tcg-target.h4
-rw-r--r--tcg/hppa/tcg-target.h2
-rw-r--r--tcg/i386/tcg-target.h2
-rw-r--r--tcg/ia64/tcg-target.c194
-rw-r--r--tcg/ia64/tcg-target.h14
-rw-r--r--tcg/mips/tcg-target.h3
-rw-r--r--tcg/optimize.c471
-rw-r--r--tcg/ppc/tcg-target.h2
-rw-r--r--tcg/ppc64/tcg-target.h1
-rw-r--r--tcg/s390/tcg-target.h2
-rw-r--r--tcg/sparc/tcg-target.h2
-rw-r--r--tcg/tcg-op.h11
-rw-r--r--tcg/tcg.c57
-rw-r--r--tcg/tcg.h2
-rw-r--r--tcg/tci/tcg-target.h3
-rw-r--r--tests/tcg/Makefile18
-rw-r--r--tests/tcg/linux-test.c2
-rw-r--r--tests/tcg/test-i386.c3
-rw-r--r--tests/tcg/test_path.c13
-rw-r--r--ui/vnc-jobs.c16
-rw-r--r--ui/vnc.c2
-rw-r--r--vl.c9
112 files changed, 2378 insertions, 1700 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f1f925007e..8f5681fd64 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -641,7 +641,7 @@ F: monitor.c
 
 Network device layer
 M: Anthony Liguori <aliguori@us.ibm.com>
-M: Stefan Hajnoczi <stefanha@gmail.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Maintained
 F: net/
 T: git git://github.com/stefanha/qemu.git net
@@ -661,7 +661,7 @@ F: slirp/
 T: git git://git.kiszka.org/qemu.git queues/slirp
 
 Tracing
-M: Stefan Hajnoczi <stefanha@gmail.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Maintained
 F: trace/
 F: scripts/tracetool.py
diff --git a/Makefile b/Makefile
index a9c22bf1d9..88285a474b 100644
--- a/Makefile
+++ b/Makefile
@@ -14,9 +14,11 @@ config-host.mak: $(SRC_PATH)/configure
 	@sed -n "/.*Configured with/s/[^:]*: //p" $@ | sh
 else
 config-host.mak:
+ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
 	@echo "Please call configure before running make!"
 	@exit 1
 endif
+endif
 
 GENERATED_HEADERS = config-host.h trace.h qemu-options.def
 ifeq ($(TRACE_BACKEND),dtrace)
@@ -403,7 +405,9 @@ qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
 
 # Add a dependency on the generated files, so that they are always
 # rebuilt before other object files
+ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
 Makefile: $(GENERATED_HEADERS)
+endif
 
 # Include automatically generated dependency files
 # Dependencies in Makefile.objs files come from our recursive subdir rules
diff --git a/configure b/configure
index 353d7889f7..fa5657f6bc 100755
--- a/configure
+++ b/configure
@@ -199,7 +199,7 @@ cocoa="no"
 softmmu="yes"
 linux_user="no"
 bsd_user="no"
-guest_base=""
+guest_base="yes"
 uname_release=""
 mixemu="no"
 aix="no"
@@ -871,63 +871,36 @@ for opt do
   esac
 done
 
-host_guest_base="no"
 case "$cpu" in
     sparc)
            LDFLAGS="-m32 $LDFLAGS"
            QEMU_CFLAGS="-m32 -mcpu=ultrasparc $QEMU_CFLAGS"
-           host_guest_base="yes"
            ;;
     sparc64)
            LDFLAGS="-m64 $LDFLAGS"
            QEMU_CFLAGS="-m64 -mcpu=ultrasparc $QEMU_CFLAGS"
-           host_guest_base="yes"
            ;;
     s390)
            QEMU_CFLAGS="-m31 -march=z990 $QEMU_CFLAGS"
            LDFLAGS="-m31 $LDFLAGS"
-           host_guest_base="yes"
            ;;
     s390x)
            QEMU_CFLAGS="-m64 -march=z990 $QEMU_CFLAGS"
            LDFLAGS="-m64 $LDFLAGS"
-           host_guest_base="yes"
            ;;
     i386)
            QEMU_CFLAGS="-m32 $QEMU_CFLAGS"
            LDFLAGS="-m32 $LDFLAGS"
            cc_i386='$(CC) -m32'
-           host_guest_base="yes"
            ;;
     x86_64)
            QEMU_CFLAGS="-m64 $QEMU_CFLAGS"
            LDFLAGS="-m64 $LDFLAGS"
            cc_i386='$(CC) -m32'
-           host_guest_base="yes"
-           ;;
-    arm*)
-           host_guest_base="yes"
-           ;;
-    ppc*)
-           host_guest_base="yes"
-           ;;
-    mips*)
-           host_guest_base="yes"
-           ;;
-    ia64*)
-           host_guest_base="yes"
-           ;;
-    hppa*)
-           host_guest_base="yes"
-           ;;
-    unicore32*)
-           host_guest_base="yes"
            ;;
+    # No special flags required for other host CPUs
 esac
 
-[ -z "$guest_base" ] && guest_base="$host_guest_base"
-
-
 default_target_list=""
 
 # these targets are portable
@@ -1323,7 +1296,7 @@ if test -z "$cross_prefix" ; then
 # big/little endian test
 cat > $TMPC << EOF
 #include <inttypes.h>
-int main(int argc, char ** argv){
+int main(void) {
         volatile uint32_t i=0x01234567;
         return (*((uint8_t*)(&i))) == 0x67;
 }
@@ -2896,7 +2869,7 @@ static int sfaa(int *ptr)
   return __sync_fetch_and_and(ptr, 0);
 }
 
-int main(int argc, char **argv)
+int main(void)
 {
   int val = 42;
   sfaa(&val);
@@ -3227,7 +3200,7 @@ echo "qemu_confdir=$qemu_confdir" >> $config_host_mak
 echo "qemu_datadir=$qemu_datadir" >> $config_host_mak
 echo "qemu_docdir=$qemu_docdir" >> $config_host_mak
 echo "qemu_localstatedir=$local_statedir" >> $config_host_mak
-echo "CONFIG_QEMU_HELPERDIR=\"$libexecdir\"" >> $config_host_mak
+echo "qemu_helperdir=$libexecdir" >> $config_host_mak
 
 echo "ARCH=$ARCH" >> $config_host_mak
 if test "$debug_tcg" = "yes" ; then
diff --git a/docs/specs/standard-vga.txt b/docs/specs/standard-vga.txt
new file mode 100644
index 0000000000..1cecccd469
--- /dev/null
+++ b/docs/specs/standard-vga.txt
@@ -0,0 +1,64 @@
+
+QEMU Standard VGA
+=================
+
+Exists in two variants, for isa and pci.
+
+command line switches:
+    -vga std            [ picks isa for -M isapc, otherwise pci ]
+    -device VGA         [ pci variant ]
+    -device isa-vga     [ isa variant ]
+
+
+PCI spec
+--------
+
+Applies to the pci variant only for obvious reasons.
+
+PCI ID: 1234:1111
+
+PCI Region 0:
+   Framebuffer memory, 16 MB in size (by default).
+   Size is tunable via vga_mem_mb property.
+
+PCI Region 1:
+   Reserved (so we have the option to make the framebuffer bar 64bit).
+
+PCI Region 2:
+   MMIO bar, 4096 bytes in size (qemu 1.3+)
+
+PCI ROM Region:
+   Holds the vgabios (qemu 0.14+).
+
+
+IO ports used
+-------------
+
+03c0 - 03df : standard vga ports
+01ce        : bochs vbe interface index port
+01cf        : bochs vbe interface data port
+
+
+Memory regions used
+-------------------
+
+0xe0000000 : Framebuffer memory, isa variant only.
+
+The pci variant used to mirror the framebuffer bar here, qemu 0.14+
+stops doing that (except when in -M pc-$old compat mode).
+
+
+MMIO area spec
+--------------
+
+Likewise applies to the pci variant only for obvious reasons.
+
+0000 - 03ff : reserved, for possible virtio extension.
+0400 - 041f : vga ioports (0x3c0 -> 0x3df), remapped 1:1.
+              word access is supported, bytes are written
+              in little endia order (aka index port first),
+              so indexed registers can be updated with a
+              single mmio write (and thus only one vmexit).
+0500 - 0515 : bochs dispi interface registers, mapped flat
+              without index/data ports.  Use (index << 1)
+              as offset for (16bit) register access.
diff --git a/exec-all.h b/exec-all.h
index 6516da071d..16caf49db7 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -121,8 +121,6 @@ static inline void tlb_flush(CPUArchState *env, int flush_global)
 #define CODE_GEN_PHYS_HASH_BITS     15
 #define CODE_GEN_PHYS_HASH_SIZE     (1 << CODE_GEN_PHYS_HASH_BITS)
 
-#define MIN_CODE_GEN_BUFFER_SIZE     (1024 * 1024)
-
 /* estimated block size for TB allocation */
 /* XXX: use a per code average code fragment size and modulate it
    according to the host CPU */
@@ -296,7 +294,8 @@ extern int tb_invalidated_flag;
 #if defined(CONFIG_TCG_INTERPRETER)
 /* Alpha and SH4 user mode emulations and Softmmu call GETPC().
    For all others, GETPC remains undefined (which makes TCI a little faster. */
-# if defined(CONFIG_SOFTMMU) || defined(TARGET_ALPHA) || defined(TARGET_SH4)
+# if defined(CONFIG_SOFTMMU) || defined(TARGET_ALPHA) || defined(TARGET_SH4) \
+     || defined(TARGET_SPARC)
 extern uintptr_t tci_tb_ptr;
 #  define GETPC() tci_tb_ptr
 # endif
diff --git a/exec.c b/exec.c
index 17e8ba21d4..5b55e3e9ed 100644
--- a/exec.c
+++ b/exec.c
@@ -85,26 +85,11 @@ static int nb_tbs;
 /* any access to the tbs or the page table must use this lock */
 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
 
-#if defined(__arm__) || defined(__sparc__)
-/* The prologue must be reachable with a direct jump. ARM and Sparc64
- have limited branch ranges (possibly also PPC) so place it in a
- section close to code segment. */
-#define code_gen_section                                \
-    __attribute__((__section__(".gen_code")))           \
-    __attribute__((aligned (32)))
-#elif defined(_WIN32) && !defined(_WIN64)
-#define code_gen_section                                \
-    __attribute__((aligned (16)))
-#else
-#define code_gen_section                                \
-    __attribute__((aligned (32)))
-#endif
-
-uint8_t code_gen_prologue[1024] code_gen_section;
+uint8_t *code_gen_prologue;
 static uint8_t *code_gen_buffer;
-static unsigned long code_gen_buffer_size;
+static size_t code_gen_buffer_size;
 /* threshold to flush the translated code buffer */
-static unsigned long code_gen_buffer_max_size;
+static size_t code_gen_buffer_max_size;
 static uint8_t *code_gen_ptr;
 
 #if !defined(CONFIG_USER_ONLY)
@@ -212,7 +197,7 @@ static int tb_flush_count;
 static int tb_phys_invalidate_count;
 
 #ifdef _WIN32
-static void map_exec(void *addr, long size)
+static inline void map_exec(void *addr, long size)
 {
     DWORD old_protect;
     VirtualProtect(addr, size,
@@ -220,7 +205,7 @@ static void map_exec(void *addr, long size)
     
 }
 #else
-static void map_exec(void *addr, long size)
+static inline void map_exec(void *addr, long size)
 {
     unsigned long start, end, page_size;
     
@@ -489,111 +474,142 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
 #define mmap_unlock() do { } while(0)
 #endif
 
-#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
-
 #if defined(CONFIG_USER_ONLY)
 /* Currently it is not recommended to allocate big chunks of data in
-   user mode. It will change when a dedicated libc will be used */
+   user mode. It will change when a dedicated libc will be used.  */
+/* ??? 64-bit hosts ought to have no problem mmaping data outside the
+   region in which the guest needs to run.  Revisit this.  */
 #define USE_STATIC_CODE_GEN_BUFFER
 #endif
 
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
-               __attribute__((aligned (CODE_GEN_ALIGN)));
+/* ??? Should configure for this, not list operating systems here.  */
+#if (defined(__linux__) \
+    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__) \
+    || defined(__NetBSD__))
+# define USE_MMAP
 #endif
 
-static void code_gen_alloc(unsigned long tb_size)
-{
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-    code_gen_buffer = static_code_gen_buffer;
-    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-    map_exec(code_gen_buffer, code_gen_buffer_size);
-#else
-    code_gen_buffer_size = tb_size;
-    if (code_gen_buffer_size == 0) {
-#if defined(CONFIG_USER_ONLY)
-        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-#else
-        /* XXX: needs adjustments */
-        code_gen_buffer_size = (unsigned long)(ram_size / 4);
-#endif
-    }
-    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
-        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
-    /* The code gen buffer location may have constraints depending on
-       the host cpu and OS */
-#if defined(__linux__) 
-    {
-        int flags;
-        void *start = NULL;
+/* Minimum size of the code gen buffer.  This number is randomly chosen,
+   but not so small that we can't have a fair number of TB's live.  */
+#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
 
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
+/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
+   indicated, this is constrained by the range of direct branches on the
+   host cpu, as used by the TCG implementation of goto_tb.  */
 #if defined(__x86_64__)
-        flags |= MAP_32BIT;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        start = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024))
-            code_gen_buffer_size = (512 * 1024 * 1024);
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__sparc__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 #elif defined(__arm__)
-        /* Keep the buffer no bigger than 16MB to branch between blocks */
-        if (code_gen_buffer_size > 16 * 1024 * 1024)
-            code_gen_buffer_size = 16 * 1024 * 1024;
+# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
 #elif defined(__s390x__)
-        /* Map the buffer so that we can use direct calls and branches.  */
-        /* We have a +- 4GB range on the branches; leave some slop.  */
-        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
-            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
-        }
-        start = (void *)0x90000000UL;
+  /* We have a +- 4GB range on the branches; leave some slop.  */
+# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
+#else
+# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 #endif
-        code_gen_buffer = mmap(start, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC,
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
-    }
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__)
-    {
-        int flags;
-        void *addr = NULL;
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
-         * 0x40000000 is free */
-        flags |= MAP_FIXED;
-        addr = (void *)0x40000000;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        addr = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
-            code_gen_buffer_size = (512 * 1024 * 1024);
-        }
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE \
+  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
+   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
+
+static inline size_t size_code_gen_buffer(size_t tb_size)
+{
+    /* Size the buffer.  */
+    if (tb_size == 0) {
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+#else
+        /* ??? Needs adjustments.  */
+        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
+           static buffer, we could size this on RESERVED_VA, on the text
+           segment size of the executable, or continue to use the default.  */
+        tb_size = (unsigned long)(ram_size / 4);
 #endif
-        code_gen_buffer = mmap(addr, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC, 
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
     }
+    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
+    }
+    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
+    }
+    code_gen_buffer_size = tb_size;
+    return tb_size;
+}
+
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
+    __attribute__((aligned(CODE_GEN_ALIGN)));
+
+static inline void *alloc_code_gen_buffer(void)
+{
+    map_exec(static_code_gen_buffer, code_gen_buffer_size);
+    return static_code_gen_buffer;
+}
+#elif defined(USE_MMAP)
+static inline void *alloc_code_gen_buffer(void)
+{
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+    uintptr_t start = 0;
+    void *buf;
+
+    /* Constrain the position of the buffer based on the host cpu.
+       Note that these addresses are chosen in concert with the
+       addresses assigned in the relevant linker script file.  */
+# if defined(__PIE__) || defined(__PIC__)
+    /* Don't bother setting a preferred location if we're building
+       a position-independent executable.  We're more likely to get
+       an address near the main executable if we let the kernel
+       choose the address.  */
+# elif defined(__x86_64__) && defined(MAP_32BIT)
+    /* Force the memory down into low memory with the executable.
+       Leave the choice of exact location with the kernel.  */
+    flags |= MAP_32BIT;
+    /* Cannot expect to map more than 800MB in low memory.  */
+    if (code_gen_buffer_size > 800u * 1024 * 1024) {
+        code_gen_buffer_size = 800u * 1024 * 1024;
+    }
+# elif defined(__sparc__)
+    start = 0x40000000ul;
+# elif defined(__s390x__)
+    start = 0x90000000ul;
+# endif
+
+    buf = mmap((void *)start, code_gen_buffer_size,
+               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
+    return buf == MAP_FAILED ? NULL : buf;
+}
 #else
-    code_gen_buffer = g_malloc(code_gen_buffer_size);
-    map_exec(code_gen_buffer, code_gen_buffer_size);
-#endif
-#endif /* !USE_STATIC_CODE_GEN_BUFFER */
-    map_exec(code_gen_prologue, sizeof(code_gen_prologue));
+static inline void *alloc_code_gen_buffer(void)
+{
+    void *buf = g_malloc(code_gen_buffer_size);
+    if (buf) {
+        map_exec(buf, code_gen_buffer_size);
+    }
+    return buf;
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
+
+static inline void code_gen_alloc(size_t tb_size)
+{
+    code_gen_buffer_size = size_code_gen_buffer(tb_size);
+    code_gen_buffer = alloc_code_gen_buffer();
+    if (code_gen_buffer == NULL) {
+        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+        exit(1);
+    }
+
+    /* Steal room for the prologue at the end of the buffer.  This ensures
+       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
+       from TB's to the prologue are going to be in range.  It also means
+       that we don't need to mark (additional) portions of the data segment
+       as executable.  */
+    code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
+    code_gen_buffer_size -= 1024;
+
     code_gen_buffer_max_size = code_gen_buffer_size -
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
     code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
@@ -4136,7 +4152,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     }
     /* XXX: avoid using doubles ? */
     cpu_fprintf(f, "Translation buffer state:\n");
-    cpu_fprintf(f, "gen code size       %td/%ld\n",
+    cpu_fprintf(f, "gen code size       %td/%zd\n",
                 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
     cpu_fprintf(f, "TB count            %d/%d\n", 
                 nb_tbs, code_gen_max_blocks);
diff --git a/hw/alpha_dp264.c b/hw/alpha_dp264.c
index 5ea04c75ac..8f082a6656 100644
--- a/hw/alpha_dp264.c
+++ b/hw/alpha_dp264.c
@@ -42,13 +42,13 @@ static int clipper_pci_map_irq(PCIDevice *d, int irq_num)
     return (slot + 1) * 4 + irq_num;
 }
 
-static void clipper_init(ram_addr_t ram_size,
-                         const char *boot_device,
-                         const char *kernel_filename,
-                         const char *kernel_cmdline,
-                         const char *initrd_filename,
-                         const char *cpu_model)
+static void clipper_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     CPUAlphaState *cpus[4];
     PCIBus *pci_bus;
     ISABus *isa_bus;
diff --git a/hw/an5206.c b/hw/an5206.c
index 25407c0f50..042c5fcd14 100644
--- a/hw/an5206.c
+++ b/hw/an5206.c
@@ -19,11 +19,11 @@
 
 /* Board init.  */
 
-static void an5206_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void an5206_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     CPUM68KState *env;
     int kernel_size;
     uint64_t elf_entry;
diff --git a/hw/axis_dev88.c b/hw/axis_dev88.c
index eab6327bed..2fd7356fce 100644
--- a/hw/axis_dev88.c
+++ b/hw/axis_dev88.c
@@ -242,11 +242,12 @@ static const MemoryRegionOps gpio_ops = {
 static struct cris_load_info li;
 
 static
-void axisdev88_init (ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+void axisdev88_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
     CRISCPU *cpu;
     CPUCRISState *env;
     DeviceState *dev;
diff --git a/hw/boards.h b/hw/boards.h
index a2e0a54497..813d0e5109 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -5,12 +5,16 @@
 
 #include "qdev.h"
 
-typedef void QEMUMachineInitFunc(ram_addr_t ram_size,
-                                 const char *boot_device,
-                                 const char *kernel_filename,
-                                 const char *kernel_cmdline,
-                                 const char *initrd_filename,
-                                 const char *cpu_model);
+typedef struct QEMUMachineInitArgs {
+    ram_addr_t ram_size;
+    const char *boot_device;
+    const char *kernel_filename;
+    const char *kernel_cmdline;
+    const char *initrd_filename;
+    const char *cpu_model;
+} QEMUMachineInitArgs;
+
+typedef void QEMUMachineInitFunc(QEMUMachineInitArgs *args);
 
 typedef void QEMUMachineResetFunc(void);
 
diff --git a/hw/collie.c b/hw/collie.c
index 56f89a9f2e..695982a99f 100644
--- a/hw/collie.c
+++ b/hw/collie.c
@@ -23,11 +23,12 @@ static struct arm_boot_info collie_binfo = {
     .ram_size = 0x20000000,
 };
 
-static void collie_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void collie_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     StrongARMState *s;
     DriveInfo *dinfo;
     MemoryRegion *sysmem = get_system_memory();
diff --git a/hw/dummy_m68k.c b/hw/dummy_m68k.c
index 7cc7a99bfb..f436a0cbc6 100644
--- a/hw/dummy_m68k.c
+++ b/hw/dummy_m68k.c
@@ -16,11 +16,11 @@
 
 /* Board init.  */
 
-static void dummy_m68k_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void dummy_m68k_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     CPUM68KState *env;
     MemoryRegion *address_space_mem =  get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/exynos4_boards.c b/hw/exynos4_boards.c
index 4bb0a60cb1..4951064c3f 100644
--- a/hw/exynos4_boards.c
+++ b/hw/exynos4_boards.c
@@ -130,22 +130,22 @@ static Exynos4210State *exynos4_boards_init_common(
             exynos4_board_ram_size[board_type]);
 }
 
-static void nuri_init(ram_addr_t ram_size,
-        const char *boot_device,
-        const char *kernel_filename, const char *kernel_cmdline,
-        const char *initrd_filename, const char *cpu_model)
+static void nuri_init(QEMUMachineInitArgs *args)
 {
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     exynos4_boards_init_common(kernel_filename, kernel_cmdline,
                 initrd_filename, EXYNOS4_BOARD_NURI);
 
     arm_load_kernel(arm_env_get_cpu(first_cpu), &exynos4_board_binfo);
 }
 
-static void smdkc210_init(ram_addr_t ram_size,
-        const char *boot_device,
-        const char *kernel_filename, const char *kernel_cmdline,
-        const char *initrd_filename, const char *cpu_model)
+static void smdkc210_init(QEMUMachineInitArgs *args)
 {
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     Exynos4210State *s = exynos4_boards_init_common(kernel_filename,
             kernel_cmdline, initrd_filename, EXYNOS4_BOARD_SMDKC210);
 
diff --git a/hw/gumstix.c b/hw/gumstix.c
index 13a36ea5c5..4103a88b80 100644
--- a/hw/gumstix.c
+++ b/hw/gumstix.c
@@ -45,10 +45,7 @@
 
 static const int sector_len = 128 * 1024;
 
-static void connex_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void connex_init(QEMUMachineInitArgs *args)
 {
     PXA2xxState *cpu;
     DriveInfo *dinfo;
@@ -84,11 +81,9 @@ static void connex_init(ram_addr_t ram_size,
                     qdev_get_gpio_in(cpu->gpio, 36));
 }
 
-static void verdex_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void verdex_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
     PXA2xxState *cpu;
     DriveInfo *dinfo;
     int be;
diff --git a/hw/highbank.c b/hw/highbank.c
index 11aa1312c0..15036b6390 100644
--- a/hw/highbank.c
+++ b/hw/highbank.c
@@ -187,11 +187,13 @@ static struct arm_boot_info highbank_binfo;
  * 32-bit host, set the reg value of memory to 0xf7ff00000 in the
  * device tree and pass -m 2047 to QEMU.
  */
-static void highbank_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void highbank_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     DeviceState *dev;
     SysBusDevice *busdev;
     qemu_irq *irqp;
diff --git a/hw/hw.h b/hw/hw.h
index 16101de3ce..b337ee3042 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -12,6 +12,7 @@
 #include "irq.h"
 #include "qemu-file.h"
 #include "vmstate.h"
+#include "qemu-log.h"
 
 #ifdef NEED_CPU_H
 #if TARGET_LONG_BITS == 64
diff --git a/hw/integratorcp.c b/hw/integratorcp.c
index d0e2e9068e..ac0ea83492 100644
--- a/hw/integratorcp.c
+++ b/hw/integratorcp.c
@@ -438,11 +438,13 @@ static struct arm_boot_info integrator_binfo = {
     .board_id = 0x113,
 };
 
-static void integratorcp_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void integratorcp_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     ARMCPU *cpu;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/kzm.c b/hw/kzm.c
index 68cd1b48b9..d1266d9305 100644
--- a/hw/kzm.c
+++ b/hw/kzm.c
@@ -70,11 +70,13 @@ static struct arm_boot_info kzm_binfo = {
     .board_id = 1722,
 };
 
-static void kzm_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void kzm_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     ARMCPU *cpu;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/leon3.c b/hw/leon3.c
index 7a9729dc28..774273828f 100644
--- a/hw/leon3.c
+++ b/hw/leon3.c
@@ -94,13 +94,11 @@ static void leon3_set_pil_in(void *opaque, uint32_t pil_in)
     }
 }
 
-static void leon3_generic_hw_init(ram_addr_t  ram_size,
-                                  const char *boot_device,
-                                  const char *kernel_filename,
-                                  const char *kernel_cmdline,
-                                  const char *initrd_filename,
-                                  const char *cpu_model)
+static void leon3_generic_hw_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     SPARCCPU *cpu;
     CPUSPARCState   *env;
     MemoryRegion *address_space_mem = get_system_memory();
diff --git a/hw/lm32_boards.c b/hw/lm32_boards.c
index b76d8008be..c5a62c8264 100644
--- a/hw/lm32_boards.c
+++ b/hw/lm32_boards.c
@@ -69,12 +69,10 @@ static void main_cpu_reset(void *opaque)
     env->deba = reset_info->flash_base;
 }
 
-static void lm32_evr_init(ram_addr_t ram_size_not_used,
-                          const char *boot_device,
-                          const char *kernel_filename,
-                          const char *kernel_cmdline,
-                          const char *initrd_filename, const char *cpu_model)
+static void lm32_evr_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     LM32CPU *cpu;
     CPULM32State *env;
     DriveInfo *dinfo;
@@ -159,12 +157,12 @@ static void lm32_evr_init(ram_addr_t ram_size_not_used,
     qemu_register_reset(main_cpu_reset, reset_info);
 }
 
-static void lm32_uclinux_init(ram_addr_t ram_size_not_used,
-                          const char *boot_device,
-                          const char *kernel_filename,
-                          const char *kernel_cmdline,
-                          const char *initrd_filename, const char *cpu_model)
+static void lm32_uclinux_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     LM32CPU *cpu;
     CPULM32State *env;
     DriveInfo *dinfo;
diff --git a/hw/mainstone.c b/hw/mainstone.c
index 97687b6eeb..c0d6034147 100644
--- a/hw/mainstone.c
+++ b/hw/mainstone.c
@@ -171,11 +171,13 @@ static void mainstone_common_init(MemoryRegion *address_space_mem,
     arm_load_kernel(mpu->cpu, &mainstone_binfo);
 }
 
-static void mainstone_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void mainstone_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     mainstone_common_init(get_system_memory(), ram_size, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, mainstone, 0x196);
 }
diff --git a/hw/mcf5208.c b/hw/mcf5208.c
index ee25b1b387..688bc3c1a6 100644
--- a/hw/mcf5208.c
+++ b/hw/mcf5208.c
@@ -187,11 +187,11 @@ static void mcf5208_sys_init(MemoryRegion *address_space, qemu_irq *pic)
     }
 }
 
-static void mcf5208evb_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void mcf5208evb_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     CPUM68KState *env;
     int kernel_size;
     uint64_t elf_entry;
diff --git a/hw/milkymist.c b/hw/milkymist.c
index 2e7235b4b3..ca9ed43d99 100644
--- a/hw/milkymist.c
+++ b/hw/milkymist.c
@@ -73,12 +73,12 @@ static void main_cpu_reset(void *opaque)
 }
 
 static void
-milkymist_init(ram_addr_t ram_size_not_used,
-                          const char *boot_device,
-                          const char *kernel_filename,
-                          const char *kernel_cmdline,
-                          const char *initrd_filename, const char *cpu_model)
+milkymist_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     LM32CPU *cpu;
     CPULM32State *env;
     int kernel_size;
diff --git a/hw/mips_fulong2e.c b/hw/mips_fulong2e.c
index d4a8672f23..fb50a1f67b 100644
--- a/hw/mips_fulong2e.c
+++ b/hw/mips_fulong2e.c
@@ -256,10 +256,13 @@ static void cpu_request_exit(void *opaque, int irq, int level)
     }
 }
 
-static void mips_fulong2e_init(ram_addr_t ram_size, const char *boot_device,
-                        const char *kernel_filename, const char *kernel_cmdline,
-                        const char *initrd_filename, const char *cpu_model)
+static void mips_fulong2e_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/mips_jazz.c b/hw/mips_jazz.c
index db927f14d0..14df4d7745 100644
--- a/hw/mips_jazz.c
+++ b/hw/mips_jazz.c
@@ -302,21 +302,19 @@ static void mips_jazz_init(MemoryRegion *address_space,
 }
 
 static
-void mips_magnum_init (ram_addr_t ram_size,
-                       const char *boot_device,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
+void mips_magnum_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
         mips_jazz_init(get_system_memory(), get_system_io(),
                        ram_size, cpu_model, JAZZ_MAGNUM);
 }
 
 static
-void mips_pica61_init (ram_addr_t ram_size,
-                       const char *boot_device,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
+void mips_pica61_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
     mips_jazz_init(get_system_memory(), get_system_io(),
                    ram_size, cpu_model, JAZZ_PICA61);
 }
diff --git a/hw/mips_malta.c b/hw/mips_malta.c
index 632b466e32..ad4910ff66 100644
--- a/hw/mips_malta.c
+++ b/hw/mips_malta.c
@@ -775,11 +775,13 @@ static void cpu_request_exit(void *opaque, int irq, int level)
 }
 
 static
-void mips_malta_init (ram_addr_t ram_size,
-                      const char *boot_device,
-                      const char *kernel_filename, const char *kernel_cmdline,
-                      const char *initrd_filename, const char *cpu_model)
+void mips_malta_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     pflash_t *fl;
     MemoryRegion *system_memory = get_system_memory();
diff --git a/hw/mips_mipssim.c b/hw/mips_mipssim.c
index 830f635597..a1d3945cbb 100644
--- a/hw/mips_mipssim.c
+++ b/hw/mips_mipssim.c
@@ -131,11 +131,13 @@ static void mipsnet_init(int base, qemu_irq irq, NICInfo *nd)
 }
 
 static void
-mips_mipssim_init (ram_addr_t ram_size,
-                   const char *boot_device,
-                   const char *kernel_filename, const char *kernel_cmdline,
-                   const char *initrd_filename, const char *cpu_model)
+mips_mipssim_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/mips_r4k.c b/hw/mips_r4k.c
index 967a76e533..b73cdc38ec 100644
--- a/hw/mips_r4k.c
+++ b/hw/mips_r4k.c
@@ -151,11 +151,13 @@ static void main_cpu_reset(void *opaque)
 
 static const int sector_len = 32 * 1024;
 static
-void mips_r4k_init (ram_addr_t ram_size,
-                    const char *boot_device,
-                    const char *kernel_filename, const char *kernel_cmdline,
-                    const char *initrd_filename, const char *cpu_model)
+void mips_r4k_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/musicpal.c b/hw/musicpal.c
index f305e21038..f06814c83b 100644
--- a/hw/musicpal.c
+++ b/hw/musicpal.c
@@ -1508,11 +1508,12 @@ static struct arm_boot_info musicpal_binfo = {
     .board_id = 0x20e,
 };
 
-static void musicpal_init(ram_addr_t ram_size,
-               const char *boot_device,
-               const char *kernel_filename, const char *kernel_cmdline,
-               const char *initrd_filename, const char *cpu_model)
+static void musicpal_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     ARMCPU *cpu;
     qemu_irq *cpu_pic;
     qemu_irq pic[32];
diff --git a/hw/nseries.c b/hw/nseries.c
index 6df71ebb48..7ada90d280 100644
--- a/hw/nseries.c
+++ b/hw/nseries.c
@@ -1397,21 +1397,27 @@ static struct arm_boot_info n810_binfo = {
     .atag_board = n810_atag_setup,
 };
 
-static void n800_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void n800_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     return n8x0_init(ram_size, boot_device,
                     kernel_filename, kernel_cmdline, initrd_filename,
                     cpu_model, &n800_binfo, 800);
 }
 
-static void n810_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void n810_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     return n8x0_init(ram_size, boot_device,
                     kernel_filename, kernel_cmdline, initrd_filename,
                     cpu_model, &n810_binfo, 810);
diff --git a/hw/null-machine.c b/hw/null-machine.c
index 69910d399c..d813c089e7 100644
--- a/hw/null-machine.c
+++ b/hw/null-machine.c
@@ -15,12 +15,7 @@
 #include "hw/hw.h"
 #include "hw/boards.h"
 
-static void machine_none_init(ram_addr_t ram_size,
-                              const char *boot_device,
-                              const char *kernel_filename,
-                              const char *kernel_cmdline,
-                              const char *initrd_filename,
-                              const char *cpu_model)
+static void machine_none_init(QEMUMachineInitArgs *args)
 {
 }
 
diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c
index abca341926..ad17487fd2 100644
--- a/hw/omap_sx1.c
+++ b/hw/omap_sx1.c
@@ -209,20 +209,26 @@ static void sx1_init(ram_addr_t ram_size,
     //~ qemu_console_resize(ds, 640, 480);
 }
 
-static void sx1_init_v1(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void sx1_init_v1(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sx1_init(ram_size, boot_device, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, 1);
 }
 
-static void sx1_init_v2(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void sx1_init_v2(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sx1_init(ram_size, boot_device, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, 2);
 }
diff --git a/hw/openrisc_sim.c b/hw/openrisc_sim.c
index 55e97f0959..e96a94492b 100644
--- a/hw/openrisc_sim.c
+++ b/hw/openrisc_sim.c
@@ -90,13 +90,11 @@ static void cpu_openrisc_load_kernel(ram_addr_t ram_size,
     cpu->env.pc = entry;
 }
 
-static void openrisc_sim_init(ram_addr_t ram_size,
-                              const char *boot_device,
-                              const char *kernel_filename,
-                              const char *kernel_cmdline,
-                              const char *initrd_filename,
-                              const char *cpu_model)
+static void openrisc_sim_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
    OpenRISCCPU *cpu = NULL;
     MemoryRegion *ram;
     int n;
diff --git a/hw/palm.c b/hw/palm.c
index bacdc90d4a..032b8d6510 100644
--- a/hw/palm.c
+++ b/hw/palm.c
@@ -190,11 +190,12 @@ static struct arm_boot_info palmte_binfo = {
     .board_id = 0x331,
 };
 
-static void palmte_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void palmte_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     MemoryRegion *address_space_mem = get_system_memory();
     struct omap_mpu_state_s *mpu;
     int flash_size = 0x00800000;
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 82364ab0d5..bf04a42da5 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -287,13 +287,14 @@ static void pc_init1(MemoryRegion *system_memory,
     }
 }
 
-static void pc_init_pci(ram_addr_t ram_size,
-                        const char *boot_device,
-                        const char *kernel_filename,
-                        const char *kernel_cmdline,
-                        const char *initrd_filename,
-                        const char *cpu_model)
+static void pc_init_pci(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     pc_init1(get_system_memory(),
              get_system_io(),
              ram_size, boot_device,
@@ -301,13 +302,14 @@ static void pc_init_pci(ram_addr_t ram_size,
              initrd_filename, cpu_model, 1, 1);
 }
 
-static void pc_init_pci_no_kvmclock(ram_addr_t ram_size,
-                                    const char *boot_device,
-                                    const char *kernel_filename,
-                                    const char *kernel_cmdline,
-                                    const char *initrd_filename,
-                                    const char *cpu_model)
+static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     pc_init1(get_system_memory(),
              get_system_io(),
              ram_size, boot_device,
@@ -315,13 +317,14 @@ static void pc_init_pci_no_kvmclock(ram_addr_t ram_size,
              initrd_filename, cpu_model, 1, 0);
 }
 
-static void pc_init_isa(ram_addr_t ram_size,
-                        const char *boot_device,
-                        const char *kernel_filename,
-                        const char *kernel_cmdline,
-                        const char *initrd_filename,
-                        const char *cpu_model)
+static void pc_init_isa(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (cpu_model == NULL)
         cpu_model = "486";
     pc_init1(get_system_memory(),
@@ -332,19 +335,12 @@ static void pc_init_isa(ram_addr_t ram_size,
 }
 
 #ifdef CONFIG_XEN
-static void pc_xen_hvm_init(ram_addr_t ram_size,
-                            const char *boot_device,
-                            const char *kernel_filename,
-                            const char *kernel_cmdline,
-                            const char *initrd_filename,
-                            const char *cpu_model)
+static void pc_xen_hvm_init(QEMUMachineInitArgs *args)
 {
     if (xen_hvm_init() != 0) {
         hw_error("xen hardware virtual machine initialisation failed");
     }
-    pc_init_pci_no_kvmclock(ram_size, boot_device,
-                            kernel_filename, kernel_cmdline,
-                            initrd_filename, cpu_model);
+    pc_init_pci_no_kvmclock(args);
     xen_vcpu_init();
 }
 #endif
@@ -379,6 +375,10 @@ static QEMUMachine pc_machine_v1_3 = {
             .driver   = "qxl-vga",\
             .property = "revision",\
             .value    = stringify(3),\
+        },{\
+            .driver   = "VGA",\
+            .property = "mmio",\
+            .value    = "off",\
         }
 
 static QEMUMachine pc_machine_v1_2 = {
diff --git a/hw/petalogix_ml605_mmu.c b/hw/petalogix_ml605_mmu.c
index b9bfbed4c4..39df25175b 100644
--- a/hw/petalogix_ml605_mmu.c
+++ b/hw/petalogix_ml605_mmu.c
@@ -73,12 +73,10 @@ static void machine_cpu_reset(MicroBlazeCPU *cpu)
 }
 
 static void
-petalogix_ml605_init(ram_addr_t ram_size,
-                          const char *boot_device,
-                          const char *kernel_filename,
-                          const char *kernel_cmdline,
-                          const char *initrd_filename, const char *cpu_model)
+petalogix_ml605_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
     MemoryRegion *address_space_mem = get_system_memory();
     DeviceState *dev, *dma, *eth0;
     MicroBlazeCPU *cpu;
diff --git a/hw/petalogix_s3adsp1800_mmu.c b/hw/petalogix_s3adsp1800_mmu.c
index 2cf68828ed..71c32ce889 100644
--- a/hw/petalogix_s3adsp1800_mmu.c
+++ b/hw/petalogix_s3adsp1800_mmu.c
@@ -57,12 +57,10 @@ static void machine_cpu_reset(MicroBlazeCPU *cpu)
 }
 
 static void
-petalogix_s3adsp1800_init(ram_addr_t ram_size,
-                          const char *boot_device,
-                          const char *kernel_filename,
-                          const char *kernel_cmdline,
-                          const char *initrd_filename, const char *cpu_model)
+petalogix_s3adsp1800_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
     DeviceState *dev;
     MicroBlazeCPU *cpu;
     CPUMBState *env;
diff --git a/hw/pl011.c b/hw/pl011.c
index 3245702df0..fb22736b6a 100644
--- a/hw/pl011.c
+++ b/hw/pl011.c
@@ -107,7 +107,8 @@ static uint64_t pl011_read(void *opaque, target_phys_addr_t offset,
     case 18: /* UARTDMACR */
         return s->dmacr;
     default:
-        hw_error("pl011_read: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl011_read: Bad offset %x\n", (int)offset);
         return 0;
     }
 }
@@ -178,11 +179,13 @@ static void pl011_write(void *opaque, target_phys_addr_t offset,
         break;
     case 18: /* UARTDMACR */
         s->dmacr = value;
-        if (value & 3)
-            hw_error("PL011: DMA not implemented\n");
+        if (value & 3) {
+            qemu_log_mask(LOG_UNIMP, "pl011: DMA not implemented\n");
+        }
         break;
     default:
-        hw_error("pl011_write: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl011_write: Bad offset %x\n", (int)offset);
     }
 }
 
diff --git a/hw/pl022.c b/hw/pl022.c
index 60e35daeb5..e2ae315efb 100644
--- a/hw/pl022.c
+++ b/hw/pl022.c
@@ -168,7 +168,8 @@ static uint64_t pl022_read(void *opaque, target_phys_addr_t offset,
         /* Not implemented.  */
         return 0;
     default:
-        hw_error("pl022_read: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl022_read: Bad offset %x\n", (int)offset);
         return 0;
     }
 }
@@ -211,11 +212,12 @@ static void pl022_write(void *opaque, target_phys_addr_t offset,
         break;
     case 0x20: /* DMACR */
         if (value) {
-            hw_error("pl022: DMA not implemented\n");
+            qemu_log_mask(LOG_UNIMP, "pl022: DMA not implemented\n");
         }
         break;
     default:
-        hw_error("pl022_write: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl022_write: Bad offset %x\n", (int)offset);
     }
 }
 
diff --git a/hw/pl031.c b/hw/pl031.c
index 9602664da6..6cbaf2386f 100644
--- a/hw/pl031.c
+++ b/hw/pl031.c
@@ -120,11 +120,13 @@ static uint64_t pl031_read(void *opaque, target_phys_addr_t offset,
     case RTC_MIS:
         return s->is & s->im;
     case RTC_ICR:
-        fprintf(stderr, "qemu: pl031_read: Unexpected offset 0x%x\n",
-                (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl031: read of write-only register at offset 0x%x\n",
+                      (int)offset);
         break;
     default:
-        hw_error("pl031_read: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl031_read: Bad offset 0x%x\n", (int)offset);
         break;
     }
 
@@ -167,12 +169,14 @@ static void pl031_write(void * opaque, target_phys_addr_t offset,
     case RTC_DR:
     case RTC_MIS:
     case RTC_RIS:
-        fprintf(stderr, "qemu: pl031_write: Unexpected offset 0x%x\n",
-                (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl031: write to read-only register at offset 0x%x\n",
+                      (int)offset);
         break;
 
     default:
-        hw_error("pl031_write: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl031_write: Bad offset 0x%x\n", (int)offset);
         break;
     }
 }
diff --git a/hw/pl041.c b/hw/pl041.c
index b6723be0a9..9a6db1b6e2 100644
--- a/hw/pl041.c
+++ b/hw/pl041.c
@@ -536,8 +536,9 @@ static int pl041_init(SysBusDevice *dev)
     default:
         /* NC FIFO depth of 16 is not allowed because its id bits in
            AACIPERIPHID3 overlap with the id for the default NC FIFO depth */
-        fprintf(stderr, "pl041: unsupported non-compact fifo depth [%i]\n",
-                s->fifo_depth);
+        qemu_log_mask(LOG_UNIMP,
+                      "pl041: unsupported non-compact fifo depth [%i]\n",
+                      s->fifo_depth);
         return -1;
     }
 
diff --git a/hw/pl181.c b/hw/pl181.c
index 7d91fbba1d..5a734735f0 100644
--- a/hw/pl181.c
+++ b/hw/pl181.c
@@ -352,7 +352,7 @@ static uint64_t pl181_read(void *opaque, target_phys_addr_t offset,
     case 0xa0: case 0xa4: case 0xa8: case 0xac:
     case 0xb0: case 0xb4: case 0xb8: case 0xbc:
         if (s->fifo_len == 0) {
-            fprintf(stderr, "pl181: Unexpected FIFO read\n");
+            qemu_log_mask(LOG_GUEST_ERROR, "pl181: Unexpected FIFO read\n");
             return 0;
         } else {
             uint32_t value;
@@ -363,7 +363,8 @@ static uint64_t pl181_read(void *opaque, target_phys_addr_t offset,
             return value;
         }
     default:
-        hw_error("pl181_read: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl181_read: Bad offset %x\n", (int)offset);
         return 0;
     }
 }
@@ -387,11 +388,11 @@ static void pl181_write(void *opaque, target_phys_addr_t offset,
         s->cmd = value;
         if (s->cmd & PL181_CMD_ENABLE) {
             if (s->cmd & PL181_CMD_INTERRUPT) {
-                fprintf(stderr, "pl181: Interrupt mode not implemented\n");
-                abort();
+                qemu_log_mask(LOG_UNIMP,
+                              "pl181: Interrupt mode not implemented\n");
             } if (s->cmd & PL181_CMD_PENDING) {
-                fprintf(stderr, "pl181: Pending commands not implemented\n");
-                abort();
+                qemu_log_mask(LOG_UNIMP,
+                              "pl181: Pending commands not implemented\n");
             } else {
                 pl181_send_command(s);
                 pl181_fifo_run(s);
@@ -427,14 +428,15 @@ static void pl181_write(void *opaque, target_phys_addr_t offset,
     case 0xa0: case 0xa4: case 0xa8: case 0xac:
     case 0xb0: case 0xb4: case 0xb8: case 0xbc:
         if (s->datacnt == 0) {
-            fprintf(stderr, "pl181: Unexpected FIFO write\n");
+            qemu_log_mask(LOG_GUEST_ERROR, "pl181: Unexpected FIFO write\n");
         } else {
             pl181_fifo_push(s, value);
             pl181_fifo_run(s);
         }
         break;
     default:
-        hw_error("pl181_write: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl181_write: Bad offset %x\n", (int)offset);
     }
     pl181_update(s);
 }
diff --git a/hw/pl190.c b/hw/pl190.c
index 7332f4dbae..961da5b3af 100644
--- a/hw/pl190.c
+++ b/hw/pl190.c
@@ -143,7 +143,8 @@ static uint64_t pl190_read(void *opaque, target_phys_addr_t offset,
     case 13: /* DEFVECTADDR */
         return s->vect_addr[16];
     default:
-        hw_error("pl190_read: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl190_read: Bad offset %x\n", (int)offset);
         return 0;
     }
 }
@@ -202,7 +203,8 @@ static void pl190_write(void *opaque, target_phys_addr_t offset,
         }
         break;
     default:
-        hw_error("pl190_write: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                     "pl190_write: Bad offset %x\n", (int)offset);
         return;
     }
     pl190_update(s);
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
index 60a5cb3bd0..4cfb94061a 100644
--- a/hw/ppc/e500plat.c
+++ b/hw/ppc/e500plat.c
@@ -25,13 +25,14 @@ static void e500plat_fixup_devtree(PPCE500Params *params, void *fdt)
                          sizeof(compatible));
 }
 
-static void e500plat_init(ram_addr_t ram_size,
-                           const char *boot_device,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
-                           const char *cpu_model)
+static void e500plat_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *boot_device = args->boot_device;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     PPCE500Params params = {
         .ram_size = ram_size,
         .boot_device = boot_device,
diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c
index 984d21cbf5..e651661941 100644
--- a/hw/ppc/mpc8544ds.c
+++ b/hw/ppc/mpc8544ds.c
@@ -25,13 +25,14 @@ static void mpc8544ds_fixup_devtree(PPCE500Params *params, void *fdt)
                          sizeof(compatible));
 }
 
-static void mpc8544ds_init(ram_addr_t ram_size,
-                           const char *boot_device,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
-                           const char *cpu_model)
+static void mpc8544ds_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *boot_device = args->boot_device;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     PPCE500Params params = {
         .ram_size = ram_size,
         .boot_device = boot_device,
diff --git a/hw/ppc405_boards.c b/hw/ppc405_boards.c
index 476775d05b..e848cb0b5c 100644
--- a/hw/ppc405_boards.c
+++ b/hw/ppc405_boards.c
@@ -158,7 +158,7 @@ static void ref405ep_fpga_reset (void *opaque)
     fpga->reg1 = 0x0F;
 }
 
-static void ref405ep_fpga_init (MemoryRegion *sysmem, uint32_t base)
+static void ref405ep_fpga_init(MemoryRegion *sysmem, uint32_t base)
 {
     ref405ep_fpga_t *fpga;
     MemoryRegion *fpga_memory = g_new(MemoryRegion, 1);
@@ -170,13 +170,12 @@ static void ref405ep_fpga_init (MemoryRegion *sysmem, uint32_t base)
     qemu_register_reset(&ref405ep_fpga_reset, fpga);
 }
 
-static void ref405ep_init (ram_addr_t ram_size,
-                           const char *boot_device,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
-                           const char *cpu_model)
+static void ref405ep_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     ppc4xx_bd_info_t bd;
     CPUPPCState *env;
@@ -484,7 +483,7 @@ static void taihu_cpld_reset (void *opaque)
     cpld->reg1 = 0x80;
 }
 
-static void taihu_cpld_init (MemoryRegion *sysmem, uint32_t base)
+static void taihu_cpld_init(MemoryRegion *sysmem, uint32_t base)
 {
     taihu_cpld_t *cpld;
     MemoryRegion *cpld_memory = g_new(MemoryRegion, 1);
@@ -495,13 +494,11 @@ static void taihu_cpld_init (MemoryRegion *sysmem, uint32_t base)
     qemu_register_reset(&taihu_cpld_reset, cpld);
 }
 
-static void taihu_405ep_init(ram_addr_t ram_size,
-                             const char *boot_device,
-                             const char *kernel_filename,
-                             const char *kernel_cmdline,
-                             const char *initrd_filename,
-                             const char *cpu_model)
+static void taihu_405ep_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *kernel_filename = args->kernel_filename;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     qemu_irq *pic;
     MemoryRegion *sysmem = get_system_memory();
diff --git a/hw/ppc440_bamboo.c b/hw/ppc440_bamboo.c
index c198071170..78e7985305 100644
--- a/hw/ppc440_bamboo.c
+++ b/hw/ppc440_bamboo.c
@@ -157,13 +157,13 @@ static void main_cpu_reset(void *opaque)
     mmubooke_create_initial_mapping(env, 0, 0);
 }
 
-static void bamboo_init(ram_addr_t ram_size,
-                        const char *boot_device,
-                        const char *kernel_filename,
-                        const char *kernel_cmdline,
-                        const char *initrd_filename,
-                        const char *cpu_model)
+static void bamboo_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     unsigned int pci_irq_nrs[4] = { 28, 27, 26, 25 };
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram_memories
diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index b8d3c9c988..a265445b70 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -128,13 +128,14 @@ static void ppc_core99_reset(void *opaque)
 }
 
 /* PowerPC Mac99 hardware initialisation */
-static void ppc_core99_init (ram_addr_t ram_size,
-                             const char *boot_device,
-                             const char *kernel_filename,
-                             const char *kernel_cmdline,
-                             const char *initrd_filename,
-                             const char *cpu_model)
+static void ppc_core99_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     PowerPCCPU *cpu = NULL;
     CPUPPCState *env = NULL;
     char *filename;
diff --git a/hw/ppc_oldworld.c b/hw/ppc_oldworld.c
index 2c4a47813f..de334080ff 100644
--- a/hw/ppc_oldworld.c
+++ b/hw/ppc_oldworld.c
@@ -71,13 +71,14 @@ static void ppc_heathrow_reset(void *opaque)
     cpu_reset(CPU(cpu));
 }
 
-static void ppc_heathrow_init (ram_addr_t ram_size,
-                               const char *boot_device,
-                               const char *kernel_filename,
-                               const char *kernel_cmdline,
-                               const char *initrd_filename,
-                               const char *cpu_model)
+static void ppc_heathrow_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     MemoryRegion *sysmem = get_system_memory();
     PowerPCCPU *cpu = NULL;
     CPUPPCState *env = NULL;
diff --git a/hw/ppc_prep.c b/hw/ppc_prep.c
index 15444303e4..b426891e29 100644
--- a/hw/ppc_prep.c
+++ b/hw/ppc_prep.c
@@ -447,13 +447,14 @@ static void ppc_prep_reset(void *opaque)
 }
 
 /* PowerPC PREP hardware initialisation */
-static void ppc_prep_init (ram_addr_t ram_size,
-                           const char *boot_device,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
-                           const char *cpu_model)
+static void ppc_prep_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     MemoryRegion *sysmem = get_system_memory();
     PowerPCCPU *cpu = NULL;
     CPUPPCState *env = NULL;
diff --git a/hw/puv3.c b/hw/puv3.c
index 43f7216e4e..764799cff4 100644
--- a/hw/puv3.c
+++ b/hw/puv3.c
@@ -91,10 +91,12 @@ static void puv3_load_kernel(const char *kernel_filename)
     graphic_console_init(NULL, NULL, NULL, NULL, NULL);
 }
 
-static void puv3_init(ram_addr_t ram_size, const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void puv3_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *initrd_filename = args->initrd_filename;
     CPUUniCore32State *env;
 
     if (initrd_filename) {
diff --git a/hw/r2d.c b/hw/r2d.c
index 1bc191ff3e..3cb6942056 100644
--- a/hw/r2d.c
+++ b/hw/r2d.c
@@ -219,11 +219,12 @@ static struct QEMU_PACKED
     char kernel_cmdline[256];
 } boot_params;
 
-static void r2d_init(ram_addr_t ram_size,
-              const char *boot_device,
-	      const char *kernel_filename, const char *kernel_cmdline,
-	      const char *initrd_filename, const char *cpu_model)
+static void r2d_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     SuperHCPU *cpu;
     CPUSH4State *env;
     ResetData *reset_info;
diff --git a/hw/realview.c b/hw/realview.c
index 19db4d026b..8dc4be6ae0 100644
--- a/hw/realview.c
+++ b/hw/realview.c
@@ -330,11 +330,14 @@ static void realview_init(ram_addr_t ram_size,
     arm_load_kernel(arm_env_get_cpu(first_cpu), &realview_binfo);
 }
 
-static void realview_eb_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void realview_eb_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (!cpu_model) {
         cpu_model = "arm926";
     }
@@ -342,11 +345,14 @@ static void realview_eb_init(ram_addr_t ram_size,
                   initrd_filename, cpu_model, BOARD_EB);
 }
 
-static void realview_eb_mpcore_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void realview_eb_mpcore_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (!cpu_model) {
         cpu_model = "arm11mpcore";
     }
@@ -354,11 +360,14 @@ static void realview_eb_mpcore_init(ram_addr_t ram_size,
                   initrd_filename, cpu_model, BOARD_EB_MPCORE);
 }
 
-static void realview_pb_a8_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void realview_pb_a8_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (!cpu_model) {
         cpu_model = "cortex-a8";
     }
@@ -366,11 +375,14 @@ static void realview_pb_a8_init(ram_addr_t ram_size,
                   initrd_filename, cpu_model, BOARD_PB_A8);
 }
 
-static void realview_pbx_a9_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void realview_pbx_a9_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (!cpu_model) {
         cpu_model = "cortex-a9";
     }
diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
index 47eed35da3..39ff17828b 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390-virtio.c
@@ -151,13 +151,14 @@ unsigned s390_del_running_cpu(CPUS390XState *env)
 }
 
 /* PC hardware initialisation */
-static void s390_init(ram_addr_t my_ram_size,
-                      const char *boot_device,
-                      const char *kernel_filename,
-                      const char *kernel_cmdline,
-                      const char *initrd_filename,
-                      const char *cpu_model)
+static void s390_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t my_ram_size = args->ram_size;
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     CPUS390XState *env = NULL;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/shix.c b/hw/shix.c
index dd9ce174f9..b56dd54f75 100644
--- a/hw/shix.c
+++ b/hw/shix.c
@@ -37,11 +37,9 @@
 #define BIOS_FILENAME "shix_bios.bin"
 #define BIOS_ADDRESS 0xA0000000
 
-static void shix_init(ram_addr_t ram_size,
-               const char *boot_device,
-	       const char *kernel_filename, const char *kernel_cmdline,
-	       const char *initrd_filename, const char *cpu_model)
+static void shix_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
     int ret;
     CPUSH4State *env;
     struct SH7750State *s;
diff --git a/hw/spapr.c b/hw/spapr.c
index 09b8e99221..637b3fb718 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -665,13 +665,14 @@ static int spapr_vga_init(PCIBus *pci_bus)
 }
 
 /* pSeries LPAR / sPAPR hardware init */
-static void ppc_spapr_init(ram_addr_t ram_size,
-                           const char *boot_device,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
-                           const char *cpu_model)
+static void ppc_spapr_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     PowerPCCPU *cpu;
     CPUPPCState *env;
     PCIHostState *phb;
diff --git a/hw/spitz.c b/hw/spitz.c
index 24346dcd5a..29426266b1 100644
--- a/hw/spitz.c
+++ b/hw/spitz.c
@@ -936,38 +936,46 @@ static void spitz_common_init(ram_addr_t ram_size,
     sl_bootparam_write(SL_PXA_PARAM_BASE);
 }
 
-static void spitz_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void spitz_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     spitz_common_init(ram_size, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, spitz, 0x2c9);
 }
 
-static void borzoi_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void borzoi_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     spitz_common_init(ram_size, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, borzoi, 0x33f);
 }
 
-static void akita_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void akita_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     spitz_common_init(ram_size, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, akita, 0x2e8);
 }
 
-static void terrier_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void terrier_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     spitz_common_init(ram_size, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, terrier, 0x33f);
 }
diff --git a/hw/stellaris.c b/hw/stellaris.c
index 353ca4c046..bfb18b014e 100644
--- a/hw/stellaris.c
+++ b/hw/stellaris.c
@@ -1313,19 +1313,17 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model,
 }
 
 /* FIXME: Figure out how to generate these from stellaris_boards.  */
-static void lm3s811evb_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void lm3s811evb_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     stellaris_init(kernel_filename, cpu_model, &stellaris_boards[0]);
 }
 
-static void lm3s6965evb_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void lm3s6965evb_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     stellaris_init(kernel_filename, cpu_model, &stellaris_boards[1]);
 }
 
diff --git a/hw/sun4m.c b/hw/sun4m.c
index a04b485eda..dbe93f9eb9 100644
--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -1306,92 +1306,118 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
 };
 
 /* SPARCstation 5 hardware initialisation */
-static void ss5_init(ram_addr_t RAM_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void ss5_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[0], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCstation 10 hardware initialisation */
-static void ss10_init(ram_addr_t RAM_size,
-                      const char *boot_device,
-                      const char *kernel_filename, const char *kernel_cmdline,
-                      const char *initrd_filename, const char *cpu_model)
+static void ss10_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[1], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCserver 600MP hardware initialisation */
-static void ss600mp_init(ram_addr_t RAM_size,
-                         const char *boot_device,
-                         const char *kernel_filename,
-                         const char *kernel_cmdline,
-                         const char *initrd_filename, const char *cpu_model)
+static void ss600mp_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[2], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCstation 20 hardware initialisation */
-static void ss20_init(ram_addr_t RAM_size,
-                      const char *boot_device,
-                      const char *kernel_filename, const char *kernel_cmdline,
-                      const char *initrd_filename, const char *cpu_model)
+static void ss20_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[3], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCstation Voyager hardware initialisation */
-static void vger_init(ram_addr_t RAM_size,
-                      const char *boot_device,
-                      const char *kernel_filename, const char *kernel_cmdline,
-                      const char *initrd_filename, const char *cpu_model)
+static void vger_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[4], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCstation LX hardware initialisation */
-static void ss_lx_init(ram_addr_t RAM_size,
-                       const char *boot_device,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
+static void ss_lx_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[5], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCstation 4 hardware initialisation */
-static void ss4_init(ram_addr_t RAM_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void ss4_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[6], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCClassic hardware initialisation */
-static void scls_init(ram_addr_t RAM_size,
-                      const char *boot_device,
-                      const char *kernel_filename, const char *kernel_cmdline,
-                      const char *initrd_filename, const char *cpu_model)
+static void scls_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[7], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCbook hardware initialisation */
-static void sbook_init(ram_addr_t RAM_size,
-                       const char *boot_device,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
+static void sbook_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[8], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
@@ -1654,21 +1680,27 @@ static void sun4d_hw_init(const struct sun4d_hwdef *hwdef, ram_addr_t RAM_size,
 }
 
 /* SPARCserver 1000 hardware initialisation */
-static void ss1000_init(ram_addr_t RAM_size,
-                        const char *boot_device,
-                        const char *kernel_filename, const char *kernel_cmdline,
-                        const char *initrd_filename, const char *cpu_model)
+static void ss1000_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4d_hw_init(&sun4d_hwdefs[0], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCcenter 2000 hardware initialisation */
-static void ss2000_init(ram_addr_t RAM_size,
-                        const char *boot_device,
-                        const char *kernel_filename, const char *kernel_cmdline,
-                        const char *initrd_filename, const char *cpu_model)
+static void ss2000_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4d_hw_init(&sun4d_hwdefs[1], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
@@ -1848,11 +1880,14 @@ static void sun4c_hw_init(const struct sun4c_hwdef *hwdef, ram_addr_t RAM_size,
 }
 
 /* SPARCstation 2 hardware initialisation */
-static void ss2_init(ram_addr_t RAM_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void ss2_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4c_hw_init(&sun4c_hwdefs[0], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
diff --git a/hw/sun4u.c b/hw/sun4u.c
index 940db3348a..abf68cf50f 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -933,31 +933,40 @@ static const struct hwdef hwdefs[] = {
 };
 
 /* Sun4u hardware initialisation */
-static void sun4u_init(ram_addr_t RAM_size,
-                       const char *boot_devices,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
-{
+static void sun4u_init(QEMUMachineInitArgs *args)
+{
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_devices = args->boot_device;
     sun4uv_init(get_system_memory(), RAM_size, boot_devices, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, &hwdefs[0]);
 }
 
 /* Sun4v hardware initialisation */
-static void sun4v_init(ram_addr_t RAM_size,
-                       const char *boot_devices,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
-{
+static void sun4v_init(QEMUMachineInitArgs *args)
+{
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_devices = args->boot_device;
     sun4uv_init(get_system_memory(), RAM_size, boot_devices, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, &hwdefs[1]);
 }
 
 /* Niagara hardware initialisation */
-static void niagara_init(ram_addr_t RAM_size,
-                         const char *boot_devices,
-                         const char *kernel_filename, const char *kernel_cmdline,
-                         const char *initrd_filename, const char *cpu_model)
-{
+static void niagara_init(QEMUMachineInitArgs *args)
+{
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_devices = args->boot_device;
     sun4uv_init(get_system_memory(), RAM_size, boot_devices, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, &hwdefs[2]);
 }
diff --git a/hw/tosa.c b/hw/tosa.c
index 297a8c2ed0..512278c241 100644
--- a/hw/tosa.c
+++ b/hw/tosa.c
@@ -205,11 +205,12 @@ static struct arm_boot_info tosa_binfo = {
     .ram_size = 0x04000000,
 };
 
-static void tosa_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void tosa_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *rom = g_new(MemoryRegion, 1);
     PXA2xxState *mpu;
diff --git a/hw/versatilepb.c b/hw/versatilepb.c
index 7b1b0256aa..756ec29da5 100644
--- a/hw/versatilepb.c
+++ b/hw/versatilepb.c
@@ -348,22 +348,28 @@ static void versatile_init(ram_addr_t ram_size,
     arm_load_kernel(cpu, &versatile_binfo);
 }
 
-static void vpb_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void vpb_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     versatile_init(ram_size,
                    boot_device,
                    kernel_filename, kernel_cmdline,
                    initrd_filename, cpu_model, 0x183);
 }
 
-static void vab_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void vab_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     versatile_init(ram_size,
                    boot_device,
                    kernel_filename, kernel_cmdline,
diff --git a/hw/vexpress.c b/hw/vexpress.c
index 3596d1e33f..36503d69fa 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -467,25 +467,27 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard,
     arm_load_kernel(arm_env_get_cpu(first_cpu), &vexpress_binfo);
 }
 
-static void vexpress_a9_init(ram_addr_t ram_size,
-                             const char *boot_device,
-                             const char *kernel_filename,
-                             const char *kernel_cmdline,
-                             const char *initrd_filename,
-                             const char *cpu_model)
+static void vexpress_a9_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     vexpress_common_init(&a9_daughterboard,
                          ram_size, boot_device, kernel_filename,
                          kernel_cmdline, initrd_filename, cpu_model);
 }
 
-static void vexpress_a15_init(ram_addr_t ram_size,
-                              const char *boot_device,
-                              const char *kernel_filename,
-                              const char *kernel_cmdline,
-                              const char *initrd_filename,
-                              const char *cpu_model)
+static void vexpress_a15_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     vexpress_common_init(&a15_daughterboard,
                          ram_size, boot_device, kernel_filename,
                          kernel_cmdline, initrd_filename, cpu_model);
diff --git a/hw/vga-isa.c b/hw/vga-isa.c
index d2904737bc..046602b3d2 100644
--- a/hw/vga-isa.c
+++ b/hw/vga-isa.c
@@ -1,6 +1,8 @@
 /*
  * QEMU ISA VGA Emulator.
  *
+ * see docs/specs/standard-vga.txt for virtual hardware specs.
+ *
  * Copyright (c) 2003 Fabrice Bellard
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
diff --git a/hw/vga-pci.c b/hw/vga-pci.c
index 996d47f23a..5c4daee397 100644
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c
@@ -1,6 +1,8 @@
 /*
  * QEMU PCI VGA Emulator.
  *
+ * see docs/specs/standard-vga.txt for virtual hardware specs.
+ *
  * Copyright (c) 2003 Fabrice Bellard
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -29,9 +31,23 @@
 #include "qemu-timer.h"
 #include "loader.h"
 
+#define PCI_VGA_IOPORT_OFFSET 0x400
+#define PCI_VGA_IOPORT_SIZE   (0x3e0 - 0x3c0)
+#define PCI_VGA_BOCHS_OFFSET  0x500
+#define PCI_VGA_BOCHS_SIZE    (0x0b * 2)
+#define PCI_VGA_MMIO_SIZE     0x1000
+
+enum vga_pci_flags {
+    PCI_VGA_FLAG_ENABLE_MMIO = 1,
+};
+
 typedef struct PCIVGAState {
     PCIDevice dev;
     VGACommonState vga;
+    uint32_t flags;
+    MemoryRegion mmio;
+    MemoryRegion ioport;
+    MemoryRegion bochs;
 } PCIVGAState;
 
 static const VMStateDescription vmstate_vga_pci = {
@@ -46,31 +62,125 @@ static const VMStateDescription vmstate_vga_pci = {
     }
 };
 
+static uint64_t pci_vga_ioport_read(void *ptr, target_phys_addr_t addr,
+                                    unsigned size)
+{
+    PCIVGAState *d = ptr;
+    uint64_t ret = 0;
+
+    switch (size) {
+    case 1:
+        ret = vga_ioport_read(&d->vga, addr);
+        break;
+    case 2:
+        ret  = vga_ioport_read(&d->vga, addr);
+        ret |= vga_ioport_read(&d->vga, addr+1) << 8;
+        break;
+    }
+    return ret;
+}
+
+static void pci_vga_ioport_write(void *ptr, target_phys_addr_t addr,
+                                 uint64_t val, unsigned size)
+{
+    PCIVGAState *d = ptr;
+    switch (size) {
+    case 1:
+        vga_ioport_write(&d->vga, addr, val);
+        break;
+    case 2:
+        /*
+         * Update bytes in little endian order.  Allows to update
+         * indexed registers with a single word write because the
+         * index byte is updated first.
+         */
+        vga_ioport_write(&d->vga, addr, val & 0xff);
+        vga_ioport_write(&d->vga, addr+1, (val >> 8) & 0xff);
+        break;
+    }
+}
+
+static const MemoryRegionOps pci_vga_ioport_ops = {
+    .read = pci_vga_ioport_read,
+    .write = pci_vga_ioport_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 2,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t pci_vga_bochs_read(void *ptr, target_phys_addr_t addr,
+                                   unsigned size)
+{
+    PCIVGAState *d = ptr;
+    int index = addr >> 1;
+
+    vbe_ioport_write_index(&d->vga, 0, index);
+    return vbe_ioport_read_data(&d->vga, 0);
+}
+
+static void pci_vga_bochs_write(void *ptr, target_phys_addr_t addr,
+                                uint64_t val, unsigned size)
+{
+    PCIVGAState *d = ptr;
+    int index = addr >> 1;
+
+    vbe_ioport_write_index(&d->vga, 0, index);
+    vbe_ioport_write_data(&d->vga, 0, val);
+}
+
+static const MemoryRegionOps pci_vga_bochs_ops = {
+    .read = pci_vga_bochs_read,
+    .write = pci_vga_bochs_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 2,
+    .impl.max_access_size = 2,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
 static int pci_std_vga_initfn(PCIDevice *dev)
 {
-     PCIVGAState *d = DO_UPCAST(PCIVGAState, dev, dev);
-     VGACommonState *s = &d->vga;
+    PCIVGAState *d = DO_UPCAST(PCIVGAState, dev, dev);
+    VGACommonState *s = &d->vga;
+
+    /* vga + console init */
+    vga_common_init(s);
+    vga_init(s, pci_address_space(dev), pci_address_space_io(dev), true);
 
-     // vga + console init
-     vga_common_init(s);
-     vga_init(s, pci_address_space(dev), pci_address_space_io(dev), true);
+    s->ds = graphic_console_init(s->update, s->invalidate,
+                                 s->screen_dump, s->text_update, s);
 
-     s->ds = graphic_console_init(s->update, s->invalidate,
-                                  s->screen_dump, s->text_update, s);
+    /* XXX: VGA_RAM_SIZE must be a power of two */
+    pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_MEM_PREFETCH, &s->vram);
 
-     /* XXX: VGA_RAM_SIZE must be a power of two */
-     pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_MEM_PREFETCH, &s->vram);
+    /* mmio bar for vga register access */
+    if (d->flags & (1 << PCI_VGA_FLAG_ENABLE_MMIO)) {
+        memory_region_init(&d->mmio, "vga.mmio", 4096);
+        memory_region_init_io(&d->ioport, &pci_vga_ioport_ops, d,
+                              "vga ioports remapped", PCI_VGA_IOPORT_SIZE);
+        memory_region_init_io(&d->bochs, &pci_vga_bochs_ops, d,
+                              "bochs dispi interface", PCI_VGA_BOCHS_SIZE);
 
-     if (!dev->rom_bar) {
-         /* compatibility with pc-0.13 and older */
-         vga_init_vbe(s, pci_address_space(dev));
-     }
+        memory_region_add_subregion(&d->mmio, PCI_VGA_IOPORT_OFFSET,
+                                    &d->ioport);
+        memory_region_add_subregion(&d->mmio, PCI_VGA_BOCHS_OFFSET,
+                                    &d->bochs);
+        pci_register_bar(&d->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
+    }
+
+    if (!dev->rom_bar) {
+        /* compatibility with pc-0.13 and older */
+        vga_init_vbe(s, pci_address_space(dev));
+    }
 
-     return 0;
+    return 0;
 }
 
 static Property vga_pci_properties[] = {
     DEFINE_PROP_UINT32("vgamem_mb", PCIVGAState, vga.vram_size_mb, 16),
+    DEFINE_PROP_BIT("mmio", PCIVGAState, flags, PCI_VGA_FLAG_ENABLE_MMIO, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/vga.c b/hw/vga.c
index afaef0d711..a07a6fb1ca 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -582,7 +582,6 @@ void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     }
 }
 
-#ifdef CONFIG_BOCHS_VBE
 static uint32_t vbe_ioport_read_index(void *opaque, uint32_t addr)
 {
     VGACommonState *s = opaque;
@@ -591,7 +590,7 @@ static uint32_t vbe_ioport_read_index(void *opaque, uint32_t addr)
     return val;
 }
 
-static uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr)
+uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr)
 {
     VGACommonState *s = opaque;
     uint32_t val;
@@ -627,13 +626,13 @@ static uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr)
     return val;
 }
 
-static void vbe_ioport_write_index(void *opaque, uint32_t addr, uint32_t val)
+void vbe_ioport_write_index(void *opaque, uint32_t addr, uint32_t val)
 {
     VGACommonState *s = opaque;
     s->vbe_index = val;
 }
 
-static void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
+void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
 {
     VGACommonState *s = opaque;
 
@@ -784,7 +783,6 @@ static void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
         }
     }
 }
-#endif
 
 /* called for accesses between 0xa0000 and 0xc0000 */
 uint32_t vga_mem_readb(VGACommonState *s, target_phys_addr_t addr)
@@ -1129,14 +1127,12 @@ static void vga_get_offsets(VGACommonState *s,
                             uint32_t *pline_compare)
 {
     uint32_t start_addr, line_offset, line_compare;
-#ifdef CONFIG_BOCHS_VBE
+
     if (s->vbe_regs[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED) {
         line_offset = s->vbe_line_offset;
         start_addr = s->vbe_start_addr;
         line_compare = 65535;
-    } else
-#endif
-    {
+    } else {
         /* compute line_offset in bytes */
         line_offset = s->cr[VGA_CRTC_OFFSET];
         line_offset <<= 3;
@@ -1572,12 +1568,10 @@ static vga_draw_line_func * const vga_draw_line_table[NB_DEPTHS * VGA_DRAW_LINE_
 static int vga_get_bpp(VGACommonState *s)
 {
     int ret;
-#ifdef CONFIG_BOCHS_VBE
+
     if (s->vbe_regs[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED) {
         ret = s->vbe_regs[VBE_DISPI_INDEX_BPP];
-    } else
-#endif
-    {
+    } else {
         ret = 0;
     }
     return ret;
@@ -1587,13 +1581,10 @@ static void vga_get_resolution(VGACommonState *s, int *pwidth, int *pheight)
 {
     int width, height;
 
-#ifdef CONFIG_BOCHS_VBE
     if (s->vbe_regs[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED) {
         width = s->vbe_regs[VBE_DISPI_INDEX_XRES];
         height = s->vbe_regs[VBE_DISPI_INDEX_YRES];
-    } else
-#endif
-    {
+    } else {
         width = (s->cr[VGA_CRTC_H_DISP] + 1) * 8;
         height = s->cr[VGA_CRTC_V_DISP_END] |
             ((s->cr[VGA_CRTC_OVERFLOW] & 0x02) << 7) |
@@ -1948,14 +1939,12 @@ void vga_common_reset(VGACommonState *s)
     s->dac_8bit = 0;
     memset(s->palette, '\0', sizeof(s->palette));
     s->bank_offset = 0;
-#ifdef CONFIG_BOCHS_VBE
     s->vbe_index = 0;
     memset(s->vbe_regs, '\0', sizeof(s->vbe_regs));
     s->vbe_regs[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID5;
     s->vbe_start_addr = 0;
     s->vbe_line_offset = 0;
     s->vbe_bank_mask = (s->vram_size >> 16) - 1;
-#endif
     memset(s->font_offsets, '\0', sizeof(s->font_offsets));
     s->graphic_mode = -1; /* force full update */
     s->shift_control = 0;
@@ -2229,13 +2218,11 @@ const VMStateDescription vmstate_vga_common = {
 
         VMSTATE_INT32(bank_offset, VGACommonState),
         VMSTATE_UINT8_EQUAL(is_vbe_vmstate, VGACommonState),
-#ifdef CONFIG_BOCHS_VBE
         VMSTATE_UINT16(vbe_index, VGACommonState),
         VMSTATE_UINT16_ARRAY(vbe_regs, VGACommonState, VBE_DISPI_INDEX_NB),
         VMSTATE_UINT32(vbe_start_addr, VGACommonState),
         VMSTATE_UINT32(vbe_line_offset, VGACommonState),
         VMSTATE_UINT32(vbe_bank_mask, VGACommonState),
-#endif
         VMSTATE_END_OF_LIST()
     }
 };
@@ -2275,11 +2262,7 @@ void vga_common_init(VGACommonState *s)
     }
     s->vram_size_mb = s->vram_size >> 20;
 
-#ifdef CONFIG_BOCHS_VBE
     s->is_vbe_vmstate = 1;
-#else
-    s->is_vbe_vmstate = 0;
-#endif
     memory_region_init_ram(&s->vram, "vga.vram", s->vram_size);
     vmstate_register_ram_global(&s->vram);
     xen_register_framebuffer(&s->vram);
@@ -2314,7 +2297,6 @@ static const MemoryRegionPortio vga_portio_list[] = {
     PORTIO_END_OF_LIST(),
 };
 
-#ifdef CONFIG_BOCHS_VBE
 static const MemoryRegionPortio vbe_portio_list[] = {
     { 0, 1, 2, .read = vbe_ioport_read_index, .write = vbe_ioport_write_index },
 # ifdef TARGET_I386
@@ -2324,7 +2306,6 @@ static const MemoryRegionPortio vbe_portio_list[] = {
 # endif
     PORTIO_END_OF_LIST(),
 };
-#endif /* CONFIG_BOCHS_VBE */
 
 /* Used by both ISA and PCI */
 MemoryRegion *vga_init_io(VGACommonState *s,
@@ -2334,10 +2315,7 @@ MemoryRegion *vga_init_io(VGACommonState *s,
     MemoryRegion *vga_mem;
 
     *vga_ports = vga_portio_list;
-    *vbe_ports = NULL;
-#ifdef CONFIG_BOCHS_VBE
     *vbe_ports = vbe_portio_list;
-#endif
 
     vga_mem = g_malloc(sizeof(*vga_mem));
     memory_region_init_io(vga_mem, &vga_mem_ops, s,
@@ -2379,7 +2357,6 @@ void vga_init(VGACommonState *s, MemoryRegion *address_space,
 
 void vga_init_vbe(VGACommonState *s, MemoryRegion *system_memory)
 {
-#ifdef CONFIG_BOCHS_VBE
     /* With pc-0.12 and below we map both the PCI BAR and the fixed VBE region,
      * so use an alias to avoid double-mapping the same region.
      */
@@ -2390,7 +2367,6 @@ void vga_init_vbe(VGACommonState *s, MemoryRegion *system_memory)
                                 VBE_DISPI_LFB_PHYSICAL_ADDRESS,
                                 &s->vram_vbe);
     s->vbe_mapped = 1;
-#endif 
 }
 /********************************************************/
 /* vga screen dump */
diff --git a/hw/vga_int.h b/hw/vga_int.h
index 330a32f77d..144e7d3c35 100644
--- a/hw/vga_int.h
+++ b/hw/vga_int.h
@@ -29,9 +29,6 @@
 #define ST01_V_RETRACE      0x08
 #define ST01_DISP_ENABLE    0x01
 
-/* bochs VBE support */
-#define CONFIG_BOCHS_VBE
-
 #define VBE_DISPI_MAX_XRES              16000
 #define VBE_DISPI_MAX_YRES              12000
 #define VBE_DISPI_MAX_BPP               32
@@ -65,21 +62,6 @@
 
 #define VBE_DISPI_LFB_PHYSICAL_ADDRESS  0xE0000000
 
-#ifdef CONFIG_BOCHS_VBE
-
-#define VGA_STATE_COMMON_BOCHS_VBE              \
-    uint16_t vbe_index;                         \
-    uint16_t vbe_regs[VBE_DISPI_INDEX_NB];      \
-    uint32_t vbe_start_addr;                    \
-    uint32_t vbe_line_offset;                   \
-    uint32_t vbe_bank_mask;			\
-    int vbe_mapped;
-#else
-
-#define VGA_STATE_COMMON_BOCHS_VBE
-
-#endif /* !CONFIG_BOCHS_VBE */
-
 #define CH_ATTR_SIZE (160 * 100)
 #define VGA_MAX_HEIGHT 2048
 
@@ -140,7 +122,13 @@ typedef struct VGACommonState {
     void (*get_resolution)(struct VGACommonState *s,
                         int *pwidth,
                         int *pheight);
-    VGA_STATE_COMMON_BOCHS_VBE
+    /* bochs vbe state */
+    uint16_t vbe_index;
+    uint16_t vbe_regs[VBE_DISPI_INDEX_NB];
+    uint32_t vbe_start_addr;
+    uint32_t vbe_line_offset;
+    uint32_t vbe_bank_mask;
+    int vbe_mapped;
     /* display refresh support */
     DisplayState *ds;
     uint32_t font_offsets[2];
@@ -208,7 +196,11 @@ void vga_invalidate_scanlines(VGACommonState *s, int y1, int y2);
 void ppm_save(const char *filename, struct DisplaySurface *ds, Error **errp);
 
 int vga_ioport_invalid(VGACommonState *s, uint32_t addr);
+
 void vga_init_vbe(VGACommonState *s, MemoryRegion *address_space);
+uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr);
+void vbe_ioport_write_index(void *opaque, uint32_t addr, uint32_t val);
+void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val);
 
 extern const uint8_t sr_mask[8];
 extern const uint8_t gr_mask[16];
diff --git a/hw/virtex_ml507.c b/hw/virtex_ml507.c
index 79bc0d10ee..a09b27a346 100644
--- a/hw/virtex_ml507.c
+++ b/hw/virtex_ml507.c
@@ -183,12 +183,12 @@ static int xilinx_load_device_tree(target_phys_addr_t addr,
     return fdt_size;
 }
 
-static void virtex_init(ram_addr_t ram_size,
-                        const char *boot_device,
-                        const char *kernel_filename,
-                        const char *kernel_cmdline,
-                        const char *initrd_filename, const char *cpu_model)
+static void virtex_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
     MemoryRegion *address_space_mem = get_system_memory();
     DeviceState *dev;
     PowerPCCPU *cpu;
diff --git a/hw/xen_machine_pv.c b/hw/xen_machine_pv.c
index 4b72aa7557..426470351e 100644
--- a/hw/xen_machine_pv.c
+++ b/hw/xen_machine_pv.c
@@ -29,13 +29,12 @@
 #include "xen_domainbuild.h"
 #include "blockdev.h"
 
-static void xen_init_pv(ram_addr_t ram_size,
-			const char *boot_device,
-			const char *kernel_filename,
-			const char *kernel_cmdline,
-			const char *initrd_filename,
-			const char *cpu_model)
+static void xen_init_pv(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     X86CPU *cpu;
     CPUX86State *env;
     DriveInfo *dinfo;
diff --git a/hw/xilinx_zynq.c b/hw/xilinx_zynq.c
index fd46ba2527..c55dafba6b 100644
--- a/hw/xilinx_zynq.c
+++ b/hw/xilinx_zynq.c
@@ -77,10 +77,13 @@ static inline void zynq_init_spi_flashes(uint32_t base_addr, qemu_irq irq)
 
 }
 
-static void zynq_init(ram_addr_t ram_size, const char *boot_device,
-                        const char *kernel_filename, const char *kernel_cmdline,
-                        const char *initrd_filename, const char *cpu_model)
+static void zynq_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     ARMCPU *cpu;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ext_ram = g_new(MemoryRegion, 1);
diff --git a/hw/xtensa_lx60.c b/hw/xtensa_lx60.c
index 3653f65b1e..1fd2c47ac5 100644
--- a/hw/xtensa_lx60.c
+++ b/hw/xtensa_lx60.c
@@ -268,11 +268,14 @@ static void lx_init(const LxBoardDesc *board,
     }
 }
 
-static void xtensa_lx60_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void xtensa_lx60_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     static const LxBoardDesc lx60_board = {
         .flash_size = 0x400000,
         .flash_sector_size = 0x10000,
@@ -283,11 +286,14 @@ static void xtensa_lx60_init(ram_addr_t ram_size,
             initrd_filename, cpu_model);
 }
 
-static void xtensa_lx200_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void xtensa_lx200_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     static const LxBoardDesc lx200_board = {
         .flash_size = 0x1000000,
         .flash_sector_size = 0x20000,
diff --git a/hw/xtensa_sim.c b/hw/xtensa_sim.c
index 831460b7c4..2e846d8aa6 100644
--- a/hw/xtensa_sim.c
+++ b/hw/xtensa_sim.c
@@ -96,11 +96,14 @@ static void sim_init(ram_addr_t ram_size,
     }
 }
 
-static void xtensa_sim_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void xtensa_sim_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (!cpu_model) {
         cpu_model = XTENSA_DEFAULT_CPU_MODEL;
     }
diff --git a/hw/z2.c b/hw/z2.c
index 076fad20c6..f62b8067bf 100644
--- a/hw/z2.c
+++ b/hw/z2.c
@@ -295,11 +295,12 @@ static TypeInfo aer915_info = {
     .class_init    = aer915_class_init,
 };
 
-static void z2_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void z2_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     MemoryRegion *address_space_mem = get_system_memory();
     uint32_t sector_len = 0x10000;
     PXA2xxState *mpu;
diff --git a/linux-user/alpha/target_signal.h b/linux-user/alpha/target_signal.h
index 94f15f612f..d3822da60e 100644
--- a/linux-user/alpha/target_signal.h
+++ b/linux-user/alpha/target_signal.h
@@ -6,9 +6,10 @@
 /* this struct defines a stack used during syscall handling */
 
 typedef struct target_sigaltstack {
-	abi_ulong ss_sp;
-	abi_long ss_flags;
-	abi_ulong ss_size;
+    abi_ulong ss_sp;
+    int32_t ss_flags;
+    int32_t dummy;
+    abi_ulong ss_size;
 } target_stack_t;
 
 
diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c
index b47025f08a..381ab8914b 100644
--- a/linux-user/linuxload.c
+++ b/linux-user/linuxload.c
@@ -140,8 +140,9 @@ int loader_exec(const char * filename, char ** argv, char ** envp,
     bprm->p = TARGET_PAGE_SIZE*MAX_ARG_PAGES-sizeof(unsigned int);
     memset(bprm->page, 0, sizeof(bprm->page));
     retval = open(filename, O_RDONLY);
-    if (retval < 0)
-        return retval;
+    if (retval < 0) {
+        return -errno;
+    }
     bprm->fd = retval;
     bprm->filename = (char *)filename;
     bprm->argc = count(argv);
@@ -165,8 +166,7 @@ int loader_exec(const char * filename, char ** argv, char ** envp,
             retval = load_flt_binary(bprm,regs,infop);
 #endif
         } else {
-            fprintf(stderr, "Unknown binary format\n");
-            return -1;
+            return -ENOEXEC;
         }
     }
 
diff --git a/linux-user/main.c b/linux-user/main.c
index f4bbe697f7..5d20abd3e5 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2527,6 +2527,7 @@ void cpu_loop(CPUMBState *env)
         case EXCP_BREAK:
             /* Return address is 4 bytes after the call.  */
             env->regs[14] += 4;
+            env->sregs[SR_PC] = env->regs[14];
             ret = do_syscall(env, 
                              env->regs[12], 
                              env->regs[5], 
@@ -2537,7 +2538,6 @@ void cpu_loop(CPUMBState *env)
                              env->regs[10],
                              0, 0);
             env->regs[3] = ret;
-            env->sregs[SR_PC] = env->regs[14];
             break;
         case EXCP_HW_EXCP:
             env->regs[17] = env->sregs[SR_PC] + 4;
@@ -3574,7 +3574,7 @@ int main(int argc, char **argv, char **envp)
     ret = loader_exec(filename, target_argv, target_environ, regs,
         info, &bprm);
     if (ret != 0) {
-        printf("Error %d while loading %s\n", ret, filename);
+        printf("Error while loading %s: %s\n", filename, strerror(-ret));
         _exit(1);
     }
 
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index fc4cc00b9f..5e53dca09e 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -219,6 +219,9 @@ unsigned long init_guest_space(unsigned long host_start,
 
 #include "qemu-log.h"
 
+/* syscall.c */
+int host_to_target_waitstatus(int status);
+
 /* strace.c */
 void print_syscall(int num,
                    abi_long arg1, abi_long arg2, abi_long arg3,
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 15bc4e8f62..95e2ffa007 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -202,46 +202,67 @@ void target_to_host_old_sigset(sigset_t *sigset,
 static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo,
                                                  const siginfo_t *info)
 {
-    int sig;
-    sig = host_to_target_signal(info->si_signo);
+    int sig = host_to_target_signal(info->si_signo);
     tinfo->si_signo = sig;
     tinfo->si_errno = 0;
     tinfo->si_code = info->si_code;
-    if (sig == SIGILL || sig == SIGFPE || sig == SIGSEGV ||
-        sig == SIGBUS || sig == SIGTRAP) {
-        /* should never come here, but who knows. The information for
-           the target is irrelevant */
+
+    if (sig == TARGET_SIGILL || sig == TARGET_SIGFPE || sig == TARGET_SIGSEGV
+        || sig == TARGET_SIGBUS || sig == TARGET_SIGTRAP) {
+        /* Should never come here, but who knows. The information for
+           the target is irrelevant.  */
         tinfo->_sifields._sigfault._addr = 0;
-    } else if (sig == SIGIO) {
+    } else if (sig == TARGET_SIGIO) {
+        tinfo->_sifields._sigpoll._band = info->si_band;
 	tinfo->_sifields._sigpoll._fd = info->si_fd;
+    } else if (sig == TARGET_SIGCHLD) {
+        tinfo->_sifields._sigchld._pid = info->si_pid;
+        tinfo->_sifields._sigchld._uid = info->si_uid;
+        tinfo->_sifields._sigchld._status
+            = host_to_target_waitstatus(info->si_status);
+        tinfo->_sifields._sigchld._utime = info->si_utime;
+        tinfo->_sifields._sigchld._stime = info->si_stime;
     } else if (sig >= TARGET_SIGRTMIN) {
         tinfo->_sifields._rt._pid = info->si_pid;
         tinfo->_sifields._rt._uid = info->si_uid;
         /* XXX: potential problem if 64 bit */
-        tinfo->_sifields._rt._sigval.sival_ptr =
-            (abi_ulong)(unsigned long)info->si_value.sival_ptr;
+        tinfo->_sifields._rt._sigval.sival_ptr
+            = (abi_ulong)(unsigned long)info->si_value.sival_ptr;
     }
 }
 
 static void tswap_siginfo(target_siginfo_t *tinfo,
                           const target_siginfo_t *info)
 {
-    int sig;
-    sig = info->si_signo;
+    int sig = info->si_signo;
     tinfo->si_signo = tswap32(sig);
     tinfo->si_errno = tswap32(info->si_errno);
     tinfo->si_code = tswap32(info->si_code);
-    if (sig == SIGILL || sig == SIGFPE || sig == SIGSEGV ||
-        sig == SIGBUS || sig == SIGTRAP) {
-        tinfo->_sifields._sigfault._addr =
-            tswapal(info->_sifields._sigfault._addr);
-    } else if (sig == SIGIO) {
-	tinfo->_sifields._sigpoll._fd = tswap32(info->_sifields._sigpoll._fd);
+
+    if (sig == TARGET_SIGILL || sig == TARGET_SIGFPE || sig == TARGET_SIGSEGV
+        || sig == TARGET_SIGBUS || sig == TARGET_SIGTRAP) {
+        tinfo->_sifields._sigfault._addr
+            = tswapal(info->_sifields._sigfault._addr);
+    } else if (sig == TARGET_SIGIO) {
+        tinfo->_sifields._sigpoll._band
+            = tswap32(info->_sifields._sigpoll._band);
+        tinfo->_sifields._sigpoll._fd = tswap32(info->_sifields._sigpoll._fd);
+    } else if (sig == TARGET_SIGCHLD) {
+        tinfo->_sifields._sigchld._pid
+            = tswap32(info->_sifields._sigchld._pid);
+        tinfo->_sifields._sigchld._uid
+            = tswap32(info->_sifields._sigchld._uid);
+        tinfo->_sifields._sigchld._status
+            = tswap32(info->_sifields._sigchld._status);
+        tinfo->_sifields._sigchld._utime
+            = tswapal(info->_sifields._sigchld._utime);
+        tinfo->_sifields._sigchld._stime
+            = tswapal(info->_sifields._sigchld._stime);
     } else if (sig >= TARGET_SIGRTMIN) {
         tinfo->_sifields._rt._pid = tswap32(info->_sifields._rt._pid);
         tinfo->_sifields._rt._uid = tswap32(info->_sifields._rt._uid);
-        tinfo->_sifields._rt._sigval.sival_ptr =
-            tswapal(info->_sifields._rt._sigval.sival_ptr);
+        tinfo->_sifields._rt._sigval.sival_ptr
+            = tswapal(info->_sifields._rt._sigval.sival_ptr);
     }
 }
 
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 471d0605f7..e4291ed776 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -587,12 +587,17 @@ extern int setfsgid(int);
 extern int setgroups(int, gid_t *);
 
 /* ARM EABI and MIPS expect 64bit types aligned even on pairs or registers */
-#ifdef TARGET_ARM 
+#ifdef TARGET_ARM
 static inline int regpairs_aligned(void *cpu_env) {
     return ((((CPUARMState *)cpu_env)->eabi) == 1) ;
 }
 #elif defined(TARGET_MIPS)
 static inline int regpairs_aligned(void *cpu_env) { return 1; }
+#elif defined(TARGET_PPC) && !defined(TARGET_PPC64)
+/* SysV AVI for PPC32 expects 64bit parameters to be passed on odd/even pairs
+ * of registers which translates to the same as ARM/MIPS, because we start with
+ * r3 as arg1 */
+static inline int regpairs_aligned(void *cpu_env) { return 1; }
 #else
 static inline int regpairs_aligned(void *cpu_env) { return 0; }
 #endif
@@ -1744,55 +1749,96 @@ static abi_long do_getsockopt(int sockfd, int level, int optname,
     return ret;
 }
 
-/* FIXME
- * lock_iovec()/unlock_iovec() have a return code of 0 for success where
- * other lock functions have a return code of 0 for failure.
- */
-static abi_long lock_iovec(int type, struct iovec *vec, abi_ulong target_addr,
-                           int count, int copy)
+static struct iovec *lock_iovec(int type, abi_ulong target_addr,
+                                int count, int copy)
 {
     struct target_iovec *target_vec;
-    abi_ulong base;
+    struct iovec *vec;
+    abi_ulong total_len, max_len;
     int i;
 
-    target_vec = lock_user(VERIFY_READ, target_addr, count * sizeof(struct target_iovec), 1);
-    if (!target_vec)
-        return -TARGET_EFAULT;
-    for(i = 0;i < count; i++) {
-        base = tswapal(target_vec[i].iov_base);
-        vec[i].iov_len = tswapal(target_vec[i].iov_len);
-        if (vec[i].iov_len != 0) {
-            vec[i].iov_base = lock_user(type, base, vec[i].iov_len, copy);
-            /* Don't check lock_user return value. We must call writev even
-               if a element has invalid base address. */
+    if (count == 0) {
+        errno = 0;
+        return NULL;
+    }
+    if (count > IOV_MAX) {
+        errno = EINVAL;
+        return NULL;
+    }
+
+    vec = calloc(count, sizeof(struct iovec));
+    if (vec == NULL) {
+        errno = ENOMEM;
+        return NULL;
+    }
+
+    target_vec = lock_user(VERIFY_READ, target_addr,
+                           count * sizeof(struct target_iovec), 1);
+    if (target_vec == NULL) {
+        errno = EFAULT;
+        goto fail2;
+    }
+
+    /* ??? If host page size > target page size, this will result in a
+       value larger than what we can actually support.  */
+    max_len = 0x7fffffff & TARGET_PAGE_MASK;
+    total_len = 0;
+
+    for (i = 0; i < count; i++) {
+        abi_ulong base = tswapal(target_vec[i].iov_base);
+        abi_long len = tswapal(target_vec[i].iov_len);
+
+        if (len < 0) {
+            errno = EINVAL;
+            goto fail;
+        } else if (len == 0) {
+            /* Zero length pointer is ignored.  */
+            vec[i].iov_base = 0;
         } else {
-            /* zero length pointer is ignored */
-            vec[i].iov_base = NULL;
+            vec[i].iov_base = lock_user(type, base, len, copy);
+            if (!vec[i].iov_base) {
+                errno = EFAULT;
+                goto fail;
+            }
+            if (len > max_len - total_len) {
+                len = max_len - total_len;
+            }
         }
+        vec[i].iov_len = len;
+        total_len += len;
     }
-    unlock_user (target_vec, target_addr, 0);
-    return 0;
+
+    unlock_user(target_vec, target_addr, 0);
+    return vec;
+
+ fail:
+    free(vec);
+ fail2:
+    unlock_user(target_vec, target_addr, 0);
+    return NULL;
 }
 
-static abi_long unlock_iovec(struct iovec *vec, abi_ulong target_addr,
-                             int count, int copy)
+static void unlock_iovec(struct iovec *vec, abi_ulong target_addr,
+                         int count, int copy)
 {
     struct target_iovec *target_vec;
-    abi_ulong base;
     int i;
 
-    target_vec = lock_user(VERIFY_READ, target_addr, count * sizeof(struct target_iovec), 1);
-    if (!target_vec)
-        return -TARGET_EFAULT;
-    for(i = 0;i < count; i++) {
-        if (target_vec[i].iov_base) {
-            base = tswapal(target_vec[i].iov_base);
+    target_vec = lock_user(VERIFY_READ, target_addr,
+                           count * sizeof(struct target_iovec), 1);
+    if (target_vec) {
+        for (i = 0; i < count; i++) {
+            abi_ulong base = tswapal(target_vec[i].iov_base);
+            abi_long len = tswapal(target_vec[i].iov_base);
+            if (len < 0) {
+                break;
+            }
             unlock_user(vec[i].iov_base, base, copy ? vec[i].iov_len : 0);
         }
+        unlock_user(target_vec, target_addr, 0);
     }
-    unlock_user (target_vec, target_addr, 0);
 
-    return 0;
+    free(vec);
 }
 
 /* do_socket() Must return target values and target errnos. */
@@ -1888,8 +1934,7 @@ static abi_long do_sendrecvmsg(int fd, abi_ulong target_msg,
         ret = target_to_host_sockaddr(msg.msg_name, tswapal(msgp->msg_name),
                                 msg.msg_namelen);
         if (ret) {
-            unlock_user_struct(msgp, target_msg, send ? 0 : 1);
-            return ret;
+            goto out2;
         }
     } else {
         msg.msg_name = NULL;
@@ -1900,9 +1945,13 @@ static abi_long do_sendrecvmsg(int fd, abi_ulong target_msg,
     msg.msg_flags = tswap32(msgp->msg_flags);
 
     count = tswapal(msgp->msg_iovlen);
-    vec = alloca(count * sizeof(struct iovec));
     target_vec = tswapal(msgp->msg_iov);
-    lock_iovec(send ? VERIFY_READ : VERIFY_WRITE, vec, target_vec, count, send);
+    vec = lock_iovec(send ? VERIFY_READ : VERIFY_WRITE,
+                     target_vec, count, send);
+    if (vec == NULL) {
+        ret = -host_to_target_errno(errno);
+        goto out2;
+    }
     msg.msg_iovlen = count;
     msg.msg_iov = vec;
 
@@ -1932,6 +1981,7 @@ static abi_long do_sendrecvmsg(int fd, abi_ulong target_msg,
 
 out:
     unlock_iovec(vec, target_vec, count, !send);
+out2:
     unlock_user_struct(msgp, target_msg, send ? 0 : 1);
     return ret;
 }
@@ -4873,7 +4923,7 @@ static int do_futex(target_ulong uaddr, int op, int val, target_ulong timeout,
 
 /* Map host to target signal numbers for the wait family of syscalls.
    Assume all other status bits are the same.  */
-static int host_to_target_waitstatus(int status)
+int host_to_target_waitstatus(int status)
 {
     if (WIFSIGNALED(status)) {
         return host_to_target_signal(WTERMSIG(status)) | (status & ~0x7f);
@@ -4962,8 +5012,8 @@ static int open_self_maps(void *cpu_env, int fd)
 #if defined(TARGET_ARM) || defined(TARGET_M68K) || defined(TARGET_UNICORE32)
     dprintf(fd, "%08llx-%08llx rw-p %08llx 00:00 0          [stack]\n",
                 (unsigned long long)ts->info->stack_limit,
-                (unsigned long long)(ts->stack_base + (TARGET_PAGE_SIZE - 1))
-                                     & TARGET_PAGE_MASK,
+                (unsigned long long)(ts->info->start_stack +
+                                     (TARGET_PAGE_SIZE - 1)) & TARGET_PAGE_MASK,
                 (unsigned long long)0);
 #endif
 
@@ -6529,6 +6579,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             __put_user(stfs.f_fsid.__val[0], &target_stfs->f_fsid.val[0]);
             __put_user(stfs.f_fsid.__val[1], &target_stfs->f_fsid.val[1]);
             __put_user(stfs.f_namelen, &target_stfs->f_namelen);
+            __put_user(stfs.f_frsize, &target_stfs->f_frsize);
+            memset(target_stfs->f_spare, 0, sizeof(target_stfs->f_spare));
             unlock_user_struct(target_stfs, arg2, 1);
         }
         break;
@@ -6557,6 +6609,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             __put_user(stfs.f_fsid.__val[0], &target_stfs->f_fsid.val[0]);
             __put_user(stfs.f_fsid.__val[1], &target_stfs->f_fsid.val[1]);
             __put_user(stfs.f_namelen, &target_stfs->f_namelen);
+            __put_user(stfs.f_frsize, &target_stfs->f_frsize);
+            memset(target_stfs->f_spare, 0, sizeof(target_stfs->f_spare));
             unlock_user_struct(target_stfs, arg3, 1);
         }
         break;
@@ -6888,6 +6942,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         ret = get_errno(do_fork(cpu_env, arg1, arg2, arg3, arg5, arg4));
 #elif defined(TARGET_CRIS)
         ret = get_errno(do_fork(cpu_env, arg2, arg1, arg3, arg4, arg5));
+#elif defined(TARGET_MICROBLAZE)
+        ret = get_errno(do_fork(cpu_env, arg1, arg2, arg4, arg6, arg5));
 #elif defined(TARGET_S390X)
         ret = get_errno(do_fork(cpu_env, arg2, arg1, arg3, arg5, arg4));
 #else
@@ -7184,26 +7240,24 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
     case TARGET_NR_readv:
         {
-            int count = arg3;
-            struct iovec *vec;
-
-            vec = alloca(count * sizeof(struct iovec));
-            if (lock_iovec(VERIFY_WRITE, vec, arg2, count, 0) < 0)
-                goto efault;
-            ret = get_errno(readv(arg1, vec, count));
-            unlock_iovec(vec, arg2, count, 1);
+            struct iovec *vec = lock_iovec(VERIFY_WRITE, arg2, arg3, 0);
+            if (vec != NULL) {
+                ret = get_errno(readv(arg1, vec, arg3));
+                unlock_iovec(vec, arg2, arg3, 1);
+            } else {
+                ret = -host_to_target_errno(errno);
+            }
         }
         break;
     case TARGET_NR_writev:
         {
-            int count = arg3;
-            struct iovec *vec;
-
-            vec = alloca(count * sizeof(struct iovec));
-            if (lock_iovec(VERIFY_READ, vec, arg2, count, 1) < 0)
-                goto efault;
-            ret = get_errno(writev(arg1, vec, count));
-            unlock_iovec(vec, arg2, count, 0);
+            struct iovec *vec = lock_iovec(VERIFY_READ, arg2, arg3, 1);
+            if (vec != NULL) {
+                ret = get_errno(writev(arg1, vec, arg3));
+                unlock_iovec(vec, arg2, arg3, 0);
+            } else {
+                ret = -host_to_target_errno(errno);
+            }
         }
         break;
     case TARGET_NR_getsid:
@@ -7415,12 +7469,20 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_pread64
     case TARGET_NR_pread64:
+        if (regpairs_aligned(cpu_env)) {
+            arg4 = arg5;
+            arg5 = arg6;
+        }
         if (!(p = lock_user(VERIFY_WRITE, arg2, arg3, 0)))
             goto efault;
         ret = get_errno(pread64(arg1, p, arg3, target_offset64(arg4, arg5)));
         unlock_user(p, arg2, ret);
         break;
     case TARGET_NR_pwrite64:
+        if (regpairs_aligned(cpu_env)) {
+            arg4 = arg5;
+            arg5 = arg6;
+        }
         if (!(p = lock_user(VERIFY_READ, arg2, arg3, 1)))
             goto efault;
         ret = get_errno(pwrite64(arg1, p, arg3, target_offset64(arg4, arg5)));
@@ -8628,14 +8690,13 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #ifdef TARGET_NR_vmsplice
 	case TARGET_NR_vmsplice:
         {
-            int count = arg3;
-            struct iovec *vec;
-
-            vec = alloca(count * sizeof(struct iovec));
-            if (lock_iovec(VERIFY_READ, vec, arg2, count, 1) < 0)
-                goto efault;
-            ret = get_errno(vmsplice(arg1, vec, count, arg4));
-            unlock_iovec(vec, arg2, count, 0);
+            struct iovec *vec = lock_iovec(VERIFY_READ, arg2, arg3, 1);
+            if (vec != NULL) {
+                ret = get_errno(vmsplice(arg1, vec, arg3, arg4));
+                unlock_iovec(vec, arg2, arg3, 0);
+            } else {
+                ret = -host_to_target_errno(errno);
+            }
         }
         break;
 #endif
@@ -8822,6 +8883,19 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
     }
 #endif
+#ifdef TARGET_NR_gethostname
+    case TARGET_NR_gethostname:
+    {
+        char *name = lock_user(VERIFY_WRITE, arg1, arg2, 0);
+        if (name) {
+            ret = get_errno(gethostname(name, arg2));
+            unlock_user(name, arg1, arg2);
+        } else {
+            ret = -TARGET_EFAULT;
+        }
+        break;
+    }
+#endif
     default:
     unimplemented:
         gemu_log("qemu: Unsupported syscall: %d\n", num);
diff --git a/net/tap-win32.c b/net/tap-win32.c
index f1801e22d2..22dad3f8fb 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -29,6 +29,7 @@
 #include "tap.h"
 
 #include "qemu-common.h"
+#include "clients.h"            /* net_init_tap */
 #include "net.h"
 #include "sysemu.h"
 #include "qemu-error.h"
diff --git a/qemu-log.c b/qemu-log.c
index 396aafdf62..a4c3d1f2e3 100644
--- a/qemu-log.c
+++ b/qemu-log.c
@@ -116,6 +116,9 @@ const CPULogItem cpu_log_items[] = {
       "show all i/o ports accesses" },
     { LOG_UNIMP, "unimp",
       "log unimplemented functionality" },
+    { LOG_GUEST_ERROR, "guest_errors",
+      "log when the guest OS does something invalid (eg accessing a\n"
+      "non-existent register)" },
     { 0, NULL, NULL },
 };
 
diff --git a/qemu-log.h b/qemu-log.h
index 5ccecf30af..ce6bb095b3 100644
--- a/qemu-log.h
+++ b/qemu-log.h
@@ -35,6 +35,7 @@ static inline bool qemu_log_enabled(void)
 #define CPU_LOG_TB_CPU     (1 << 8)
 #define CPU_LOG_RESET      (1 << 9)
 #define LOG_UNIMP          (1 << 10)
+#define LOG_GUEST_ERROR    (1 << 11)
 
 /* Returns true if a bit is set in the current loglevel mask
  */
diff --git a/qemu-options.hx b/qemu-options.hx
index 7d97f96928..46f0539182 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -6,10 +6,6 @@ HXCOMM construct option structures, enums and help message for specified
 HXCOMM architectures.
 HXCOMM HXCOMM can be used for comments, discarded from both texi and C
 
-HXCOMM TODO : when we are able to change -help output without breaking
-HXCOMM libvirt we should update the help options which refer to -cpu ?,
-HXCOMM -driver ?, etc to use the preferred -cpu help etc instead.
-
 DEFHEADING(Standard options:)
 STEXI
 @table @option
@@ -33,7 +29,7 @@ ETEXI
 
 DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
     "-machine [type=]name[,prop[=value][,...]]\n"
-    "                selects emulated machine (-machine ? for list)\n"
+    "                selects emulated machine ('-machine help' for list)\n"
     "                property accel=accel1[:accel2[:...]] selects accelerator\n"
     "                supported accelerators are kvm, xen, tcg (default: tcg)\n"
     "                kernel_irqchip=on|off controls accelerated irqchip support\n"
@@ -44,7 +40,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
 STEXI
 @item -machine [type=]@var{name}[,prop=@var{value}[,...]]
 @findex -machine
-Select the emulated machine by @var{name}. Use @code{-machine ?} to list
+Select the emulated machine by @var{name}. Use @code{-machine help} to list
 available machines. Supported machine properties are:
 @table @option
 @item accel=@var{accels1}[:@var{accels2}[:...]]
@@ -69,11 +65,11 @@ HXCOMM Deprecated by -machine
 DEF("M", HAS_ARG, QEMU_OPTION_M, "", QEMU_ARCH_ALL)
 
 DEF("cpu", HAS_ARG, QEMU_OPTION_cpu,
-    "-cpu cpu        select CPU (-cpu ? for list)\n", QEMU_ARCH_ALL)
+    "-cpu cpu        select CPU ('-cpu help' for list)\n", QEMU_ARCH_ALL)
 STEXI
 @item -cpu @var{model}
 @findex -cpu
-Select CPU model (-cpu ? for list and additional feature selection)
+Select CPU model (@code{-cpu help} for list and additional feature selection)
 ETEXI
 
 DEF("smp", HAS_ARG, QEMU_OPTION_smp,
@@ -463,12 +459,12 @@ ETEXI
 DEF("soundhw", HAS_ARG, QEMU_OPTION_soundhw,
     "-soundhw c1,... enable audio support\n"
     "                and only specified sound cards (comma separated list)\n"
-    "                use -soundhw ? to get the list of supported cards\n"
-    "                use -soundhw all to enable all of them\n", QEMU_ARCH_ALL)
+    "                use '-soundhw help' to get the list of supported cards\n"
+    "                use '-soundhw all' to enable all of them\n", QEMU_ARCH_ALL)
 STEXI
 @item -soundhw @var{card1}[,@var{card2},...] or -soundhw all
 @findex -soundhw
-Enable audio and selected sound hardware. Use ? to print all
+Enable audio and selected sound hardware. Use 'help' to print all
 available sound hardware.
 
 @example
@@ -477,7 +473,7 @@ qemu-system-i386 -soundhw es1370 disk.img
 qemu-system-i386 -soundhw ac97 disk.img
 qemu-system-i386 -soundhw hda disk.img
 qemu-system-i386 -soundhw all disk.img
-qemu-system-i386 -soundhw ?
+qemu-system-i386 -soundhw help
 @end example
 
 Note that Linux's i810_audio OSS kernel (for AC97) module might
@@ -566,16 +562,16 @@ DEF("device", HAS_ARG, QEMU_OPTION_device,
     "-device driver[,prop[=value][,...]]\n"
     "                add device (based on driver)\n"
     "                prop=value,... sets driver properties\n"
-    "                use -device ? to print all possible drivers\n"
-    "                use -device driver,? to print all possible properties\n",
+    "                use '-device help' to print all possible drivers\n"
+    "                use '-device driver,help' to print all possible properties\n",
     QEMU_ARCH_ALL)
 STEXI
 @item -device @var{driver}[,@var{prop}[=@var{value}][,...]]
 @findex -device
 Add device @var{driver}.  @var{prop}=@var{value} sets driver
 properties.  Valid properties depend on the driver.  To get help on
-possible drivers and properties, use @code{-device ?} and
-@code{-device @var{driver},?}.
+possible drivers and properties, use @code{-device help} and
+@code{-device @var{driver},help}.
 ETEXI
 
 DEFHEADING()
@@ -1365,7 +1361,7 @@ Valid values for @var{type} are
 @code{virtio}, @code{i82551}, @code{i82557b}, @code{i82559er},
 @code{ne2k_pci}, @code{ne2k_isa}, @code{pcnet}, @code{rtl8139},
 @code{e1000}, @code{smc91c111}, @code{lance} and @code{mcf_fec}.
-Not all devices are supported on all targets.  Use -net nic,model=?
+Not all devices are supported on all targets.  Use @code{-net nic,model=help}
 for a list of available devices for your target.
 
 @item -netdev user,id=@var{id}[,@var{option}][,@var{option}][,...]
@@ -2398,7 +2394,7 @@ Shorthand for -gdb tcp::1234, i.e. open a gdbserver on TCP port 1234
 ETEXI
 
 DEF("d", HAS_ARG, QEMU_OPTION_d, \
-    "-d item1,...    output log to /tmp/qemu.log (use -d ? for a list of log items)\n",
+    "-d item1,...    output log to /tmp/qemu.log (use '-d help' for a list of log items)\n",
     QEMU_ARCH_ALL)
 STEXI
 @item -d
@@ -2533,13 +2529,13 @@ ETEXI
 
 DEF("clock", HAS_ARG, QEMU_OPTION_clock, \
     "-clock          force the use of the given methods for timer alarm.\n" \
-    "                To see what timers are available use -clock ?\n",
+    "                To see what timers are available use '-clock help'\n",
     QEMU_ARCH_ALL)
 STEXI
 @item -clock @var{method}
 @findex -clock
 Force the use of the given methods for timer alarm. To see what timers
-are available use -clock ?.
+are available use @code{-clock help}.
 ETEXI
 
 HXCOMM Options deprecated by -rtc
@@ -2608,7 +2604,7 @@ watchdog with a single timer, or @code{i6300esb} (Intel 6300ESB I/O
 controller hub) which is a much more featureful PCI-based dual-timer
 watchdog.  Choose a model for which your guest has drivers.
 
-Use @code{-watchdog ?} to list available hardware models.  Only one
+Use @code{-watchdog help} to list available hardware models.  Only one
 watchdog can be enabled for a guest.
 ETEXI
 
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 8bb5129d6a..9aa920d2ac 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -788,7 +788,6 @@ uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t valop, uint64_t shift
     return helper_neon_qshl_u64(env, valop, shiftop);
 }
 
-/* FIXME: This is wrong.  */
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
diff --git a/target-arm/translate.c b/target-arm/translate.c
index c6840b7832..daccb15c23 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -516,10 +516,10 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
             tcg_gen_rotri_i32(var, var, shift); break;
         } else {
             TCGv tmp = tcg_temp_new_i32();
+            tcg_gen_shli_i32(tmp, cpu_CF, 31);
             if (flags)
                 shifter_out_im(var, 0);
             tcg_gen_shri_i32(var, var, 1);
-            tcg_gen_shli_i32(tmp, cpu_CF, 31);
             tcg_gen_or_i32(var, var, tmp);
             tcg_temp_free_i32(tmp);
         }
diff --git a/target-microblaze/cpu.h b/target-microblaze/cpu.h
index 4968c244e8..88430b5057 100644
--- a/target-microblaze/cpu.h
+++ b/target-microblaze/cpu.h
@@ -345,6 +345,7 @@ static inline void cpu_clone_regs(CPUMBState *env, target_ulong newsp)
 
 static inline void cpu_set_tls(CPUMBState *env, target_ulong newtls)
 {
+    env->regs[21] = newtls;
 }
 
 static inline int cpu_interrupts_enabled(CPUMBState *env)
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index ce5ddaf050..05b7730987 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -714,11 +714,13 @@ void helper_sdm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
 
 #ifndef CONFIG_USER_ONLY
 /* SMP helpers.  */
-static int mips_vpe_is_wfi(CPUMIPSState *c)
+static bool mips_vpe_is_wfi(MIPSCPU *c)
 {
+    CPUMIPSState *env = &c->env;
+
     /* If the VPE is halted but otherwise active, it means it's waiting for
        an interrupt.  */
-    return c->halted && mips_vpe_active(c);
+    return env->halted && mips_vpe_active(env);
 }
 
 static inline void mips_vpe_wake(CPUMIPSState *c)
@@ -729,27 +731,33 @@ static inline void mips_vpe_wake(CPUMIPSState *c)
     cpu_interrupt(c, CPU_INTERRUPT_WAKE);
 }
 
-static inline void mips_vpe_sleep(CPUMIPSState *c)
+static inline void mips_vpe_sleep(MIPSCPU *cpu)
 {
+    CPUMIPSState *c = &cpu->env;
+
     /* The VPE was shut off, really go to bed.
        Reset any old _WAKE requests.  */
     c->halted = 1;
     cpu_reset_interrupt(c, CPU_INTERRUPT_WAKE);
 }
 
-static inline void mips_tc_wake(CPUMIPSState *c, int tc)
+static inline void mips_tc_wake(MIPSCPU *cpu, int tc)
 {
+    CPUMIPSState *c = &cpu->env;
+
     /* FIXME: TC reschedule.  */
-    if (mips_vpe_active(c) && !mips_vpe_is_wfi(c)) {
+    if (mips_vpe_active(c) && !mips_vpe_is_wfi(cpu)) {
         mips_vpe_wake(c);
     }
 }
 
-static inline void mips_tc_sleep(CPUMIPSState *c, int tc)
+static inline void mips_tc_sleep(MIPSCPU *cpu, int tc)
 {
+    CPUMIPSState *c = &cpu->env;
+
     /* FIXME: TC reschedule.  */
     if (!mips_vpe_active(c)) {
-        mips_vpe_sleep(c);
+        mips_vpe_sleep(cpu);
     }
 }
 
@@ -1342,13 +1350,15 @@ void helper_mttc0_tcrestart(CPUMIPSState *env, target_ulong arg1)
 
 void helper_mtc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
+    MIPSCPU *cpu = mips_env_get_cpu(env);
+
     env->active_tc.CP0_TCHalt = arg1 & 0x1;
 
     // TODO: Halt TC / Restart (if allocated+active) TC.
     if (env->active_tc.CP0_TCHalt & 1) {
-        mips_tc_sleep(env, env->current_tc);
+        mips_tc_sleep(cpu, env->current_tc);
     } else {
-        mips_tc_wake(env, env->current_tc);
+        mips_tc_wake(cpu, env->current_tc);
     }
 }
 
@@ -1356,6 +1366,7 @@ void helper_mttc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
+    MIPSCPU *other_cpu = mips_env_get_cpu(other);
 
     // TODO: Halt TC / Restart (if allocated+active) TC.
 
@@ -1365,9 +1376,9 @@ void helper_mttc0_tchalt(CPUMIPSState *env, target_ulong arg1)
         other->tcs[other_tc].CP0_TCHalt = arg1;
 
     if (arg1 & 1) {
-        mips_tc_sleep(other, other_tc);
+        mips_tc_sleep(other_cpu, other_tc);
     } else {
-        mips_tc_wake(other, other_tc);
+        mips_tc_wake(other_cpu, other_tc);
     }
 }
 
@@ -1874,35 +1885,39 @@ target_ulong helper_emt(void)
 
 target_ulong helper_dvpe(CPUMIPSState *env)
 {
-    CPUMIPSState *other_cpu = first_cpu;
+    CPUMIPSState *other_cpu_env = first_cpu;
     target_ulong prev = env->mvp->CP0_MVPControl;
 
     do {
         /* Turn off all VPEs except the one executing the dvpe.  */
-        if (other_cpu != env) {
-            other_cpu->mvp->CP0_MVPControl &= ~(1 << CP0MVPCo_EVP);
+        if (other_cpu_env != env) {
+            MIPSCPU *other_cpu = mips_env_get_cpu(other_cpu_env);
+
+            other_cpu_env->mvp->CP0_MVPControl &= ~(1 << CP0MVPCo_EVP);
             mips_vpe_sleep(other_cpu);
         }
-        other_cpu = other_cpu->next_cpu;
-    } while (other_cpu);
+        other_cpu_env = other_cpu_env->next_cpu;
+    } while (other_cpu_env);
     return prev;
 }
 
 target_ulong helper_evpe(CPUMIPSState *env)
 {
-    CPUMIPSState *other_cpu = first_cpu;
+    CPUMIPSState *other_cpu_env = first_cpu;
     target_ulong prev = env->mvp->CP0_MVPControl;
 
     do {
-        if (other_cpu != env
-           /* If the VPE is WFI, don't disturb its sleep.  */
-           && !mips_vpe_is_wfi(other_cpu)) {
+        MIPSCPU *other_cpu = mips_env_get_cpu(other_cpu_env);
+
+        if (other_cpu_env != env
+            /* If the VPE is WFI, don't disturb its sleep.  */
+            && !mips_vpe_is_wfi(other_cpu)) {
             /* Enable the VPE.  */
-            other_cpu->mvp->CP0_MVPControl |= (1 << CP0MVPCo_EVP);
-            mips_vpe_wake(other_cpu); /* And wake it up.  */
+            other_cpu_env->mvp->CP0_MVPControl |= (1 << CP0MVPCo_EVP);
+            mips_vpe_wake(other_cpu_env); /* And wake it up.  */
         }
-        other_cpu = other_cpu->next_cpu;
-    } while (other_cpu);
+        other_cpu_env = other_cpu_env->next_cpu;
+    } while (other_cpu_env);
     return prev;
 }
 #endif /* !CONFIG_USER_ONLY */
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 6cef96bfa6..4321393688 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -48,7 +48,7 @@ static TCGv cpu_y;
 #ifndef CONFIG_USER_ONLY
 static TCGv cpu_tbr;
 #endif
-static TCGv cpu_cond, cpu_dst, cpu_addr, cpu_val;
+static TCGv cpu_cond;
 #ifdef TARGET_SPARC64
 static TCGv_i32 cpu_xcc, cpu_asi, cpu_fprs;
 static TCGv cpu_gsr;
@@ -58,10 +58,6 @@ static TCGv_i32 cpu_softint;
 #else
 static TCGv cpu_wim;
 #endif
-/* local register indexes (only used inside old micro ops) */
-static TCGv cpu_tmp0;
-static TCGv_i32 cpu_tmp32;
-static TCGv_i64 cpu_tmp64;
 /* Floating point registers */
 static TCGv_i64 cpu_fpr[TARGET_DPREGS];
 
@@ -83,7 +79,9 @@ typedef struct DisasContext {
     struct TranslationBlock *tb;
     sparc_def_t *def;
     TCGv_i32 t32[3];
+    TCGv ttl[5];
     int n_t32;
+    int n_ttl;
 } DisasContext;
 
 typedef struct {
@@ -123,6 +121,22 @@ static int sign_extend(int x, int len)
 
 #define IS_IMM (insn & (1<<13))
 
+static inline TCGv_i32 get_temp_i32(DisasContext *dc)
+{
+    TCGv_i32 t;
+    assert(dc->n_t32 < ARRAY_SIZE(dc->t32));
+    dc->t32[dc->n_t32++] = t = tcg_temp_new_i32();
+    return t;
+}
+
+static inline TCGv get_temp_tl(DisasContext *dc)
+{
+    TCGv t;
+    assert(dc->n_ttl < ARRAY_SIZE(dc->ttl));
+    dc->ttl[dc->n_ttl++] = t = tcg_temp_new();
+    return t;
+}
+
 static inline void gen_update_fprs_dirty(int rd)
 {
 #if defined(TARGET_SPARC64)
@@ -143,16 +157,13 @@ static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src)
     if (src & 1) {
         return MAKE_TCGV_I32(GET_TCGV_I64(cpu_fpr[src / 2]));
     } else {
-        TCGv_i32 ret = tcg_temp_new_i32();
+        TCGv_i32 ret = get_temp_i32(dc);
         TCGv_i64 t = tcg_temp_new_i64();
 
         tcg_gen_shri_i64(t, cpu_fpr[src / 2], 32);
         tcg_gen_trunc_i64_i32(ret, t);
         tcg_temp_free_i64(t);
 
-        dc->t32[dc->n_t32++] = ret;
-        assert(dc->n_t32 <= ARRAY_SIZE(dc->t32));
-
         return ret;
     }
 #endif
@@ -174,9 +185,9 @@ static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v)
     gen_update_fprs_dirty(dst);
 }
 
-static TCGv_i32 gen_dest_fpr_F(void)
+static TCGv_i32 gen_dest_fpr_F(DisasContext *dc)
 {
-    return cpu_tmp32;
+    return get_temp_i32(dc);
 }
 
 static TCGv_i64 gen_load_fpr_D(DisasContext *dc, unsigned int src)
@@ -192,9 +203,9 @@ static void gen_store_fpr_D(DisasContext *dc, unsigned int dst, TCGv_i64 v)
     gen_update_fprs_dirty(dst);
 }
 
-static TCGv_i64 gen_dest_fpr_D(void)
+static TCGv_i64 gen_dest_fpr_D(DisasContext *dc, unsigned int dst)
 {
-    return cpu_tmp64;
+    return cpu_fpr[DFPREG(dst) / 2];
 }
 
 static void gen_op_load_fpr_QT0(unsigned int src)
@@ -263,25 +274,38 @@ static inline void gen_address_mask(DisasContext *dc, TCGv addr)
 #endif
 }
 
-static inline void gen_movl_reg_TN(int reg, TCGv tn)
+static inline TCGv gen_load_gpr(DisasContext *dc, int reg)
+{
+    if (reg == 0 || reg >= 8) {
+        TCGv t = get_temp_tl(dc);
+        if (reg == 0) {
+            tcg_gen_movi_tl(t, 0);
+        } else {
+            tcg_gen_ld_tl(t, cpu_regwptr, (reg - 8) * sizeof(target_ulong));
+        }
+        return t;
+    } else {
+        return cpu_gregs[reg];
+    }
+}
+
+static inline void gen_store_gpr(DisasContext *dc, int reg, TCGv v)
 {
-    if (reg == 0)
-        tcg_gen_movi_tl(tn, 0);
-    else if (reg < 8)
-        tcg_gen_mov_tl(tn, cpu_gregs[reg]);
-    else {
-        tcg_gen_ld_tl(tn, cpu_regwptr, (reg - 8) * sizeof(target_ulong));
+    if (reg > 0) {
+        if (reg < 8) {
+            tcg_gen_mov_tl(cpu_gregs[reg], v);
+        } else {
+            tcg_gen_st_tl(v, cpu_regwptr, (reg - 8) * sizeof(target_ulong));
+        }
     }
 }
 
-static inline void gen_movl_TN_reg(int reg, TCGv tn)
+static inline TCGv gen_dest_gpr(DisasContext *dc, int reg)
 {
-    if (reg == 0)
-        return;
-    else if (reg < 8)
-        tcg_gen_mov_tl(cpu_gregs[reg], tn);
-    else {
-        tcg_gen_st_tl(tn, cpu_regwptr, (reg - 8) * sizeof(target_ulong));
+    if (reg == 0 || reg >= 8) {
+        return get_temp_tl(dc);
+    } else {
+        return cpu_gregs[reg];
     }
 }
 
@@ -582,9 +606,10 @@ static void gen_op_subx_int(DisasContext *dc, TCGv dst, TCGv src1,
 
 static inline void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
 {
-    TCGv r_temp, zero;
+    TCGv r_temp, zero, t0;
 
     r_temp = tcg_temp_new();
+    t0 = tcg_temp_new();
 
     /* old op:
     if (!(env->y & 1))
@@ -602,22 +627,23 @@ static inline void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
     // env->y = (b2 << 31) | (env->y >> 1);
     tcg_gen_andi_tl(r_temp, cpu_cc_src, 0x1);
     tcg_gen_shli_tl(r_temp, r_temp, 31);
-    tcg_gen_shri_tl(cpu_tmp0, cpu_y, 1);
-    tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0x7fffffff);
-    tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, r_temp);
-    tcg_gen_andi_tl(cpu_y, cpu_tmp0, 0xffffffff);
+    tcg_gen_shri_tl(t0, cpu_y, 1);
+    tcg_gen_andi_tl(t0, t0, 0x7fffffff);
+    tcg_gen_or_tl(t0, t0, r_temp);
+    tcg_gen_andi_tl(cpu_y, t0, 0xffffffff);
 
     // b1 = N ^ V;
-    gen_mov_reg_N(cpu_tmp0, cpu_psr);
+    gen_mov_reg_N(t0, cpu_psr);
     gen_mov_reg_V(r_temp, cpu_psr);
-    tcg_gen_xor_tl(cpu_tmp0, cpu_tmp0, r_temp);
+    tcg_gen_xor_tl(t0, t0, r_temp);
     tcg_temp_free(r_temp);
 
     // T0 = (b1 << 31) | (T0 >> 1);
     // src1 = T0;
-    tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, 31);
+    tcg_gen_shli_tl(t0, t0, 31);
     tcg_gen_shri_tl(cpu_cc_src, cpu_cc_src, 1);
-    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
+    tcg_temp_free(t0);
 
     tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
 
@@ -649,9 +675,9 @@ static inline void gen_op_multiply(TCGv dst, TCGv src1, TCGv src2, int sign_ext)
     tcg_gen_mul_i64(r_temp2, r_temp, r_temp2);
 
     tcg_gen_shri_i64(r_temp, r_temp2, 32);
-    tcg_gen_trunc_i64_tl(cpu_tmp0, r_temp);
+    tcg_gen_trunc_i64_tl(cpu_y, r_temp);
     tcg_temp_free_i64(r_temp);
-    tcg_gen_andi_tl(cpu_y, cpu_tmp0, 0xffffffff);
+    tcg_gen_andi_tl(cpu_y, cpu_y, 0xffffffff);
 
     tcg_gen_trunc_i64_tl(dst, r_temp2);
 
@@ -688,27 +714,33 @@ static inline void gen_op_eval_be(TCGv dst, TCGv_i32 src)
 // Z | (N ^ V)
 static inline void gen_op_eval_ble(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_N(cpu_tmp0, src);
+    TCGv t0 = tcg_temp_new();
+    gen_mov_reg_N(t0, src);
     gen_mov_reg_V(dst, src);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
-    gen_mov_reg_Z(cpu_tmp0, src);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    tcg_gen_xor_tl(dst, dst, t0);
+    gen_mov_reg_Z(t0, src);
+    tcg_gen_or_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // N ^ V
 static inline void gen_op_eval_bl(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_V(cpu_tmp0, src);
+    TCGv t0 = tcg_temp_new();
+    gen_mov_reg_V(t0, src);
     gen_mov_reg_N(dst, src);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
+    tcg_gen_xor_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // C | Z
 static inline void gen_op_eval_bleu(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_Z(cpu_tmp0, src);
+    TCGv t0 = tcg_temp_new();
+    gen_mov_reg_Z(t0, src);
     gen_mov_reg_C(dst, src);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    tcg_gen_or_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // C
@@ -745,29 +777,21 @@ static inline void gen_op_eval_bne(TCGv dst, TCGv_i32 src)
 // !(Z | (N ^ V))
 static inline void gen_op_eval_bg(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_N(cpu_tmp0, src);
-    gen_mov_reg_V(dst, src);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
-    gen_mov_reg_Z(cpu_tmp0, src);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    gen_op_eval_ble(dst, src);
     tcg_gen_xori_tl(dst, dst, 0x1);
 }
 
 // !(N ^ V)
 static inline void gen_op_eval_bge(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_V(cpu_tmp0, src);
-    gen_mov_reg_N(dst, src);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
+    gen_op_eval_bl(dst, src);
     tcg_gen_xori_tl(dst, dst, 0x1);
 }
 
 // !(C | Z)
 static inline void gen_op_eval_bgu(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_Z(cpu_tmp0, src);
-    gen_mov_reg_C(dst, src);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    gen_op_eval_bleu(dst, src);
     tcg_gen_xori_tl(dst, dst, 0x1);
 }
 
@@ -817,18 +841,22 @@ static inline void gen_mov_reg_FCC1(TCGv reg, TCGv src,
 static inline void gen_op_eval_fbne(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_or_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // 1 or 2: FCC0 ^ FCC1
 static inline void gen_op_eval_fblg(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_xor_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // 1 or 3: FCC0
@@ -842,10 +870,11 @@ static inline void gen_op_eval_fbul(TCGv dst, TCGv src,
 static inline void gen_op_eval_fbl(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_xori_tl(cpu_tmp0, cpu_tmp0, 0x1);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_andc_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // 2 or 3: FCC1
@@ -859,39 +888,46 @@ static inline void gen_op_eval_fbug(TCGv dst, TCGv src,
 static inline void gen_op_eval_fbg(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_andc_tl(dst, t0, dst);
+    tcg_temp_free(t0);
 }
 
 // 3: FCC0 & FCC1
 static inline void gen_op_eval_fbu(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_and_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // 0: !(FCC0 | FCC1)
 static inline void gen_op_eval_fbe(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_or_tl(dst, dst, t0);
     tcg_gen_xori_tl(dst, dst, 0x1);
+    tcg_temp_free(t0);
 }
 
 // 0 or 3: !(FCC0 ^ FCC1)
 static inline void gen_op_eval_fbue(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_xor_tl(dst, dst, t0);
     tcg_gen_xori_tl(dst, dst, 0x1);
+    tcg_temp_free(t0);
 }
 
 // 0 or 2: !FCC0
@@ -906,11 +942,12 @@ static inline void gen_op_eval_fbge(TCGv dst, TCGv src,
 static inline void gen_op_eval_fbuge(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_xori_tl(cpu_tmp0, cpu_tmp0, 0x1);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_andc_tl(dst, dst, t0);
     tcg_gen_xori_tl(dst, dst, 0x1);
+    tcg_temp_free(t0);
 }
 
 // 0 or 1: !FCC1
@@ -925,21 +962,24 @@ static inline void gen_op_eval_fble(TCGv dst, TCGv src,
 static inline void gen_op_eval_fbule(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_andc_tl(dst, t0, dst);
     tcg_gen_xori_tl(dst, dst, 0x1);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
-    tcg_gen_xori_tl(dst, dst, 0x1);
+    tcg_temp_free(t0);
 }
 
 // !3: !(FCC0 & FCC1)
 static inline void gen_op_eval_fbo(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_and_tl(dst, dst, t0);
     tcg_gen_xori_tl(dst, dst, 0x1);
+    tcg_temp_free(t0);
 }
 
 static inline void gen_branch2(DisasContext *dc, target_ulong pc1,
@@ -1675,7 +1715,7 @@ static inline void gen_fop_FF(DisasContext *dc, int rd, int rs,
     TCGv_i32 dst, src;
 
     src = gen_load_fpr_F(dc, rs);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, cpu_env, src);
 
@@ -1688,7 +1728,7 @@ static inline void gen_ne_fop_FF(DisasContext *dc, int rd, int rs,
     TCGv_i32 dst, src;
 
     src = gen_load_fpr_F(dc, rs);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, src);
 
@@ -1702,7 +1742,7 @@ static inline void gen_fop_FFF(DisasContext *dc, int rd, int rs1, int rs2,
 
     src1 = gen_load_fpr_F(dc, rs1);
     src2 = gen_load_fpr_F(dc, rs2);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, cpu_env, src1, src2);
 
@@ -1717,7 +1757,7 @@ static inline void gen_ne_fop_FFF(DisasContext *dc, int rd, int rs1, int rs2,
 
     src1 = gen_load_fpr_F(dc, rs1);
     src2 = gen_load_fpr_F(dc, rs2);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, src1, src2);
 
@@ -1731,7 +1771,7 @@ static inline void gen_fop_DD(DisasContext *dc, int rd, int rs,
     TCGv_i64 dst, src;
 
     src = gen_load_fpr_D(dc, rs);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env, src);
 
@@ -1745,7 +1785,7 @@ static inline void gen_ne_fop_DD(DisasContext *dc, int rd, int rs,
     TCGv_i64 dst, src;
 
     src = gen_load_fpr_D(dc, rs);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, src);
 
@@ -1760,7 +1800,7 @@ static inline void gen_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
 
     src1 = gen_load_fpr_D(dc, rs1);
     src2 = gen_load_fpr_D(dc, rs2);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env, src1, src2);
 
@@ -1775,7 +1815,7 @@ static inline void gen_ne_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
 
     src1 = gen_load_fpr_D(dc, rs1);
     src2 = gen_load_fpr_D(dc, rs2);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, src1, src2);
 
@@ -1789,7 +1829,7 @@ static inline void gen_gsr_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
 
     src1 = gen_load_fpr_D(dc, rs1);
     src2 = gen_load_fpr_D(dc, rs2);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_gsr, src1, src2);
 
@@ -1804,7 +1844,7 @@ static inline void gen_ne_fop_DDDD(DisasContext *dc, int rd, int rs1, int rs2,
     src1 = gen_load_fpr_D(dc, rs1);
     src2 = gen_load_fpr_D(dc, rs2);
     src0 = gen_load_fpr_D(dc, rd);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, src0, src1, src2);
 
@@ -1856,7 +1896,7 @@ static inline void gen_fop_DFF(DisasContext *dc, int rd, int rs1, int rs2,
 
     src1 = gen_load_fpr_F(dc, rs1);
     src2 = gen_load_fpr_F(dc, rs2);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env, src1, src2);
 
@@ -1885,7 +1925,7 @@ static inline void gen_fop_DF(DisasContext *dc, int rd, int rs,
     TCGv_i32 src;
 
     src = gen_load_fpr_F(dc, rs);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env, src);
 
@@ -1900,7 +1940,7 @@ static inline void gen_ne_fop_DF(DisasContext *dc, int rd, int rs,
     TCGv_i32 src;
 
     src = gen_load_fpr_F(dc, rs);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env, src);
 
@@ -1914,7 +1954,7 @@ static inline void gen_fop_FD(DisasContext *dc, int rd, int rs,
     TCGv_i64 src;
 
     src = gen_load_fpr_D(dc, rs);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, cpu_env, src);
 
@@ -1927,7 +1967,7 @@ static inline void gen_fop_FQ(DisasContext *dc, int rd, int rs,
     TCGv_i32 dst;
 
     gen_op_load_fpr_QT1(QFPREG(rs));
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, cpu_env);
 
@@ -1940,7 +1980,7 @@ static inline void gen_fop_DQ(DisasContext *dc, int rd, int rs,
     TCGv_i64 dst;
 
     gen_op_load_fpr_QT1(QFPREG(rs));
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env);
 
@@ -2041,22 +2081,25 @@ static inline void gen_stf_asi(TCGv addr, int insn, int size, int rd)
     tcg_temp_free_i32(r_asi);
 }
 
-static inline void gen_swap_asi(TCGv dst, TCGv addr, int insn)
+static inline void gen_swap_asi(TCGv dst, TCGv src, TCGv addr, int insn)
 {
     TCGv_i32 r_asi, r_size, r_sign;
+    TCGv_i64 t64 = tcg_temp_new_i64();
 
     r_asi = gen_get_asi(insn, addr);
     r_size = tcg_const_i32(4);
     r_sign = tcg_const_i32(0);
-    gen_helper_ld_asi(cpu_tmp64, cpu_env, addr, r_asi, r_size, r_sign);
+    gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_size, r_sign);
     tcg_temp_free_i32(r_sign);
-    gen_helper_st_asi(cpu_env, addr, dst, r_asi, r_size);
+    gen_helper_st_asi(cpu_env, addr, src, r_asi, r_size);
     tcg_temp_free_i32(r_size);
     tcg_temp_free_i32(r_asi);
-    tcg_gen_trunc_i64_tl(dst, cpu_tmp64);
+    tcg_gen_trunc_i64_tl(dst, t64);
+    tcg_temp_free_i64(t64);
 }
 
-static inline void gen_ldda_asi(TCGv hi, TCGv addr, int insn, int rd)
+static inline void gen_ldda_asi(DisasContext *dc, TCGv hi, TCGv addr,
+                                int insn, int rd)
 {
     TCGv_i32 r_asi, r_rd;
 
@@ -2067,42 +2110,44 @@ static inline void gen_ldda_asi(TCGv hi, TCGv addr, int insn, int rd)
     tcg_temp_free_i32(r_asi);
 }
 
-static inline void gen_stda_asi(TCGv hi, TCGv addr, int insn, int rd)
+static inline void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
+                                int insn, int rd)
 {
     TCGv_i32 r_asi, r_size;
+    TCGv lo = gen_load_gpr(dc, rd + 1);
+    TCGv_i64 t64 = tcg_temp_new_i64();
 
-    gen_movl_reg_TN(rd + 1, cpu_tmp0);
-    tcg_gen_concat_tl_i64(cpu_tmp64, cpu_tmp0, hi);
+    tcg_gen_concat_tl_i64(t64, lo, hi);
     r_asi = gen_get_asi(insn, addr);
     r_size = tcg_const_i32(8);
-    gen_helper_st_asi(cpu_env, addr, cpu_tmp64, r_asi, r_size);
+    gen_helper_st_asi(cpu_env, addr, t64, r_asi, r_size);
     tcg_temp_free_i32(r_size);
     tcg_temp_free_i32(r_asi);
+    tcg_temp_free_i64(t64);
 }
 
-static inline void gen_cas_asi(TCGv dst, TCGv addr, TCGv val2, int insn,
-                               int rd)
+static inline void gen_cas_asi(DisasContext *dc, TCGv addr,
+                               TCGv val2, int insn, int rd)
 {
-    TCGv r_val1;
-    TCGv_i32 r_asi;
+    TCGv val1 = gen_load_gpr(dc, rd);
+    TCGv dst = gen_dest_gpr(dc, rd);
+    TCGv_i32 r_asi = gen_get_asi(insn, addr);
 
-    r_val1 = tcg_temp_new();
-    gen_movl_reg_TN(rd, r_val1);
-    r_asi = gen_get_asi(insn, addr);
-    gen_helper_cas_asi(dst, cpu_env, addr, r_val1, val2, r_asi);
+    gen_helper_cas_asi(dst, cpu_env, addr, val1, val2, r_asi);
     tcg_temp_free_i32(r_asi);
-    tcg_temp_free(r_val1);
+    gen_store_gpr(dc, rd, dst);
 }
 
-static inline void gen_casx_asi(TCGv dst, TCGv addr, TCGv val2, int insn,
-                                int rd)
+static inline void gen_casx_asi(DisasContext *dc, TCGv addr,
+                                TCGv val2, int insn, int rd)
 {
-    TCGv_i32 r_asi;
+    TCGv val1 = gen_load_gpr(dc, rd);
+    TCGv dst = gen_dest_gpr(dc, rd);
+    TCGv_i32 r_asi = gen_get_asi(insn, addr);
 
-    gen_movl_reg_TN(rd, cpu_tmp64);
-    r_asi = gen_get_asi(insn, addr);
-    gen_helper_casx_asi(dst, cpu_env, addr, cpu_tmp64, val2, r_asi);
+    gen_helper_casx_asi(dst, cpu_env, addr, val1, val2, r_asi);
     tcg_temp_free_i32(r_asi);
+    gen_store_gpr(dc, rd, dst);
 }
 
 #elif !defined(CONFIG_USER_ONLY)
@@ -2111,77 +2156,94 @@ static inline void gen_ld_asi(TCGv dst, TCGv addr, int insn, int size,
                               int sign)
 {
     TCGv_i32 r_asi, r_size, r_sign;
+    TCGv_i64 t64 = tcg_temp_new_i64();
 
     r_asi = tcg_const_i32(GET_FIELD(insn, 19, 26));
     r_size = tcg_const_i32(size);
     r_sign = tcg_const_i32(sign);
-    gen_helper_ld_asi(cpu_tmp64, cpu_env, addr, r_asi, r_size, r_sign);
-    tcg_temp_free(r_sign);
-    tcg_temp_free(r_size);
-    tcg_temp_free(r_asi);
-    tcg_gen_trunc_i64_tl(dst, cpu_tmp64);
+    gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_size, r_sign);
+    tcg_temp_free_i32(r_sign);
+    tcg_temp_free_i32(r_size);
+    tcg_temp_free_i32(r_asi);
+    tcg_gen_trunc_i64_tl(dst, t64);
+    tcg_temp_free_i64(t64);
 }
 
 static inline void gen_st_asi(TCGv src, TCGv addr, int insn, int size)
 {
     TCGv_i32 r_asi, r_size;
+    TCGv_i64 t64 = tcg_temp_new_i64();
 
-    tcg_gen_extu_tl_i64(cpu_tmp64, src);
+    tcg_gen_extu_tl_i64(t64, src);
     r_asi = tcg_const_i32(GET_FIELD(insn, 19, 26));
     r_size = tcg_const_i32(size);
-    gen_helper_st_asi(cpu_env, addr, cpu_tmp64, r_asi, r_size);
-    tcg_temp_free(r_size);
-    tcg_temp_free(r_asi);
+    gen_helper_st_asi(cpu_env, addr, t64, r_asi, r_size);
+    tcg_temp_free_i32(r_size);
+    tcg_temp_free_i32(r_asi);
+    tcg_temp_free_i64(t64);
 }
 
-static inline void gen_swap_asi(TCGv dst, TCGv addr, int insn)
+static inline void gen_swap_asi(TCGv dst, TCGv src, TCGv addr, int insn)
 {
     TCGv_i32 r_asi, r_size, r_sign;
-    TCGv_i64 r_val;
+    TCGv_i64 r_val, t64;
 
     r_asi = tcg_const_i32(GET_FIELD(insn, 19, 26));
     r_size = tcg_const_i32(4);
     r_sign = tcg_const_i32(0);
-    gen_helper_ld_asi(cpu_tmp64, cpu_env, addr, r_asi, r_size, r_sign);
+    t64 = tcg_temp_new_i64();
+    gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_size, r_sign);
     tcg_temp_free(r_sign);
     r_val = tcg_temp_new_i64();
-    tcg_gen_extu_tl_i64(r_val, dst);
+    tcg_gen_extu_tl_i64(r_val, src);
     gen_helper_st_asi(cpu_env, addr, r_val, r_asi, r_size);
     tcg_temp_free_i64(r_val);
-    tcg_temp_free(r_size);
-    tcg_temp_free(r_asi);
-    tcg_gen_trunc_i64_tl(dst, cpu_tmp64);
+    tcg_temp_free_i32(r_size);
+    tcg_temp_free_i32(r_asi);
+    tcg_gen_trunc_i64_tl(dst, t64);
+    tcg_temp_free_i64(t64);
 }
 
-static inline void gen_ldda_asi(TCGv hi, TCGv addr, int insn, int rd)
+static inline void gen_ldda_asi(DisasContext *dc, TCGv hi, TCGv addr,
+                                int insn, int rd)
 {
     TCGv_i32 r_asi, r_size, r_sign;
+    TCGv t;
+    TCGv_i64 t64;
 
     r_asi = tcg_const_i32(GET_FIELD(insn, 19, 26));
     r_size = tcg_const_i32(8);
     r_sign = tcg_const_i32(0);
-    gen_helper_ld_asi(cpu_tmp64, cpu_env, addr, r_asi, r_size, r_sign);
-    tcg_temp_free(r_sign);
-    tcg_temp_free(r_size);
-    tcg_temp_free(r_asi);
-    tcg_gen_trunc_i64_tl(cpu_tmp0, cpu_tmp64);
-    gen_movl_TN_reg(rd + 1, cpu_tmp0);
-    tcg_gen_shri_i64(cpu_tmp64, cpu_tmp64, 32);
-    tcg_gen_trunc_i64_tl(hi, cpu_tmp64);
-    gen_movl_TN_reg(rd, hi);
+    t64 = tcg_temp_new_i64();
+    gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_size, r_sign);
+    tcg_temp_free_i32(r_sign);
+    tcg_temp_free_i32(r_size);
+    tcg_temp_free_i32(r_asi);
+
+    t = gen_dest_gpr(dc, rd + 1);
+    tcg_gen_trunc_i64_tl(t, t64);
+    gen_store_gpr(dc, rd + 1, t);
+
+    tcg_gen_shri_i64(t64, t64, 32);
+    tcg_gen_trunc_i64_tl(hi, t64);
+    tcg_temp_free_i64(t64);
+    gen_store_gpr(dc, rd, hi);
 }
 
-static inline void gen_stda_asi(TCGv hi, TCGv addr, int insn, int rd)
+static inline void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
+                                int insn, int rd)
 {
     TCGv_i32 r_asi, r_size;
+    TCGv lo = gen_load_gpr(dc, rd + 1);
+    TCGv_i64 t64 = tcg_temp_new_i64();
 
-    gen_movl_reg_TN(rd + 1, cpu_tmp0);
-    tcg_gen_concat_tl_i64(cpu_tmp64, cpu_tmp0, hi);
+    tcg_gen_concat_tl_i64(t64, lo, hi);
     r_asi = tcg_const_i32(GET_FIELD(insn, 19, 26));
     r_size = tcg_const_i32(8);
-    gen_helper_st_asi(cpu_env, addr, cpu_tmp64, r_asi, r_size);
-    tcg_temp_free(r_size);
-    tcg_temp_free(r_asi);
+    gen_helper_st_asi(cpu_env, addr, t64, r_asi, r_size);
+    tcg_temp_free_i32(r_size);
+    tcg_temp_free_i32(r_asi);
+    tcg_temp_free_i64(t64);
 }
 #endif
 
@@ -2203,40 +2265,23 @@ static inline void gen_ldstub_asi(TCGv dst, TCGv addr, int insn)
 }
 #endif
 
-static inline TCGv get_src1(unsigned int insn, TCGv def)
+static TCGv get_src1(DisasContext *dc, unsigned int insn)
 {
-    TCGv r_rs1 = def;
-    unsigned int rs1;
-
-    rs1 = GET_FIELD(insn, 13, 17);
-    if (rs1 == 0) {
-        tcg_gen_movi_tl(def, 0);
-    } else if (rs1 < 8) {
-        r_rs1 = cpu_gregs[rs1];
-    } else {
-        tcg_gen_ld_tl(def, cpu_regwptr, (rs1 - 8) * sizeof(target_ulong));
-    }
-    return r_rs1;
+    unsigned int rs1 = GET_FIELD(insn, 13, 17);
+    return gen_load_gpr(dc, rs1);
 }
 
-static inline TCGv get_src2(unsigned int insn, TCGv def)
+static TCGv get_src2(DisasContext *dc, unsigned int insn)
 {
-    TCGv r_rs2 = def;
-
     if (IS_IMM) { /* immediate */
         target_long simm = GET_FIELDs(insn, 19, 31);
-        tcg_gen_movi_tl(def, simm);
-    } else { /* register */
+        TCGv t = get_temp_tl(dc);
+        tcg_gen_movi_tl(t, simm);
+        return t;
+    } else {      /* register */
         unsigned int rs2 = GET_FIELD(insn, 27, 31);
-        if (rs2 == 0) {
-            tcg_gen_movi_tl(def, 0);
-        } else if (rs2 < 8) {
-            r_rs2 = cpu_gregs[rs2];
-        } else {
-            tcg_gen_ld_tl(def, cpu_regwptr, (rs2 - 8) * sizeof(target_ulong));
-        }
+        return gen_load_gpr(dc, rs2);
     }
-    return r_rs2;
 }
 
 #ifdef TARGET_SPARC64
@@ -2259,7 +2304,7 @@ static void gen_fmovs(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
 
     s1 = gen_load_fpr_F(dc, rs);
     s2 = gen_load_fpr_F(dc, rd);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
     zero = tcg_const_i32(0);
 
     tcg_gen_movcond_i32(TCG_COND_NE, dst, c32, zero, s1, s2);
@@ -2271,7 +2316,7 @@ static void gen_fmovs(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
 
 static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
 {
-    TCGv_i64 dst = gen_dest_fpr_D();
+    TCGv_i64 dst = gen_dest_fpr_D(dc, rd);
     tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, cmp->c2,
                         gen_load_fpr_D(dc, rs),
                         gen_load_fpr_D(dc, rd));
@@ -2470,7 +2515,7 @@ static void gen_faligndata(TCGv dst, TCGv gsr, TCGv s1, TCGv s2)
 static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 {
     unsigned int opc, rs1, rs2, rd;
-    TCGv cpu_src1, cpu_src2, cpu_tmp1, cpu_tmp2;
+    TCGv cpu_src1, cpu_src2;
     TCGv_i32 cpu_src1_32, cpu_src2_32, cpu_dst_32;
     TCGv_i64 cpu_src1_64, cpu_src2_64, cpu_dst_64;
     target_long simm;
@@ -2480,12 +2525,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     }
 
     opc = GET_FIELD(insn, 0, 1);
-
     rd = GET_FIELD(insn, 2, 6);
 
-    cpu_tmp1 = cpu_src1 = tcg_temp_new();
-    cpu_tmp2 = cpu_src2 = tcg_temp_new();
-
     switch (opc) {
     case 0:                     /* branches/sethi */
         {
@@ -2515,7 +2556,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         (GET_FIELD_SP(insn, 20, 21) << 14);
                     target = sign_extend(target, 16);
                     target <<= 2;
-                    cpu_src1 = get_src1(insn, cpu_src1);
+                    cpu_src1 = get_src1(dc, insn);
                     do_branch_reg(dc, target, insn, cpu_src1);
                     goto jmp_insn;
                 }
@@ -2557,13 +2598,12 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     goto jmp_insn;
                 }
             case 0x4:           /* SETHI */
-                if (rd) { // nop
+                /* Special-case %g0 because that's the canonical nop.  */
+                if (rd) {
                     uint32_t value = GET_FIELD(insn, 10, 31);
-                    TCGv r_const;
-
-                    r_const = tcg_const_tl(value << 10);
-                    gen_movl_TN_reg(rd, r_const);
-                    tcg_temp_free(r_const);
+                    TCGv t = gen_dest_gpr(dc, rd);
+                    tcg_gen_movi_tl(t, value << 10);
+                    gen_store_gpr(dc, rd, t);
                 }
                 break;
             case 0x0:           /* UNIMPL */
@@ -2576,11 +2616,10 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     case 1:                     /*CALL*/
         {
             target_long target = GET_FIELDs(insn, 2, 31) << 2;
-            TCGv r_const;
+            TCGv o7 = gen_dest_gpr(dc, 15);
 
-            r_const = tcg_const_tl(dc->pc);
-            gen_movl_TN_reg(15, r_const);
-            tcg_temp_free(r_const);
+            tcg_gen_movi_tl(o7, dc->pc);
+            gen_store_gpr(dc, 15, o7);
             target += dc->pc;
             gen_mov_pc_npc(dc);
 #ifdef TARGET_SPARC64
@@ -2594,6 +2633,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     case 2:                     /* FPU & Logical Operations */
         {
             unsigned int xop = GET_FIELD(insn, 7, 12);
+            TCGv cpu_dst = gen_dest_gpr(dc, rd);
+            TCGv cpu_tmp0;
+
             if (xop == 0x3a) {  /* generate trap */
                 int cond = GET_FIELD(insn, 3, 6);
                 TCGv_i32 trap;
@@ -2644,22 +2686,17 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         /* Signal that the trap value is fully constant.  */
                         mask = 0;
                     } else {
-                        TCGv t1 = tcg_temp_new();
-                        gen_movl_reg_TN(rs1, t1);
+                        TCGv t1 = gen_load_gpr(dc, rs1);
                         tcg_gen_trunc_tl_i32(trap, t1);
-                        tcg_temp_free(t1);
                         tcg_gen_addi_i32(trap, trap, rs2);
                     }
                 } else {
-                    TCGv t1 = tcg_temp_new();
-                    TCGv t2 = tcg_temp_new();
+                    TCGv t1, t2;
                     rs2 = GET_FIELD_SP(insn, 0, 4);
-                    gen_movl_reg_TN(rs1, t1);
-                    gen_movl_reg_TN(rs2, t2);
+                    t1 = gen_load_gpr(dc, rs1);
+                    t2 = gen_load_gpr(dc, rs2);
                     tcg_gen_add_tl(t1, t1, t2);
                     tcg_gen_trunc_tl_i32(trap, t1);
-                    tcg_temp_free(t1);
-                    tcg_temp_free(t2);
                 }
                 if (mask != 0) {
                     tcg_gen_andi_i32(trap, trap, mask);
@@ -2693,27 +2730,24 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                        microSPARC II */
                     /* Read Asr17 */
                     if (rs1 == 0x11 && dc->def->features & CPU_FEATURE_ASR17) {
-                        TCGv r_const;
-
+                        TCGv t = gen_dest_gpr(dc, rd);
                         /* Read Asr17 for a Leon3 monoprocessor */
-                        r_const = tcg_const_tl((1 << 8)
-                                               | (dc->def->nwindows - 1));
-                        gen_movl_TN_reg(rd, r_const);
-                        tcg_temp_free(r_const);
+                        tcg_gen_movi_tl(t, (1 << 8) | (dc->def->nwindows - 1));
+                        gen_store_gpr(dc, rd, t);
                         break;
                     }
 #endif
-                    gen_movl_TN_reg(rd, cpu_y);
+                    gen_store_gpr(dc, rd, cpu_y);
                     break;
 #ifdef TARGET_SPARC64
                 case 0x2: /* V9 rdccr */
                     update_psr(dc);
                     gen_helper_rdccr(cpu_dst, cpu_env);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x3: /* V9 rdasi */
                     tcg_gen_ext_i32_tl(cpu_dst, cpu_asi);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x4: /* V9 rdtick */
                     {
@@ -2724,25 +2758,23 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                        offsetof(CPUSPARCState, tick));
                         gen_helper_tick_get_count(cpu_dst, r_tickptr);
                         tcg_temp_free_ptr(r_tickptr);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                     }
                     break;
                 case 0x5: /* V9 rdpc */
                     {
-                        TCGv r_const;
-
+                        TCGv t = gen_dest_gpr(dc, rd);
                         if (unlikely(AM_CHECK(dc))) {
-                            r_const = tcg_const_tl(dc->pc & 0xffffffffULL);
+                            tcg_gen_movi_tl(t, dc->pc & 0xffffffffULL);
                         } else {
-                           r_const = tcg_const_tl(dc->pc);
+                            tcg_gen_movi_tl(t, dc->pc);
                         }
-                        gen_movl_TN_reg(rd, r_const);
-                        tcg_temp_free(r_const);
+                        gen_store_gpr(dc, rd, t);
                     }
                     break;
                 case 0x6: /* V9 rdfprs */
                     tcg_gen_ext_i32_tl(cpu_dst, cpu_fprs);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0xf: /* V9 membar */
                     break; /* no effect */
@@ -2750,14 +2782,14 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
-                    gen_movl_TN_reg(rd, cpu_gsr);
+                    gen_store_gpr(dc, rd, cpu_gsr);
                     break;
                 case 0x16: /* Softint */
                     tcg_gen_ext_i32_tl(cpu_dst, cpu_softint);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x17: /* Tick compare */
-                    gen_movl_TN_reg(rd, cpu_tick_cmpr);
+                    gen_store_gpr(dc, rd, cpu_tick_cmpr);
                     break;
                 case 0x18: /* System tick */
                     {
@@ -2768,11 +2800,11 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                        offsetof(CPUSPARCState, stick));
                         gen_helper_tick_get_count(cpu_dst, r_tickptr);
                         tcg_temp_free_ptr(r_tickptr);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                     }
                     break;
                 case 0x19: /* System tick compare */
-                    gen_movl_TN_reg(rd, cpu_stick_cmpr);
+                    gen_store_gpr(dc, rd, cpu_stick_cmpr);
                     break;
                 case 0x10: /* Performance Control */
                 case 0x11: /* Performance Instrumentation Counter */
@@ -2819,11 +2851,13 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     goto illegal_insn;
                 }
 #endif
-                gen_movl_TN_reg(rd, cpu_dst);
+                gen_store_gpr(dc, rd, cpu_dst);
                 break;
             } else if (xop == 0x2a) { /* rdwim / V9 rdpr */
-                if (!supervisor(dc))
+                if (!supervisor(dc)) {
                     goto priv_insn;
+                }
+                cpu_tmp0 = get_temp_tl(dc);
 #ifdef TARGET_SPARC64
                 rs1 = GET_FIELD(insn, 13, 17);
                 switch (rs1) {
@@ -2862,14 +2896,12 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     break;
                 case 3: // tt
                     {
-                        TCGv_ptr r_tsptr;
+                        TCGv_ptr r_tsptr = tcg_temp_new_ptr();
 
-                        r_tsptr = tcg_temp_new_ptr();
                         gen_load_trap_state_at_tl(r_tsptr, cpu_env);
-                        tcg_gen_ld_i32(cpu_tmp32, r_tsptr,
-                                       offsetof(trap_state, tt));
+                        tcg_gen_ld32s_tl(cpu_tmp0, r_tsptr,
+                                         offsetof(trap_state, tt));
                         tcg_temp_free_ptr(r_tsptr);
-                        tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
                     }
                     break;
                 case 4: // tick
@@ -2880,7 +2912,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         tcg_gen_ld_ptr(r_tickptr, cpu_env,
                                        offsetof(CPUSPARCState, tick));
                         gen_helper_tick_get_count(cpu_tmp0, r_tickptr);
-                        gen_movl_TN_reg(rd, cpu_tmp0);
                         tcg_temp_free_ptr(r_tickptr);
                     }
                     break;
@@ -2888,53 +2919,44 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     tcg_gen_mov_tl(cpu_tmp0, cpu_tbr);
                     break;
                 case 6: // pstate
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, pstate));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, pstate));
                     break;
                 case 7: // tl
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, tl));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, tl));
                     break;
                 case 8: // pil
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, psrpil));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, psrpil));
                     break;
                 case 9: // cwp
                     gen_helper_rdcwp(cpu_tmp0, cpu_env);
                     break;
                 case 10: // cansave
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, cansave));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, cansave));
                     break;
                 case 11: // canrestore
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, canrestore));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, canrestore));
                     break;
                 case 12: // cleanwin
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, cleanwin));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, cleanwin));
                     break;
                 case 13: // otherwin
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, otherwin));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, otherwin));
                     break;
                 case 14: // wstate
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, wstate));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, wstate));
                     break;
                 case 16: // UA2005 gl
                     CHECK_IU_FEATURE(dc, GL);
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, gl));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, gl));
                     break;
                 case 26: // UA2005 strand status
                     CHECK_IU_FEATURE(dc, HYPV);
@@ -2952,7 +2974,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 #else
                 tcg_gen_ext_i32_tl(cpu_tmp0, cpu_wim);
 #endif
-                gen_movl_TN_reg(rd, cpu_tmp0);
+                gen_store_gpr(dc, rd, cpu_tmp0);
                 break;
             } else if (xop == 0x2b) { /* rdtbr / V9 flushw */
 #ifdef TARGET_SPARC64
@@ -2961,7 +2983,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 #else
                 if (!supervisor(dc))
                     goto priv_insn;
-                gen_movl_TN_reg(rd, cpu_tbr);
+                gen_store_gpr(dc, rd, cpu_tbr);
 #endif
                 break;
 #endif
@@ -3154,8 +3176,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 #define FMOVR(sz)                                                  \
                 do {                                               \
                     DisasCompare cmp;                              \
-                    cond = GET_FIELD_SP(insn, 14, 17);             \
-                    cpu_src1 = get_src1(insn, cpu_src1);           \
+                    cond = GET_FIELD_SP(insn, 10, 12);             \
+                    cpu_src1 = get_src1(dc, insn);                 \
                     gen_compare_reg(&cmp, cond, cpu_src1);         \
                     gen_fmov##sz(dc, &cmp, rd, rs2);               \
                     free_compare(&cmp);                            \
@@ -3293,43 +3315,45 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         goto illegal_insn;
                 }
             } else if (xop == 0x2) {
-                // clr/mov shortcut
-
+                TCGv dst = gen_dest_gpr(dc, rd);
                 rs1 = GET_FIELD(insn, 13, 17);
                 if (rs1 == 0) {
-                    // or %g0, x, y -> mov T0, x; mov y, T0
+                    /* clr/mov shortcut : or %g0, x, y -> mov x, y */
                     if (IS_IMM) {       /* immediate */
-                        TCGv r_const;
-
                         simm = GET_FIELDs(insn, 19, 31);
-                        r_const = tcg_const_tl(simm);
-                        gen_movl_TN_reg(rd, r_const);
-                        tcg_temp_free(r_const);
+                        tcg_gen_movi_tl(dst, simm);
+                        gen_store_gpr(dc, rd, dst);
                     } else {            /* register */
                         rs2 = GET_FIELD(insn, 27, 31);
-                        gen_movl_reg_TN(rs2, cpu_dst);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        if (rs2 == 0) {
+                            tcg_gen_movi_tl(dst, 0);
+                            gen_store_gpr(dc, rd, dst);
+                        } else {
+                            cpu_src2 = gen_load_gpr(dc, rs2);
+                            gen_store_gpr(dc, rd, cpu_src2);
+                        }
                     }
                 } else {
-                    cpu_src1 = get_src1(insn, cpu_src1);
+                    cpu_src1 = get_src1(dc, insn);
                     if (IS_IMM) {       /* immediate */
                         simm = GET_FIELDs(insn, 19, 31);
-                        tcg_gen_ori_tl(cpu_dst, cpu_src1, simm);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        tcg_gen_ori_tl(dst, cpu_src1, simm);
+                        gen_store_gpr(dc, rd, dst);
                     } else {            /* register */
-                        // or x, %g0, y -> mov T1, x; mov y, T1
                         rs2 = GET_FIELD(insn, 27, 31);
-                        if (rs2 != 0) {
-                            gen_movl_reg_TN(rs2, cpu_src2);
-                            tcg_gen_or_tl(cpu_dst, cpu_src1, cpu_src2);
-                            gen_movl_TN_reg(rd, cpu_dst);
-                        } else
-                            gen_movl_TN_reg(rd, cpu_src1);
+                        if (rs2 == 0) {
+                            /* mov shortcut:  or x, %g0, y -> mov x, y */
+                            gen_store_gpr(dc, rd, cpu_src1);
+                        } else {
+                            cpu_src2 = gen_load_gpr(dc, rs2);
+                            tcg_gen_or_tl(dst, cpu_src1, cpu_src2);
+                            gen_store_gpr(dc, rd, dst);
+                        }
                     }
                 }
 #ifdef TARGET_SPARC64
             } else if (xop == 0x25) { /* sll, V9 sllx */
-                cpu_src1 = get_src1(insn, cpu_src1);
+                cpu_src1 = get_src1(dc, insn);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 20, 31);
                     if (insn & (1 << 12)) {
@@ -3339,7 +3363,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     }
                 } else {                /* register */
                     rs2 = GET_FIELD(insn, 27, 31);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
+                    cpu_tmp0 = get_temp_tl(dc);
                     if (insn & (1 << 12)) {
                         tcg_gen_andi_i64(cpu_tmp0, cpu_src2, 0x3f);
                     } else {
@@ -3347,9 +3372,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     }
                     tcg_gen_shl_i64(cpu_dst, cpu_src1, cpu_tmp0);
                 }
-                gen_movl_TN_reg(rd, cpu_dst);
+                gen_store_gpr(dc, rd, cpu_dst);
             } else if (xop == 0x26) { /* srl, V9 srlx */
-                cpu_src1 = get_src1(insn, cpu_src1);
+                cpu_src1 = get_src1(dc, insn);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 20, 31);
                     if (insn & (1 << 12)) {
@@ -3360,7 +3385,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     }
                 } else {                /* register */
                     rs2 = GET_FIELD(insn, 27, 31);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
+                    cpu_tmp0 = get_temp_tl(dc);
                     if (insn & (1 << 12)) {
                         tcg_gen_andi_i64(cpu_tmp0, cpu_src2, 0x3f);
                         tcg_gen_shr_i64(cpu_dst, cpu_src1, cpu_tmp0);
@@ -3370,65 +3396,48 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         tcg_gen_shr_i64(cpu_dst, cpu_dst, cpu_tmp0);
                     }
                 }
-                gen_movl_TN_reg(rd, cpu_dst);
+                gen_store_gpr(dc, rd, cpu_dst);
             } else if (xop == 0x27) { /* sra, V9 srax */
-                cpu_src1 = get_src1(insn, cpu_src1);
+                cpu_src1 = get_src1(dc, insn);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 20, 31);
                     if (insn & (1 << 12)) {
                         tcg_gen_sari_i64(cpu_dst, cpu_src1, simm & 0x3f);
                     } else {
-                        tcg_gen_andi_i64(cpu_dst, cpu_src1, 0xffffffffULL);
-                        tcg_gen_ext32s_i64(cpu_dst, cpu_dst);
+                        tcg_gen_ext32s_i64(cpu_dst, cpu_src1);
                         tcg_gen_sari_i64(cpu_dst, cpu_dst, simm & 0x1f);
                     }
                 } else {                /* register */
                     rs2 = GET_FIELD(insn, 27, 31);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
+                    cpu_tmp0 = get_temp_tl(dc);
                     if (insn & (1 << 12)) {
                         tcg_gen_andi_i64(cpu_tmp0, cpu_src2, 0x3f);
                         tcg_gen_sar_i64(cpu_dst, cpu_src1, cpu_tmp0);
                     } else {
                         tcg_gen_andi_i64(cpu_tmp0, cpu_src2, 0x1f);
-                        tcg_gen_andi_i64(cpu_dst, cpu_src1, 0xffffffffULL);
-                        tcg_gen_ext32s_i64(cpu_dst, cpu_dst);
+                        tcg_gen_ext32s_i64(cpu_dst, cpu_src1);
                         tcg_gen_sar_i64(cpu_dst, cpu_dst, cpu_tmp0);
                     }
                 }
-                gen_movl_TN_reg(rd, cpu_dst);
+                gen_store_gpr(dc, rd, cpu_dst);
 #endif
             } else if (xop < 0x36) {
                 if (xop < 0x20) {
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    cpu_src2 = get_src2(insn, cpu_src2);
+                    cpu_src1 = get_src1(dc, insn);
+                    cpu_src2 = get_src2(dc, insn);
                     switch (xop & ~0x10) {
                     case 0x0: /* add */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            if (xop & 0x10) {
-                                gen_op_addi_cc(cpu_dst, cpu_src1, simm);
-                                tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADD);
-                                dc->cc_op = CC_OP_ADD;
-                            } else {
-                                tcg_gen_addi_tl(cpu_dst, cpu_src1, simm);
-                            }
+                        if (xop & 0x10) {
+                            gen_op_add_cc(cpu_dst, cpu_src1, cpu_src2);
+                            tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADD);
+                            dc->cc_op = CC_OP_ADD;
                         } else {
-                            if (xop & 0x10) {
-                                gen_op_add_cc(cpu_dst, cpu_src1, cpu_src2);
-                                tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADD);
-                                dc->cc_op = CC_OP_ADD;
-                            } else {
-                                tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
-                            }
+                            tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
                         }
                         break;
                     case 0x1: /* and */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_andi_tl(cpu_dst, cpu_src1, simm);
-                        } else {
-                            tcg_gen_and_tl(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_and_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3436,12 +3445,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         }
                         break;
                     case 0x2: /* or */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_ori_tl(cpu_dst, cpu_src1, simm);
-                        } else {
-                            tcg_gen_or_tl(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_or_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3449,12 +3453,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         }
                         break;
                     case 0x3: /* xor */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_xori_tl(cpu_dst, cpu_src1, simm);
-                        } else {
-                            tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3462,30 +3461,16 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         }
                         break;
                     case 0x4: /* sub */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            if (xop & 0x10) {
-                                gen_op_subi_cc(cpu_dst, cpu_src1, simm, dc);
-                            } else {
-                                tcg_gen_subi_tl(cpu_dst, cpu_src1, simm);
-                            }
+                        if (xop & 0x10) {
+                            gen_op_sub_cc(cpu_dst, cpu_src1, cpu_src2);
+                            tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUB);
+                            dc->cc_op = CC_OP_SUB;
                         } else {
-                            if (xop & 0x10) {
-                                gen_op_sub_cc(cpu_dst, cpu_src1, cpu_src2);
-                                tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUB);
-                                dc->cc_op = CC_OP_SUB;
-                            } else {
-                                tcg_gen_sub_tl(cpu_dst, cpu_src1, cpu_src2);
-                            }
+                            tcg_gen_sub_tl(cpu_dst, cpu_src1, cpu_src2);
                         }
                         break;
                     case 0x5: /* andn */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_andi_tl(cpu_dst, cpu_src1, ~simm);
-                        } else {
-                            tcg_gen_andc_tl(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_andc_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3493,12 +3478,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         }
                         break;
                     case 0x6: /* orn */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_ori_tl(cpu_dst, cpu_src1, ~simm);
-                        } else {
-                            tcg_gen_orc_tl(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_orc_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3506,13 +3486,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         }
                         break;
                     case 0x7: /* xorn */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_xori_tl(cpu_dst, cpu_src1, ~simm);
-                        } else {
-                            tcg_gen_not_tl(cpu_tmp0, cpu_src2);
-                            tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_tmp0);
-                        }
+                        tcg_gen_eqv_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3525,12 +3499,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         break;
 #ifdef TARGET_SPARC64
                     case 0x9: /* V9 mulx */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_muli_i64(cpu_dst, cpu_src1, simm);
-                        } else {
-                            tcg_gen_mul_i64(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_mul_i64(cpu_dst, cpu_src1, cpu_src2);
                         break;
 #endif
                     case 0xa: /* umul */
@@ -3585,39 +3554,39 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     default:
                         goto illegal_insn;
                     }
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                 } else {
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    cpu_src2 = get_src2(insn, cpu_src2);
+                    cpu_src1 = get_src1(dc, insn);
+                    cpu_src2 = get_src2(dc, insn);
                     switch (xop) {
                     case 0x20: /* taddcc */
                         gen_op_add_cc(cpu_dst, cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_TADD);
                         dc->cc_op = CC_OP_TADD;
                         break;
                     case 0x21: /* tsubcc */
                         gen_op_sub_cc(cpu_dst, cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_TSUB);
                         dc->cc_op = CC_OP_TSUB;
                         break;
                     case 0x22: /* taddcctv */
                         gen_helper_taddcctv(cpu_dst, cpu_env,
                                             cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         dc->cc_op = CC_OP_TADDTV;
                         break;
                     case 0x23: /* tsubcctv */
                         gen_helper_tsubcctv(cpu_dst, cpu_env,
                                             cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         dc->cc_op = CC_OP_TSUBTV;
                         break;
                     case 0x24: /* mulscc */
                         update_psr(dc);
                         gen_op_mulscc(cpu_dst, cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADD);
                         dc->cc_op = CC_OP_ADD;
                         break;
@@ -3627,34 +3596,38 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                             simm = GET_FIELDs(insn, 20, 31);
                             tcg_gen_shli_tl(cpu_dst, cpu_src1, simm & 0x1f);
                         } else { /* register */
+                            cpu_tmp0 = get_temp_tl(dc);
                             tcg_gen_andi_tl(cpu_tmp0, cpu_src2, 0x1f);
                             tcg_gen_shl_tl(cpu_dst, cpu_src1, cpu_tmp0);
                         }
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         break;
                     case 0x26:  /* srl */
                         if (IS_IMM) { /* immediate */
                             simm = GET_FIELDs(insn, 20, 31);
                             tcg_gen_shri_tl(cpu_dst, cpu_src1, simm & 0x1f);
                         } else { /* register */
+                            cpu_tmp0 = get_temp_tl(dc);
                             tcg_gen_andi_tl(cpu_tmp0, cpu_src2, 0x1f);
                             tcg_gen_shr_tl(cpu_dst, cpu_src1, cpu_tmp0);
                         }
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         break;
                     case 0x27:  /* sra */
                         if (IS_IMM) { /* immediate */
                             simm = GET_FIELDs(insn, 20, 31);
                             tcg_gen_sari_tl(cpu_dst, cpu_src1, simm & 0x1f);
                         } else { /* register */
+                            cpu_tmp0 = get_temp_tl(dc);
                             tcg_gen_andi_tl(cpu_tmp0, cpu_src2, 0x1f);
                             tcg_gen_sar_tl(cpu_dst, cpu_src1, cpu_tmp0);
                         }
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         break;
 #endif
                     case 0x30:
                         {
+                            cpu_tmp0 = get_temp_tl(dc);
                             switch(rd) {
                             case 0: /* wry */
                                 tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
@@ -3672,19 +3645,19 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 break;
 #else
                             case 0x2: /* V9 wrccr */
-                                tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
-                                gen_helper_wrccr(cpu_env, cpu_dst);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                gen_helper_wrccr(cpu_env, cpu_tmp0);
                                 tcg_gen_movi_i32(cpu_cc_op, CC_OP_FLAGS);
                                 dc->cc_op = CC_OP_FLAGS;
                                 break;
                             case 0x3: /* V9 wrasi */
-                                tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
-                                tcg_gen_andi_tl(cpu_dst, cpu_dst, 0xff);
-                                tcg_gen_trunc_tl_i32(cpu_asi, cpu_dst);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xff);
+                                tcg_gen_trunc_tl_i32(cpu_asi, cpu_tmp0);
                                 break;
                             case 0x6: /* V9 wrfprs */
-                                tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
-                                tcg_gen_trunc_tl_i32(cpu_fprs, cpu_dst);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                tcg_gen_trunc_tl_i32(cpu_fprs, cpu_tmp0);
                                 save_state(dc);
                                 gen_op_next_insn();
                                 tcg_gen_exit_tb(0);
@@ -3706,20 +3679,20 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                             case 0x14: /* Softint set */
                                 if (!supervisor(dc))
                                     goto illegal_insn;
-                                tcg_gen_xor_tl(cpu_tmp64, cpu_src1, cpu_src2);
-                                gen_helper_set_softint(cpu_env, cpu_tmp64);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                gen_helper_set_softint(cpu_env, cpu_tmp0);
                                 break;
                             case 0x15: /* Softint clear */
                                 if (!supervisor(dc))
                                     goto illegal_insn;
-                                tcg_gen_xor_tl(cpu_tmp64, cpu_src1, cpu_src2);
-                                gen_helper_clear_softint(cpu_env, cpu_tmp64);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                gen_helper_clear_softint(cpu_env, cpu_tmp0);
                                 break;
                             case 0x16: /* Softint write */
                                 if (!supervisor(dc))
                                     goto illegal_insn;
-                                tcg_gen_xor_tl(cpu_tmp64, cpu_src1, cpu_src2);
-                                gen_helper_write_softint(cpu_env, cpu_tmp64);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                gen_helper_write_softint(cpu_env, cpu_tmp0);
                                 break;
                             case 0x17: /* Tick compare */
 #if !defined(CONFIG_USER_ONLY)
@@ -3747,13 +3720,13 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 {
                                     TCGv_ptr r_tickptr;
 
-                                    tcg_gen_xor_tl(cpu_dst, cpu_src1,
+                                    tcg_gen_xor_tl(cpu_tmp0, cpu_src1,
                                                    cpu_src2);
                                     r_tickptr = tcg_temp_new_ptr();
                                     tcg_gen_ld_ptr(r_tickptr, cpu_env,
                                                    offsetof(CPUSPARCState, stick));
                                     gen_helper_tick_set_count(r_tickptr,
-                                                              cpu_dst);
+                                                              cpu_tmp0);
                                     tcg_temp_free_ptr(r_tickptr);
                                 }
                                 break;
@@ -3808,8 +3781,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 goto illegal_insn;
                             }
 #else
-                            tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
-                            gen_helper_wrpsr(cpu_env, cpu_dst);
+                            cpu_tmp0 = get_temp_tl(dc);
+                            tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                            gen_helper_wrpsr(cpu_env, cpu_tmp0);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_FLAGS);
                             dc->cc_op = CC_OP_FLAGS;
                             save_state(dc);
@@ -3823,6 +3797,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         {
                             if (!supervisor(dc))
                                 goto priv_insn;
+                            cpu_tmp0 = get_temp_tl(dc);
                             tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
 #ifdef TARGET_SPARC64
                             switch (rd) {
@@ -3866,9 +3841,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 
                                     r_tsptr = tcg_temp_new_ptr();
                                     gen_load_trap_state_at_tl(r_tsptr, cpu_env);
-                                    tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                    tcg_gen_st_i32(cpu_tmp32, r_tsptr,
-                                                   offsetof(trap_state, tt));
+                                    tcg_gen_st32_tl(cpu_tmp0, r_tsptr,
+                                                    offsetof(trap_state, tt));
                                     tcg_temp_free_ptr(r_tsptr);
                                 }
                                 break;
@@ -3894,8 +3868,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 break;
                             case 7: // tl
                                 save_state(dc);
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
                                                offsetof(CPUSPARCState, tl));
                                 dc->npc = DYNAMIC_PC;
                                 break;
@@ -3906,40 +3879,34 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 gen_helper_wrcwp(cpu_env, cpu_tmp0);
                                 break;
                             case 10: // cansave
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState,
-                                                        cansave));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState,
+                                                         cansave));
                                 break;
                             case 11: // canrestore
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState,
-                                                        canrestore));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState,
+                                                         canrestore));
                                 break;
                             case 12: // cleanwin
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState,
-                                                        cleanwin));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState,
+                                                         cleanwin));
                                 break;
                             case 13: // otherwin
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState,
-                                                        otherwin));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState,
+                                                         otherwin));
                                 break;
                             case 14: // wstate
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState,
-                                                        wstate));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState,
+                                                         wstate));
                                 break;
                             case 16: // UA2005 gl
                                 CHECK_IU_FEATURE(dc, GL);
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState, gl));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState, gl));
                                 break;
                             case 26: // UA2005 strand status
                                 CHECK_IU_FEATURE(dc, HYPV);
@@ -3951,11 +3918,11 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 goto illegal_insn;
                             }
 #else
-                            tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                            if (dc->def->nwindows != 32)
-                                tcg_gen_andi_tl(cpu_tmp32, cpu_tmp32,
+                            tcg_gen_trunc_tl_i32(cpu_wim, cpu_tmp0);
+                            if (dc->def->nwindows != 32) {
+                                tcg_gen_andi_tl(cpu_wim, cpu_wim,
                                                 (1 << dc->def->nwindows) - 1);
-                            tcg_gen_mov_i32(cpu_wim, cpu_tmp32);
+                            }
 #endif
                         }
                         break;
@@ -3969,6 +3936,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                             CHECK_IU_FEATURE(dc, HYPV);
                             if (!hypervisor(dc))
                                 goto priv_insn;
+                            cpu_tmp0 = get_temp_tl(dc);
                             tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
                             switch (rd) {
                             case 0: // hpstate
@@ -4014,6 +3982,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                             int cc = GET_FIELD_SP(insn, 11, 12);
                             int cond = GET_FIELD_SP(insn, 14, 17);
                             DisasCompare cmp;
+                            TCGv dst;
 
                             if (insn & (1 << 18)) {
                                 if (cc == 0) {
@@ -4035,28 +4004,27 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 tcg_gen_movi_tl(cpu_src2, simm);
                             }
 
-                            gen_movl_reg_TN(rd, cpu_dst);
-                            tcg_gen_movcond_tl(cmp.cond, cpu_dst,
+                            dst = gen_load_gpr(dc, rd);
+                            tcg_gen_movcond_tl(cmp.cond, dst,
                                                cmp.c1, cmp.c2,
-                                               cpu_src2, cpu_dst);
+                                               cpu_src2, dst);
                             free_compare(&cmp);
-                            gen_movl_TN_reg(rd, cpu_dst);
+                            gen_store_gpr(dc, rd, dst);
                             break;
                         }
                     case 0x2d: /* V9 sdivx */
                         gen_helper_sdivx(cpu_dst, cpu_env, cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         break;
                     case 0x2e: /* V9 popc */
-                        {
-                            cpu_src2 = get_src2(insn, cpu_src2);
-                            gen_helper_popc(cpu_dst, cpu_src2);
-                            gen_movl_TN_reg(rd, cpu_dst);
-                        }
+                        gen_helper_popc(cpu_dst, cpu_src2);
+                        gen_store_gpr(dc, rd, cpu_dst);
+                        break;
                     case 0x2f: /* V9 movr */
                         {
                             int cond = GET_FIELD_SP(insn, 10, 12);
                             DisasCompare cmp;
+                            TCGv dst;
 
                             gen_compare_reg(&cmp, cond, cpu_src1);
 
@@ -4068,12 +4036,12 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 tcg_gen_movi_tl(cpu_src2, simm);
                             }
 
-                            gen_movl_reg_TN(rd, cpu_dst);
-                            tcg_gen_movcond_tl(cmp.cond, cpu_dst,
+                            dst = gen_load_gpr(dc, rd);
+                            tcg_gen_movcond_tl(cmp.cond, dst,
                                                cmp.c1, cmp.c2,
-                                               cpu_src2, cpu_dst);
+                                               cpu_src2, dst);
                             free_compare(&cmp);
-                            gen_movl_TN_reg(rd, cpu_dst);
+                            gen_store_gpr(dc, rd, dst);
                             break;
                         }
 #endif
@@ -4093,188 +4061,188 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 switch (opf) {
                 case 0x000: /* VIS I edge8cc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 1, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x001: /* VIS II edge8n */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 0, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x002: /* VIS I edge8lcc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 1, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x003: /* VIS II edge8ln */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 0, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x004: /* VIS I edge16cc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 1, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x005: /* VIS II edge16n */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 0, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x006: /* VIS I edge16lcc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 1, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x007: /* VIS II edge16ln */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 0, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x008: /* VIS I edge32cc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 1, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x009: /* VIS II edge32n */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 0, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x00a: /* VIS I edge32lcc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 1, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x00b: /* VIS II edge32ln */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 0, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x010: /* VIS I array8 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x012: /* VIS I array16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
                     tcg_gen_shli_i64(cpu_dst, cpu_dst, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x014: /* VIS I array32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
                     tcg_gen_shli_i64(cpu_dst, cpu_dst, 2);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x018: /* VIS I alignaddr */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_alignaddr(cpu_dst, cpu_src1, cpu_src2, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x01a: /* VIS I alignaddrl */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_alignaddr(cpu_dst, cpu_src1, cpu_src2, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x019: /* VIS II bmask */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    cpu_src2 = get_src1(insn, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
                     tcg_gen_deposit_tl(cpu_gsr, cpu_gsr, cpu_dst, 32, 32);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x020: /* VIS I fcmple16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmple16(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x022: /* VIS I fcmpne16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpne16(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x024: /* VIS I fcmple32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmple32(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x026: /* VIS I fcmpne32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpne32(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x028: /* VIS I fcmpgt16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpgt16(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x02a: /* VIS I fcmpeq16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpeq16(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x02c: /* VIS I fcmpgt32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpgt32(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x02e: /* VIS I fcmpeq32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpeq32(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x031: /* VIS I fmul8x16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
@@ -4311,14 +4279,14 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 case 0x03b: /* VIS I fpack16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs2);
-                    cpu_dst_32 = gen_dest_fpr_F();
+                    cpu_dst_32 = gen_dest_fpr_F(dc);
                     gen_helper_fpack16(cpu_dst_32, cpu_gsr, cpu_src1_64);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x03d: /* VIS I fpackfix */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs2);
-                    cpu_dst_32 = gen_dest_fpr_F();
+                    cpu_dst_32 = gen_dest_fpr_F(dc);
                     gen_helper_fpackfix(cpu_dst_32, cpu_gsr, cpu_src1_64);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
@@ -4376,13 +4344,13 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     break;
                 case 0x060: /* VIS I fzero */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_dst_64 = gen_dest_fpr_D();
+                    cpu_dst_64 = gen_dest_fpr_D(dc, rd);
                     tcg_gen_movi_i64(cpu_dst_64, 0);
                     gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 case 0x061: /* VIS I fzeros */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_dst_32 = gen_dest_fpr_F();
+                    cpu_dst_32 = gen_dest_fpr_F(dc);
                     tcg_gen_movi_i32(cpu_dst_32, 0);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
@@ -4504,13 +4472,13 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     break;
                 case 0x07e: /* VIS I fone */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_dst_64 = gen_dest_fpr_D();
+                    cpu_dst_64 = gen_dest_fpr_D(dc, rd);
                     tcg_gen_movi_i64(cpu_dst_64, -1);
                     gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 case 0x07f: /* VIS I fones */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_dst_32 = gen_dest_fpr_F();
+                    cpu_dst_32 = gen_dest_fpr_F(dc);
                     tcg_gen_movi_i32(cpu_dst_32, -1);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
@@ -4535,55 +4503,59 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 TCGv_i32 r_const;
 
                 save_state(dc);
-                cpu_src1 = get_src1(insn, cpu_src1);
+                cpu_src1 = get_src1(dc, insn);
+                cpu_tmp0 = get_temp_tl(dc);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 19, 31);
-                    tcg_gen_addi_tl(cpu_dst, cpu_src1, simm);
+                    tcg_gen_addi_tl(cpu_tmp0, cpu_src1, simm);
                 } else {                /* register */
                     rs2 = GET_FIELD(insn, 27, 31);
                     if (rs2) {
-                        gen_movl_reg_TN(rs2, cpu_src2);
-                        tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
-                    } else
-                        tcg_gen_mov_tl(cpu_dst, cpu_src1);
+                        cpu_src2 = gen_load_gpr(dc, rs2);
+                        tcg_gen_add_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                    } else {
+                        tcg_gen_mov_tl(cpu_tmp0, cpu_src1);
+                    }
                 }
                 gen_helper_restore(cpu_env);
                 gen_mov_pc_npc(dc);
                 r_const = tcg_const_i32(3);
-                gen_helper_check_align(cpu_env, cpu_dst, r_const);
+                gen_helper_check_align(cpu_env, cpu_tmp0, r_const);
                 tcg_temp_free_i32(r_const);
-                tcg_gen_mov_tl(cpu_npc, cpu_dst);
+                tcg_gen_mov_tl(cpu_npc, cpu_tmp0);
                 dc->npc = DYNAMIC_PC;
                 goto jmp_insn;
 #endif
             } else {
-                cpu_src1 = get_src1(insn, cpu_src1);
+                cpu_src1 = get_src1(dc, insn);
+                cpu_tmp0 = get_temp_tl(dc);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 19, 31);
-                    tcg_gen_addi_tl(cpu_dst, cpu_src1, simm);
+                    tcg_gen_addi_tl(cpu_tmp0, cpu_src1, simm);
                 } else {                /* register */
                     rs2 = GET_FIELD(insn, 27, 31);
                     if (rs2) {
-                        gen_movl_reg_TN(rs2, cpu_src2);
-                        tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
-                    } else
-                        tcg_gen_mov_tl(cpu_dst, cpu_src1);
+                        cpu_src2 = gen_load_gpr(dc, rs2);
+                        tcg_gen_add_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                    } else {
+                        tcg_gen_mov_tl(cpu_tmp0, cpu_src1);
+                    }
                 }
                 switch (xop) {
                 case 0x38:      /* jmpl */
                     {
-                        TCGv r_pc;
+                        TCGv t;
                         TCGv_i32 r_const;
 
-                        r_pc = tcg_const_tl(dc->pc);
-                        gen_movl_TN_reg(rd, r_pc);
-                        tcg_temp_free(r_pc);
+                        t = gen_dest_gpr(dc, rd);
+                        tcg_gen_movi_tl(t, dc->pc);
+                        gen_store_gpr(dc, rd, t);
                         gen_mov_pc_npc(dc);
                         r_const = tcg_const_i32(3);
-                        gen_helper_check_align(cpu_env, cpu_dst, r_const);
+                        gen_helper_check_align(cpu_env, cpu_tmp0, r_const);
                         tcg_temp_free_i32(r_const);
-                        gen_address_mask(dc, cpu_dst);
-                        tcg_gen_mov_tl(cpu_npc, cpu_dst);
+                        gen_address_mask(dc, cpu_tmp0);
+                        tcg_gen_mov_tl(cpu_npc, cpu_tmp0);
                         dc->npc = DYNAMIC_PC;
                     }
                     goto jmp_insn;
@@ -4596,9 +4568,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                             goto priv_insn;
                         gen_mov_pc_npc(dc);
                         r_const = tcg_const_i32(3);
-                        gen_helper_check_align(cpu_env, cpu_dst, r_const);
+                        gen_helper_check_align(cpu_env, cpu_tmp0, r_const);
                         tcg_temp_free_i32(r_const);
-                        tcg_gen_mov_tl(cpu_npc, cpu_dst);
+                        tcg_gen_mov_tl(cpu_npc, cpu_tmp0);
                         dc->npc = DYNAMIC_PC;
                         gen_helper_rett(cpu_env);
                     }
@@ -4612,12 +4584,12 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 case 0x3c:      /* save */
                     save_state(dc);
                     gen_helper_save(cpu_env);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_tmp0);
                     break;
                 case 0x3d:      /* restore */
                     save_state(dc);
                     gen_helper_restore(cpu_env);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_tmp0);
                     break;
 #if !defined(CONFIG_USER_ONLY) && defined(TARGET_SPARC64)
                 case 0x3e:      /* V9 done/retry */
@@ -4653,26 +4625,29 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     case 3:                     /* load/store instructions */
         {
             unsigned int xop = GET_FIELD(insn, 7, 12);
+            /* ??? gen_address_mask prevents us from using a source
+               register directly.  Always generate a temporary.  */
+            TCGv cpu_addr = get_temp_tl(dc);
 
-            cpu_src1 = get_src1(insn, cpu_src1);
-            if (xop == 0x3c || xop == 0x3e) { // V9 casa/casxa
-                rs2 = GET_FIELD(insn, 27, 31);
-                gen_movl_reg_TN(rs2, cpu_src2);
-                tcg_gen_mov_tl(cpu_addr, cpu_src1);
+            tcg_gen_mov_tl(cpu_addr, get_src1(dc, insn));
+            if (xop == 0x3c || xop == 0x3e) {
+                /* V9 casa/casxa : no offset */
             } else if (IS_IMM) {     /* immediate */
                 simm = GET_FIELDs(insn, 19, 31);
-                tcg_gen_addi_tl(cpu_addr, cpu_src1, simm);
+                if (simm != 0) {
+                    tcg_gen_addi_tl(cpu_addr, cpu_addr, simm);
+                }
             } else {            /* register */
                 rs2 = GET_FIELD(insn, 27, 31);
                 if (rs2 != 0) {
-                    gen_movl_reg_TN(rs2, cpu_src2);
-                    tcg_gen_add_tl(cpu_addr, cpu_src1, cpu_src2);
-                } else
-                    tcg_gen_mov_tl(cpu_addr, cpu_src1);
+                    tcg_gen_add_tl(cpu_addr, cpu_addr, gen_load_gpr(dc, rs2));
+                }
             }
             if (xop < 4 || (xop > 7 && xop < 0x14 && xop != 0x0e) ||
                 (xop > 0x17 && xop <= 0x1d ) ||
                 (xop > 0x2c && xop <= 0x33) || xop == 0x1f || xop == 0x3d) {
+                TCGv cpu_val = gen_dest_gpr(dc, rd);
+
                 switch (xop) {
                 case 0x0:       /* ld, V9 lduw, load unsigned word */
                     gen_address_mask(dc, cpu_addr);
@@ -4691,6 +4666,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         goto illegal_insn;
                     else {
                         TCGv_i32 r_const;
+                        TCGv_i64 t64;
 
                         save_state(dc);
                         r_const = tcg_const_i32(7);
@@ -4698,13 +4674,15 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         gen_helper_check_align(cpu_env, cpu_addr, r_const);
                         tcg_temp_free_i32(r_const);
                         gen_address_mask(dc, cpu_addr);
-                        tcg_gen_qemu_ld64(cpu_tmp64, cpu_addr, dc->mem_idx);
-                        tcg_gen_trunc_i64_tl(cpu_tmp0, cpu_tmp64);
-                        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffffULL);
-                        gen_movl_TN_reg(rd + 1, cpu_tmp0);
-                        tcg_gen_shri_i64(cpu_tmp64, cpu_tmp64, 32);
-                        tcg_gen_trunc_i64_tl(cpu_val, cpu_tmp64);
-                        tcg_gen_andi_tl(cpu_val, cpu_val, 0xffffffffULL);
+                        t64 = tcg_temp_new_i64();
+                        tcg_gen_qemu_ld64(t64, cpu_addr, dc->mem_idx);
+                        tcg_gen_trunc_i64_tl(cpu_val, t64);
+                        tcg_gen_ext32u_tl(cpu_val, cpu_val);
+                        gen_store_gpr(dc, rd + 1, cpu_val);
+                        tcg_gen_shri_i64(t64, t64, 32);
+                        tcg_gen_trunc_i64_tl(cpu_val, t64);
+                        tcg_temp_free_i64(t64);
+                        tcg_gen_ext32u_tl(cpu_val, cpu_val);
                     }
                     break;
                 case 0x9:       /* ldsb, load signed byte */
@@ -4726,14 +4704,17 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         tcg_temp_free(r_const);
                     }
                     break;
-                case 0x0f:      /* swap, swap register with memory. Also
-                                   atomically */
-                    CHECK_IU_FEATURE(dc, SWAP);
-                    gen_movl_reg_TN(rd, cpu_val);
-                    gen_address_mask(dc, cpu_addr);
-                    tcg_gen_qemu_ld32u(cpu_tmp0, cpu_addr, dc->mem_idx);
-                    tcg_gen_qemu_st32(cpu_val, cpu_addr, dc->mem_idx);
-                    tcg_gen_mov_tl(cpu_val, cpu_tmp0);
+                case 0x0f:
+                    /* swap, swap register with memory. Also atomically */
+                    {
+                        TCGv t0 = get_temp_tl(dc);
+                        CHECK_IU_FEATURE(dc, SWAP);
+                        cpu_src1 = gen_load_gpr(dc, rd);
+                        gen_address_mask(dc, cpu_addr);
+                        tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
+                        tcg_gen_qemu_st32(cpu_src1, cpu_addr, dc->mem_idx);
+                        tcg_gen_mov_tl(cpu_val, t0);
+                    }
                     break;
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
                 case 0x10:      /* lda, V9 lduwa, load word alternate */
@@ -4776,7 +4757,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (rd & 1)
                         goto illegal_insn;
                     save_state(dc);
-                    gen_ldda_asi(cpu_val, cpu_addr, insn, rd);
+                    gen_ldda_asi(dc, cpu_val, cpu_addr, insn, rd);
                     goto skip_move;
                 case 0x19:      /* ldsba, load signed byte alternate */
 #ifndef TARGET_SPARC64
@@ -4818,8 +4799,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         goto priv_insn;
 #endif
                     save_state(dc);
-                    gen_movl_reg_TN(rd, cpu_val);
-                    gen_swap_asi(cpu_val, cpu_addr, insn);
+                    cpu_src1 = gen_load_gpr(dc, rd);
+                    gen_swap_asi(cpu_val, cpu_src1, cpu_addr, insn);
                     break;
 
 #ifndef TARGET_SPARC64
@@ -4879,11 +4860,13 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 default:
                     goto illegal_insn;
                 }
-                gen_movl_TN_reg(rd, cpu_val);
+                gen_store_gpr(dc, rd, cpu_val);
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
             skip_move: ;
 #endif
             } else if (xop >= 0x20 && xop < 0x24) {
+                TCGv t0;
+
                 if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
                 }
@@ -4891,28 +4874,28 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 switch (xop) {
                 case 0x20:      /* ldf, load fpreg */
                     gen_address_mask(dc, cpu_addr);
-                    tcg_gen_qemu_ld32u(cpu_tmp0, cpu_addr, dc->mem_idx);
-                    cpu_dst_32 = gen_dest_fpr_F();
-                    tcg_gen_trunc_tl_i32(cpu_dst_32, cpu_tmp0);
+                    t0 = get_temp_tl(dc);
+                    tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
+                    cpu_dst_32 = gen_dest_fpr_F(dc);
+                    tcg_gen_trunc_tl_i32(cpu_dst_32, t0);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x21:      /* ldfsr, V9 ldxfsr */
 #ifdef TARGET_SPARC64
                     gen_address_mask(dc, cpu_addr);
                     if (rd == 1) {
-                        tcg_gen_qemu_ld64(cpu_tmp64, cpu_addr, dc->mem_idx);
-                        gen_helper_ldxfsr(cpu_env, cpu_tmp64);
-                    } else {
-                        tcg_gen_qemu_ld32u(cpu_tmp0, cpu_addr, dc->mem_idx);
-                        tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                        gen_helper_ldfsr(cpu_env, cpu_tmp32);
-                    }
-#else
-                    {
-                        tcg_gen_qemu_ld32u(cpu_tmp32, cpu_addr, dc->mem_idx);
-                        gen_helper_ldfsr(cpu_env, cpu_tmp32);
+                        TCGv_i64 t64 = tcg_temp_new_i64();
+                        tcg_gen_qemu_ld64(t64, cpu_addr, dc->mem_idx);
+                        gen_helper_ldxfsr(cpu_env, t64);
+                        tcg_temp_free_i64(t64);
+                        break;
                     }
 #endif
+                    cpu_dst_32 = get_temp_i32(dc);
+                    t0 = get_temp_tl(dc);
+                    tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
+                    tcg_gen_trunc_tl_i32(cpu_dst_32, t0);
+                    gen_helper_ldfsr(cpu_env, cpu_dst_32);
                     break;
                 case 0x22:      /* ldqf, load quad fpreg */
                     {
@@ -4929,7 +4912,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     break;
                 case 0x23:      /* lddf, load double fpreg */
                     gen_address_mask(dc, cpu_addr);
-                    cpu_dst_64 = gen_dest_fpr_D();
+                    cpu_dst_64 = gen_dest_fpr_D(dc, rd);
                     tcg_gen_qemu_ld64(cpu_dst_64, cpu_addr, dc->mem_idx);
                     gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
@@ -4938,7 +4921,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 }
             } else if (xop < 8 || (xop >= 0x14 && xop < 0x18) ||
                        xop == 0xe || xop == 0x1e) {
-                gen_movl_reg_TN(rd, cpu_val);
+                TCGv cpu_val = gen_load_gpr(dc, rd);
+
                 switch (xop) {
                 case 0x4: /* st, store word */
                     gen_address_mask(dc, cpu_addr);
@@ -4957,6 +4941,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         goto illegal_insn;
                     else {
                         TCGv_i32 r_const;
+                        TCGv_i64 t64;
+                        TCGv lo;
 
                         save_state(dc);
                         gen_address_mask(dc, cpu_addr);
@@ -4964,9 +4950,12 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         /* XXX remove alignment check */
                         gen_helper_check_align(cpu_env, cpu_addr, r_const);
                         tcg_temp_free_i32(r_const);
-                        gen_movl_reg_TN(rd + 1, cpu_tmp0);
-                        tcg_gen_concat_tl_i64(cpu_tmp64, cpu_tmp0, cpu_val);
-                        tcg_gen_qemu_st64(cpu_tmp64, cpu_addr, dc->mem_idx);
+                        lo = gen_load_gpr(dc, rd + 1);
+
+                        t64 = tcg_temp_new_i64();
+                        tcg_gen_concat_tl_i64(t64, lo, cpu_val);
+                        tcg_gen_qemu_st64(t64, cpu_addr, dc->mem_idx);
+                        tcg_temp_free_i64(t64);
                     }
                     break;
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
@@ -5014,7 +5003,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         goto illegal_insn;
                     else {
                         save_state(dc);
-                        gen_stda_asi(cpu_val, cpu_addr, insn, rd);
+                        gen_stda_asi(dc, cpu_val, cpu_addr, insn, rd);
                     }
                     break;
 #endif
@@ -5039,23 +5028,28 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 save_state(dc);
                 switch (xop) {
                 case 0x24: /* stf, store fpreg */
-                    gen_address_mask(dc, cpu_addr);
-                    cpu_src1_32 = gen_load_fpr_F(dc, rd);
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_src1_32);
-                    tcg_gen_qemu_st32(cpu_tmp0, cpu_addr, dc->mem_idx);
+                    {
+                        TCGv t = get_temp_tl(dc);
+                        gen_address_mask(dc, cpu_addr);
+                        cpu_src1_32 = gen_load_fpr_F(dc, rd);
+                        tcg_gen_ext_i32_tl(t, cpu_src1_32);
+                        tcg_gen_qemu_st32(t, cpu_addr, dc->mem_idx);
+                    }
                     break;
                 case 0x25: /* stfsr, V9 stxfsr */
+                    {
+                        TCGv t = get_temp_tl(dc);
+
+                        tcg_gen_ld_tl(t, cpu_env, offsetof(CPUSPARCState, fsr));
 #ifdef TARGET_SPARC64
-                    gen_address_mask(dc, cpu_addr);
-                    tcg_gen_ld_i64(cpu_tmp64, cpu_env, offsetof(CPUSPARCState, fsr));
-                    if (rd == 1)
-                        tcg_gen_qemu_st64(cpu_tmp64, cpu_addr, dc->mem_idx);
-                    else
-                        tcg_gen_qemu_st32(cpu_tmp64, cpu_addr, dc->mem_idx);
-#else
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env, offsetof(CPUSPARCState, fsr));
-                    tcg_gen_qemu_st32(cpu_tmp32, cpu_addr, dc->mem_idx);
+                        gen_address_mask(dc, cpu_addr);
+                        if (rd == 1) {
+                            tcg_gen_qemu_st64(t, cpu_addr, dc->mem_idx);
+                            break;
+                        }
 #endif
+                        tcg_gen_qemu_st32(t, cpu_addr, dc->mem_idx);
+                    }
                     break;
                 case 0x26:
 #ifdef TARGET_SPARC64
@@ -5123,12 +5117,14 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     gen_stf_asi(cpu_addr, insn, 8, DFPREG(rd));
                     break;
                 case 0x3c: /* V9 casa */
-                    gen_cas_asi(cpu_val, cpu_addr, cpu_src2, insn, rd);
-                    gen_movl_TN_reg(rd, cpu_val);
+                    rs2 = GET_FIELD(insn, 27, 31);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
+                    gen_cas_asi(dc, cpu_addr, cpu_src2, insn, rd);
                     break;
                 case 0x3e: /* V9 casxa */
-                    gen_casx_asi(cpu_val, cpu_addr, cpu_src2, insn, rd);
-                    gen_movl_TN_reg(rd, cpu_val);
+                    rs2 = GET_FIELD(insn, 27, 31);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
+                    gen_casx_asi(dc, cpu_addr, cpu_src2, insn, rd);
                     break;
 #else
                 case 0x34: /* stc */
@@ -5140,8 +5136,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 default:
                     goto illegal_insn;
                 }
-            } else
+            } else {
                 goto illegal_insn;
+            }
         }
         break;
     }
@@ -5220,8 +5217,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     goto egress;
 #endif
  egress:
-    tcg_temp_free(cpu_tmp1);
-    tcg_temp_free(cpu_tmp2);
     if (dc->n_t32 != 0) {
         int i;
         for (i = dc->n_t32 - 1; i >= 0; --i) {
@@ -5229,6 +5224,13 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
         }
         dc->n_t32 = 0;
     }
+    if (dc->n_ttl != 0) {
+        int i;
+        for (i = dc->n_ttl - 1; i >= 0; --i) {
+            tcg_temp_free(dc->ttl[i]);
+        }
+        dc->n_ttl = 0;
+    }
 }
 
 static inline void gen_intermediate_code_internal(TranslationBlock * tb,
@@ -5293,23 +5295,9 @@ static inline void gen_intermediate_code_internal(TranslationBlock * tb,
         last_pc = dc->pc;
         insn = cpu_ldl_code(env, dc->pc);
 
-        cpu_tmp0 = tcg_temp_new();
-        cpu_tmp32 = tcg_temp_new_i32();
-        cpu_tmp64 = tcg_temp_new_i64();
-        cpu_dst = tcg_temp_new();
-        cpu_val = tcg_temp_new();
-        cpu_addr = tcg_temp_new();
-
         disas_sparc_insn(dc, insn);
         num_insns++;
 
-        tcg_temp_free(cpu_addr);
-        tcg_temp_free(cpu_val);
-        tcg_temp_free(cpu_dst);
-        tcg_temp_free_i64(cpu_tmp64);
-        tcg_temp_free_i32(cpu_tmp32);
-        tcg_temp_free(cpu_tmp0);
-
         if (dc->is_br)
             break;
         /* if the next PC is different, we abort now */
diff --git a/targphys.h b/targphys.h
index 08cade9096..50911fd12f 100644
--- a/targphys.h
+++ b/targphys.h
@@ -3,6 +3,8 @@
 #ifndef TARGPHYS_H
 #define TARGPHYS_H
 
+#ifndef CONFIG_USER_ONLY
+
 #define TARGET_PHYS_ADDR_BITS 64
 /* target_phys_addr_t is the type of a physical address (its size can
    be different from 'target_ulong').  */
@@ -18,3 +20,5 @@ typedef uint64_t target_phys_addr_t;
 #define TARGET_PRIXPHYS PRIX64
 
 #endif
+
+#endif
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 737200e5e6..e790bf04b4 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -467,6 +467,21 @@ static inline void tcg_out_movi32(TCGContext *s,
     }
 }
 
+static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
+                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
+{
+    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
+     * rhs must satisfy the "rI" constraint.
+     */
+    if (rhs_is_const) {
+        int rot = encode_imm(rhs);
+        assert(rot >= 0);
+        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+    } else {
+        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+    }
+}
+
 static inline void tcg_out_mul32(TCGContext *s,
                 int cond, int rd, int rs, int rm)
 {
@@ -1557,6 +1572,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_movi_i32:
         tcg_out_movi32(s, COND_AL, args[0], args[1]);
         break;
+    case INDEX_op_movcond_i32:
+        /* Constraints mean that v2 is always in the same register as dest,
+         * so we only need to do "if condition passed, move v1 to dest".
+         */
+        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+                       args[1], args[2], const_args[2]);
+        tcg_out_dat_rI(s, tcg_cond_to_arm_cond[args[5]],
+                       ARITH_MOV, args[0], 0, args[3], const_args[3]);
+        break;
     case INDEX_op_add_i32:
         c = ARITH_ADD;
         goto gen_arith;
@@ -1576,14 +1600,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         c = ARITH_EOR;
         /* Fall through.  */
     gen_arith:
-        if (const_args[2]) {
-            int rot;
-            rot = encode_imm(args[2]);
-            tcg_out_dat_imm(s, COND_AL, c,
-                            args[0], args[1], rotl(args[2], rot) | (rot << 7));
-        } else
-            tcg_out_dat_reg(s, COND_AL, c,
-                            args[0], args[1], args[2], SHIFT_IMM_LSL(0));
+        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
         break;
     case INDEX_op_add2_i32:
         tcg_out_dat_reg2(s, COND_AL, ARITH_ADD, ARITH_ADC,
@@ -1643,15 +1660,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_brcond_i32:
-        if (const_args[1]) {
-            int rot;
-            rot = encode_imm(args[1]);
-            tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
-                            args[0], rotl(args[1], rot) | (rot << 7));
-        } else {
-            tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
-                            args[0], args[1], SHIFT_IMM_LSL(0));
-        }
+        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+                       args[0], args[1], const_args[1]);
         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]);
         break;
     case INDEX_op_brcond2_i32:
@@ -1670,15 +1680,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]);
         break;
     case INDEX_op_setcond_i32:
-        if (const_args[2]) {
-            int rot;
-            rot = encode_imm(args[2]);
-            tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
-                            args[1], rotl(args[2], rot) | (rot << 7));
-        } else {
-            tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
-                            args[1], args[2], SHIFT_IMM_LSL(0));
-        }
+        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+                       args[1], args[2], const_args[2]);
         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
                         ARITH_MOV, args[0], 0, 1);
         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
@@ -1788,6 +1791,7 @@ static const TCGTargetOpDef arm_op_defs[] = {
 
     { INDEX_op_brcond_i32, { "r", "rI" } },
     { INDEX_op_setcond_i32, { "r", "r", "rI" } },
+    { INDEX_op_movcond_i32, { "r", "r", "rI", "rI", "0" } },
 
     /* TODO: "r", "r", "r", "r", "ri", "ri" */
     { INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } },
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index e2299cadd3..98fa11b286 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -73,9 +73,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_movcond_i32      0
-
-#define TCG_TARGET_HAS_GUEST_BASE
+#define TCG_TARGET_HAS_movcond_i32      1
 
 enum {
     TCG_AREG0 = TCG_REG_R6,
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index 5351353719..f43fb419ae 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -103,8 +103,6 @@ typedef enum {
 #define TCG_TARGET_HAS_ext8u_i32        0 /* and rd, rs, 0xff */
 #define TCG_TARGET_HAS_ext16u_i32       0 /* and rd, rs, 0xffff */
 
-#define TCG_TARGET_HAS_GUEST_BASE
-
 #define TCG_AREG0 TCG_REG_R17
 
 
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index ace63ba37b..dbc6756414 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -125,8 +125,6 @@ typedef enum {
      ((ofs) == 0 && (len) == 16))
 #define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
 
-#define TCG_TARGET_HAS_GUEST_BASE
-
 #if TCG_TARGET_REG_BITS == 64
 # define TCG_AREG0 TCG_REG_R14
 #else
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 705712f775..06570bea38 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -230,6 +230,8 @@ enum {
     OPC_CMP4_LT_A6            = 0x18400000000ull,
     OPC_CMP4_LTU_A6           = 0x1a400000000ull,
     OPC_CMP4_EQ_A6            = 0x1c400000000ull,
+    OPC_DEP_I14               = 0x0ae00000000ull,
+    OPC_DEP_I15               = 0x08000000000ull,
     OPC_DEP_Z_I12             = 0x0a600000000ull,
     OPC_EXTR_I11              = 0x0a400002000ull,
     OPC_EXTR_U_I11            = 0x0a400000000ull,
@@ -501,6 +503,30 @@ static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1,
            | (qp & 0x3f);
 }
 
+static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm,
+                                   int r3, uint64_t pos, uint64_t len)
+{
+    return opc
+           | ((imm & 0x01) << 36)
+           | ((len & 0x3f) << 27)
+           | ((r3 & 0x7f) << 20)
+           | ((pos & 0x3f) << 14)
+           | ((r1 & 0x7f) << 6)
+           | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1, int r2,
+                                   int r3, uint64_t pos, uint64_t len)
+{
+    return opc
+           | ((pos & 0x3f) << 31)
+           | ((len & 0x0f) << 27)
+           | ((r3 & 0x7f) << 20)
+           | ((r2 & 0x7f) << 13)
+           | ((r1 & 0x7f) << 6)
+           | (qp & 0x3f);
+}
+
 static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm)
 {
     return opc
@@ -1312,6 +1338,37 @@ static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
                    tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
 }
 
+static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
+                                   TCGArg a2, int const_a2, int pos, int len)
+{
+    uint64_t i1 = 0, i2 = 0;
+    int cpos = 63 - pos, lm1 = len - 1;
+
+    if (const_a2) {
+        /* Truncate the value of a constant a2 to the width of the field.  */
+        int mask = (1u << len) - 1;
+        a2 &= mask;
+
+        if (a2 == 0 || a2 == mask) {
+            /* 1-bit signed constant inserted into register.  */
+            i2 = tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, ret, a2, a1, cpos, lm1);
+        } else {
+            /* Otherwise, load any constant into a temporary.  Do this into
+               the first I slot to help out with cross-unit delays.  */
+            i1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+                            TCG_REG_R2, a2, TCG_REG_R0);
+            a2 = TCG_REG_R2;
+        }
+    }
+    if (i2 == 0) {
+        i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1);
+    }
+    tcg_out_bundle(s, (i1 ? mII : miI),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   i1 ? i1 : tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                   i2);
+}
+
 static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1,
                                      TCGArg arg2, int cmp4)
 {
@@ -1404,21 +1461,47 @@ static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret,
                    tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0));
 }
 
+static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
+                                   TCGArg c1, TCGArg c2,
+                                   TCGArg v1, int const_v1,
+                                   TCGArg v2, int const_v2, int cmp4)
+{
+    uint64_t opc1, opc2;
+
+    if (const_v1) {
+        opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0);
+    } else if (ret == v1) {
+        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+    } else {
+        opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1);
+    }
+    if (const_v2) {
+        opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
+    } else if (ret == v2) {
+        opc2 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+    } else {
+        opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2);
+    }
+
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4),
+                   opc1,
+                   opc2);
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
 
 /* Load and compare a TLB entry, and return the result in (p6, p7).
    R2 is loaded with the address of the addend TLB entry.
-   R56 is loaded with the address, zero extented on 32-bit targets. */
+   R57 is loaded with the address, zero extented on 32-bit targets. */
 static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                                     int s_bits, uint64_t offset_rw,
                                     uint64_t offset_addend)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3,
-                               TARGET_PAGE_MASK | ((1 << s_bits) - 1),
-                               TCG_REG_R0),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                    tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
                                addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1),
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2,
@@ -1428,9 +1511,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                    tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
                                offset_rw, TCG_REG_R2),
 #if TARGET_LONG_BITS == 32
-                   tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R56, addr_reg),
+                   tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg),
 #else
-                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R56,
+                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R57,
                               0, addr_reg),
 #endif
                    tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
@@ -1438,12 +1521,13 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
     tcg_out_bundle(s, mII,
                    tcg_opc_m3 (TCG_REG_P0,
                                (TARGET_LONG_BITS == 32
-                                ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R57,
+                                ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56,
                                TCG_REG_R2, offset_addend - offset_rw),
-                   tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R56),
+                   tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0,
+                               TCG_REG_R57, 63 - s_bits,
+                               TARGET_PAGE_BITS - s_bits - 1),
                    tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
-                               TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
+                               TCG_REG_P7, TCG_REG_R3, TCG_REG_R56));
 }
 
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
@@ -1480,8 +1564,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R57,
-                               mem_index, TCG_REG_R0),
+                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
+                               TCG_REG_R56, 0, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_ld_helpers[s_bits]));
@@ -1489,7 +1573,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                    tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R56),
+                               TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
     if (bswap && s_bits == 1) {
@@ -1513,23 +1597,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
-    /* XXX/FIXME: suboptimal */
-    tcg_out_bundle(s, mII,
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
-                               mem_index, TCG_REG_R0),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R57, 0, TCG_REG_R56),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R56, 0, TCG_AREG0));
     if (!bswap || s_bits == 0) {
         tcg_out_bundle(s, miB,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
+                                   mem_index, TCG_REG_R0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                    TCG_REG_B0, TCG_REG_B6));
     } else {
         tcg_out_bundle(s, miB,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
+                                   mem_index, TCG_REG_R0),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R8, TCG_REG_R8, 0xb),
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
@@ -1581,8 +1659,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
-                   tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R57,
-                              0, data_reg),
+                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
+                               TCG_REG_R56, 0, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_st_helpers[opc]));
@@ -1590,31 +1668,42 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                    tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R56),
+                               TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
 
     if (!bswap || opc == 0) {
-        tcg_out_bundle(s, mII,
+        tcg_out_bundle(s, mii,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     } else if (opc == 1) {
-        tcg_out_bundle(s, mII,
+        tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 15, 15),
+                                   TCG_REG_R2, data_reg, 15, 15));
+        tcg_out_bundle(s, miI,
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
     } else if (opc == 2) {
-        tcg_out_bundle(s, mII,
+        tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 31, 31),
+                                   TCG_REG_R2, data_reg, 31, 31));
+        tcg_out_bundle(s, miI,
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
@@ -1622,25 +1711,18 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, data_reg, 0xb));
         data_reg = TCG_REG_R2;
     }
 
-    /* XXX/FIXME: suboptimal */
-    tcg_out_bundle(s, mII,
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
-                               mem_index, TCG_REG_R0),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R58, 0, TCG_REG_R57),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R57, 0, TCG_REG_R56));
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
                                data_reg, TCG_REG_R3),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R56, 0, TCG_AREG0),
+                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
+                               mem_index, TCG_REG_R0),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
 }
@@ -2092,6 +2174,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_bswap64(s, args[0], args[1]);
         break;
 
+    case INDEX_op_deposit_i32:
+    case INDEX_op_deposit_i64:
+        tcg_out_deposit(s, args[0], args[1], args[2], const_args[2],
+                        args[3], args[4]);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], const_args[0],
                        args[1], const_args[1], args[3], 1);
@@ -2106,6 +2194,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_setcond_i64:
         tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
         break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        args[3], const_args[3], args[4], const_args[4], 1);
+        break;
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        args[3], const_args[3], args[4], const_args[4], 0);
+        break;
 
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
@@ -2196,6 +2292,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
 
     { INDEX_op_brcond_i32, { "rI", "rI" } },
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
+    { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
     { INDEX_op_movi_i64, { "r" } },
@@ -2245,6 +2342,10 @@ static const TCGTargetOpDef ia64_op_defs[] = {
 
     { INDEX_op_brcond_i64, { "rI", "rI" } },
     { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } },
+    { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } },
+
+    { INDEX_op_deposit_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_deposit_i64, { "r", "rZ", "ri" } },
 
     { INDEX_op_qemu_ld8u, { "r", "r" } },
     { INDEX_op_qemu_ld8s, { "r", "r" } },
@@ -2269,9 +2370,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     int frame_size;
 
     /* reserve some stack space */
-    frame_size = TCG_STATIC_CALL_ARGS_SIZE;
+    frame_size = TCG_STATIC_CALL_ARGS_SIZE +
+                 CPU_TEMP_BUF_NLONGS * sizeof(long);
     frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
                  ~(TCG_TARGET_STACK_ALIGN - 1);
+    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 
     /* First emit adhoc function descriptor */
     *(uint64_t *)(s->code_ptr) = (uint64_t)s->code_ptr + 16; /* entry point */
@@ -2378,6 +2482,4 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
 
     tcg_add_target_add_op_defs(ia64_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 368aee4196..91fe7a3b06 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -131,10 +131,13 @@ typedef enum {
 #define TCG_TARGET_HAS_orc_i64          1
 #define TCG_TARGET_HAS_rot_i32          1
 #define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_movcond_i32      0
-#define TCG_TARGET_HAS_movcond_i64      0
+#define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_movcond_i64      1
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_deposit_i64      1
+
+#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
+#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub r1, r0, r3 */
@@ -144,9 +147,6 @@ typedef enum {
 
 #define TCG_AREG0 TCG_REG_R7
 
-/* Guest base is supported */
-#define TCG_TARGET_HAS_GUEST_BASE
-
 static inline void flush_icache_range(tcg_target_ulong start,
                                       tcg_target_ulong stop)
 {
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 7020d65845..65b5c59e89 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -116,9 +116,6 @@ typedef enum {
 
 #define TCG_AREG0 TCG_REG_S0
 
-/* guest base is supported */
-#define TCG_TARGET_HAS_GUEST_BASE
-
 #ifdef __OpenBSD__
 #include <machine/sysarch.h>
 #else
diff --git a/tcg/optimize.c b/tcg/optimize.c
index edb2b0ea90..a06c8eb43e 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -292,6 +292,82 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
     return res;
 }
 
+static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int32_t)x < (int32_t)y;
+    case TCG_COND_GE:
+        return (int32_t)x >= (int32_t)y;
+    case TCG_COND_LE:
+        return (int32_t)x <= (int32_t)y;
+    case TCG_COND_GT:
+        return (int32_t)x > (int32_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int64_t)x < (int64_t)y;
+    case TCG_COND_GE:
+        return (int64_t)x >= (int64_t)y;
+    case TCG_COND_LE:
+        return (int64_t)x <= (int64_t)y;
+    case TCG_COND_GT:
+        return (int64_t)x > (int64_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_eq(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_GT:
+    case TCG_COND_LTU:
+    case TCG_COND_LT:
+    case TCG_COND_GTU:
+    case TCG_COND_NE:
+        return 0;
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+    case TCG_COND_EQ:
+        return 1;
+    default:
+        tcg_abort();
+    }
+}
+
 /* Return 2 if the condition can't be simplified, and the result
    of the condition (0 or 1) if it can */
 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
@@ -300,75 +376,14 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
         switch (op_bits(op)) {
         case 32:
-            switch (c) {
-            case TCG_COND_EQ:
-                return (uint32_t)temps[x].val == (uint32_t)temps[y].val;
-            case TCG_COND_NE:
-                return (uint32_t)temps[x].val != (uint32_t)temps[y].val;
-            case TCG_COND_LT:
-                return (int32_t)temps[x].val < (int32_t)temps[y].val;
-            case TCG_COND_GE:
-                return (int32_t)temps[x].val >= (int32_t)temps[y].val;
-            case TCG_COND_LE:
-                return (int32_t)temps[x].val <= (int32_t)temps[y].val;
-            case TCG_COND_GT:
-                return (int32_t)temps[x].val > (int32_t)temps[y].val;
-            case TCG_COND_LTU:
-                return (uint32_t)temps[x].val < (uint32_t)temps[y].val;
-            case TCG_COND_GEU:
-                return (uint32_t)temps[x].val >= (uint32_t)temps[y].val;
-            case TCG_COND_LEU:
-                return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
-            case TCG_COND_GTU:
-                return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
-            default:
-                break;
-            }
-            break;
+            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
         case 64:
-            switch (c) {
-            case TCG_COND_EQ:
-                return (uint64_t)temps[x].val == (uint64_t)temps[y].val;
-            case TCG_COND_NE:
-                return (uint64_t)temps[x].val != (uint64_t)temps[y].val;
-            case TCG_COND_LT:
-                return (int64_t)temps[x].val < (int64_t)temps[y].val;
-            case TCG_COND_GE:
-                return (int64_t)temps[x].val >= (int64_t)temps[y].val;
-            case TCG_COND_LE:
-                return (int64_t)temps[x].val <= (int64_t)temps[y].val;
-            case TCG_COND_GT:
-                return (int64_t)temps[x].val > (int64_t)temps[y].val;
-            case TCG_COND_LTU:
-                return (uint64_t)temps[x].val < (uint64_t)temps[y].val;
-            case TCG_COND_GEU:
-                return (uint64_t)temps[x].val >= (uint64_t)temps[y].val;
-            case TCG_COND_LEU:
-                return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
-            case TCG_COND_GTU:
-                return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
-            default:
-                break;
-            }
-            break;
-        }
-    } else if (temps_are_copies(x, y)) {
-        switch (c) {
-        case TCG_COND_GT:
-        case TCG_COND_LTU:
-        case TCG_COND_LT:
-        case TCG_COND_GTU:
-        case TCG_COND_NE:
-            return 0;
-        case TCG_COND_GE:
-        case TCG_COND_GEU:
-        case TCG_COND_LE:
-        case TCG_COND_LEU:
-        case TCG_COND_EQ:
-            return 1;
+            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
         default:
-            break;
+            tcg_abort();
         }
+    } else if (temps_are_copies(x, y)) {
+        return do_constant_folding_cond_eq(c);
     } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
         switch (c) {
         case TCG_COND_LTU:
@@ -381,11 +396,73 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     } else {
         return 2;
     }
+}
 
-    fprintf(stderr,
-            "Unrecognized bitness %d or condition %d in "
-            "do_constant_folding_cond.\n", op_bits(op), c);
-    tcg_abort();
+/* Return 2 if the condition can't be simplified, and the result
+   of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+{
+    TCGArg al = p1[0], ah = p1[1];
+    TCGArg bl = p2[0], bh = p2[1];
+
+    if (temps[bl].state == TCG_TEMP_CONST
+        && temps[bh].state == TCG_TEMP_CONST) {
+        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
+
+        if (temps[al].state == TCG_TEMP_CONST
+            && temps[ah].state == TCG_TEMP_CONST) {
+            uint64_t a;
+            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
+            return do_constant_folding_cond_64(a, b, c);
+        }
+        if (b == 0) {
+            switch (c) {
+            case TCG_COND_LTU:
+                return 0;
+            case TCG_COND_GEU:
+                return 1;
+            default:
+                break;
+            }
+        }
+    }
+    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
+        return do_constant_folding_cond_eq(c);
+    }
+    return 2;
+}
+
+static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
+{
+    TCGArg a1 = *p1, a2 = *p2;
+    int sum = 0;
+    sum += temps[a1].state == TCG_TEMP_CONST;
+    sum -= temps[a2].state == TCG_TEMP_CONST;
+
+    /* Prefer the constant in second argument, and then the form
+       op a, a, b, which is better handled on non-RISC hosts. */
+    if (sum > 0 || (sum == 0 && dest == a2)) {
+        *p1 = a2;
+        *p2 = a1;
+        return true;
+    }
+    return false;
+}
+
+static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+{
+    int sum = 0;
+    sum += temps[p1[0]].state == TCG_TEMP_CONST;
+    sum += temps[p1[1]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[0]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[1]].state == TCG_TEMP_CONST;
+    if (sum > 0) {
+        TCGArg t;
+        t = p1[0], p1[0] = p2[0], p2[0] = t;
+        t = p1[1], p1[1] = p2[1], p2[1] = t;
+        return true;
+    }
+    return false;
 }
 
 /* Propagate constants and copies, fold constant expressions. */
@@ -397,7 +474,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     const TCGOpDef *def;
     TCGArg *gen_args;
     TCGArg tmp;
-    TCGCond cond;
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
@@ -440,52 +516,46 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(eqv):
         CASE_OP_32_64(nand):
         CASE_OP_32_64(nor):
-            /* Prefer the constant in second argument, and then the form
-               op a, a, b, which is better handled on non-RISC hosts. */
-            if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2]
-                && temps[args[2]].state != TCG_TEMP_CONST)) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
-            }
+            swap_commutative(args[0], &args[1], &args[2]);
             break;
         CASE_OP_32_64(brcond):
-            if (temps[args[0]].state == TCG_TEMP_CONST
-                && temps[args[1]].state != TCG_TEMP_CONST) {
-                tmp = args[0];
-                args[0] = args[1];
-                args[1] = tmp;
+            if (swap_commutative(-1, &args[0], &args[1])) {
                 args[2] = tcg_swap_cond(args[2]);
             }
             break;
         CASE_OP_32_64(setcond):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state != TCG_TEMP_CONST) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
+            if (swap_commutative(args[0], &args[1], &args[2])) {
                 args[3] = tcg_swap_cond(args[3]);
             }
             break;
         CASE_OP_32_64(movcond):
-            cond = args[5];
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state != TCG_TEMP_CONST) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
-                cond = tcg_swap_cond(cond);
+            if (swap_commutative(-1, &args[1], &args[2])) {
+                args[5] = tcg_swap_cond(args[5]);
             }
             /* For movcond, we canonicalize the "false" input reg to match
                the destination reg so that the tcg backend can implement
                a "move if true" operation.  */
-            if (args[0] == args[3]) {
-                tmp = args[3];
-                args[3] = args[4];
-                args[4] = tmp;
-                cond = tcg_invert_cond(cond);
+            if (swap_commutative(args[0], &args[4], &args[3])) {
+                args[5] = tcg_invert_cond(args[5]);
             }
-            args[5] = cond;
+            break;
+        case INDEX_op_add2_i32:
+            swap_commutative(args[0], &args[2], &args[4]);
+            swap_commutative(args[1], &args[3], &args[5]);
+            break;
+        case INDEX_op_mulu2_i32:
+            swap_commutative(args[0], &args[2], &args[3]);
+            break;
+        case INDEX_op_brcond2_i32:
+            if (swap_commutative2(&args[0], &args[2])) {
+                args[4] = tcg_swap_cond(args[4]);
+            }
+            break;
+        case INDEX_op_setcond2_i32:
+            if (swap_commutative2(&args[1], &args[3])) {
+                args[5] = tcg_swap_cond(args[5]);
+            }
+            break;
         default:
             break;
         }
@@ -622,6 +692,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             gen_args += 2;
             args += 2;
             break;
+
         CASE_OP_32_64(not):
         CASE_OP_32_64(neg):
         CASE_OP_32_64(ext8s):
@@ -634,14 +705,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
+                gen_args += 2;
+                args += 2;
+                break;
             }
-            gen_args += 2;
-            args += 2;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(mul):
@@ -665,15 +734,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                           temps[args[2]].val);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args += 3;
+                args += 3;
+                break;
             }
-            args += 3;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(deposit):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
@@ -683,33 +748,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                       | ((temps[args[2]].val & tmp) << args[3]);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args[4] = args[4];
-                gen_args += 5;
+                args += 5;
+                break;
             }
-            args += 5;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(setcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
             if (tmp != 2) {
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args += 4;
+                args += 4;
+                break;
             }
-            args += 4;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(brcond):
             tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
             if (tmp != 2) {
@@ -721,17 +775,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 } else {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 }
-            } else {
-                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args += 4;
+                args += 4;
+                break;
             }
-            args += 4;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(movcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
             if (tmp != 2) {
@@ -746,18 +794,125 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
                     gen_args += 2;
                 }
+                args += 6;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_add2_i32:
+        case INDEX_op_sub2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST
+                && temps[args[4]].state == TCG_TEMP_CONST
+                && temps[args[5]].state == TCG_TEMP_CONST) {
+                uint32_t al = temps[args[2]].val;
+                uint32_t ah = temps[args[3]].val;
+                uint32_t bl = temps[args[4]].val;
+                uint32_t bh = temps[args[5]].val;
+                uint64_t a = ((uint64_t)ah << 32) | al;
+                uint64_t b = ((uint64_t)bh << 32) | bl;
+                TCGArg rl, rh;
+
+                if (op == INDEX_op_add2_i32) {
+                    a += b;
+                } else {
+                    a -= b;
+                }
+
+                /* We emit the extra nop when we emit the add2/sub2.  */
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
+                gen_args += 4;
+                args += 6;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_mulu2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST) {
+                uint32_t a = temps[args[2]].val;
+                uint32_t b = temps[args[3]].val;
+                uint64_t r = (uint64_t)a * b;
+                TCGArg rl, rh;
+
+                /* We emit the extra nop when we emit the mulu2.  */
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
+                gen_args += 4;
+                args += 4;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_brcond2_i32:
+            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
+            if (tmp != 2) {
+                if (tmp) {
+                    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                    gen_opc_buf[op_index] = INDEX_op_br;
+                    gen_args[0] = args[5];
+                    gen_args += 1;
+                } else {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                }
+            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
+                       && temps[args[2]].state == TCG_TEMP_CONST
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[2]].val == 0
+                       && temps[args[3]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                gen_opc_buf[op_index] = INDEX_op_brcond_i32;
+                gen_args[0] = args[1];
+                gen_args[1] = args[3];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
             } else {
-                reset_temp(args[0]);
+                goto do_default;
+            }
+            args += 6;
+            break;
+
+        case INDEX_op_setcond2_i32:
+            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
+            if (tmp != 2) {
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[4]].state == TCG_TEMP_CONST
+                       && temps[args[3]].val == 0
+                       && temps[args[4]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
+                gen_opc_buf[op_index] = INDEX_op_setcond_i32;
                 gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args[4] = args[4];
-                gen_args[5] = args[5];
-                gen_args += 6;
+                gen_args[1] = args[2];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
+            } else {
+                goto do_default;
             }
             args += 6;
             break;
+
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
@@ -776,11 +931,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 i--;
             }
             break;
+
         default:
-            /* Default case: we do know nothing about operation so no
-               propagation is done.  We trash everything if the operation
-               is the end of a basic block, otherwise we only trash the
-               output args.  */
+        do_default:
+            /* Default case: we know nothing about operation (or were unable
+               to compute the operation result) so no propagation is done.
+               We trash everything if the operation is the end of a basic
+               block, otherwise we only trash the output args.  */
             if (def->flags & TCG_OPF_BB_END) {
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             } else {
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 3259d898ab..ad433ae5bb 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -96,8 +96,6 @@ typedef enum {
 
 #define TCG_AREG0 TCG_REG_R27
 
-#define TCG_TARGET_HAS_GUEST_BASE
-
 #define tcg_qemu_tb_exec(env, tb_ptr) \
     ((long __attribute__ ((longcall)) \
       (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 57569e8938..97fc5c9885 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -108,5 +108,4 @@ typedef enum {
 
 #define TCG_AREG0 TCG_REG_R27
 
-#define TCG_TARGET_HAS_GUEST_BASE
 #define TCG_TARGET_EXTEND_ARGS 1
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index ed55c331c6..a0181aef74 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -88,8 +88,6 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_movcond_i64      0
 #endif
 
-#define TCG_TARGET_HAS_GUEST_BASE
-
 /* used for function call generation */
 #define TCG_REG_CALL_STACK		TCG_REG_R15
 #define TCG_TARGET_STACK_ALIGN		8
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index c2fbb23abb..0e7d398a41 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -125,8 +125,6 @@ typedef enum {
 #define TCG_TARGET_HAS_movcond_i64      1
 #endif
 
-#define TCG_TARGET_HAS_GUEST_BASE
-
 #define TCG_AREG0 TCG_REG_I0
 
 static inline void flush_icache_range(tcg_target_ulong start,
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 551845801d..8100a5a2e0 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -25,6 +25,11 @@
 
 int gen_new_label(void);
 
+static inline void tcg_gen_op0(TCGOpcode opc)
+{
+    *gen_opc_ptr++ = opc;
+}
+
 static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
 {
     *gen_opc_ptr++ = opc;
@@ -886,6 +891,8 @@ static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace add2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -893,6 +900,8 @@ static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace sub2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1018,6 +1027,8 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 
     tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
                     TCGV_LOW(arg1), TCGV_LOW(arg2));
+    /* Allow the optimizer room to replace mulu2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 
     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 32cd0c6b65..5faaca5ebf 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -62,10 +62,6 @@
 
 #include "elf.h"
 
-#if defined(CONFIG_USE_GUEST_BASE) && !defined(TCG_TARGET_HAS_GUEST_BASE)
-#error GUEST_BASE not supported on this host.
-#endif
-
 /* Forward declarations for functions declared in tcg-target.c and used here. */
 static void tcg_target_init(TCGContext *s);
 static void tcg_target_qemu_prologue(TCGContext *s);
@@ -1307,8 +1303,58 @@ static void tcg_liveness_analysis(TCGContext *s)
             break;
         case INDEX_op_end:
             break;
-            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
+
+        case INDEX_op_add2_i32:
+        case INDEX_op_sub2_i32:
+            args -= 6;
+            nb_iargs = 4;
+            nb_oargs = 2;
+            /* Test if the high part of the operation is dead, but not
+               the low part.  The result can be optimized to a simple
+               add or sub.  This happens often for x86_64 guest when the
+               cpu mode is set to 32 bit.  */
+            if (dead_temps[args[1]]) {
+                if (dead_temps[args[0]]) {
+                    goto do_remove;
+                }
+                /* Create the single operation plus nop.  */
+                if (op == INDEX_op_add2_i32) {
+                    op = INDEX_op_add_i32;
+                } else {
+                    op = INDEX_op_sub_i32;
+                }
+                gen_opc_buf[op_index] = op;
+                args[1] = args[2];
+                args[2] = args[4];
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 3);
+                /* Fall through and mark the single-word operation live.  */
+                nb_iargs = 2;
+                nb_oargs = 1;
+            }
+            goto do_not_remove;
+
+        case INDEX_op_mulu2_i32:
+            args -= 4;
+            nb_iargs = 2;
+            nb_oargs = 2;
+            /* Likewise, test for the high part of the operation dead.  */
+            if (dead_temps[args[1]]) {
+                if (dead_temps[args[0]]) {
+                    goto do_remove;
+                }
+                gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+                args[1] = args[2];
+                args[2] = args[3];
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1);
+                /* Fall through and mark the single-word operation live.  */
+                nb_oargs = 1;
+            }
+            goto do_not_remove;
+
         default:
+            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
             args -= def->nb_args;
             nb_iargs = def->nb_iargs;
             nb_oargs = def->nb_oargs;
@@ -1322,6 +1368,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                     if (!dead_temps[arg])
                         goto do_not_remove;
                 }
+            do_remove:
                 tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
 #ifdef CONFIG_PROFILER
                 s->del_op_count++;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 7bafe0eeb6..45e94f536c 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -616,7 +616,7 @@ TCGv_i64 tcg_const_i64(int64_t val);
 TCGv_i32 tcg_const_local_i32(int32_t val);
 TCGv_i64 tcg_const_local_i64(int64_t val);
 
-extern uint8_t code_gen_prologue[];
+extern uint8_t *code_gen_prologue;
 
 /* TCG targets may use a different definition of tcg_qemu_tb_exec. */
 #if !defined(tcg_qemu_tb_exec)
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 6d894953aa..37f28c0522 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -102,9 +102,6 @@
 #define TCG_TARGET_HAS_movcond_i64      0
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
-/* Offset to user memory in user mode. */
-#define TCG_TARGET_HAS_GUEST_BASE
-
 /* Number of registers available.
    For 32 bit hosts, we need more than 8 registers (call arguments). */
 /* #define TCG_TARGET_NB_REGS 8 */
diff --git a/tests/tcg/Makefile b/tests/tcg/Makefile
index 15e36a208c..80b1a4b529 100644
--- a/tests/tcg/Makefile
+++ b/tests/tcg/Makefile
@@ -1,13 +1,13 @@
--include ../config-host.mak
+-include ../../config-host.mak
 -include $(SRC_PATH)/rules.mak
 
-$(call set-vpath, $(SRC_PATH)/tests)
+$(call set-vpath, $(SRC_PATH)/tests/tcg)
 
-QEMU=../i386-linux-user/qemu-i386
-QEMU_X86_64=../x86_64-linux-user/qemu-x86_64
+QEMU=../../i386-linux-user/qemu-i386
+QEMU_X86_64=../../x86_64-linux-user/qemu-x86_64
 CC_X86_64=$(CC_I386) -m64
 
-QEMU_INCLUDES += -I..
+QEMU_INCLUDES += -I../..
 CFLAGS=-Wall -O2 -g -fno-strict-aliasing
 #CFLAGS+=-msse2
 LDFLAGS=
@@ -36,6 +36,7 @@ TESTS += $(I386_TESTS)
 endif
 
 all: $(patsubst %,run-%,$(TESTS))
+test: all
 
 # rules to run tests
 
@@ -74,7 +75,10 @@ run-test_path: test_path
 # rules to compile tests
 
 test_path: test_path.o
+	$(CC_I386) $(LDFLAGS) -o $@ $^ $(LIBS)
+
 test_path.o: test_path.c
+	$(CC_I386) $(QEMU_INCLUDES) $(GLIB_CFLAGS) $(CFLAGS) -c -o $@ $^
 
 hello-i386: hello-i386.c
 	$(CC_I386) -nostdlib $(CFLAGS) -static $(LDFLAGS) -o $@ $<
@@ -86,12 +90,12 @@ testthread: testthread.c
 # i386/x86_64 emulation test (test various opcodes) */
 test-i386: test-i386.c test-i386-code16.S test-i386-vm86.S \
            test-i386.h test-i386-shift.h test-i386-muldiv.h
-	$(CC_I386) $(CFLAGS) $(LDFLAGS) -o $@ \
+	$(CC_I386) $(QEMU_INCLUDES) $(CFLAGS) $(LDFLAGS) -o $@ \
               $(<D)/test-i386.c $(<D)/test-i386-code16.S $(<D)/test-i386-vm86.S -lm
 
 test-x86_64: test-i386.c \
            test-i386.h test-i386-shift.h test-i386-muldiv.h
-	$(CC_X86_64) $(CFLAGS) $(LDFLAGS) -o $@ $(<D)/test-i386.c -lm
+	$(CC_X86_64) $(QEMU_INCLUDES) $(CFLAGS) $(LDFLAGS) -o $@ $(<D)/test-i386.c -lm
 
 # generic Linux and CPU test
 linux-test: linux-test.c
diff --git a/tests/tcg/linux-test.c b/tests/tcg/linux-test.c
index 2e4a746ac3..83cb32ddb9 100644
--- a/tests/tcg/linux-test.c
+++ b/tests/tcg/linux-test.c
@@ -16,6 +16,7 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
+#define _GNU_SOURCE
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -38,6 +39,7 @@
 #include <dirent.h>
 #include <setjmp.h>
 #include <sys/shm.h>
+#include <sched.h>
 
 #define TESTPATH "/tmp/linux-test.tmp"
 #define TESTPORT 7654
diff --git a/tests/tcg/test-i386.c b/tests/tcg/test-i386.c
index 8e64bbaf38..64d929e482 100644
--- a/tests/tcg/test-i386.c
+++ b/tests/tcg/test-i386.c
@@ -17,6 +17,7 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #define _GNU_SOURCE
+#include "compiler.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -1827,7 +1828,7 @@ void test_exceptions(void)
     printf("lock nop exception:\n");
     if (setjmp(jmp_env) == 0) {
         /* now execute an invalid instruction */
-        asm volatile("lock nop");
+        asm volatile(".byte 0xf0, 0x90"); /* lock nop */
     }
 
     printf("INT exception:\n");
diff --git a/tests/tcg/test_path.c b/tests/tcg/test_path.c
index 7265a9445d..a064eea8fb 100644
--- a/tests/tcg/test_path.c
+++ b/tests/tcg/test_path.c
@@ -1,11 +1,12 @@
 /* Test path override code */
-#include "../config-host.h"
-#include "../qemu-malloc.c"
-#include "../cutils.c"
-#include "../path.c"
-#include "../trace.c"
+#define _GNU_SOURCE
+#include "config-host.h"
+#include "iov.c"
+#include "cutils.c"
+#include "path.c"
+#include "trace.c"
 #ifdef CONFIG_TRACE_SIMPLE
-#include "../simpletrace.c"
+#include "../trace/simple.c"
 #endif
 
 #include <stdarg.h>
diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
index 087b84d319..3c592b3f3d 100644
--- a/ui/vnc-jobs.c
+++ b/ui/vnc-jobs.c
@@ -33,21 +33,21 @@
 /*
  * Locking:
  *
- * There is three levels of locking:
+ * There are three levels of locking:
  * - jobs queue lock: for each operation on the queue (push, pop, isEmpty?)
  * - VncDisplay global lock: mainly used for framebuffer updates to avoid
  *                      screen corruption if the framebuffer is updated
- *			while the worker is doing something.
+ *                      while the worker is doing something.
  * - VncState::output lock: used to make sure the output buffer is not corrupted
- * 		   	 if two threads try to write on it at the same time
+ *                          if two threads try to write on it at the same time
  *
- * While the VNC worker thread is working, the VncDisplay global lock is hold
- * to avoid screen corruptions (this does not block vnc_refresh() because it
- * uses trylock()) but the output lock is not hold because the thread work on
+ * While the VNC worker thread is working, the VncDisplay global lock is held
+ * to avoid screen corruption (this does not block vnc_refresh() because it
+ * uses trylock()) but the output lock is not held because the thread works on
  * its own output buffer.
  * When the encoding job is done, the worker thread will hold the output lock
  * and copy its output buffer in vs->output.
-*/
+ */
 
 struct VncJobQueue {
     QemuCond cond;
@@ -62,7 +62,7 @@ typedef struct VncJobQueue VncJobQueue;
 
 /*
  * We use a single global queue, but most of the functions are
- * already reetrant, so we can easilly add more than one encoding thread
+ * already reentrant, so we can easily add more than one encoding thread
  */
 static VncJobQueue *queue;
 
diff --git a/ui/vnc.c b/ui/vnc.c
index 33e6386a6e..66ae93010f 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -1806,10 +1806,12 @@ static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings)
             vs->features |= VNC_FEATURE_TIGHT_MASK;
             vs->vnc_encoding = enc;
             break;
+#ifdef CONFIG_VNC_PNG
         case VNC_ENCODING_TIGHT_PNG:
             vs->features |= VNC_FEATURE_TIGHT_PNG_MASK;
             vs->vnc_encoding = enc;
             break;
+#endif
         case VNC_ENCODING_ZLIB:
             vs->features |= VNC_FEATURE_ZLIB_MASK;
             vs->vnc_encoding = enc;
diff --git a/vl.c b/vl.c
index 5b357a3b06..ee3c43ae2f 100644
--- a/vl.c
+++ b/vl.c
@@ -3638,8 +3638,13 @@ int main(int argc, char **argv, char **envp)
 
     qdev_machine_init();
 
-    machine->init(ram_size, boot_devices,
-                  kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
+    QEMUMachineInitArgs args = { .ram_size = ram_size,
+                                 .boot_device = boot_devices,
+                                 .kernel_filename = kernel_filename,
+                                 .kernel_cmdline = kernel_cmdline,
+                                 .initrd_filename = initrd_filename,
+                                 .cpu_model = cpu_model };
+    machine->init(&args);
 
     cpu_synchronize_all_post_init();