summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Makefile47
-rw-r--r--Makefile.hw2
-rw-r--r--Makefile.objs2
-rw-r--r--Makefile.target11
-rw-r--r--audio/audio_template.h2
-rw-r--r--block/curl.c14
-rw-r--r--block/qcow2.c8
-rw-r--r--bsd-user/main.c9
-rw-r--r--console.h2
-rw-r--r--darwin-user/commpage.c2
-rw-r--r--darwin-user/syscall.c2
-rw-r--r--default-configs/i386-softmmu.mak2
-rw-r--r--default-configs/mips-softmmu.mak2
-rw-r--r--default-configs/mips64-softmmu.mak2
-rw-r--r--default-configs/mips64el-softmmu.mak2
-rw-r--r--default-configs/mipsel-softmmu.mak2
-rw-r--r--default-configs/ppc-softmmu.mak2
-rw-r--r--default-configs/ppc64-softmmu.mak2
-rw-r--r--default-configs/ppcemb-softmmu.mak2
-rw-r--r--default-configs/sparc64-softmmu.mak1
-rw-r--r--default-configs/x86_64-softmmu.mak2
-rw-r--r--exec.c5
-rw-r--r--hw/arm_timer.c4
-rw-r--r--hw/axis_dev88.c31
-rw-r--r--hw/cirrus_vga.c1
-rw-r--r--hw/dma.c17
-rw-r--r--hw/elf_ops.h5
-rw-r--r--hw/isa.h2
-rw-r--r--hw/lsi53c895a.c4
-rw-r--r--hw/mips_jazz.c13
-rw-r--r--hw/mips_malta.c13
-rw-r--r--hw/pc.c50
-rw-r--r--hw/pc.h5
-rw-r--r--hw/pckbd.c112
-rw-r--r--hw/pflash_cfi01.c20
-rw-r--r--hw/ppc_prep.c15
-rw-r--r--hw/sparc32_dma.c12
-rw-r--r--hw/sun4m.c6
-rw-r--r--hw/sun4u.c6
-rw-r--r--hw/vga.c2
-rw-r--r--hw/vhost_net.c2
-rw-r--r--hw/vmmouse.c2
-rw-r--r--hxtool16
-rw-r--r--ia64-dis.c9
-rw-r--r--linux-user/main.c24
-rw-r--r--linux-user/syscall.c99
-rw-r--r--linux-user/syscall_defs.h10
-rw-r--r--nbd.c3
-rw-r--r--qemu-img-cmds.hx2
-rw-r--r--qemu-img.c10
-rw-r--r--qemu-io.c58
-rw-r--r--qemu-options.hx15
-rw-r--r--rules.mak2
-rw-r--r--target-cris/mmu.c2
-rw-r--r--target-cris/translate.c4
-rw-r--r--target-ppc/translate.c7
-rw-r--r--target-sparc/cpu.h73
-rw-r--r--target-sparc/helper.c130
-rw-r--r--target-sparc/op_helper.c250
-rw-r--r--target-sparc/translate.c286
-rw-r--r--tcg/hppa/tcg-target.c12
-rw-r--r--tcg/i386/tcg-target.c709
-rw-r--r--tcg/ia64/tcg-target.c199
-rw-r--r--tcg/tcg.c3
-rw-r--r--tcg/tcg.h1
66 files changed, 1520 insertions, 852 deletions
diff --git a/.gitignore b/.gitignore
index dfc8e5b133..fdfe2f01e8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,6 @@ config-devices.*
 config-all-devices.*
 config-host.*
 config-target.*
-i386
 *-softmmu
 *-darwin-user
 *-linux-user
diff --git a/Makefile b/Makefile
index 306a1a4038..7986bf6922 100644
--- a/Makefile
+++ b/Makefile
@@ -299,43 +299,22 @@ tar:
 	cd /tmp && tar zcvf ~/$(FILE).tar.gz $(FILE) --exclude CVS --exclude .git --exclude .svn
 	rm -rf /tmp/$(FILE)
 
+SYSTEM_TARGETS=$(filter %-softmmu,$(TARGET_DIRS))
+SYSTEM_PROGS=$(patsubst qemu-system-i386,qemu, \
+             $(patsubst %-softmmu,qemu-system-%, \
+             $(SYSTEM_TARGETS)))
+
+USER_TARGETS=$(filter %-user,$(TARGET_DIRS))
+USER_PROGS=$(patsubst %-bsd-user,qemu-%, \
+           $(patsubst %-darwin-user,qemu-%, \
+           $(patsubst %-linux-user,qemu-%, \
+           $(USER_TARGETS))))
+
 # generate a binary distribution
 tarbin:
 	cd / && tar zcvf ~/qemu-$(VERSION)-$(ARCH).tar.gz \
-	$(bindir)/qemu \
-	$(bindir)/qemu-system-x86_64 \
-	$(bindir)/qemu-system-arm \
-	$(bindir)/qemu-system-cris \
-	$(bindir)/qemu-system-m68k \
-	$(bindir)/qemu-system-microblaze \
-	$(bindir)/qemu-system-mips \
-	$(bindir)/qemu-system-mipsel \
-	$(bindir)/qemu-system-mips64 \
-	$(bindir)/qemu-system-mips64el \
-	$(bindir)/qemu-system-ppc \
-	$(bindir)/qemu-system-ppcemb \
-	$(bindir)/qemu-system-ppc64 \
-	$(bindir)/qemu-system-sh4 \
-	$(bindir)/qemu-system-sh4eb \
-	$(bindir)/qemu-system-sparc \
-	$(bindir)/qemu-i386 \
-	$(bindir)/qemu-x86_64 \
-	$(bindir)/qemu-alpha \
-	$(bindir)/qemu-arm \
-	$(bindir)/qemu-armeb \
-	$(bindir)/qemu-cris \
-	$(bindir)/qemu-m68k \
-	$(bindir)/qemu-microblaze \
-	$(bindir)/qemu-mips \
-	$(bindir)/qemu-mipsel \
-	$(bindir)/qemu-ppc \
-	$(bindir)/qemu-ppc64 \
-	$(bindir)/qemu-ppc64abi32 \
-	$(bindir)/qemu-sh4 \
-	$(bindir)/qemu-sh4eb \
-	$(bindir)/qemu-sparc \
-	$(bindir)/qemu-sparc64 \
-	$(bindir)/qemu-sparc32plus \
+	$(patsubst %,$(bindir)/%, $(SYSTEM_PROGS)) \
+	$(patsubst %,$(bindir)/%, $(USER_PROGS)) \
 	$(bindir)/qemu-img \
 	$(bindir)/qemu-nbd \
 	$(datadir)/bios.bin \
diff --git a/Makefile.hw b/Makefile.hw
index be35359e8c..b9181ab122 100644
--- a/Makefile.hw
+++ b/Makefile.hw
@@ -18,7 +18,7 @@ all: $(hw-obj-y)
 	@true
 
 clean:
-	rm -f *.o *.d *.a *~
+	rm -f *.o */*.o *.d */*.d *.a */*.a *~ */*~
 
 # Include automatically generated dependency files
 -include $(wildcard *.d */*.d)
diff --git a/Makefile.objs b/Makefile.objs
index acbaf22745..15851010ca 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -151,10 +151,12 @@ hw-obj-$(CONFIG_SERIAL) += serial.o
 hw-obj-$(CONFIG_PARALLEL) += parallel.o
 hw-obj-$(CONFIG_I8254) += i8254.o
 hw-obj-$(CONFIG_PCSPK) += pcspk.o
+hw-obj-$(CONFIG_PCKBD) += pckbd.o
 hw-obj-$(CONFIG_USB_UHCI) += usb-uhci.o
 hw-obj-$(CONFIG_FDC) += fdc.o
 hw-obj-$(CONFIG_ACPI) += acpi.o acpi_piix4.o
 hw-obj-$(CONFIG_APM) += pm_smbus.o apm.o
+hw-obj-$(CONFIG_DMA) += dma.o
 
 # PPC devices
 hw-obj-$(CONFIG_OPENPIC) += openpic.o
diff --git a/Makefile.target b/Makefile.target
index a22484e1e9..fda5bf3c73 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -188,7 +188,6 @@ obj-y += rtl8139.o
 obj-y += e1000.o
 
 # Hardware support
-obj-i386-y = pckbd.o dma.o
 obj-i386-y += vga.o
 obj-i386-y += mc146818rtc.o i8259.o pc.o
 obj-i386-y += cirrus_vga.o apic.o ioapic.o piix_pci.o
@@ -199,9 +198,9 @@ obj-i386-y += pc_piix.o
 
 # shared objects
 obj-ppc-y = ppc.o
-obj-ppc-y += vga.o dma.o
+obj-ppc-y += vga.o
 # PREP target
-obj-ppc-y += pckbd.o i8259.o mc146818rtc.o
+obj-ppc-y += i8259.o mc146818rtc.o
 obj-ppc-y += ppc_prep.o
 # OldWorld PowerMac
 obj-ppc-y += ppc_oldworld.o
@@ -217,9 +216,9 @@ obj-ppc-$(CONFIG_FDT) += device_tree.o
 
 obj-mips-y = mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o
 obj-mips-y += mips_addr.o mips_timer.o mips_int.o
-obj-mips-y += dma.o vga.o i8259.o
+obj-mips-y += vga.o i8259.o
 obj-mips-y += g364fb.o jazz_led.o
-obj-mips-y += gt64xxx.o pckbd.o mc146818rtc.o
+obj-mips-y += gt64xxx.o mc146818rtc.o
 obj-mips-y += piix4.o cirrus_vga.o
 
 obj-microblaze-y = petalogix_s3adsp1800_mmu.o
@@ -243,7 +242,7 @@ obj-cris-y += etraxfs_timer.o
 obj-cris-y += etraxfs_ser.o
 
 ifeq ($(TARGET_ARCH), sparc64)
-obj-sparc-y = sun4u.o pckbd.o apb_pci.o
+obj-sparc-y = sun4u.o apb_pci.o
 obj-sparc-y += vga.o
 obj-sparc-y += mc146818rtc.o
 obj-sparc-y += cirrus_vga.o
diff --git a/audio/audio_template.h b/audio/audio_template.h
index 6b19848af4..2f5224ba29 100644
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -541,7 +541,7 @@ uint64_t glue (AUD_get_elapsed_usec_, TYPE) (SW *sw, QEMUAudioTimeStamp *ts)
 
     cur_ts = sw->hw->ts_helper;
     old_ts = ts->old_ts;
-    /* dolog ("cur %lld old %lld\n", cur_ts, old_ts); */
+    /* dolog ("cur %" PRId64 " old %" PRId64 "\n", cur_ts, old_ts); */
 
     if (cur_ts >= old_ts) {
         delta = cur_ts - old_ts;
diff --git a/block/curl.c b/block/curl.c
index b944740fb9..407f0955a3 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -104,10 +104,11 @@ static size_t curl_size_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
     CURLState *s = ((CURLState*)opaque);
     size_t realsize = size * nmemb;
-    long long fsize;
+    size_t fsize;
 
-    if(sscanf(ptr, "Content-Length: %lld", &fsize) == 1)
+    if(sscanf(ptr, "Content-Length: %zd", &fsize) == 1) {
         s->s->len = fsize;
+    }
 
     return realsize;
 }
@@ -118,7 +119,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
     size_t realsize = size * nmemb;
     int i;
 
-    DPRINTF("CURL: Just reading %lld bytes\n", (unsigned long long)realsize);
+    DPRINTF("CURL: Just reading %zd bytes\n", realsize);
 
     if (!s || !s->orig_buf)
         goto read_end;
@@ -368,7 +369,7 @@ static int curl_open(BlockDriverState *bs, const char *filename, int flags)
         s->len = (size_t)d;
     else if(!s->len)
         goto out;
-    DPRINTF("CURL: Size = %lld\n", (long long)s->len);
+    DPRINTF("CURL: Size = %zd\n", s->len);
 
     curl_clean_state(state);
     curl_easy_cleanup(state->curl);
@@ -450,8 +451,9 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
     state->orig_buf = qemu_malloc(state->buf_len);
     state->acb[0] = acb;
 
-    snprintf(state->range, 127, "%lld-%lld", (long long)start, (long long)end);
-    DPRINTF("CURL (AIO): Reading %d at %lld (%s)\n", (nb_sectors * SECTOR_SIZE), start, state->range);
+    snprintf(state->range, 127, "%zd-%zd", start, end);
+    DPRINTF("CURL (AIO): Reading %d at %zd (%s)\n",
+            (nb_sectors * SECTOR_SIZE), start, state->range);
     curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);
 
     curl_multi_add_handle(s->multi, state->curl);
diff --git a/block/qcow2.c b/block/qcow2.c
index 0ce71507e9..5b72758915 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -93,8 +93,9 @@ static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset,
 #endif
 
         if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) {
-            fprintf(stderr, "qcow_handle_extension: ERROR: pread fail from offset %llu\n",
-                    (unsigned long long)offset);
+            fprintf(stderr, "qcow_handle_extension: ERROR: "
+                    "pread fail from offset %" PRIu64 "\n",
+                    offset);
             return 1;
         }
         be32_to_cpus(&ext.magic);
@@ -1245,7 +1246,8 @@ static void dump_refcounts(BlockDriverState *bs)
         k++;
         while (k < nb_clusters && get_refcount(bs, k) == refcount)
             k++;
-        printf("%lld: refcount=%d nb=%lld\n", k, refcount, k - k1);
+        printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount,
+               k - k1);
     }
 }
 #endif
diff --git a/bsd-user/main.c b/bsd-user/main.c
index b1c438d895..05cc3d92b7 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -30,7 +30,7 @@
 #include "qemu-common.h"
 /* For tb_lock */
 #include "exec-all.h"
-
+#include "tcg.h"
 #include "qemu-timer.h"
 #include "envlist.h"
 
@@ -970,6 +970,13 @@ int main(int argc, char **argv)
     syscall_init();
     signal_init();
 
+#if defined(CONFIG_USE_GUEST_BASE)
+    /* Now that we've loaded the binary, GUEST_BASE is fixed.  Delay
+       generating the prologue until now so that the prologue can take
+       the real value of GUEST_BASE into account.  */
+    tcg_prologue_init(&tcg_ctx);
+#endif
+
     /* build Task State */
     memset(ts, 0, sizeof(TaskState));
     init_task_state(ts);
diff --git a/console.h b/console.h
index 6def1152e6..3a75bcc4d0 100644
--- a/console.h
+++ b/console.h
@@ -306,6 +306,8 @@ static inline int ds_get_bytes_per_pixel(DisplayState *ds)
 typedef unsigned long console_ch_t;
 static inline void console_write_ch(console_ch_t *dest, uint32_t ch)
 {
+    if (!(ch & 0xff))
+        ch |= ' ';
     cpu_to_le32wu((uint32_t *) dest, ch);
 }
 
diff --git a/darwin-user/commpage.c b/darwin-user/commpage.c
index 2b41bc5e3a..f6aa71e058 100644
--- a/darwin-user/commpage.c
+++ b/darwin-user/commpage.c
@@ -237,7 +237,7 @@ void do_compare_and_swap64(void *cpu_env, int num)
     uint64_t *value = (uint64_t*)((CPUX86State*)cpu_env)->regs[R_ESI];
     old = (uint64_t)((uint64_t)((CPUX86State*)cpu_env)->regs[R_EDX]) << 32 | (uint64_t)((CPUX86State*)cpu_env)->regs[R_EAX];
 
-    DPRINTF("commpage: compare_and_swap64(%llx,new,%p)\n", old, value);
+    DPRINTF("commpage: compare_and_swap64(%" PRIx64 ",new,%p)\n", old, value);
     swapped_val = tswap64(*value);
 
     if(old == swapped_val)
diff --git a/darwin-user/syscall.c b/darwin-user/syscall.c
index d774ad353e..060acc889d 100644
--- a/darwin-user/syscall.c
+++ b/darwin-user/syscall.c
@@ -858,7 +858,7 @@ long no_syscall(void *cpu_env, int num);
 
 long do_pread(uint32_t arg1, void * arg2, size_t arg3, off_t arg4)
 {
-    DPRINTF("0x%x, %p, 0x%lx, 0x%llx\n", arg1, arg2, arg3, arg4);
+    DPRINTF("0x%x, %p, 0x%lx, 0x%" PRIx64 "\n", arg1, arg2, arg3, arg4);
     long ret = pread(arg1, arg2, arg3, arg4);
     return ret;
 }
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 4fbd8aae2c..ed00471da2 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -8,10 +8,12 @@ CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
 CONFIG_PCSPK=y
+CONFIG_PCKBD=y
 CONFIG_USB_UHCI=y
 CONFIG_FDC=y
 CONFIG_ACPI=y
 CONFIG_APM=y
+CONFIG_DMA=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
diff --git a/default-configs/mips-softmmu.mak b/default-configs/mips-softmmu.mak
index fd6fbc1808..29be52eb1d 100644
--- a/default-configs/mips-softmmu.mak
+++ b/default-configs/mips-softmmu.mak
@@ -10,10 +10,12 @@ CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
 CONFIG_PCSPK=y
+CONFIG_PCKBD=y
 CONFIG_USB_UHCI=y
 CONFIG_FDC=y
 CONFIG_ACPI=y
 CONFIG_APM=y
+CONFIG_DMA=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
diff --git a/default-configs/mips64-softmmu.mak b/default-configs/mips64-softmmu.mak
index dfd30dbbea..9bae8a7bc2 100644
--- a/default-configs/mips64-softmmu.mak
+++ b/default-configs/mips64-softmmu.mak
@@ -10,10 +10,12 @@ CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
 CONFIG_PCSPK=y
+CONFIG_PCKBD=y
 CONFIG_USB_UHCI=y
 CONFIG_FDC=y
 CONFIG_ACPI=y
 CONFIG_APM=y
+CONFIG_DMA=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
diff --git a/default-configs/mips64el-softmmu.mak b/default-configs/mips64el-softmmu.mak
index 6fa54a3e15..b372c1dfa1 100644
--- a/default-configs/mips64el-softmmu.mak
+++ b/default-configs/mips64el-softmmu.mak
@@ -10,10 +10,12 @@ CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
 CONFIG_PCSPK=y
+CONFIG_PCKBD=y
 CONFIG_USB_UHCI=y
 CONFIG_FDC=y
 CONFIG_ACPI=y
 CONFIG_APM=y
+CONFIG_DMA=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
diff --git a/default-configs/mipsel-softmmu.mak b/default-configs/mipsel-softmmu.mak
index 8203997b69..10ef483799 100644
--- a/default-configs/mipsel-softmmu.mak
+++ b/default-configs/mipsel-softmmu.mak
@@ -10,10 +10,12 @@ CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
 CONFIG_PCSPK=y
+CONFIG_PCKBD=y
 CONFIG_USB_UHCI=y
 CONFIG_FDC=y
 CONFIG_ACPI=y
 CONFIG_APM=y
+CONFIG_DMA=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak
index 958bf755a7..c026bbb00e 100644
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -8,7 +8,9 @@ CONFIG_M48T59=y
 CONFIG_VGA_PCI=y
 CONFIG_SERIAL=y
 CONFIG_I8254=y
+CONFIG_PCKBD=y
 CONFIG_FDC=y
+CONFIG_DMA=y
 CONFIG_OPENPIC=y
 CONFIG_PREP_PCI=y
 CONFIG_MACIO=y
diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index 9896662a1e..0101a283e3 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -8,7 +8,9 @@ CONFIG_M48T59=y
 CONFIG_VGA_PCI=y
 CONFIG_SERIAL=y
 CONFIG_I8254=y
+CONFIG_PCKBD=y
 CONFIG_FDC=y
+CONFIG_DMA=y
 CONFIG_OPENPIC=y
 CONFIG_PREP_PCI=y
 CONFIG_MACIO=y
diff --git a/default-configs/ppcemb-softmmu.mak b/default-configs/ppcemb-softmmu.mak
index 2e41a942bc..8ba9ac179f 100644
--- a/default-configs/ppcemb-softmmu.mak
+++ b/default-configs/ppcemb-softmmu.mak
@@ -8,7 +8,9 @@ CONFIG_M48T59=y
 CONFIG_VGA_PCI=y
 CONFIG_SERIAL=y
 CONFIG_I8254=y
+CONFIG_PCKBD=y
 CONFIG_FDC=y
+CONFIG_DMA=y
 CONFIG_OPENPIC=y
 CONFIG_PREP_PCI=y
 CONFIG_MACIO=y
diff --git a/default-configs/sparc64-softmmu.mak b/default-configs/sparc64-softmmu.mak
index 514d3347e5..1cc3f13079 100644
--- a/default-configs/sparc64-softmmu.mak
+++ b/default-configs/sparc64-softmmu.mak
@@ -6,6 +6,7 @@ CONFIG_PTIMER=y
 CONFIG_VGA_PCI=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
+CONFIG_PCKBD=y
 CONFIG_FDC=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak
index ffedfdbd68..518320377f 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -8,10 +8,12 @@ CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
 CONFIG_PCSPK=y
+CONFIG_PCKBD=y
 CONFIG_USB_UHCI=y
 CONFIG_FDC=y
 CONFIG_ACPI=y
 CONFIG_APM=y
+CONFIG_DMA=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
diff --git a/exec.c b/exec.c
index 56b5561884..bb3dcadcde 100644
--- a/exec.c
+++ b/exec.c
@@ -574,6 +574,11 @@ void cpu_exec_init_all(unsigned long tb_size)
 #if !defined(CONFIG_USER_ONLY)
     io_mem_init();
 #endif
+#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
+    /* There's no guest base to take into account, so go ahead and
+       initialize the prologue now.  */
+    tcg_prologue_init(&tcg_ctx);
+#endif
 }
 
 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
diff --git a/hw/arm_timer.c b/hw/arm_timer.c
index 9fef191cbc..9073ffc007 100644
--- a/hw/arm_timer.c
+++ b/hw/arm_timer.c
@@ -71,7 +71,7 @@ static void arm_timer_recalibrate(arm_timer_state *s, int reload)
 {
     uint32_t limit;
 
-    if ((s->control & TIMER_CTRL_PERIODIC) == 0) {
+    if ((s->control & (TIMER_CTRL_PERIODIC | TIMER_CTRL_ONESHOT)) == 0) {
         /* Free running.  */
         if (s->control & TIMER_CTRL_32BIT)
             limit = 0xffffffff;
@@ -113,7 +113,7 @@ static void arm_timer_write(void *opaque, target_phys_addr_t offset,
         case 1: freq >>= 4; break;
         case 2: freq >>= 8; break;
         }
-        arm_timer_recalibrate(s, 0);
+        arm_timer_recalibrate(s, s->control & TIMER_CTRL_ENABLE);
         ptimer_set_freq(s->timer, freq);
         if (s->control & TIMER_CTRL_ENABLE) {
             /* Restart the timer if still enabled.  */
diff --git a/hw/axis_dev88.c b/hw/axis_dev88.c
index 5516e42528..7d59c96ca1 100644
--- a/hw/axis_dev88.c
+++ b/hw/axis_dev88.c
@@ -240,13 +240,22 @@ static CPUWriteMemoryFunc * const gpio_write[] = {
 
 #define INTMEM_SIZE (128 * 1024)
 
-static uint32_t bootstrap_pc;
+static struct {
+    uint32_t bootstrap_pc;
+    uint32_t regs[16];
+} loadargs;
+
 static void main_cpu_reset(void *opaque)
 {
+    int i;
+
     CPUState *env = opaque;
     cpu_reset(env);
 
-    env->pc = bootstrap_pc;
+    env->pc = loadargs.bootstrap_pc;
+    for (i = 0; i < 16; i++) {
+        env->regs[i] = loadargs.regs[i];
+    }
 }
 
 static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
@@ -352,15 +361,15 @@ void axisdev88_init (ram_addr_t ram_size,
            devboard SDK.  */
         kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
                                &entry, NULL, &high, 0, ELF_MACHINE, 0);
-        bootstrap_pc = entry;
+        loadargs.bootstrap_pc = entry;
         if (kernel_size < 0) {
             /* Takes a kimage from the axis devboard SDK.  */
             kernel_size = load_image_targphys(kernel_filename, 0x40004000,
                                               ram_size);
-            bootstrap_pc = 0x40004000;
-            env->regs[9] = 0x40004000 + kernel_size;
+            loadargs.bootstrap_pc = 0x40004000;
+            loadargs.regs[9] = 0x40004000 + kernel_size;
         }
-        env->regs[8] = 0x56902387; /* RAM init magic.  */
+        loadargs.regs[8] = 0x56902387; /* RAM init magic.  */
 
         if (kernel_cmdline && (kcmdline_len = strlen(kernel_cmdline))) {
             if (kcmdline_len > 256) {
@@ -368,15 +377,11 @@ void axisdev88_init (ram_addr_t ram_size,
                 exit(1);
             }
             /* Let the kernel know we are modifying the cmdline.  */
-            env->regs[10] = 0x87109563;
-            env->regs[11] = 0x40000000;
-            pstrcpy_targphys("cmdline", env->regs[11], 256, kernel_cmdline);
+            loadargs.regs[10] = 0x87109563;
+            loadargs.regs[11] = 0x40000000;
+            pstrcpy_targphys("cmdline", loadargs.regs[11], 256, kernel_cmdline);
         }
     }
-    env->pc = bootstrap_pc;
-
-    printf ("pc =%x\n", env->pc);
-    printf ("ram size =%ld\n", ram_size);
 }
 
 static QEMUMachine axisdev88_machine = {
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index 9f61a01d45..ba4828996a 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -2985,7 +2985,6 @@ static const VMStateDescription vmstate_pci_cirrus_vga = {
     .version_id = 2,
     .minimum_version_id = 2,
     .minimum_version_id_old = 2,
-    .post_load = cirrus_post_load,
     .fields      = (VMStateField []) {
         VMSTATE_PCI_DEVICE(dev, PCICirrusVGAState),
         VMSTATE_STRUCT(cirrus_vga, PCICirrusVGAState, 0,
diff --git a/hw/dma.c b/hw/dma.c
index e5f7af7a88..5b215211e1 100644
--- a/hw/dma.c
+++ b/hw/dma.c
@@ -57,6 +57,7 @@ static struct dma_cont {
     uint8_t flip_flop;
     int dshift;
     struct dma_regs regs[4];
+    qemu_irq *cpu_request_exit;
 } dma_controllers[2];
 
 enum {
@@ -444,9 +445,9 @@ int DMA_write_memory (int nchan, void *buf, int pos, int len)
 /* request the emulator to transfer a new DMA memory block ASAP */
 void DMA_schedule(int nchan)
 {
-    CPUState *env = cpu_single_env;
-    if (env)
-        cpu_exit(env);
+    struct dma_cont *d = &dma_controllers[nchan > 3];
+
+    qemu_irq_pulse(*d->cpu_request_exit);
 }
 
 static void dma_reset(void *opaque)
@@ -464,12 +465,14 @@ static int dma_phony_handler (void *opaque, int nchan, int dma_pos, int dma_len)
 
 /* dshift = 0: 8 bit DMA, 1 = 16 bit DMA */
 static void dma_init2(struct dma_cont *d, int base, int dshift,
-                      int page_base, int pageh_base)
+                      int page_base, int pageh_base,
+                      qemu_irq *cpu_request_exit)
 {
     static const int page_port_list[] = { 0x1, 0x2, 0x3, 0x7 };
     int i;
 
     d->dshift = dshift;
+    d->cpu_request_exit = cpu_request_exit;
     for (i = 0; i < 8; i++) {
         register_ioport_write (base + (i << dshift), 1, 1, write_chan, d);
         register_ioport_read (base + (i << dshift), 1, 1, read_chan, d);
@@ -539,12 +542,12 @@ static const VMStateDescription vmstate_dma = {
     }
 };
 
-void DMA_init (int high_page_enable)
+void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit)
 {
     dma_init2(&dma_controllers[0], 0x00, 0, 0x80,
-              high_page_enable ? 0x480 : -1);
+              high_page_enable ? 0x480 : -1, cpu_request_exit);
     dma_init2(&dma_controllers[1], 0xc0, 1, 0x88,
-              high_page_enable ? 0x488 : -1);
+              high_page_enable ? 0x488 : -1, cpu_request_exit);
     vmstate_register (0, &vmstate_dma, &dma_controllers[0]);
     vmstate_register (1, &vmstate_dma, &dma_controllers[1]);
 
diff --git a/hw/elf_ops.h b/hw/elf_ops.h
index 69c07571b6..27d1ab9bc2 100644
--- a/hw/elf_ops.h
+++ b/hw/elf_ops.h
@@ -216,6 +216,11 @@ static int glue(load_elf, SZ)(const char *name, int fd,
                 if (EM_386 != ehdr.e_machine)
                     goto fail;
             break;
+        case EM_MICROBLAZE:
+            if (EM_MICROBLAZE != ehdr.e_machine)
+                if (EM_MICROBLAZE_OLD != ehdr.e_machine)
+                    goto fail;
+            break;
         default:
             if (elf_machine != ehdr.e_machine)
                 goto fail;
diff --git a/hw/isa.h b/hw/isa.h
index 97f69a25cb..aaf0272c25 100644
--- a/hw/isa.h
+++ b/hw/isa.h
@@ -41,7 +41,7 @@ int DMA_write_memory (int nchan, void *buf, int pos, int size);
 void DMA_hold_DREQ (int nchan);
 void DMA_release_DREQ (int nchan);
 void DMA_schedule(int nchan);
-void DMA_init (int high_page_enable);
+void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit);
 void DMA_register_channel (int nchan,
                            DMA_transfer_handler transfer_handler,
                            void *opaque);
diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c
index 9d3c44d1c7..f5a91ba10a 100644
--- a/hw/lsi53c895a.c
+++ b/hw/lsi53c895a.c
@@ -543,7 +543,7 @@ static void lsi_do_dma(LSIState *s, int out)
         return;
     }
 
-    id = s->current->tag >> 8;
+    id = (s->current->tag >> 8) & 0xf;
     dev = s->bus.devs[id];
     if (!dev) {
         lsi_bad_selection(s, id);
@@ -745,7 +745,7 @@ static void lsi_do_command(LSIState *s)
     s->sfbr = buf[0];
     s->command_complete = 0;
 
-    id = s->select_tag >> 8;
+    id = (s->select_tag >> 8) & 0xf;
     dev = s->bus.devs[id];
     if (!dev) {
         lsi_bad_selection(s, id);
diff --git a/hw/mips_jazz.c b/hw/mips_jazz.c
index 6e0ec8fd36..ead3a00c3d 100644
--- a/hw/mips_jazz.c
+++ b/hw/mips_jazz.c
@@ -114,6 +114,15 @@ static void audio_init(qemu_irq *pic)
 #define MAGNUM_BIOS_SIZE_MAX 0x7e000
 #define MAGNUM_BIOS_SIZE (BIOS_SIZE < MAGNUM_BIOS_SIZE_MAX ? BIOS_SIZE : MAGNUM_BIOS_SIZE_MAX)
 
+static void cpu_request_exit(void *opaque, int irq, int level)
+{
+    CPUState *env = cpu_single_env;
+
+    if (env && level) {
+        cpu_exit(env);
+    }
+}
+
 static
 void mips_jazz_init (ram_addr_t ram_size,
                      const char *cpu_model,
@@ -130,6 +139,7 @@ void mips_jazz_init (ram_addr_t ram_size,
     PITState *pit;
     DriveInfo *fds[MAX_FD];
     qemu_irq esp_reset;
+    qemu_irq *cpu_exit_irq;
     ram_addr_t ram_offset;
     ram_addr_t bios_offset;
 
@@ -189,7 +199,8 @@ void mips_jazz_init (ram_addr_t ram_size,
     i8259 = i8259_init(env->irq[4]);
     isa_bus_new(NULL);
     isa_bus_irqs(i8259);
-    DMA_init(0);
+    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
+    DMA_init(0, cpu_exit_irq);
     pit = pit_init(0x40, i8259[0]);
     pcspk_init(pit);
 
diff --git a/hw/mips_malta.c b/hw/mips_malta.c
index 792709bf5a..a8f9d152dd 100644
--- a/hw/mips_malta.c
+++ b/hw/mips_malta.c
@@ -763,6 +763,15 @@ static void main_cpu_reset(void *opaque)
     }
 }
 
+static void cpu_request_exit(void *opaque, int irq, int level)
+{
+    CPUState *env = cpu_single_env;
+
+    if (env && level) {
+        cpu_exit(env);
+    }
+}
+
 static
 void mips_malta_init (ram_addr_t ram_size,
                       const char *boot_device,
@@ -781,6 +790,7 @@ void mips_malta_init (ram_addr_t ram_size,
     FDCtrl *floppy_controller;
     MaltaFPGAState *malta_fpga;
     qemu_irq *i8259;
+    qemu_irq *cpu_exit_irq;
     int piix4_devfn;
     uint8_t *eeprom_buf;
     i2c_bus *smbus;
@@ -943,7 +953,8 @@ void mips_malta_init (ram_addr_t ram_size,
         qdev_init_nofail(eeprom);
     }
     pit = pit_init(0x40, isa_reserve_irq(0));
-    DMA_init(0);
+    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
+    DMA_init(0, cpu_exit_irq);
 
     /* Super I/O */
     isa_dev = isa_create_simple("i8042");
diff --git a/hw/pc.c b/hw/pc.c
index 20dc7fdb51..e7f31d3848 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -365,26 +365,12 @@ void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
     rtc_set_memory(s, 0x39, val);
 }
 
-void ioport_set_a20(int enable)
+static void handle_a20_line_change(void *opaque, int irq, int level)
 {
-    /* XXX: send to all CPUs ? */
-    cpu_x86_set_a20(first_cpu, enable);
-}
-
-int ioport_get_a20(void)
-{
-    return ((first_cpu->a20_mask >> 20) & 1);
-}
+    CPUState *cpu = opaque;
 
-static void ioport92_write(void *opaque, uint32_t addr, uint32_t val)
-{
-    ioport_set_a20((val >> 1) & 1);
-    /* XXX: bit 0 is fast reset */
-}
-
-static uint32_t ioport92_read(void *opaque, uint32_t addr)
-{
-    return ioport_get_a20() << 1;
+    /* XXX: send to all CPUs ? */
+    cpu_x86_set_a20(cpu, level);
 }
 
 /***********************************************************/
@@ -818,7 +804,7 @@ void pc_memory_init(ram_addr_t ram_size,
     ram_addr_t ram_addr, bios_offset, option_rom_offset;
     ram_addr_t below_4g_mem_size, above_4g_mem_size = 0;
     int bios_size, isa_bios_size;
-    void **fw_cfg;
+    void *fw_cfg;
 
     if (ram_size >= 0xe0000000 ) {
         above_4g_mem_size = ram_size - 0xe0000000;
@@ -893,7 +879,7 @@ void pc_memory_init(ram_addr_t ram_size,
     rom_set_fw(fw_cfg);
 
     if (linux_boot) {
-        load_linux(*fw_cfg, kernel_filename, initrd_filename, kernel_cmdline, below_4g_mem_size);
+        load_linux(fw_cfg, kernel_filename, initrd_filename, kernel_cmdline, below_4g_mem_size);
     }
 
     for (i = 0; i < nb_option_roms; i++) {
@@ -928,6 +914,15 @@ void pc_vga_init(PCIBus *pci_bus)
     }
 }
 
+static void cpu_request_exit(void *opaque, int irq, int level)
+{
+    CPUState *env = cpu_single_env;
+
+    if (env && level) {
+        cpu_exit(env);
+    }
+}
+
 void pc_basic_device_init(qemu_irq *isa_irq,
                           FDCtrl **floppy_controller,
                           ISADevice **rtc_state)
@@ -935,6 +930,9 @@ void pc_basic_device_init(qemu_irq *isa_irq,
     int i;
     DriveInfo *fd[MAX_FD];
     PITState *pit;
+    qemu_irq *a20_line;
+    ISADevice *i8042;
+    qemu_irq *cpu_exit_irq;
 
     register_ioport_write(0x80, 1, 1, ioport80_write, NULL);
 
@@ -944,9 +942,6 @@ void pc_basic_device_init(qemu_irq *isa_irq,
 
     qemu_register_boot_set(pc_boot_set, *rtc_state);
 
-    register_ioport_read(0x92, 1, 1, ioport92_read, NULL);
-    register_ioport_write(0x92, 1, 1, ioport92_write, NULL);
-
     pit = pit_init(0x40, isa_reserve_irq(0));
     pcspk_init(pit);
     if (!no_hpet) {
@@ -965,8 +960,13 @@ void pc_basic_device_init(qemu_irq *isa_irq,
         }
     }
 
-    isa_create_simple("i8042");
-    DMA_init(0);
+    a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 1);
+    i8042 = isa_create_simple("i8042");
+    i8042_setup_a20_line(i8042, a20_line);
+    vmmouse_init(i8042);
+
+    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
+    DMA_init(0, cpu_exit_irq);
 
     for(i = 0; i < MAX_FD; i++) {
         fd[i] = drive_get(IF_FLOPPY, 0, i);
diff --git a/hw/pc.h b/hw/pc.h
index 654b7b33d6..73cccefff7 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -75,6 +75,8 @@ void i8042_init(qemu_irq kbd_irq, qemu_irq mouse_irq, uint32_t io_base);
 void i8042_mm_init(qemu_irq kbd_irq, qemu_irq mouse_irq,
                    target_phys_addr_t base, ram_addr_t size,
                    target_phys_addr_t mask);
+void i8042_isa_mouse_fake_event(void *opaque);
+void i8042_setup_a20_line(ISADevice *dev, qemu_irq *a20_out);
 
 /* pc.c */
 extern int fd_bootchk;
@@ -104,9 +106,6 @@ void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
                   FDCtrl *floppy_controller, ISADevice *s);
 void pc_pci_device_init(PCIBus *pci_bus);
 
-void ioport_set_a20(int enable);
-int ioport_get_a20(void);
-
 typedef void (*cpu_set_smm_t)(int smm, void *arg);
 void cpu_smm_register(cpu_set_smm_t callback, void *arg);
 
diff --git a/hw/pckbd.c b/hw/pckbd.c
index e83b8a6bcb..381228479e 100644
--- a/hw/pckbd.c
+++ b/hw/pckbd.c
@@ -29,6 +29,12 @@
 
 /* debug PC keyboard */
 //#define DEBUG_KBD
+#ifdef DEBUG_KBD
+#define DPRINTF(fmt, ...)                                       \
+    do { printf("KBD: " fmt , ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...)
+#endif
 
 /*	Keyboard Controller Commands */
 #define KBD_CCMD_READ_MODE	0x20	/* Read mode bits */
@@ -87,6 +93,12 @@
 #define KBD_MODE_KCC 		0x40	/* Scan code conversion to PC format */
 #define KBD_MODE_RFU		0x80
 
+/* Output Port Bits */
+#define KBD_OUT_RESET           0x01    /* 1=normal mode, 0=reset */
+#define KBD_OUT_A20             0x02    /* x86 only */
+#define KBD_OUT_OBF             0x10    /* Keyboard output buffer full */
+#define KBD_OUT_MOUSE_OBF       0x20    /* Mouse output buffer full */
+
 /* Mouse Commands */
 #define AUX_SET_SCALE11		0xE6	/* Set 1:1 scaling */
 #define AUX_SET_SCALE21		0xE7	/* Set 2:1 scaling */
@@ -116,6 +128,7 @@ typedef struct KBDState {
     uint8_t write_cmd; /* if non zero, write data to port 60 is expected */
     uint8_t status;
     uint8_t mode;
+    uint8_t outport;
     /* Bitmask of devices with data available.  */
     uint8_t pending;
     void *kbd;
@@ -123,6 +136,7 @@ typedef struct KBDState {
 
     qemu_irq irq_kbd;
     qemu_irq irq_mouse;
+    qemu_irq *a20_out;
     target_phys_addr_t mask;
 } KBDState;
 
@@ -136,11 +150,14 @@ static void kbd_update_irq(KBDState *s)
     irq_kbd_level = 0;
     irq_mouse_level = 0;
     s->status &= ~(KBD_STAT_OBF | KBD_STAT_MOUSE_OBF);
+    s->outport &= ~(KBD_OUT_OBF | KBD_OUT_MOUSE_OBF);
     if (s->pending) {
         s->status |= KBD_STAT_OBF;
+        s->outport |= KBD_OUT_OBF;
         /* kbd data takes priority over aux data.  */
         if (s->pending == KBD_PENDING_AUX) {
             s->status |= KBD_STAT_MOUSE_OBF;
+            s->outport |= KBD_OUT_MOUSE_OBF;
             if (s->mode & KBD_MODE_MOUSE_INT)
                 irq_mouse_level = 1;
         } else {
@@ -180,9 +197,7 @@ static uint32_t kbd_read_status(void *opaque, uint32_t addr)
     KBDState *s = opaque;
     int val;
     val = s->status;
-#if defined(DEBUG_KBD)
-    printf("kbd: read status=0x%02x\n", val);
-#endif
+    DPRINTF("kbd: read status=0x%02x\n", val);
     return val;
 }
 
@@ -194,13 +209,35 @@ static void kbd_queue(KBDState *s, int b, int aux)
         ps2_queue(s->kbd, b);
 }
 
+static void ioport92_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    KBDState *s = opaque;
+
+    DPRINTF("kbd: write outport=0x%02x\n", val);
+    s->outport = val;
+    if (s->a20_out) {
+        qemu_set_irq(*s->a20_out, (val >> 1) & 1);
+    }
+    if (!(val & 1)) {
+        qemu_system_reset_request();
+    }
+}
+
+static uint32_t ioport92_read(void *opaque, uint32_t addr)
+{
+    KBDState *s = opaque;
+    uint32_t ret;
+
+    ret = s->outport;
+    DPRINTF("kbd: read outport=0x%02x\n", ret);
+    return ret;
+}
+
 static void kbd_write_command(void *opaque, uint32_t addr, uint32_t val)
 {
     KBDState *s = opaque;
 
-#ifdef DEBUG_KBD
-    printf("kbd: write cmd=0x%02x\n", val);
-#endif
+    DPRINTF("kbd: write cmd=0x%02x\n", val);
     switch(val) {
     case KBD_CCMD_READ_MODE:
         kbd_queue(s, s->mode, 0);
@@ -240,26 +277,20 @@ static void kbd_write_command(void *opaque, uint32_t addr, uint32_t val)
         kbd_queue(s, 0x00, 0);
         break;
     case KBD_CCMD_READ_OUTPORT:
-        /* XXX: check that */
-#ifdef TARGET_I386
-        val = 0x01 | (ioport_get_a20() << 1);
-#else
-        val = 0x01;
-#endif
-        if (s->status & KBD_STAT_OBF)
-            val |= 0x10;
-        if (s->status & KBD_STAT_MOUSE_OBF)
-            val |= 0x20;
-        kbd_queue(s, val, 0);
+        kbd_queue(s, s->outport, 0);
         break;
-#ifdef TARGET_I386
     case KBD_CCMD_ENABLE_A20:
-        ioport_set_a20(1);
+        if (s->a20_out) {
+            qemu_irq_raise(*s->a20_out);
+        }
+        s->outport |= KBD_OUT_A20;
         break;
     case KBD_CCMD_DISABLE_A20:
-        ioport_set_a20(0);
+        if (s->a20_out) {
+            qemu_irq_lower(*s->a20_out);
+        }
+        s->outport &= ~KBD_OUT_A20;
         break;
-#endif
     case KBD_CCMD_RESET:
         qemu_system_reset_request();
         break;
@@ -282,9 +313,7 @@ static uint32_t kbd_read_data(void *opaque, uint32_t addr)
     else
         val = ps2_read_data(s->kbd);
 
-#if defined(DEBUG_KBD)
-    printf("kbd: read data=0x%02x\n", val);
-#endif
+    DPRINTF("kbd: read data=0x%02x\n", val);
     return val;
 }
 
@@ -292,9 +321,7 @@ static void kbd_write_data(void *opaque, uint32_t addr, uint32_t val)
 {
     KBDState *s = opaque;
 
-#ifdef DEBUG_KBD
-    printf("kbd: write data=0x%02x\n", val);
-#endif
+    DPRINTF("kbd: write data=0x%02x\n", val);
 
     switch(s->write_cmd) {
     case 0:
@@ -313,12 +340,7 @@ static void kbd_write_data(void *opaque, uint32_t addr, uint32_t val)
         kbd_queue(s, val, 1);
         break;
     case KBD_CCMD_WRITE_OUTPORT:
-#ifdef TARGET_I386
-        ioport_set_a20((val >> 1) & 1);
-#endif
-        if (!(val & 1)) {
-            qemu_system_reset_request();
-        }
+        ioport92_write(s, 0, val);
         break;
     case KBD_CCMD_WRITE_MOUSE:
         ps2_write_mouse(s->mouse, val);
@@ -335,6 +357,7 @@ static void kbd_reset(void *opaque)
 
     s->mode = KBD_MODE_KBD_INT | KBD_MODE_MOUSE_INT;
     s->status = KBD_STAT_CMD | KBD_STAT_UNLOCKED;
+    s->outport = KBD_OUT_RESET | KBD_OUT_A20;
 }
 
 static const VMStateDescription vmstate_kbd = {
@@ -401,9 +424,6 @@ void i8042_mm_init(qemu_irq kbd_irq, qemu_irq mouse_irq,
 
     s->kbd = ps2_kbd_init(kbd_update_kbd_irq, s);
     s->mouse = ps2_mouse_init(kbd_update_aux_irq, s);
-#ifdef TARGET_I386
-    vmmouse_init(s->mouse);
-#endif
     qemu_register_reset(kbd_reset, s);
 }
 
@@ -412,6 +432,21 @@ typedef struct ISAKBDState {
     KBDState  kbd;
 } ISAKBDState;
 
+void i8042_isa_mouse_fake_event(void *opaque)
+{
+    ISADevice *dev = opaque;
+    KBDState *s = &(DO_UPCAST(ISAKBDState, dev, dev)->kbd);
+
+    ps2_mouse_fake_event(s->mouse);
+}
+
+void i8042_setup_a20_line(ISADevice *dev, qemu_irq *a20_out)
+{
+    KBDState *s = &(DO_UPCAST(ISAKBDState, dev, dev)->kbd);
+
+    s->a20_out = a20_out;
+}
+
 static const VMStateDescription vmstate_kbd_isa = {
     .name = "pckbd",
     .version_id = 3,
@@ -434,12 +469,11 @@ static int i8042_initfn(ISADevice *dev)
     register_ioport_write(0x60, 1, 1, kbd_write_data, s);
     register_ioport_read(0x64, 1, 1, kbd_read_status, s);
     register_ioport_write(0x64, 1, 1, kbd_write_command, s);
+    register_ioport_read(0x92, 1, 1, ioport92_read, s);
+    register_ioport_write(0x92, 1, 1, ioport92_write, s);
 
     s->kbd = ps2_kbd_init(kbd_update_kbd_irq, s);
     s->mouse = ps2_mouse_init(kbd_update_aux_irq, s);
-#ifdef TARGET_I386
-    vmmouse_init(s->mouse);
-#endif
     qemu_register_reset(kbd_reset, s);
     return 0;
 }
diff --git a/hw/pflash_cfi01.c b/hw/pflash_cfi01.c
index 20fe93d63c..19e13d632d 100644
--- a/hw/pflash_cfi01.c
+++ b/hw/pflash_cfi01.c
@@ -166,6 +166,22 @@ static uint32_t pflash_read (pflash_t *pfl, target_phys_addr_t offset,
         ret = pfl->status;
         DPRINTF("%s: status %x\n", __func__, ret);
         break;
+    case 0x90:
+        switch (boff) {
+        case 0:
+            ret = pfl->ident[0] << 8 | pfl->ident[1];
+            DPRINTF("%s: Manufacturer Code %04x\n", __func__, ret);
+            break;
+        case 1:
+            ret = pfl->ident[2] << 8 | pfl->ident[3];
+            DPRINTF("%s: Device ID Code %04x\n", __func__, ret);
+            break;
+        default:
+            DPRINTF("%s: Read Device Information boff=%x\n", __func__, boff);
+            ret = 0;
+            break;
+        }
+        break;
     case 0x98: /* Query mode */
         if (boff > pfl->cfi_len)
             ret = 0;
@@ -283,6 +299,10 @@ static void pflash_write(pflash_t *pfl, target_phys_addr_t offset,
             DPRINTF("%s: Read status register\n", __func__);
             pfl->cmd = cmd;
             return;
+        case 0x90: /* Read Device ID */
+            DPRINTF("%s: Read Device information\n", __func__);
+            pfl->cmd = cmd;
+            return;
         case 0x98: /* CFI query */
             DPRINTF("%s: CFI query\n", __func__);
             break;
diff --git a/hw/ppc_prep.c b/hw/ppc_prep.c
index 09a98819d1..16c9950740 100644
--- a/hw/ppc_prep.c
+++ b/hw/ppc_prep.c
@@ -547,6 +547,15 @@ static CPUReadMemoryFunc * const PPC_prep_io_read[] = {
 
 #define NVRAM_SIZE        0x2000
 
+static void cpu_request_exit(void *opaque, int irq, int level)
+{
+    CPUState *env = cpu_single_env;
+
+    if (env && level) {
+        cpu_exit(env);
+    }
+}
+
 /* PowerPC PREP hardware initialisation */
 static void ppc_prep_init (ram_addr_t ram_size,
                            const char *boot_device,
@@ -565,6 +574,7 @@ static void ppc_prep_init (ram_addr_t ram_size,
     uint32_t kernel_base, kernel_size, initrd_base, initrd_size;
     PCIBus *pci_bus;
     qemu_irq *i8259;
+    qemu_irq *cpu_exit_irq;
     int ppc_boot_device;
     DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
     DriveInfo *fd[MAX_FD];
@@ -719,7 +729,10 @@ static void ppc_prep_init (ram_addr_t ram_size,
 		     hd[2 * i + 1]);
     }
     isa_create_simple("i8042");
-    DMA_init(1);
+
+    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
+    DMA_init(1, cpu_exit_irq);
+
     //    SB16_init();
 
     for(i = 0; i < MAX_FD; i++) {
diff --git a/hw/sparc32_dma.c b/hw/sparc32_dma.c
index 3ceb851e91..b52170787b 100644
--- a/hw/sparc32_dma.c
+++ b/hw/sparc32_dma.c
@@ -62,6 +62,9 @@
 #define DMA_DRAIN_FIFO 0x40
 #define DMA_RESET 0x80
 
+/* XXX SCSI and ethernet should have different read-only bit masks */
+#define DMA_CSR_RO_MASK 0xfe000007
+
 typedef struct DMAState DMAState;
 
 struct DMAState {
@@ -187,7 +190,7 @@ static void dma_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
     switch (saddr) {
     case 0:
         if (val & DMA_INTREN) {
-            if (val & DMA_INTR) {
+            if (s->dmaregs[0] & DMA_INTR) {
                 DPRINTF("Raise IRQ\n");
                 qemu_irq_raise(s->irq);
             }
@@ -204,16 +207,17 @@ static void dma_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
             val &= ~DMA_DRAIN_FIFO;
         } else if (val == 0)
             val = DMA_DRAIN_FIFO;
-        val &= 0x0fffffff;
+        val &= ~DMA_CSR_RO_MASK;
         val |= DMA_VER;
+        s->dmaregs[0] = (s->dmaregs[0] & DMA_CSR_RO_MASK) | val;
         break;
     case 1:
         s->dmaregs[0] |= DMA_LOADED;
-        break;
+        /* fall through */
     default:
+        s->dmaregs[saddr] = val;
         break;
     }
-    s->dmaregs[saddr] = val;
 }
 
 static CPUReadMemoryFunc * const dma_mem_read[3] = {
diff --git a/hw/sun4m.c b/hw/sun4m.c
index 9a79120b1d..7ba0f763bc 100644
--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -152,7 +152,11 @@ int DMA_write_memory (int nchan, void *buf, int pos, int size)
 void DMA_hold_DREQ (int nchan) {}
 void DMA_release_DREQ (int nchan) {}
 void DMA_schedule(int nchan) {}
-void DMA_init (int high_page_enable) {}
+
+void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit)
+{
+}
+
 void DMA_register_channel (int nchan,
                            DMA_transfer_handler transfer_handler,
                            void *opaque)
diff --git a/hw/sun4u.c b/hw/sun4u.c
index 24ea367d1a..e9a1e231e9 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -105,7 +105,11 @@ int DMA_write_memory (int nchan, void *buf, int pos, int size)
 void DMA_hold_DREQ (int nchan) {}
 void DMA_release_DREQ (int nchan) {}
 void DMA_schedule(int nchan) {}
-void DMA_init (int high_page_enable) {}
+
+void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit)
+{
+}
+
 void DMA_register_channel (int nchan,
                            DMA_transfer_handler transfer_handler,
                            void *opaque)
diff --git a/hw/vga.c b/hw/vga.c
index 87a1bb06d1..01de1e1b04 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -232,7 +232,7 @@ static void vga_precise_update_retrace_info(VGACommonState *s)
         "clocking_mode = %d\n"
         "clock_sel = %d %d\n"
         "dots = %d\n"
-        "ticks/char = %lld\n"
+        "ticks/char = %" PRId64 "\n"
         "\n",
         (double) get_ticks_per_sec() / (r->ticks_per_char * r->total_chars),
         htotal_chars,
diff --git a/hw/vhost_net.c b/hw/vhost_net.c
index 2e292eea26..26dae797bf 100644
--- a/hw/vhost_net.c
+++ b/hw/vhost_net.c
@@ -99,7 +99,7 @@ struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd)
         goto fail;
     }
     if (~net->dev.features & net->dev.backend_features) {
-        fprintf(stderr, "vhost lacks feature mask %llu for backend\n",
+        fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n",
                 ~net->dev.features & net->dev.backend_features);
         vhost_dev_cleanup(&net->dev);
         goto fail;
diff --git a/hw/vmmouse.c b/hw/vmmouse.c
index bb6e6056d6..afebad9a0a 100644
--- a/hw/vmmouse.c
+++ b/hw/vmmouse.c
@@ -97,7 +97,7 @@ static void vmmouse_mouse_event(void *opaque, int x, int y, int dz, int buttons_
 
     /* need to still generate PS2 events to notify driver to
        read from queue */
-    ps2_mouse_fake_event(s->ps2_mouse);
+    i8042_isa_mouse_fake_event(s->ps2_mouse);
 }
 
 static void vmmouse_update_handler(VMMouseState *s)
diff --git a/hxtool b/hxtool
index 0fdbc641c6..8f655320be 100644
--- a/hxtool
+++ b/hxtool
@@ -19,11 +19,24 @@ hxtoh()
 hxtotexi()
 {
     flag=0
+    line=1
     while read -r str; do
         case "$str" in
             HXCOMM*)
             ;;
-            STEXI*|ETEXI*) flag=$(($flag^1))
+            STEXI*)
+            if test $flag -eq 1 ; then
+                echo "line $line: syntax error: expected ETEXI, found $str" >&2
+                exit 1
+            fi
+            flag=1
+            ;;
+            ETEXI*)
+            if test $flag -ne 1 ; then
+                echo "line $line: syntax error: expected STEXI, found $str" >&2
+                exit 1
+            fi
+            flag=0
             ;;
             DEFHEADING*)
             echo "$(expr "$str" : "DEFHEADING(\(.*\))")"
@@ -32,6 +45,7 @@ hxtotexi()
             test $flag -eq 1 && echo "$str"
             ;;
         esac
+        line=$((line+1))
     done
 }
 
diff --git a/ia64-dis.c b/ia64-dis.c
index da73a98ba5..2886df3614 100644
--- a/ia64-dis.c
+++ b/ia64-dis.c
@@ -10560,11 +10560,14 @@ print_insn_ia64 (bfd_vma memaddr, struct disassemble_info *info)
 	    if (str)
 	      (*info->fprintf_func) (info->stream, "%s", str);
 	    else if (odesc->flags & IA64_OPND_FLAG_DECIMAL_SIGNED)
-	      (*info->fprintf_func) (info->stream, "%lld", (long long) value);
+              (*info->fprintf_func) (info->stream, "%" PRId64,
+                                     (int64_t) value);
 	    else if (odesc->flags & IA64_OPND_FLAG_DECIMAL_UNSIGNED)
-	      (*info->fprintf_func) (info->stream, "%llu", (long long) value);
+              (*info->fprintf_func) (info->stream, "%" PRIu64,
+                                     (uint64_t) value);
 	    else
-	      (*info->fprintf_func) (info->stream, "0x%llx", (long long) value);
+              (*info->fprintf_func) (info->stream, "0x%" PRIx64,
+                                     (uint64_t) value);
 	    break;
 
 	  case IA64_OPND_CLASS_REL:
diff --git a/linux-user/main.c b/linux-user/main.c
index 99a7cde559..b240f290f7 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -31,7 +31,7 @@
 #include "cache-utils.h"
 /* For tb_lock */
 #include "exec-all.h"
-
+#include "tcg.h"
 #include "qemu-timer.h"
 #include "envlist.h"
 
@@ -2434,7 +2434,7 @@ void cpu_loop (CPUState *env)
             info.si_signo = TARGET_SIGSEGV;
             info.si_errno = 0;
             info.si_code = 0;  /* ??? SEGV_MAPERR vs SEGV_ACCERR.  */
-            info._sifields._sigfault._addr = env->pc;
+            info._sifields._sigfault._addr = env->ipr[IPR_EXC_ADDR];
             queue_signal(env, info.si_signo, &info);
             break;
         case EXCP_DTB_MISS_PAL:
@@ -2458,7 +2458,7 @@ void cpu_loop (CPUState *env)
             info.si_signo = TARGET_SIGBUS;
             info.si_errno = 0;
             info.si_code = TARGET_BUS_ADRALN;
-            info._sifields._sigfault._addr = env->pc;
+            info._sifields._sigfault._addr = env->ipr[IPR_EXC_ADDR];
             queue_signal(env, info.si_signo, &info);
             break;
         case EXCP_OPCDEC:
@@ -2499,8 +2499,15 @@ void cpu_loop (CPUState *env)
                                     env->ir[IR_A0], env->ir[IR_A1],
                                     env->ir[IR_A2], env->ir[IR_A3],
                                     env->ir[IR_A4], env->ir[IR_A5]);
-                if (trapnr != TARGET_NR_sigreturn
-                    && trapnr != TARGET_NR_rt_sigreturn) {
+                if (trapnr == TARGET_NR_sigreturn
+                    || trapnr == TARGET_NR_rt_sigreturn) {
+                    break;
+                }
+                /* Syscall writes 0 to V0 to bypass error check, similar
+                   to how this is handled internal to Linux kernel.  */
+                if (env->ir[IR_V0] == 0) {
+                    env->ir[IR_V0] = sysret;
+                } else {
                     env->ir[IR_V0] = (sysret < 0 ? -sysret : sysret);
                     env->ir[IR_A3] = (sysret < 0);
                 }
@@ -2977,6 +2984,13 @@ int main(int argc, char **argv, char **envp)
     syscall_init();
     signal_init();
 
+#if defined(CONFIG_USE_GUEST_BASE)
+    /* Now that we've loaded the binary, GUEST_BASE is fixed.  Delay
+       generating the prologue until now so that the prologue can take
+       the real value of GUEST_BASE into account.  */
+    tcg_prologue_init(&tcg_ctx);
+#endif
+
 #if defined(TARGET_I386)
     cpu_x86_set_cpl(env, 3);
 
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ce728faa4d..8222cb92f1 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -718,9 +718,17 @@ abi_long do_brk(abi_ulong new_brk)
                                         PROT_READ|PROT_WRITE,
                                         MAP_ANON|MAP_FIXED|MAP_PRIVATE, 0, 0));
 
-    if (!is_error(mapped_addr))
+#if defined(TARGET_ALPHA)
+    /* We (partially) emulate OSF/1 on Alpha, which requires we
+       return a proper errno, not an unchanged brk value.  */
+    if (is_error(mapped_addr)) {
+        return -TARGET_ENOMEM;
+    }
+#endif
+
+    if (!is_error(mapped_addr)) {
 	target_brk = new_brk;
-    
+    }
     return target_brk;
 }
 
@@ -987,7 +995,8 @@ static abi_long do_pipe2(int host_pipe[], int flags)
 #endif
 }
 
-static abi_long do_pipe(void *cpu_env, abi_ulong pipedes, int flags)
+static abi_long do_pipe(void *cpu_env, abi_ulong pipedes,
+                        int flags, int is_pipe2)
 {
     int host_pipe[2];
     abi_long ret;
@@ -995,20 +1004,25 @@ static abi_long do_pipe(void *cpu_env, abi_ulong pipedes, int flags)
 
     if (is_error(ret))
         return get_errno(ret);
-#if defined(TARGET_MIPS)
-    ((CPUMIPSState*)cpu_env)->active_tc.gpr[3] = host_pipe[1];
-    ret = host_pipe[0];
-#else
-#if defined(TARGET_SH4)
-    if (!flags) {
+
+    /* Several targets have special calling conventions for the original
+       pipe syscall, but didn't replicate this into the pipe2 syscall.  */
+    if (!is_pipe2) {
+#if defined(TARGET_ALPHA)
+        ((CPUAlphaState *)cpu_env)->ir[IR_A4] = host_pipe[1];
+        return host_pipe[0];
+#elif defined(TARGET_MIPS)
+        ((CPUMIPSState*)cpu_env)->active_tc.gpr[3] = host_pipe[1];
+        return host_pipe[0];
+#elif defined(TARGET_SH4)
         ((CPUSH4State*)cpu_env)->gregs[1] = host_pipe[1];
-        ret = host_pipe[0];
-    } else
+        return host_pipe[0];
 #endif
+    }
+
     if (put_user_s32(host_pipe[0], pipedes)
         || put_user_s32(host_pipe[1], pipedes + sizeof(host_pipe[0])))
         return -TARGET_EFAULT;
-#endif
     return get_errno(ret);
 }
 
@@ -4483,13 +4497,18 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     case TARGET_NR_lseek:
         ret = get_errno(lseek(arg1, arg2, arg3));
         break;
-#ifdef TARGET_NR_getxpid
+#if defined(TARGET_NR_getxpid) && defined(TARGET_ALPHA)
+    /* Alpha specific */
     case TARGET_NR_getxpid:
-#else
-    case TARGET_NR_getpid:
+        ((CPUAlphaState *)cpu_env)->ir[IR_A4] = getppid();
+        ret = get_errno(getpid());
+        break;
 #endif
+#ifdef TARGET_NR_getpid
+    case TARGET_NR_getpid:
         ret = get_errno(getpid());
         break;
+#endif
     case TARGET_NR_mount:
 		{
 			/* need to look at the data field */
@@ -4698,11 +4717,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         ret = get_errno(dup(arg1));
         break;
     case TARGET_NR_pipe:
-        ret = do_pipe(cpu_env, arg1, 0);
+        ret = do_pipe(cpu_env, arg1, 0, 0);
         break;
 #ifdef TARGET_NR_pipe2
     case TARGET_NR_pipe2:
-        ret = do_pipe(cpu_env, arg1, arg2);
+        ret = do_pipe(cpu_env, arg1, arg2, 1);
         break;
 #endif
     case TARGET_NR_times:
@@ -4964,11 +4983,41 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #ifdef TARGET_NR_sigprocmask
     case TARGET_NR_sigprocmask:
         {
-            int how = arg1;
+#if defined(TARGET_ALPHA)
+            sigset_t set, oldset;
+            abi_ulong mask;
+            int how;
+
+            switch (arg1) {
+            case TARGET_SIG_BLOCK:
+                how = SIG_BLOCK;
+                break;
+            case TARGET_SIG_UNBLOCK:
+                how = SIG_UNBLOCK;
+                break;
+            case TARGET_SIG_SETMASK:
+                how = SIG_SETMASK;
+                break;
+            default:
+                ret = -TARGET_EINVAL;
+                goto fail;
+            }
+            mask = arg2;
+            target_to_host_old_sigset(&set, &mask);
+
+            ret = get_errno(sigprocmask(how, &set, &oldset));
+
+            if (!is_error(ret)) {
+                host_to_target_old_sigset(&mask, &oldset);
+                ret = mask;
+                ((CPUAlphaState *)cpu_env)->[IR_V0] = 0; /* force no error */
+            }
+#else
             sigset_t set, oldset, *set_ptr;
+            int how;
 
             if (arg2) {
-                switch(how) {
+                switch (arg1) {
                 case TARGET_SIG_BLOCK:
                     how = SIG_BLOCK;
                     break;
@@ -4991,13 +5040,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                 how = 0;
                 set_ptr = NULL;
             }
-            ret = get_errno(sigprocmask(arg1, set_ptr, &oldset));
+            ret = get_errno(sigprocmask(how, set_ptr, &oldset));
             if (!is_error(ret) && arg3) {
                 if (!(p = lock_user(VERIFY_WRITE, arg3, sizeof(target_sigset_t), 0)))
                     goto efault;
                 host_to_target_old_sigset(p, &oldset);
                 unlock_user(p, arg3, sizeof(target_sigset_t));
             }
+#endif
         }
         break;
 #endif
@@ -5069,10 +5119,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     case TARGET_NR_sigsuspend:
         {
             sigset_t set;
+#if defined(TARGET_ALPHA)
+            abi_ulong mask = arg1;
+            target_to_host_old_sigset(&set, &mask);
+#else
             if (!(p = lock_user(VERIFY_READ, arg1, sizeof(target_sigset_t), 1)))
                 goto efault;
             target_to_host_old_sigset(&set, p);
             unlock_user(p, arg1, 0);
+#endif
             ret = get_errno(sigsuspend(&set));
         }
         break;
@@ -5213,6 +5268,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         }
         break;
 #endif
+#ifdef TARGET_NR_pselect6
+    case TARGET_NR_pselect6:
+	    goto unimplemented_nowarn;
+#endif
     case TARGET_NR_symlink:
         {
             void *p2;
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 255e89cbd8..681021ca0e 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -336,6 +336,14 @@ int do_sigaction(int sig, const struct target_sigaction *act,
 #if !defined(TARGET_ABI_MIPSN32) && !defined(TARGET_ABI_MIPSN64)
 #define TARGET_SA_RESTORER	0x04000000	/* Only for O32 */
 #endif
+#elif defined(TARGET_ALPHA)
+#define TARGET_SA_ONSTACK	0x00000001
+#define TARGET_SA_RESTART	0x00000002
+#define TARGET_SA_NOCLDSTOP	0x00000004
+#define TARGET_SA_NODEFER	0x00000008
+#define TARGET_SA_RESETHAND	0x00000010
+#define TARGET_SA_NOCLDWAIT	0x00000020 /* not supported yet */
+#define TARGET_SA_SIGINFO	0x00000040
 #else
 #define TARGET_SA_NOCLDSTOP	0x00000001
 #define TARGET_SA_NOCLDWAIT	0x00000002 /* not supported yet */
@@ -670,7 +678,7 @@ struct target_rlimit {
 };
 
 #if defined(TARGET_ALPHA)
-#define TARGET_RLIM_INFINITY	0x7ffffffffffffffful
+#define TARGET_RLIM_INFINITY	0x7fffffffffffffffull
 #elif defined(TARGET_MIPS) || defined(TARGET_SPARC)
 #define TARGET_RLIM_INFINITY	0x7fffffffUL
 #else
diff --git a/nbd.c b/nbd.c
index 337eeba873..a9f295f2bb 100644
--- a/nbd.c
+++ b/nbd.c
@@ -353,8 +353,7 @@ int nbd_init(int fd, int csock, off_t size, size_t blocksize)
 		return -1;
 	}
 
-	TRACE("Setting size to %llu block(s)",
-	      (unsigned long long)(size / blocksize));
+        TRACE("Setting size to %zd block(s)", (size_t)(size / blocksize));
 
 	if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) == -1) {
 		int serrno = errno;
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index c079019048..c4cf3e7542 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -7,7 +7,7 @@ HXCOMM HXCOMM can be used for comments, discarded from both texi and C
 
 STEXI
 @table @option
-STEXI
+ETEXI
 
 DEF("check", img_check,
     "check [-f fmt] filename")
diff --git a/qemu-img.c b/qemu-img.c
index d3c30a74f3..cb007b757d 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -701,9 +701,9 @@ static int img_convert(int argc, char **argv)
                     bs_offset += bs_sectors;
                     bdrv_get_geometry(bs[bs_i], &bs_sectors);
                     bs_num = 0;
-                    /* printf("changing part: sector_num=%lld, "
-                       "bs_i=%d, bs_offset=%lld, bs_sectors=%lld\n",
-                       sector_num, bs_i, bs_offset, bs_sectors); */
+                    /* printf("changing part: sector_num=%" PRId64 ", "
+                       "bs_i=%d, bs_offset=%" PRId64 ", bs_sectors=%" PRId64
+                       "\n", sector_num, bs_i, bs_offset, bs_sectors); */
                 }
                 assert (bs_num < bs_sectors);
 
@@ -749,8 +749,8 @@ static int img_convert(int argc, char **argv)
                 assert (bs_i < bs_n);
                 bs_offset += bs_sectors;
                 bdrv_get_geometry(bs[bs_i], &bs_sectors);
-                /* printf("changing part: sector_num=%lld, bs_i=%d, "
-                  "bs_offset=%lld, bs_sectors=%lld\n",
+                /* printf("changing part: sector_num=%" PRId64 ", bs_i=%d, "
+                  "bs_offset=%" PRId64 ", bs_sectors=%" PRId64 "\n",
                    sector_num, bs_i, bs_offset, bs_sectors); */
             }
 
diff --git a/qemu-io.c b/qemu-io.c
index 8517b909c7..f39109e231 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -84,7 +84,7 @@ dump_buffer(const void *buffer, int64_t offset, int len)
 	for (i = 0, p = buffer; i < len; i += 16) {
 		const uint8_t *s = p;
 
-		printf("%08llx:  ", (unsigned long long)offset + i);
+                printf("%08" PRIx64 ":  ", offset + i);
 		for (j = 0; j < 16 && i + j < len; j++, p++)
 			printf("%02x ", *p);
 		printf(" ");
@@ -108,8 +108,8 @@ print_report(const char *op, struct timeval *t, int64_t offset,
 	if (!Cflag) {
 		cvtstr((double)total, s1, sizeof(s1));
 		cvtstr(tdiv((double)total, *t), s2, sizeof(s2));
-		printf("%s %d/%d bytes at offset %lld\n",
-			op, total, count, (long long)offset);
+                printf("%s %d/%d bytes at offset %" PRId64 "\n",
+                       op, total, count, offset);
 		printf("%s, %d ops; %s (%s/sec and %.4f ops/sec)\n",
 			s1, cnt, ts, s2, tdiv((double)cnt, *t));
 	} else {/* bytes,ops,time,bytes/sec,ops/sec */
@@ -135,7 +135,7 @@ create_iovec(QEMUIOVector *qiov, char **argv, int nr_iov, int pattern)
 
 	for (i = 0; i < nr_iov; i++) {
 		char *arg = argv[i];
-		long long len;
+                uint64_t len;
 
 		len = cvtnum(arg);
 		if (len < 0) {
@@ -150,8 +150,8 @@ create_iovec(QEMUIOVector *qiov, char **argv, int nr_iov, int pattern)
 		}
 
 		if (len & 0x1ff) {
-			printf("length argument %lld is not sector aligned\n",
-				len);
+                        printf("length argument %" PRId64
+                               " is not sector aligned\n", len);
 			goto fail;
 		}
 
@@ -398,8 +398,8 @@ read_f(int argc, char **argv)
 
 	if (!pflag)
 		if (offset & 0x1ff) {
-			printf("offset %lld is not sector aligned\n",
-				(long long)offset);
+                        printf("offset %" PRId64 " is not sector aligned\n",
+                               offset);
 			return 0;
 
 		if (count & 0x1ff) {
@@ -429,9 +429,9 @@ read_f(int argc, char **argv)
 		void* cmp_buf = malloc(pattern_count);
 		memset(cmp_buf, pattern, pattern_count);
 		if (memcmp(buf + pattern_offset, cmp_buf, pattern_count)) {
-			printf("Pattern verification failed at offset %lld, "
-				"%d bytes\n",
-				(long long) offset + pattern_offset, pattern_count);
+			printf("Pattern verification failed at offset %"
+                               PRId64 ", %d bytes\n",
+                               offset + pattern_offset, pattern_count);
 		}
 		free(cmp_buf);
 	}
@@ -533,8 +533,8 @@ readv_f(int argc, char **argv)
 	optind++;
 
 	if (offset & 0x1ff) {
-		printf("offset %lld is not sector aligned\n",
-			(long long)offset);
+                printf("offset %" PRId64 " is not sector aligned\n",
+                       offset);
 		return 0;
 	}
 
@@ -554,9 +554,9 @@ readv_f(int argc, char **argv)
 		void* cmp_buf = malloc(qiov.size);
 		memset(cmp_buf, pattern, qiov.size);
 		if (memcmp(buf, cmp_buf, qiov.size)) {
-			printf("Pattern verification failed at offset %lld, "
-				"%zd bytes\n",
-				(long long) offset, qiov.size);
+			printf("Pattern verification failed at offset %"
+                               PRId64 ", %zd bytes\n",
+                               offset, qiov.size);
 		}
 		free(cmp_buf);
 	}
@@ -669,8 +669,8 @@ write_f(int argc, char **argv)
 
 	if (!pflag) {
 		if (offset & 0x1ff) {
-			printf("offset %lld is not sector aligned\n",
-				(long long)offset);
+                        printf("offset %" PRId64 " is not sector aligned\n",
+                               offset);
 			return 0;
 		}
 
@@ -783,8 +783,8 @@ writev_f(int argc, char **argv)
 	optind++;
 
 	if (offset & 0x1ff) {
-		printf("offset %lld is not sector aligned\n",
-			(long long)offset);
+                printf("offset %" PRId64 " is not sector aligned\n",
+                       offset);
 		return 0;
 	}
 
@@ -868,9 +868,9 @@ aio_read_done(void *opaque, int ret)
 
 		memset(cmp_buf, ctx->pattern, ctx->qiov.size);
 		if (memcmp(ctx->buf, cmp_buf, ctx->qiov.size)) {
-			printf("Pattern verification failed at offset %lld, "
-				"%zd bytes\n",
-				(long long) ctx->offset, ctx->qiov.size);
+			printf("Pattern verification failed at offset %"
+                               PRId64 ", %zd bytes\n",
+                               ctx->offset, ctx->qiov.size);
 		}
 		free(cmp_buf);
 	}
@@ -969,8 +969,8 @@ aio_read_f(int argc, char **argv)
 	optind++;
 
 	if (ctx->offset & 0x1ff) {
-		printf("offset %lld is not sector aligned\n",
-			(long long)ctx->offset);
+		printf("offset %" PRId64 " is not sector aligned\n",
+                       ctx->offset);
 		free(ctx);
 		return 0;
 	}
@@ -1064,8 +1064,8 @@ aio_write_f(int argc, char **argv)
 	optind++;
 
 	if (ctx->offset & 0x1ff) {
-		printf("offset %lld is not sector aligned\n",
-			(long long)ctx->offset);
+		printf("offset %" PRId64 " is not sector aligned\n",
+                       ctx->offset);
 		free(ctx);
 		return 0;
 	}
@@ -1214,8 +1214,8 @@ alloc_f(int argc, char **argv)
 
 	offset = cvtnum(argv[1]);
 	if (offset & 0x1ff) {
-		printf("offset %lld is not sector aligned\n",
-			(long long)offset);
+                printf("offset %" PRId64 " is not sector aligned\n",
+                       offset);
 		return 0;
 	}
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 12f6b5179d..03e95fd2bf 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -464,18 +464,15 @@ DEF("device", HAS_ARG, QEMU_OPTION_device,
     "                add device (based on driver)\n"
     "                prop=value,... sets driver properties\n"
     "                use -device ? to print all possible drivers\n"
-    "                use -device driver,? to print all possible options\n"
-    "                use -device driver,option=? to print a help for value\n",
+    "                use -device driver,? to print all possible properties\n",
     QEMU_ARCH_ALL)
 STEXI
-@item -device @var{driver}[,@var{option}[=@var{value}][,...]]
+@item -device @var{driver}[,@var{prop}[=@var{value}][,...]]
 @findex -device
-Add device @var{driver}. Depending on the device type,
-@var{option} (with default or given @var{value}) may be useful.
-To get a help on possible @var{driver}s, @var{option}s or @var{value}s, use
-@code{-device ?},
-@code{-device @var{driver},?} or
-@code{-device @var{driver},@var{option}=?}. 
+Add device @var{driver}.  @var{prop}=@var{value} sets driver
+properties.  Valid properties depend on the driver.  To get help on
+possible drivers and properties, use @code{-device ?} and
+@code{-device @var{driver},?}.
 ETEXI
 
 #ifdef CONFIG_LINUX
diff --git a/rules.mak b/rules.mak
index 7e10432596..c843a131e8 100644
--- a/rules.mak
+++ b/rules.mak
@@ -12,7 +12,7 @@ MAKEFLAGS += -rR
 %.mak:
 
 # Flags for dependency generation
-QEMU_DGFLAGS += -MMD -MP -MT $@
+QEMU_DGFLAGS += -MMD -MP -MT $@ -MF $(*D)/$(*F).d
 
 %.o: %.c
 	$(call quiet-command,$(CC) $(QEMU_CFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) -c -o $@ $<,"  CC    $(TARGET_DIR)$@")
diff --git a/target-cris/mmu.c b/target-cris/mmu.c
index 2a5ded8953..d09e921230 100644
--- a/target-cris/mmu.c
+++ b/target-cris/mmu.c
@@ -31,7 +31,7 @@
 
 #ifdef DEBUG
 #define D(x) x
-#define D_LOG(...) qemu_log(__VA__ARGS__)
+#define D_LOG(...) qemu_log(__VA_ARGS__)
 #else
 #define D(x)
 #define D_LOG(...) do { } while (0)
diff --git a/target-cris/translate.c b/target-cris/translate.c
index a7014face6..6c1d9e026e 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -3226,14 +3226,14 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
 
 	if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
 		qemu_log(
-			"srch=%d pc=%x %x flg=%llx bt=%x ds=%u ccs=%x\n"
+                        "srch=%d pc=%x %x flg=%" PRIx64 " bt=%x ds=%u ccs=%x\n"
 			"pid=%x usp=%x\n"
 			"%x.%x.%x.%x\n"
 			"%x.%x.%x.%x\n"
 			"%x.%x.%x.%x\n"
 			"%x.%x.%x.%x\n",
 			search_pc, dc->pc, dc->ppc,
-			(unsigned long long)tb->flags,
+                        (uint64_t)tb->flags,
 			env->btarget, (unsigned)tb->flags & 7,
 			env->pregs[PR_CCS], 
 			env->pregs[PR_PID], env->pregs[PR_USP],
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 86cca5182d..66e1c0d3bf 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -8918,7 +8918,7 @@ void cpu_dump_statistics (CPUState *env, FILE*f,
                         if (handler->count == 0)
                             continue;
                         cpu_fprintf(f, "%02x %02x %02x (%02x %04d) %16s: "
-                                    "%016llx %lld\n",
+                                    "%016" PRIx64 " %" PRId64 "\n",
                                     op1, op2, op3, op1, (op3 << 5) | op2,
                                     handler->oname,
                                     handler->count, handler->count);
@@ -8927,7 +8927,7 @@ void cpu_dump_statistics (CPUState *env, FILE*f,
                     if (handler->count == 0)
                         continue;
                     cpu_fprintf(f, "%02x %02x    (%02x %04d) %16s: "
-                                "%016llx %lld\n",
+                                "%016" PRIx64 " %" PRId64 "\n",
                                 op1, op2, op1, op2, handler->oname,
                                 handler->count, handler->count);
                 }
@@ -8935,7 +8935,8 @@ void cpu_dump_statistics (CPUState *env, FILE*f,
         } else {
             if (handler->count == 0)
                 continue;
-            cpu_fprintf(f, "%02x       (%02x     ) %16s: %016llx %lld\n",
+            cpu_fprintf(f, "%02x       (%02x     ) %16s: %016" PRIx64
+                        " %" PRId64 "\n",
                         op1, op1, handler->oname,
                         handler->count, handler->count);
         }
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 27b020b541..8f0484b24a 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -92,12 +92,14 @@
 #define PSR_CARRY_SHIFT 20
 #define PSR_CARRY (1 << PSR_CARRY_SHIFT)
 #define PSR_ICC   (PSR_NEG|PSR_ZERO|PSR_OVF|PSR_CARRY)
+#if !defined(TARGET_SPARC64)
 #define PSR_EF    (1<<12)
 #define PSR_PIL   0xf00
 #define PSR_S     (1<<7)
 #define PSR_PS    (1<<6)
 #define PSR_ET    (1<<5)
 #define PSR_CWP   0x1f
+#endif
 
 #define CC_SRC (env->cc_src)
 #define CC_SRC2 (env->cc_src2)
@@ -341,14 +343,16 @@ typedef struct CPUSPARCState {
     uint32_t wim;      /* window invalid mask */
 #endif
     target_ulong tbr;  /* trap base register */
+#if !defined(TARGET_SPARC64)
     int      psrs;     /* supervisor mode (extracted from PSR) */
     int      psrps;    /* previous supervisor mode */
-#if !defined(TARGET_SPARC64)
     int      psret;    /* enable traps */
 #endif
     uint32_t psrpil;   /* interrupt blocking level */
     uint32_t pil_in;   /* incoming interrupt level bitmap */
+#if !defined(TARGET_SPARC64)
     int      psref;    /* enable fpu */
+#endif
     target_ulong version;
     int interrupt_index;
     uint32_t nwindows;
@@ -508,21 +512,41 @@ int cpu_sparc_signal_handler(int host_signum, void *pinfo, void *puc);
 #define CPU_SAVE_VERSION 6
 
 /* MMU modes definitions */
+#if defined (TARGET_SPARC64)
+#define MMU_USER_IDX   0
 #define MMU_MODE0_SUFFIX _user
-#define MMU_MODE1_SUFFIX _kernel
-#ifdef TARGET_SPARC64
-#define MMU_MODE2_SUFFIX _hypv
-#define MMU_MODE3_SUFFIX _nucleus
-#define MMU_MODE4_SUFFIX _user_secondary
-#define MMU_MODE5_SUFFIX _kernel_secondary
-#endif
+#define MMU_USER_SECONDARY_IDX   1
+#define MMU_MODE1_SUFFIX _user_secondary
+#define MMU_KERNEL_IDX 2
+#define MMU_MODE2_SUFFIX _kernel
+#define MMU_KERNEL_SECONDARY_IDX 3
+#define MMU_MODE3_SUFFIX _kernel_secondary
+#define MMU_NUCLEUS_IDX 4
+#define MMU_MODE4_SUFFIX _nucleus
+#define MMU_HYPV_IDX   5
+#define MMU_MODE5_SUFFIX _hypv
+#else
 #define MMU_USER_IDX   0
+#define MMU_MODE0_SUFFIX _user
 #define MMU_KERNEL_IDX 1
-#define MMU_HYPV_IDX   2
-#ifdef TARGET_SPARC64
-#define MMU_NUCLEUS_IDX 3
-#define MMU_USER_SECONDARY_IDX   4
-#define MMU_KERNEL_SECONDARY_IDX 5
+#define MMU_MODE1_SUFFIX _kernel
+#endif
+
+#if defined (TARGET_SPARC64)
+static inline int cpu_has_hypervisor(CPUState *env1)
+{
+    return env1->def->features & CPU_FEATURE_HYPV;
+}
+
+static inline int cpu_hypervisor_mode(CPUState *env1)
+{
+    return cpu_has_hypervisor(env1) && (env1->hpstate & HS_PRIV);
+}
+
+static inline int cpu_supervisor_mode(CPUState *env1)
+{
+    return env1->pstate & PS_PRIV;
+}
 #endif
 
 static inline int cpu_mmu_index(CPUState *env1)
@@ -532,12 +556,15 @@ static inline int cpu_mmu_index(CPUState *env1)
 #elif !defined(TARGET_SPARC64)
     return env1->psrs;
 #else
-    if (!env1->psrs)
-        return MMU_USER_IDX;
-    else if ((env1->hpstate & HS_PRIV) == 0)
-        return MMU_KERNEL_IDX;
-    else
+    if (env1->tl > 0) {
+        return MMU_NUCLEUS_IDX;
+    } else if (cpu_hypervisor_mode(env1)) {
         return MMU_HYPV_IDX;
+    } else if (cpu_supervisor_mode(env1)) {
+        return MMU_KERNEL_IDX;
+    } else {
+        return MMU_USER_IDX;
+    }
 #endif
 }
 
@@ -611,9 +638,13 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
     *cs_base = env->npc;
 #ifdef TARGET_SPARC64
     // AM . Combined FPU enable bits . PRIV . DMMU enabled . IMMU enabled
-    *flags = ((env->pstate & PS_AM) << 2)
-        | (((env->pstate & PS_PEF) >> 1) | ((env->fprs & FPRS_FEF) << 2))
-        | (env->pstate & PS_PRIV) | ((env->lsu & (DMMU_E | IMMU_E)) >> 2);
+    *flags = ((env->pstate & PS_AM) << 2)          /* 5 */
+        | (((env->pstate & PS_PEF) >> 1)           /* 3 */
+        | ((env->fprs & FPRS_FEF) << 2))           /* 4 */
+        | (env->pstate & PS_PRIV)                  /* 2 */
+        | ((env->lsu & (DMMU_E | IMMU_E)) >> 2)    /* 1, 0 */
+        | ((env->tl & 0xff) << 8)
+        | (env->dmmu.mmu_primary_context << 16);   /* 16... */
 #else
     // FPU enable . Supervisor
     *flags = (env->psref << 4) | env->psrs;
diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index 582de1082c..96a22f3475 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -30,6 +30,13 @@
 //#define DEBUG_MMU
 //#define DEBUG_FEATURES
 
+#ifdef DEBUG_MMU
+#define DPRINTF_MMU(fmt, ...) \
+    do { printf("MMU: " fmt , ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF_MMU(fmt, ...) do {} while (0)
+#endif
+
 static int cpu_sparc_find_by_name(sparc_def_t *cpu_def, const char *cpu_model);
 
 /* Sparc MMU emulation */
@@ -451,42 +458,50 @@ static int get_physical_address_data(CPUState *env,
 
     for (i = 0; i < 64; i++) {
         // ctx match, vaddr match, valid?
-        if (ultrasparc_tag_match(&env->dtlb[i],
-                                 address, context, physical)) {
+        if (ultrasparc_tag_match(&env->dtlb[i], address, context, physical)) {
+
+            uint8_t fault_type = 0;
+
             // access ok?
-            if (((env->dtlb[i].tte & 0x4) && is_user) ||
-                (!(env->dtlb[i].tte & 0x2) && (rw == 1))) {
-                uint8_t fault_type = 0;
+            if ((env->dtlb[i].tte & 0x4) && is_user) {
+                fault_type |= 1; /* privilege violation */
+                env->exception_index = TT_DFAULT;
 
-                if ((env->dtlb[i].tte & 0x4) && is_user) {
-                    fault_type |= 1; /* privilege violation */
-                }
+                DPRINTF_MMU("DFAULT at %" PRIx64 " context %" PRIx64
+                            " mmu_idx=%d tl=%d\n",
+                            address, context, mmu_idx, env->tl);
+            } else if (!(env->dtlb[i].tte & 0x2) && (rw == 1)) {
+                env->exception_index = TT_DPROT;
+
+                DPRINTF_MMU("DPROT at %" PRIx64 " context %" PRIx64
+                            " mmu_idx=%d tl=%d\n",
+                            address, context, mmu_idx, env->tl);
+            } else {
+                *prot = PAGE_READ;
+                if (env->dtlb[i].tte & 0x2)
+                    *prot |= PAGE_WRITE;
 
-                if (env->dmmu.sfsr & 1) /* Fault status register */
-                    env->dmmu.sfsr = 2; /* overflow (not read before
+                TTE_SET_USED(env->dtlb[i].tte);
+
+                return 0;
+            }
+
+            if (env->dmmu.sfsr & 1) /* Fault status register */
+                env->dmmu.sfsr = 2; /* overflow (not read before
                                              another fault) */
 
-                env->dmmu.sfsr |= (is_user << 3) | ((rw == 1) << 2) | 1;
+            env->dmmu.sfsr |= (is_user << 3) | ((rw == 1) << 2) | 1;
 
-                env->dmmu.sfsr |= (fault_type << 7);
+            env->dmmu.sfsr |= (fault_type << 7);
 
-                env->dmmu.sfar = address; /* Fault address register */
-                env->exception_index = TT_DFAULT;
-#ifdef DEBUG_MMU
-                printf("DFAULT at 0x%" PRIx64 "\n", address);
-#endif
-                return 1;
-            }
-            *prot = PAGE_READ;
-            if (env->dtlb[i].tte & 0x2)
-                *prot |= PAGE_WRITE;
-            TTE_SET_USED(env->dtlb[i].tte);
-            return 0;
+            env->dmmu.sfar = address; /* Fault address register */
+            return 1;
         }
     }
-#ifdef DEBUG_MMU
-    printf("DMISS at 0x%" PRIx64 "\n", address);
-#endif
+
+    DPRINTF_MMU("DMISS at %" PRIx64 " context %" PRIx64 "\n",
+                address, context);
+
     env->dmmu.tag_access = (address & ~0x1fffULL) | context;
     env->exception_index = TT_DMISS;
     return 1;
@@ -528,9 +543,10 @@ static int get_physical_address_code(CPUState *env,
                                              another fault) */
                 env->immu.sfsr |= (is_user << 3) | 1;
                 env->exception_index = TT_TFAULT;
-#ifdef DEBUG_MMU
-                printf("TFAULT at 0x%" PRIx64 "\n", address);
-#endif
+
+                DPRINTF_MMU("TFAULT at %" PRIx64 " context %" PRIx64 "\n",
+                            address, context);
+
                 return 1;
             }
             *prot = PAGE_EXEC;
@@ -538,9 +554,10 @@ static int get_physical_address_code(CPUState *env,
             return 0;
         }
     }
-#ifdef DEBUG_MMU
-    printf("TMISS at 0x%" PRIx64 "\n", address);
-#endif
+
+    DPRINTF_MMU("TMISS at %" PRIx64 " context %" PRIx64 "\n",
+                address, context);
+
     /* Context is stored in DMMU (dmmuregs[1]) also for IMMU */
     env->immu.tag_access = (address & ~0x1fffULL) | context;
     env->exception_index = TT_TMISS;
@@ -555,6 +572,23 @@ static int get_physical_address(CPUState *env, target_phys_addr_t *physical,
     /* ??? We treat everything as a small page, then explicitly flush
        everything when an entry is evicted.  */
     *page_size = TARGET_PAGE_SIZE;
+
+#if defined (DEBUG_MMU)
+    /* safety net to catch wrong softmmu index use from dynamic code */
+    if (env->tl > 0 && mmu_idx != MMU_NUCLEUS_IDX) {
+        DPRINTF_MMU("get_physical_address %s tl=%d mmu_idx=%d"
+                    " primary context=%" PRIx64
+                    " secondary context=%" PRIx64
+                " address=%" PRIx64
+                "\n",
+                (rw == 2 ? "CODE" : "DATA"),
+                env->tl, mmu_idx,
+                env->dmmu.mmu_primary_context,
+                env->dmmu.mmu_secondary_context,
+                address);
+    }
+#endif
+
     if (rw == 2)
         return get_physical_address_code(env, physical, prot, address,
                                          mmu_idx);
@@ -578,10 +612,18 @@ int cpu_sparc_handle_mmu_fault (CPUState *env, target_ulong address, int rw,
         virt_addr = address & TARGET_PAGE_MASK;
         vaddr = virt_addr + ((address & TARGET_PAGE_MASK) &
                              (TARGET_PAGE_SIZE - 1));
-#ifdef DEBUG_MMU
-        printf("Translate at 0x%" PRIx64 " -> 0x%" PRIx64 ", vaddr 0x%" PRIx64
-               "\n", address, paddr, vaddr);
-#endif
+
+        DPRINTF_MMU("Translate at %" PRIx64 " -> %" PRIx64 ","
+                    " vaddr %" PRIx64
+                    " mmu_idx=%d"
+                    " tl=%d"
+                    " primary context=%" PRIx64
+                    " secondary context=%" PRIx64
+                    "\n",
+                    address, paddr, vaddr, mmu_idx, env->tl,
+                    env->dmmu.mmu_primary_context,
+                    env->dmmu.mmu_secondary_context);
+
         tlb_set_page(env, vaddr, paddr, prot, mmu_idx, page_size);
         return 0;
     }
@@ -628,7 +670,7 @@ void dump_mmu(CPUState *env)
                        env->dtlb[i].tte & 0x2? "RW": "RO",
                        env->dtlb[i].tte & 0x40? "locked": "unlocked",
                        env->dtlb[i].tag & (uint64_t)0x1fffULL,
-                       TTE_IS_GLOBAL(env->dtlb[i].tag)? "global" : "local");
+                       TTE_IS_GLOBAL(env->dtlb[i].tte)? "global" : "local");
             }
         }
     }
@@ -662,7 +704,7 @@ void dump_mmu(CPUState *env)
                        env->itlb[i].tte & 0x4? "priv": "user",
                        env->itlb[i].tte & 0x40? "locked": "unlocked",
                        env->itlb[i].tag & (uint64_t)0x1fffULL,
-                       TTE_IS_GLOBAL(env->itlb[i].tag)? "global" : "local");
+                       TTE_IS_GLOBAL(env->itlb[i].tte)? "global" : "local");
             }
         }
     }
@@ -693,7 +735,7 @@ target_phys_addr_t cpu_get_phys_page_nofault(CPUState *env, target_ulong addr,
 
 target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
 {
-    return cpu_get_phys_page_nofault(env, addr, MMU_KERNEL_IDX);
+    return cpu_get_phys_page_nofault(env, addr, cpu_mmu_index(env));
 }
 #endif
 
@@ -721,12 +763,12 @@ void cpu_reset(CPUSPARCState *env)
 #else
 #if !defined(TARGET_SPARC64)
     env->psret = 0;
-#endif
     env->psrs = 1;
     env->psrps = 1;
+#endif
 #ifdef TARGET_SPARC64
     env->pstate = PS_PRIV|PS_RED|PS_PEF|PS_AG;
-    env->hpstate = HS_PRIV;
+    env->hpstate = cpu_has_hypervisor(env) ? HS_PRIV : 0;
     env->tl = env->maxtl;
     cpu_tsptr(env)->tt = TT_POWER_ON_RESET;
     env->lsu = 0;
@@ -1310,7 +1352,7 @@ static int cpu_sparc_find_by_name(sparc_def_t *cpu_def, const char *cpu_model)
     char *featurestr, *name = strtok(s, ",");
     uint32_t plus_features = 0;
     uint32_t minus_features = 0;
-    long long iu_version;
+    uint64_t iu_version;
     uint32_t fpu_version, mmu_version, nwindows;
 
     for (i = 0; i < ARRAY_SIZE(sparc_defs); i++) {
@@ -1342,7 +1384,7 @@ static int cpu_sparc_find_by_name(sparc_def_t *cpu_def, const char *cpu_model)
                 }
                 cpu_def->iu_version = iu_version;
 #ifdef DEBUG_FEATURES
-                fprintf(stderr, "iu_version %llx\n", iu_version);
+                fprintf(stderr, "iu_version %" PRIx64 "\n", iu_version);
 #endif
             } else if (!strcmp(featurestr, "fpu_version")) {
                 char *err;
diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
index b1451d346f..edeeb4469a 100644
--- a/target-sparc/op_helper.c
+++ b/target-sparc/op_helper.c
@@ -901,14 +901,15 @@ static uint32_t compute_C_flags(void)
     return env->psr & PSR_CARRY;
 }
 
-static inline uint32_t get_NZ_icc(target_ulong dst)
+static inline uint32_t get_NZ_icc(int32_t dst)
 {
     uint32_t ret = 0;
 
-    if (!(dst & 0xffffffffULL))
-        ret |= PSR_ZERO;
-    if ((int32_t) (dst & 0xffffffffULL) < 0)
-        ret |= PSR_NEG;
+    if (dst == 0) {
+        ret = PSR_ZERO;
+    } else if (dst < 0) {
+        ret = PSR_NEG;
+    }
     return ret;
 }
 
@@ -923,14 +924,15 @@ static uint32_t compute_C_flags_xcc(void)
     return env->xcc & PSR_CARRY;
 }
 
-static inline uint32_t get_NZ_xcc(target_ulong dst)
+static inline uint32_t get_NZ_xcc(target_long dst)
 {
     uint32_t ret = 0;
 
-    if (!dst)
-        ret |= PSR_ZERO;
-    if ((int64_t)dst < 0)
-        ret |= PSR_NEG;
+    if (!dst) {
+        ret = PSR_ZERO;
+    } else if (dst < 0) {
+        ret = PSR_NEG;
+    }
     return ret;
 }
 #endif
@@ -939,8 +941,9 @@ static inline uint32_t get_V_div_icc(target_ulong src2)
 {
     uint32_t ret = 0;
 
-    if (src2 != 0)
-        ret |= PSR_OVF;
+    if (src2 != 0) {
+        ret = PSR_OVF;
+    }
     return ret;
 }
 
@@ -958,26 +961,35 @@ static uint32_t compute_C_div(void)
     return 0;
 }
 
-/* carry = (src1[31] & src2[31]) | ( ~dst[31] & (src1[31] | src2[31])) */
-static inline uint32_t get_C_add_icc(target_ulong dst, target_ulong src1,
-                                     target_ulong src2)
+static inline uint32_t get_C_add_icc(uint32_t dst, uint32_t src1)
 {
     uint32_t ret = 0;
 
-    if (((src1 & (1ULL << 31)) & (src2 & (1ULL << 31)))
-        | ((~(dst & (1ULL << 31)))
-           & ((src1 & (1ULL << 31)) | (src2 & (1ULL << 31)))))
-        ret |= PSR_CARRY;
+    if (dst < src1) {
+        ret = PSR_CARRY;
+    }
     return ret;
 }
 
-static inline uint32_t get_V_add_icc(target_ulong dst, target_ulong src1,
-                                         target_ulong src2)
+static inline uint32_t get_C_addx_icc(uint32_t dst, uint32_t src1,
+                                      uint32_t src2)
+{
+    uint32_t ret = 0;
+
+    if (((src1 & src2) | (~dst & (src1 | src2))) & (1U << 31)) {
+        ret = PSR_CARRY;
+    }
+    return ret;
+}
+
+static inline uint32_t get_V_add_icc(uint32_t dst, uint32_t src1,
+                                     uint32_t src2)
 {
     uint32_t ret = 0;
 
-    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1ULL << 31))
-        ret |= PSR_OVF;
+    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1U << 31)) {
+        ret = PSR_OVF;
+    }
     return ret;
 }
 
@@ -986,8 +998,20 @@ static inline uint32_t get_C_add_xcc(target_ulong dst, target_ulong src1)
 {
     uint32_t ret = 0;
 
-    if (dst < src1)
-        ret |= PSR_CARRY;
+    if (dst < src1) {
+        ret = PSR_CARRY;
+    }
+    return ret;
+}
+
+static inline uint32_t get_C_addx_xcc(target_ulong dst, target_ulong src1,
+                                      target_ulong src2)
+{
+    uint32_t ret = 0;
+
+    if (((src1 & src2) | (~dst & (src1 | src2))) & (1ULL << 63)) {
+        ret = PSR_CARRY;
+    }
     return ret;
 }
 
@@ -996,8 +1020,9 @@ static inline uint32_t get_V_add_xcc(target_ulong dst, target_ulong src1,
 {
     uint32_t ret = 0;
 
-    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1ULL << 63))
-        ret |= PSR_OVF;
+    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1ULL << 63)) {
+        ret = PSR_OVF;
+    }
     return ret;
 }
 
@@ -1022,14 +1047,14 @@ static uint32_t compute_all_add(void)
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_add_icc(CC_DST, CC_SRC);
     ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
     return ret;
 }
 
 static uint32_t compute_C_add(void)
 {
-    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    return get_C_add_icc(CC_DST, CC_SRC);
 }
 
 #ifdef TARGET_SPARC64
@@ -1038,8 +1063,7 @@ static uint32_t compute_all_addx_xcc(void)
     uint32_t ret;
 
     ret = get_NZ_xcc(CC_DST);
-    ret |= get_C_add_xcc(CC_DST - CC_SRC2, CC_SRC);
-    ret |= get_C_add_xcc(CC_DST, CC_SRC);
+    ret |= get_C_addx_xcc(CC_DST, CC_SRC, CC_SRC2);
     ret |= get_V_add_xcc(CC_DST, CC_SRC, CC_SRC2);
     return ret;
 }
@@ -1048,18 +1072,36 @@ static uint32_t compute_C_addx_xcc(void)
 {
     uint32_t ret;
 
-    ret = get_C_add_xcc(CC_DST - CC_SRC2, CC_SRC);
-    ret |= get_C_add_xcc(CC_DST, CC_SRC);
+    ret = get_C_addx_xcc(CC_DST, CC_SRC, CC_SRC2);
     return ret;
 }
 #endif
 
+static uint32_t compute_all_addx(void)
+{
+    uint32_t ret;
+
+    ret = get_NZ_icc(CC_DST);
+    ret |= get_C_addx_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    return ret;
+}
+
+static uint32_t compute_C_addx(void)
+{
+    uint32_t ret;
+
+    ret = get_C_addx_icc(CC_DST, CC_SRC, CC_SRC2);
+    return ret;
+}
+
 static inline uint32_t get_V_tag_icc(target_ulong src1, target_ulong src2)
 {
     uint32_t ret = 0;
 
-    if ((src1 | src2) & 0x3)
-        ret |= PSR_OVF;
+    if ((src1 | src2) & 0x3) {
+        ret = PSR_OVF;
+    }
     return ret;
 }
 
@@ -1068,51 +1110,50 @@ static uint32_t compute_all_tadd(void)
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_add_icc(CC_DST, CC_SRC);
     ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
     ret |= get_V_tag_icc(CC_SRC, CC_SRC2);
     return ret;
 }
 
-static uint32_t compute_C_tadd(void)
-{
-    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
-}
-
 static uint32_t compute_all_taddtv(void)
 {
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_add_icc(CC_DST, CC_SRC);
     return ret;
 }
 
-static uint32_t compute_C_taddtv(void)
+static inline uint32_t get_C_sub_icc(uint32_t src1, uint32_t src2)
 {
-    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    uint32_t ret = 0;
+
+    if (src1 < src2) {
+        ret = PSR_CARRY;
+    }
+    return ret;
 }
 
-/* carry = (~src1[31] & src2[31]) | ( dst[31]  & (~src1[31] | src2[31])) */
-static inline uint32_t get_C_sub_icc(target_ulong dst, target_ulong src1,
-                                     target_ulong src2)
+static inline uint32_t get_C_subx_icc(uint32_t dst, uint32_t src1,
+                                      uint32_t src2)
 {
     uint32_t ret = 0;
 
-    if (((~(src1 & (1ULL << 31))) & (src2 & (1ULL << 31)))
-        | ((dst & (1ULL << 31)) & (( ~(src1 & (1ULL << 31)))
-                                   | (src2 & (1ULL << 31)))))
-        ret |= PSR_CARRY;
+    if (((~src1 & src2) | (dst & (~src1 | src2))) & (1U << 31)) {
+        ret = PSR_CARRY;
+    }
     return ret;
 }
 
-static inline uint32_t get_V_sub_icc(target_ulong dst, target_ulong src1,
-                                     target_ulong src2)
+static inline uint32_t get_V_sub_icc(uint32_t dst, uint32_t src1,
+                                     uint32_t src2)
 {
     uint32_t ret = 0;
 
-    if (((src1 ^ src2) & (src1 ^ dst)) & (1ULL << 31))
-        ret |= PSR_OVF;
+    if (((src1 ^ src2) & (src1 ^ dst)) & (1U << 31)) {
+        ret = PSR_OVF;
+    }
     return ret;
 }
 
@@ -1122,8 +1163,20 @@ static inline uint32_t get_C_sub_xcc(target_ulong src1, target_ulong src2)
 {
     uint32_t ret = 0;
 
-    if (src1 < src2)
-        ret |= PSR_CARRY;
+    if (src1 < src2) {
+        ret = PSR_CARRY;
+    }
+    return ret;
+}
+
+static inline uint32_t get_C_subx_xcc(target_ulong dst, target_ulong src1,
+                                      target_ulong src2)
+{
+    uint32_t ret = 0;
+
+    if (((~src1 & src2) | (dst & (~src1 | src2))) & (1ULL << 63)) {
+        ret = PSR_CARRY;
+    }
     return ret;
 }
 
@@ -1132,8 +1185,9 @@ static inline uint32_t get_V_sub_xcc(target_ulong dst, target_ulong src1,
 {
     uint32_t ret = 0;
 
-    if (((src1 ^ src2) & (src1 ^ dst)) & (1ULL << 63))
-        ret |= PSR_OVF;
+    if (((src1 ^ src2) & (src1 ^ dst)) & (1ULL << 63)) {
+        ret = PSR_OVF;
+    }
     return ret;
 }
 
@@ -1158,14 +1212,14 @@ static uint32_t compute_all_sub(void)
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
     ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
     return ret;
 }
 
 static uint32_t compute_C_sub(void)
 {
-    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    return get_C_sub_icc(CC_SRC, CC_SRC2);
 }
 
 #ifdef TARGET_SPARC64
@@ -1174,8 +1228,7 @@ static uint32_t compute_all_subx_xcc(void)
     uint32_t ret;
 
     ret = get_NZ_xcc(CC_DST);
-    ret |= get_C_sub_xcc(CC_DST - CC_SRC2, CC_SRC);
-    ret |= get_C_sub_xcc(CC_DST, CC_SRC2);
+    ret |= get_C_subx_xcc(CC_DST, CC_SRC, CC_SRC2);
     ret |= get_V_sub_xcc(CC_DST, CC_SRC, CC_SRC2);
     return ret;
 }
@@ -1184,40 +1237,47 @@ static uint32_t compute_C_subx_xcc(void)
 {
     uint32_t ret;
 
-    ret = get_C_sub_xcc(CC_DST - CC_SRC2, CC_SRC);
-    ret |= get_C_sub_xcc(CC_DST, CC_SRC2);
+    ret = get_C_subx_xcc(CC_DST, CC_SRC, CC_SRC2);
     return ret;
 }
 #endif
 
-static uint32_t compute_all_tsub(void)
+static uint32_t compute_all_subx(void)
 {
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_subx_icc(CC_DST, CC_SRC, CC_SRC2);
     ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
-    ret |= get_V_tag_icc(CC_SRC, CC_SRC2);
     return ret;
 }
 
-static uint32_t compute_C_tsub(void)
+static uint32_t compute_C_subx(void)
 {
-    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    uint32_t ret;
+
+    ret = get_C_subx_icc(CC_DST, CC_SRC, CC_SRC2);
+    return ret;
 }
 
-static uint32_t compute_all_tsubtv(void)
+static uint32_t compute_all_tsub(void)
 {
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
+    ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_V_tag_icc(CC_SRC, CC_SRC2);
     return ret;
 }
 
-static uint32_t compute_C_tsubtv(void)
+static uint32_t compute_all_tsubtv(void)
 {
-    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    uint32_t ret;
+
+    ret = get_NZ_icc(CC_DST);
+    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
+    return ret;
 }
 
 static uint32_t compute_all_logic(void)
@@ -1247,13 +1307,13 @@ static const CCTable icc_table[CC_OP_NB] = {
     [CC_OP_FLAGS] = { compute_all_flags, compute_C_flags },
     [CC_OP_DIV] = { compute_all_div, compute_C_div },
     [CC_OP_ADD] = { compute_all_add, compute_C_add },
-    [CC_OP_ADDX] = { compute_all_add, compute_C_add },
-    [CC_OP_TADD] = { compute_all_tadd, compute_C_tadd },
-    [CC_OP_TADDTV] = { compute_all_taddtv, compute_C_taddtv },
+    [CC_OP_ADDX] = { compute_all_addx, compute_C_addx },
+    [CC_OP_TADD] = { compute_all_tadd, compute_C_add },
+    [CC_OP_TADDTV] = { compute_all_taddtv, compute_C_add },
     [CC_OP_SUB] = { compute_all_sub, compute_C_sub },
-    [CC_OP_SUBX] = { compute_all_sub, compute_C_sub },
-    [CC_OP_TSUB] = { compute_all_tsub, compute_C_tsub },
-    [CC_OP_TSUBTV] = { compute_all_tsubtv, compute_C_tsubtv },
+    [CC_OP_SUBX] = { compute_all_subx, compute_C_subx },
+    [CC_OP_TSUB] = { compute_all_tsub, compute_C_sub },
+    [CC_OP_TSUBTV] = { compute_all_tsubtv, compute_C_sub },
     [CC_OP_LOGIC] = { compute_all_logic, compute_C_logic },
 };
 
@@ -1344,11 +1404,7 @@ static target_ulong get_psr(void)
         (env->psrps? PSR_PS : 0) |
         (env->psret? PSR_ET : 0) | env->cwp;
 #else
-    return env->version | (env->psr & PSR_ICC) |
-        (env->psref? PSR_EF : 0) |
-        (env->psrpil << 8) |
-        (env->psrs? PSR_S : 0) |
-        (env->psrps? PSR_PS : 0) | env->cwp;
+    return env->psr & PSR_ICC;
 #endif
 }
 
@@ -1367,17 +1423,19 @@ target_ulong cpu_get_psr(CPUState *env1)
 static void put_psr(target_ulong val)
 {
     env->psr = val & PSR_ICC;
+#if !defined (TARGET_SPARC64)
     env->psref = (val & PSR_EF)? 1 : 0;
     env->psrpil = (val & PSR_PIL) >> 8;
+#endif
 #if ((!defined (TARGET_SPARC64)) && !defined(CONFIG_USER_ONLY))
     cpu_check_irqs(env);
 #endif
+#if !defined (TARGET_SPARC64)
     env->psrs = (val & PSR_S)? 1 : 0;
     env->psrps = (val & PSR_PS)? 1 : 0;
-#if !defined (TARGET_SPARC64)
     env->psret = (val & PSR_ET)? 1 : 0;
-#endif
     set_cwp(val & PSR_CWP);
+#endif
     env->cc_op = CC_OP_FLAGS;
 }
 
@@ -2266,7 +2324,7 @@ uint64_t helper_ld_asi(target_ulong addr, int asi, int size, int sign)
     asi &= 0xff;
 
     if ((asi < 0x80 && (env->pstate & PS_PRIV) == 0)
-        || ((env->def->features & CPU_FEATURE_HYPV)
+        || (cpu_has_hypervisor(env)
             && asi >= 0x30 && asi < 0x80
             && !(env->hpstate & HS_PRIV)))
         raise_exception(TT_PRIV_ACT);
@@ -2301,8 +2359,7 @@ uint64_t helper_ld_asi(target_ulong addr, int asi, int size, int sign)
     case 0xe2: // UA2007 Primary block init
     case 0xe3: // UA2007 Secondary block init
         if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
-            if ((env->def->features & CPU_FEATURE_HYPV)
-                && env->hpstate & HS_PRIV) {
+            if (cpu_hypervisor_mode(env)) {
                 switch(size) {
                 case 1:
                     ret = ldub_hypv(addr);
@@ -2618,7 +2675,7 @@ void helper_st_asi(target_ulong addr, target_ulong val, int asi, int size)
     asi &= 0xff;
 
     if ((asi < 0x80 && (env->pstate & PS_PRIV) == 0)
-        || ((env->def->features & CPU_FEATURE_HYPV)
+        || (cpu_has_hypervisor(env)
             && asi >= 0x30 && asi < 0x80
             && !(env->hpstate & HS_PRIV)))
         raise_exception(TT_PRIV_ACT);
@@ -2662,8 +2719,7 @@ void helper_st_asi(target_ulong addr, target_ulong val, int asi, int size)
     case 0xe2: // UA2007 Primary block init
     case 0xe3: // UA2007 Secondary block init
         if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
-            if ((env->def->features & CPU_FEATURE_HYPV)
-                && env->hpstate & HS_PRIV) {
+            if (cpu_hypervisor_mode(env)) {
                 switch(size) {
                 case 1:
                     stb_hypv(addr, val);
@@ -2903,9 +2959,15 @@ void helper_st_asi(target_ulong addr, target_ulong val, int asi, int size)
                 break;
             case 1: // Primary context
                 env->dmmu.mmu_primary_context = val;
+                /* can be optimized to only flush MMU_USER_IDX
+                   and MMU_KERNEL_IDX entries */
+                tlb_flush(env, 1);
                 break;
             case 2: // Secondary context
                 env->dmmu.mmu_secondary_context = val;
+                /* can be optimized to only flush MMU_USER_SECONDARY_IDX
+                   and MMU_KERNEL_SECONDARY_IDX entries */
+                tlb_flush(env, 1);
                 break;
             case 5: // TSB access
                 DPRINTF_MMU("dmmu TSB write: 0x%016" PRIx64 " -> 0x%016"
@@ -2988,7 +3050,7 @@ void helper_st_asi(target_ulong addr, target_ulong val, int asi, int size)
 void helper_ldda_asi(target_ulong addr, int asi, int rd)
 {
     if ((asi < 0x80 && (env->pstate & PS_PRIV) == 0)
-        || ((env->def->features & CPU_FEATURE_HYPV)
+        || (cpu_has_hypervisor(env)
             && asi >= 0x30 && asi < 0x80
             && !(env->hpstate & HS_PRIV)))
         raise_exception(TT_PRIV_ACT);
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index ea7c71b85a..72ca0b4dce 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -183,9 +183,9 @@ static void gen_op_store_QT0_fpr(unsigned int dst)
 #define hypervisor(dc) 0
 #endif
 #else
-#define supervisor(dc) (dc->mem_idx >= 1)
+#define supervisor(dc) (dc->mem_idx >= MMU_KERNEL_IDX)
 #ifdef TARGET_SPARC64
-#define hypervisor(dc) (dc->mem_idx == 2)
+#define hypervisor(dc) (dc->mem_idx == MMU_HYPV_IDX)
 #else
 #endif
 #endif
@@ -332,24 +332,132 @@ static inline void gen_op_add_cc(TCGv dst, TCGv src1, TCGv src2)
     tcg_gen_mov_tl(dst, cpu_cc_dst);
 }
 
-static inline void gen_op_addxi_cc(TCGv dst, TCGv src1, target_long src2)
+static TCGv_i32 gen_add32_carry32(void)
 {
-    gen_helper_compute_C_icc(cpu_tmp0);
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_movi_tl(cpu_cc_src2, src2);
-    tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0);
-    tcg_gen_addi_tl(cpu_cc_dst, cpu_cc_dst, src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
+    TCGv_i32 carry_32, cc_src1_32, cc_src2_32;
+
+    /* Carry is computed from a previous add: (dst < src)  */
+#if TARGET_LONG_BITS == 64
+    cc_src1_32 = tcg_temp_new_i32();
+    cc_src2_32 = tcg_temp_new_i32();
+    tcg_gen_trunc_i64_i32(cc_src1_32, cpu_cc_dst);
+    tcg_gen_trunc_i64_i32(cc_src2_32, cpu_cc_src);
+#else
+    cc_src1_32 = cpu_cc_dst;
+    cc_src2_32 = cpu_cc_src;
+#endif
+
+    carry_32 = tcg_temp_new_i32();
+    tcg_gen_setcond_i32(TCG_COND_LTU, carry_32, cc_src1_32, cc_src2_32);
+
+#if TARGET_LONG_BITS == 64
+    tcg_temp_free_i32(cc_src1_32);
+    tcg_temp_free_i32(cc_src2_32);
+#endif
+
+    return carry_32;
 }
 
-static inline void gen_op_addx_cc(TCGv dst, TCGv src1, TCGv src2)
+static TCGv_i32 gen_sub32_carry32(void)
 {
-    gen_helper_compute_C_icc(cpu_tmp0);
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0);
-    tcg_gen_add_tl(cpu_cc_dst, cpu_cc_dst, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
+    TCGv_i32 carry_32, cc_src1_32, cc_src2_32;
+
+    /* Carry is computed from a previous borrow: (src1 < src2)  */
+#if TARGET_LONG_BITS == 64
+    cc_src1_32 = tcg_temp_new_i32();
+    cc_src2_32 = tcg_temp_new_i32();
+    tcg_gen_trunc_i64_i32(cc_src1_32, cpu_cc_src);
+    tcg_gen_trunc_i64_i32(cc_src2_32, cpu_cc_src2);
+#else
+    cc_src1_32 = cpu_cc_src;
+    cc_src2_32 = cpu_cc_src2;
+#endif
+
+    carry_32 = tcg_temp_new_i32();
+    tcg_gen_setcond_i32(TCG_COND_LTU, carry_32, cc_src1_32, cc_src2_32);
+
+#if TARGET_LONG_BITS == 64
+    tcg_temp_free_i32(cc_src1_32);
+    tcg_temp_free_i32(cc_src2_32);
+#endif
+
+    return carry_32;
+}
+
+static void gen_op_addx_int(DisasContext *dc, TCGv dst, TCGv src1,
+                            TCGv src2, int update_cc)
+{
+    TCGv_i32 carry_32;
+    TCGv carry;
+
+    switch (dc->cc_op) {
+    case CC_OP_DIV:
+    case CC_OP_LOGIC:
+        /* Carry is known to be zero.  Fall back to plain ADD.  */
+        if (update_cc) {
+            gen_op_add_cc(dst, src1, src2);
+        } else {
+            tcg_gen_add_tl(dst, src1, src2);
+        }
+        return;
+
+    case CC_OP_ADD:
+    case CC_OP_TADD:
+    case CC_OP_TADDTV:
+#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
+        {
+            /* For 32-bit hosts, we can re-use the host's hardware carry
+               generation by using an ADD2 opcode.  We discard the low
+               part of the output.  Ideally we'd combine this operation
+               with the add that generated the carry in the first place.  */
+            TCGv dst_low = tcg_temp_new();
+            tcg_gen_op6_i32(INDEX_op_add2_i32, dst_low, dst,
+                            cpu_cc_src, src1, cpu_cc_src2, src2);
+            tcg_temp_free(dst_low);
+            goto add_done;
+        }
+#endif
+        carry_32 = gen_add32_carry32();
+        break;
+
+    case CC_OP_SUB:
+    case CC_OP_TSUB:
+    case CC_OP_TSUBTV:
+        carry_32 = gen_sub32_carry32();
+        break;
+
+    default:
+        /* We need external help to produce the carry.  */
+        carry_32 = tcg_temp_new_i32();
+        gen_helper_compute_C_icc(carry_32);
+        break;
+    }
+
+#if TARGET_LONG_BITS == 64
+    carry = tcg_temp_new();
+    tcg_gen_extu_i32_i64(carry, carry_32);
+#else
+    carry = carry_32;
+#endif
+
+    tcg_gen_add_tl(dst, src1, src2);
+    tcg_gen_add_tl(dst, dst, carry);
+
+    tcg_temp_free_i32(carry_32);
+#if TARGET_LONG_BITS == 64
+    tcg_temp_free(carry);
+#endif
+
+#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
+ add_done:
+#endif
+    if (update_cc) {
+        tcg_gen_mov_tl(cpu_cc_src, src1);
+        tcg_gen_mov_tl(cpu_cc_src2, src2);
+        tcg_gen_mov_tl(cpu_cc_dst, dst);
+        tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADDX);
+        dc->cc_op = CC_OP_ADDX;
+    }
 }
 
 static inline void gen_op_tadd_cc(TCGv dst, TCGv src1, TCGv src2)
@@ -415,24 +523,80 @@ static inline void gen_op_sub_cc(TCGv dst, TCGv src1, TCGv src2)
     tcg_gen_mov_tl(dst, cpu_cc_dst);
 }
 
-static inline void gen_op_subxi_cc(TCGv dst, TCGv src1, target_long src2)
+static void gen_op_subx_int(DisasContext *dc, TCGv dst, TCGv src1,
+                            TCGv src2, int update_cc)
 {
-    gen_helper_compute_C_icc(cpu_tmp0);
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_movi_tl(cpu_cc_src2, src2);
-    tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0);
-    tcg_gen_subi_tl(cpu_cc_dst, cpu_cc_dst, src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
-}
+    TCGv_i32 carry_32;
+    TCGv carry;
 
-static inline void gen_op_subx_cc(TCGv dst, TCGv src1, TCGv src2)
-{
-    gen_helper_compute_C_icc(cpu_tmp0);
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0);
-    tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_dst, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
+    switch (dc->cc_op) {
+    case CC_OP_DIV:
+    case CC_OP_LOGIC:
+        /* Carry is known to be zero.  Fall back to plain SUB.  */
+        if (update_cc) {
+            gen_op_sub_cc(dst, src1, src2);
+        } else {
+            tcg_gen_sub_tl(dst, src1, src2);
+        }
+        return;
+
+    case CC_OP_ADD:
+    case CC_OP_TADD:
+    case CC_OP_TADDTV:
+        carry_32 = gen_add32_carry32();
+        break;
+
+    case CC_OP_SUB:
+    case CC_OP_TSUB:
+    case CC_OP_TSUBTV:
+#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
+        {
+            /* For 32-bit hosts, we can re-use the host's hardware carry
+               generation by using a SUB2 opcode.  We discard the low
+               part of the output.  Ideally we'd combine this operation
+               with the add that generated the carry in the first place.  */
+            TCGv dst_low = tcg_temp_new();
+            tcg_gen_op6_i32(INDEX_op_sub2_i32, dst_low, dst,
+                            cpu_cc_src, src1, cpu_cc_src2, src2);
+            tcg_temp_free(dst_low);
+            goto sub_done;
+        }
+#endif
+        carry_32 = gen_sub32_carry32();
+        break;
+
+    default:
+        /* We need external help to produce the carry.  */
+        carry_32 = tcg_temp_new_i32();
+        gen_helper_compute_C_icc(carry_32);
+        break;
+    }
+
+#if TARGET_LONG_BITS == 64
+    carry = tcg_temp_new();
+    tcg_gen_extu_i32_i64(carry, carry_32);
+#else
+    carry = carry_32;
+#endif
+
+    tcg_gen_sub_tl(dst, src1, src2);
+    tcg_gen_sub_tl(dst, dst, carry);
+
+    tcg_temp_free_i32(carry_32);
+#if TARGET_LONG_BITS == 64
+    tcg_temp_free(carry);
+#endif
+
+#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
+ sub_done:
+#endif
+    if (update_cc) {
+        tcg_gen_mov_tl(cpu_cc_src, src1);
+        tcg_gen_mov_tl(cpu_cc_src2, src2);
+        tcg_gen_mov_tl(cpu_cc_dst, dst);
+        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUBX);
+        dc->cc_op = CC_OP_SUBX;
+    }
 }
 
 static inline void gen_op_tsub_cc(TCGv dst, TCGv src1, TCGv src2)
@@ -2950,28 +3114,8 @@ static void disas_sparc_insn(DisasContext * dc)
                         }
                         break;
                     case 0x8: /* addx, V9 addc */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            if (xop & 0x10) {
-                                gen_op_addxi_cc(cpu_dst, cpu_src1, simm);
-                                tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADDX);
-                                dc->cc_op = CC_OP_ADDX;
-                            } else {
-                                gen_helper_compute_C_icc(cpu_tmp0);
-                                tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, simm);
-                                tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_tmp0);
-                            }
-                        } else {
-                            if (xop & 0x10) {
-                                gen_op_addx_cc(cpu_dst, cpu_src1, cpu_src2);
-                                tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADDX);
-                                dc->cc_op = CC_OP_ADDX;
-                            } else {
-                                gen_helper_compute_C_icc(cpu_tmp0);
-                                tcg_gen_add_tl(cpu_tmp0, cpu_src2, cpu_tmp0);
-                                tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_tmp0);
-                            }
-                        }
+                        gen_op_addx_int(dc, cpu_dst, cpu_src1, cpu_src2,
+                                        (xop & 0x10));
                         break;
 #ifdef TARGET_SPARC64
                     case 0x9: /* V9 mulx */
@@ -3002,28 +3146,8 @@ static void disas_sparc_insn(DisasContext * dc)
                         }
                         break;
                     case 0xc: /* subx, V9 subc */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            if (xop & 0x10) {
-                                gen_op_subxi_cc(cpu_dst, cpu_src1, simm);
-                                tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUBX);
-                                dc->cc_op = CC_OP_SUBX;
-                            } else {
-                                gen_helper_compute_C_icc(cpu_tmp0);
-                                tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, simm);
-                                tcg_gen_sub_tl(cpu_dst, cpu_src1, cpu_tmp0);
-                            }
-                        } else {
-                            if (xop & 0x10) {
-                                gen_op_subx_cc(cpu_dst, cpu_src1, cpu_src2);
-                                tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUBX);
-                                dc->cc_op = CC_OP_SUBX;
-                            } else {
-                                gen_helper_compute_C_icc(cpu_tmp0);
-                                tcg_gen_add_tl(cpu_tmp0, cpu_src2, cpu_tmp0);
-                                tcg_gen_sub_tl(cpu_dst, cpu_src1, cpu_tmp0);
-                            }
-                        }
+                        gen_op_subx_int(dc, cpu_dst, cpu_src1, cpu_src2,
+                                        (xop & 0x10));
                         break;
 #ifdef TARGET_SPARC64
                     case 0xd: /* V9 udivx */
@@ -3360,14 +3484,14 @@ static void disas_sparc_insn(DisasContext * dc)
                             case 6: // pstate
                                 save_state(dc, cpu_cond);
                                 gen_helper_wrpstate(cpu_tmp0);
-                                gen_op_next_insn();
-                                tcg_gen_exit_tb(0);
-                                dc->is_br = 1;
+                                dc->npc = DYNAMIC_PC;
                                 break;
                             case 7: // tl
+                                save_state(dc, cpu_cond);
                                 tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
                                 tcg_gen_st_i32(cpu_tmp32, cpu_env,
                                                offsetof(CPUSPARCState, tl));
+                                dc->npc = DYNAMIC_PC;
                                 break;
                             case 8: // pil
                                 gen_helper_wrpil(cpu_tmp0);
@@ -4426,6 +4550,7 @@ static void disas_sparc_insn(DisasContext * dc)
 #endif
                     save_state(dc, cpu_cond);
                     gen_st_asi(cpu_val, cpu_addr, insn, 4);
+                    dc->npc = DYNAMIC_PC;
                     break;
                 case 0x15: /* stba, store byte alternate */
 #ifndef TARGET_SPARC64
@@ -4436,6 +4561,7 @@ static void disas_sparc_insn(DisasContext * dc)
 #endif
                     save_state(dc, cpu_cond);
                     gen_st_asi(cpu_val, cpu_addr, insn, 1);
+                    dc->npc = DYNAMIC_PC;
                     break;
                 case 0x16: /* stha, store halfword alternate */
 #ifndef TARGET_SPARC64
@@ -4446,6 +4572,7 @@ static void disas_sparc_insn(DisasContext * dc)
 #endif
                     save_state(dc, cpu_cond);
                     gen_st_asi(cpu_val, cpu_addr, insn, 2);
+                    dc->npc = DYNAMIC_PC;
                     break;
                 case 0x17: /* stda, store double word alternate */
 #ifndef TARGET_SPARC64
@@ -4470,6 +4597,7 @@ static void disas_sparc_insn(DisasContext * dc)
                 case 0x1e: /* V9 stxa */
                     save_state(dc, cpu_cond);
                     gen_st_asi(cpu_val, cpu_addr, insn, 8);
+                    dc->npc = DYNAMIC_PC;
                     break;
 #endif
                 default:
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index cb605f1061..558c21f67f 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -1629,11 +1629,10 @@ void tcg_target_qemu_prologue(TCGContext *s)
     }
 
 #ifdef CONFIG_USE_GUEST_BASE
-    /* Note that GUEST_BASE can change after the prologue is generated.
-       To combat that, load the value from the variable instead of
-       embedding a constant here.  */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG,
-               TCG_REG_R0, (tcg_target_long)&guest_base);
+    if (GUEST_BASE != 0) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
 #endif
 
     /* Jump to TB, and adjust R18 to be the return address.  */
@@ -1679,9 +1678,6 @@ void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_DP);  /* data pointer */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);  /* stack pointer */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R31); /* ble link reg */
-#ifdef CONFIG_USE_GUEST_BASE
-    tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
-#endif
 
     tcg_add_target_add_op_defs(hppa_op_defs);
 }
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4e33b25ef8..396a2f1d53 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -50,7 +50,7 @@ static const int tcg_target_call_oarg_regs[2] = { TCG_REG_EAX, TCG_REG_EDX };
 
 static uint8_t *tb_ret_addr;
 
-static void patch_reloc(uint8_t *code_ptr, int type, 
+static void patch_reloc(uint8_t *code_ptr, int type,
                         tcg_target_long value, tcg_target_long addend)
 {
     value += addend;
@@ -156,6 +156,50 @@ static inline int tcg_target_const_match(tcg_target_long val,
         return 0;
 }
 
+#define P_EXT   0x100 /* 0x0f opcode prefix */
+
+#define OPC_ARITH_EvIz	(0x81)
+#define OPC_ARITH_EvIb	(0x83)
+#define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
+#define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
+#define OPC_BSWAP	(0xc8 | P_EXT)
+#define OPC_CALL_Jz	(0xe8)
+#define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
+#define OPC_DEC_r32	(0x48)
+#define OPC_IMUL_GvEv	(0xaf | P_EXT)
+#define OPC_IMUL_GvEvIb	(0x6b)
+#define OPC_IMUL_GvEvIz	(0x69)
+#define OPC_INC_r32	(0x40)
+#define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
+#define OPC_JCC_short	(0x70)		/* ... plus condition code */
+#define OPC_JMP_long	(0xe9)
+#define OPC_JMP_short	(0xeb)
+#define OPC_LEA         (0x8d)
+#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
+#define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
+#define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
+#define OPC_MOVL_Iv     (0xb8)
+#define OPC_MOVSBL	(0xbe | P_EXT)
+#define OPC_MOVSWL	(0xbf | P_EXT)
+#define OPC_MOVZBL	(0xb6 | P_EXT)
+#define OPC_MOVZWL	(0xb7 | P_EXT)
+#define OPC_POP_r32	(0x58)
+#define OPC_PUSH_r32	(0x50)
+#define OPC_PUSH_Iv	(0x68)
+#define OPC_PUSH_Ib	(0x6a)
+#define OPC_RET		(0xc3)
+#define OPC_SETCC	(0x90 | P_EXT)	/* ... plus condition code */
+#define OPC_SHIFT_1	(0xd1)
+#define OPC_SHIFT_Ib	(0xc1)
+#define OPC_SHIFT_cl	(0xd3)
+#define OPC_TESTL	(0x85)
+#define OPC_XCHG_ax_r32	(0x90)
+
+#define OPC_GRP3_Ev	(0xf7)
+#define OPC_GRP5	(0xff)
+
+/* Group 1 opcode extensions for 0x80-0x83.
+   These are also used as modifiers for OPC_ARITH.  */
 #define ARITH_ADD 0
 #define ARITH_OR  1
 #define ARITH_ADC 2
@@ -165,12 +209,26 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define ARITH_XOR 6
 #define ARITH_CMP 7
 
+/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
 #define SHIFT_ROL 0
 #define SHIFT_ROR 1
 #define SHIFT_SHL 4
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
+#define EXT3_NOT   2
+#define EXT3_NEG   3
+#define EXT3_MUL   4
+#define EXT3_IMUL  5
+#define EXT3_DIV   6
+#define EXT3_IDIV  7
+
+/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
+#define EXT5_CALLN_Ev	2
+#define EXT5_JMPN_Ev	4
+
+/* Condition codes to be added to OPC_JCC_{long,short}.  */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -189,8 +247,6 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define JCC_JLE 0xe
 #define JCC_JG  0xf
 
-#define P_EXT   0x100 /* 0x0f opcode prefix */
-
 static const uint8_t tcg_cond_to_jcc[10] = {
     [TCG_COND_EQ] = JCC_JE,
     [TCG_COND_NE] = JCC_JNE,
@@ -217,91 +273,192 @@ static inline void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
     tcg_out8(s, 0xc0 | (r << 3) | rm);
 }
 
-/* rm == -1 means no register index */
-static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm, 
-                                        int32_t offset)
+/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
+   We handle either RM and INDEX missing with a -1 value.  */
+
+static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
+                                     int index, int shift, int32_t offset)
 {
+    int mod, len;
+
+    if (index == -1 && rm == -1) {
+        /* Absolute address.  */
+        tcg_out_opc(s, opc);
+        tcg_out8(s, (r << 3) | 5);
+        tcg_out32(s, offset);
+        return;
+    }
+
     tcg_out_opc(s, opc);
+
+    /* Find the length of the immediate addend.  Note that the encoding
+       that would be used for (%ebp) indicates absolute addressing.  */
     if (rm == -1) {
-        tcg_out8(s, 0x05 | (r << 3));
-        tcg_out32(s, offset);
+        mod = 0, len = 4, rm = 5;
     } else if (offset == 0 && rm != TCG_REG_EBP) {
-        if (rm == TCG_REG_ESP) {
-            tcg_out8(s, 0x04 | (r << 3));
-            tcg_out8(s, 0x24);
-        } else {
-            tcg_out8(s, 0x00 | (r << 3) | rm);
-        }
-    } else if ((int8_t)offset == offset) {
-        if (rm == TCG_REG_ESP) {
-            tcg_out8(s, 0x44 | (r << 3));
-            tcg_out8(s, 0x24);
-        } else {
-            tcg_out8(s, 0x40 | (r << 3) | rm);
-        }
-        tcg_out8(s, offset);
+        mod = 0, len = 0;
+    } else if (offset == (int8_t)offset) {
+        mod = 0x40, len = 1;
     } else {
-        if (rm == TCG_REG_ESP) {
-            tcg_out8(s, 0x84 | (r << 3));
-            tcg_out8(s, 0x24);
+        mod = 0x80, len = 4;
+    }
+
+    /* Use a single byte MODRM format if possible.  Note that the encoding
+       that would be used for %esp is the escape to the two byte form.  */
+    if (index == -1 && rm != TCG_REG_ESP) {
+        /* Single byte MODRM format.  */
+        tcg_out8(s, mod | (r << 3) | rm);
+    } else {
+        /* Two byte MODRM+SIB format.  */
+
+        /* Note that the encoding that would place %esp into the index
+           field indicates no index register.  */
+        if (index == -1) {
+            index = 4;
         } else {
-            tcg_out8(s, 0x80 | (r << 3) | rm);
+            assert(index != TCG_REG_ESP);
         }
+
+        tcg_out8(s, mod | (r << 3) | 4);
+        tcg_out8(s, (shift << 6) | (index << 3) | rm);
+    }
+
+    if (len == 1) {
+        tcg_out8(s, offset);
+    } else if (len == 4) {
         tcg_out32(s, offset);
     }
 }
 
+/* rm == -1 means no register index */
+static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm,
+                                        int32_t offset)
+{
+    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
+}
+
+/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
+static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
+{
+    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3), dest, src);
+}
+
 static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
 {
-    if (arg != ret)
-        tcg_out_modrm(s, 0x8b, ret, arg);
+    if (arg != ret) {
+        tcg_out_modrm(s, OPC_MOVL_GvEv, ret, arg);
+    }
 }
 
 static inline void tcg_out_movi(TCGContext *s, TCGType type,
                                 int ret, int32_t arg)
 {
     if (arg == 0) {
-        /* xor r0,r0 */
-        tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret);
+        tgen_arithr(s, ARITH_XOR, ret, ret);
     } else {
-        tcg_out8(s, 0xb8 + ret);
+        tcg_out8(s, OPC_MOVL_Iv + ret);
         tcg_out32(s, arg);
     }
 }
 
+static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
+{
+    if (val == (int8_t)val) {
+        tcg_out_opc(s, OPC_PUSH_Ib);
+        tcg_out8(s, val);
+    } else {
+        tcg_out_opc(s, OPC_PUSH_Iv);
+        tcg_out32(s, val);
+    }
+}
+
+static inline void tcg_out_push(TCGContext *s, int reg)
+{
+    tcg_out_opc(s, OPC_PUSH_r32 + reg);
+}
+
+static inline void tcg_out_pop(TCGContext *s, int reg)
+{
+    tcg_out_opc(s, OPC_POP_r32 + reg);
+}
+
 static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
                               int arg1, tcg_target_long arg2)
 {
-    /* movl */
-    tcg_out_modrm_offset(s, 0x8b, ret, arg1, arg2);
+    tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2);
 }
 
 static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
                               int arg1, tcg_target_long arg2)
 {
-    /* movl */
-    tcg_out_modrm_offset(s, 0x89, arg, arg1, arg2);
+    tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2);
+}
+
+static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
+{
+    if (count == 1) {
+        tcg_out_modrm(s, OPC_SHIFT_1, subopc, reg);
+    } else {
+        tcg_out_modrm(s, OPC_SHIFT_Ib, subopc, reg);
+        tcg_out8(s, count);
+    }
+}
+
+static inline void tcg_out_bswap32(TCGContext *s, int reg)
+{
+    tcg_out_opc(s, OPC_BSWAP + reg);
+}
+
+static inline void tcg_out_rolw_8(TCGContext *s, int reg)
+{
+    tcg_out8(s, 0x66);
+    tcg_out_shifti(s, SHIFT_ROL, reg, 8);
+}
+
+static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
+{
+    /* movzbl */
+    assert(src < 4);
+    tcg_out_modrm(s, OPC_MOVZBL, dest, src);
+}
+
+static void tcg_out_ext8s(TCGContext *s, int dest, int src)
+{
+    /* movsbl */
+    assert(src < 4);
+    tcg_out_modrm(s, OPC_MOVSBL, dest, src);
 }
 
-static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf)
+static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
 {
-    if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) {
-        /* inc */
-        tcg_out_opc(s, 0x40 + r0);
-    } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1))) {
-        /* dec */
-        tcg_out_opc(s, 0x48 + r0);
+    /* movzwl */
+    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
+}
+
+static inline void tcg_out_ext16s(TCGContext *s, int dest, int src)
+{
+    /* movswl */
+    tcg_out_modrm(s, OPC_MOVSWL, dest, src);
+}
+
+static inline void tgen_arithi(TCGContext *s, int c, int r0,
+                               int32_t val, int cf)
+{
+    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
+       partial flags update stalls on Pentium4 and are not recommended
+       by current Intel optimization manuals.  */
+    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
+        int opc = ((c == ARITH_ADD) ^ (val < 0) ? OPC_INC_r32 : OPC_DEC_r32);
+        tcg_out_opc(s, opc + r0);
     } else if (val == (int8_t)val) {
-        tcg_out_modrm(s, 0x83, c, r0);
+        tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0);
         tcg_out8(s, val);
     } else if (c == ARITH_AND && val == 0xffu && r0 < 4) {
-        /* movzbl */
-        tcg_out_modrm(s, 0xb6 | P_EXT, r0, r0);
+        tcg_out_ext8u(s, r0, r0);
     } else if (c == ARITH_AND && val == 0xffffu) {
-        /* movzwl */
-        tcg_out_modrm(s, 0xb7 | P_EXT, r0, r0);
+        tcg_out_ext16u(s, r0, r0);
     } else {
-        tcg_out_modrm(s, 0x81, c, r0);
+        tcg_out_modrm(s, OPC_ARITH_EvIz, c, r0);
         tcg_out32(s, val);
     }
 }
@@ -317,15 +474,15 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
 {
     int32_t val, val1;
     TCGLabel *l = &s->labels[label_index];
-    
+
     if (l->has_value) {
         val = l->u.value - (tcg_target_long)s->code_ptr;
         val1 = val - 2;
         if ((int8_t)val1 == val1) {
             if (opc == -1) {
-                tcg_out8(s, 0xeb);
+                tcg_out8(s, OPC_JMP_short);
             } else {
-                tcg_out8(s, 0x70 + opc);
+                tcg_out8(s, OPC_JCC_short + opc);
             }
             tcg_out8(s, val1);
         } else {
@@ -333,28 +490,26 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
                 tcg_abort();
             }
             if (opc == -1) {
-                tcg_out8(s, 0xe9);
+                tcg_out8(s, OPC_JMP_long);
                 tcg_out32(s, val - 5);
             } else {
-                tcg_out8(s, 0x0f);
-                tcg_out8(s, 0x80 + opc);
+                tcg_out_opc(s, OPC_JCC_long + opc);
                 tcg_out32(s, val - 6);
             }
         }
     } else if (small) {
         if (opc == -1) {
-            tcg_out8(s, 0xeb);
+            tcg_out8(s, OPC_JMP_short);
         } else {
-            tcg_out8(s, 0x70 + opc);
+            tcg_out8(s, OPC_JCC_short + opc);
         }
         tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
         s->code_ptr += 1;
     } else {
         if (opc == -1) {
-            tcg_out8(s, 0xe9);
+            tcg_out8(s, OPC_JMP_long);
         } else {
-            tcg_out8(s, 0x0f);
-            tcg_out8(s, 0x80 + opc);
+            tcg_out_opc(s, OPC_JCC_long + opc);
         }
         tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
         s->code_ptr += 4;
@@ -367,12 +522,12 @@ static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
     if (const_arg2) {
         if (arg2 == 0) {
             /* test r, r */
-            tcg_out_modrm(s, 0x85, arg1, arg1);
+            tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
         } else {
             tgen_arithi(s, ARITH_CMP, arg1, arg2, 0);
         }
     } else {
-        tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+        tgen_arithr(s, ARITH_CMP, arg1, arg2);
     }
 }
 
@@ -470,9 +625,8 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg dest,
                             TCGArg arg1, TCGArg arg2, int const_arg2)
 {
     tcg_out_cmp(s, arg1, arg2, const_arg2);
-    /* setcc */
-    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, dest);
-    tgen_arithi(s, ARITH_AND, dest, 0xff, 0);
+    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
+    tcg_out_ext8u(s, dest, dest);
 }
 
 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
@@ -517,6 +671,12 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
     }
 }
 
+static void tcg_out_calli(TCGContext *s, tcg_target_long dest)
+{
+    tcg_out_opc(s, OPC_CALL_Jz);
+    tcg_out32(s, dest - (tcg_target_long)s->code_ptr - 4);
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
@@ -573,48 +733,42 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     r1 = TCG_REG_EDX;
 
 #if defined(CONFIG_SOFTMMU)
-    tcg_out_mov(s, r1, addr_reg); 
-
-    tcg_out_mov(s, r0, addr_reg); 
- 
-    tcg_out_modrm(s, 0xc1, 5, r1); /* shr $x, r1 */
-    tcg_out8(s, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 
-    
-    tcg_out_modrm(s, 0x81, 4, r0); /* andl $x, r0 */
-    tcg_out32(s, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    
-    tcg_out_modrm(s, 0x81, 4, r1); /* andl $x, r1 */
-    tcg_out32(s, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-
-    tcg_out_opc(s, 0x8d); /* lea offset(r1, %ebp), r1 */
-    tcg_out8(s, 0x80 | (r1 << 3) | 0x04);
-    tcg_out8(s, (5 << 3) | r1);
-    tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
+    tcg_out_mov(s, r1, addr_reg);
+    tcg_out_mov(s, r0, addr_reg);
+
+    tcg_out_shifti(s, SHIFT_SHR, r1, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+    tgen_arithi(s, ARITH_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+    tgen_arithi(s, ARITH_AND, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
+
+    tcg_out_modrm_sib_offset(s, OPC_LEA, r1, TCG_AREG0, r1, 0,
+                             offsetof(CPUState,
+                                      tlb_table[mem_index][0].addr_read));
 
     /* cmp 0(r1), r0 */
-    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
-    
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
+
     tcg_out_mov(s, r0, addr_reg);
-    
+
 #if TARGET_LONG_BITS == 32
     /* je label1 */
-    tcg_out8(s, 0x70 + JCC_JE);
+    tcg_out8(s, OPC_JCC_short + JCC_JE);
     label1_ptr = s->code_ptr;
     s->code_ptr++;
 #else
     /* jne label3 */
-    tcg_out8(s, 0x70 + JCC_JNE);
+    tcg_out8(s, OPC_JCC_short + JCC_JNE);
     label3_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* cmp 4(r1), addr_reg2 */
-    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
 
     /* je label1 */
-    tcg_out8(s, 0x70 + JCC_JE);
+    tcg_out8(s, OPC_JCC_short + JCC_JE);
     label1_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* label3: */
     *label3_ptr = s->code_ptr - label3_ptr - 1;
 #endif
@@ -626,26 +780,20 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out_mov(s, TCG_REG_EDX, addr_reg2);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
 #endif
-    tcg_out8(s, 0xe8);
-    tcg_out32(s, (tcg_target_long)qemu_ld_helpers[s_bits] - 
-              (tcg_target_long)s->code_ptr - 4);
+    tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
 
     switch(opc) {
     case 0 | 4:
-        /* movsbl */
-        tcg_out_modrm(s, 0xbe | P_EXT, data_reg, TCG_REG_EAX);
+        tcg_out_ext8s(s, data_reg, TCG_REG_EAX);
         break;
     case 1 | 4:
-        /* movswl */
-        tcg_out_modrm(s, 0xbf | P_EXT, data_reg, TCG_REG_EAX);
+        tcg_out_ext16s(s, data_reg, TCG_REG_EAX);
         break;
     case 0:
-        /* movzbl */
-        tcg_out_modrm(s, 0xb6 | P_EXT, data_reg, TCG_REG_EAX);
+        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
         break;
     case 1:
-        /* movzwl */
-        tcg_out_modrm(s, 0xb7 | P_EXT, data_reg, TCG_REG_EAX);
+        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
         break;
     case 2:
     default:
@@ -653,7 +801,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         break;
     case 3:
         if (data_reg == TCG_REG_EDX) {
-            tcg_out_opc(s, 0x90 + TCG_REG_EDX); /* xchg %edx, %eax */
+            /* xchg %edx, %eax */
+            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX);
             tcg_out_mov(s, data_reg2, TCG_REG_EAX);
         } else {
             tcg_out_mov(s, data_reg, TCG_REG_EAX);
@@ -663,15 +812,16 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     }
 
     /* jmp label2 */
-    tcg_out8(s, 0xeb);
+    tcg_out8(s, OPC_JMP_short);
     label2_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* label1: */
     *label1_ptr = s->code_ptr - label1_ptr - 1;
 
     /* add x(r1), r0 */
-    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
+    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
+                         offsetof(CPUTLBEntry, addend) -
                          offsetof(CPUTLBEntry, addr_read));
 #else
     r0 = addr_reg;
@@ -685,62 +835,51 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     switch(opc) {
     case 0:
         /* movzbl */
-        tcg_out_modrm_offset(s, 0xb6 | P_EXT, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVZBL, data_reg, r0, GUEST_BASE);
         break;
     case 0 | 4:
         /* movsbl */
-        tcg_out_modrm_offset(s, 0xbe | P_EXT, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVSBL, data_reg, r0, GUEST_BASE);
         break;
     case 1:
         /* movzwl */
-        tcg_out_modrm_offset(s, 0xb7 | P_EXT, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVZWL, data_reg, r0, GUEST_BASE);
         if (bswap) {
-            /* rolw $8, data_reg */
-            tcg_out8(s, 0x66); 
-            tcg_out_modrm(s, 0xc1, 0, data_reg);
-            tcg_out8(s, 8);
+            tcg_out_rolw_8(s, data_reg);
         }
         break;
     case 1 | 4:
         /* movswl */
-        tcg_out_modrm_offset(s, 0xbf | P_EXT, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVSWL, data_reg, r0, GUEST_BASE);
         if (bswap) {
-            /* rolw $8, data_reg */
-            tcg_out8(s, 0x66); 
-            tcg_out_modrm(s, 0xc1, 0, data_reg);
-            tcg_out8(s, 8);
+            tcg_out_rolw_8(s, data_reg);
 
             /* movswl data_reg, data_reg */
-            tcg_out_modrm(s, 0xbf | P_EXT, data_reg, data_reg);
+            tcg_out_modrm(s, OPC_MOVSWL, data_reg, data_reg);
         }
         break;
     case 2:
-        /* movl (r0), data_reg */
-        tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE);
+        tcg_out_ld(s, TCG_TYPE_I32, data_reg, r0, GUEST_BASE);
         if (bswap) {
-            /* bswap */
-            tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
+            tcg_out_bswap32(s, data_reg);
         }
         break;
     case 3:
-        /* XXX: could be nicer */
-        if (r0 == data_reg) {
-            r1 = TCG_REG_EDX;
-            if (r1 == data_reg)
-                r1 = TCG_REG_EAX;
-            tcg_out_mov(s, r1, r0);
-            r0 = r1;
+        if (bswap) {
+            int t = data_reg;
+            data_reg = data_reg2;
+            data_reg2 = t;
         }
-        if (!bswap) {
-            tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE);
-            tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE + 4);
+        if (r0 != data_reg) {
+            tcg_out_ld(s, TCG_TYPE_I32, data_reg, r0, GUEST_BASE);
+            tcg_out_ld(s, TCG_TYPE_I32, data_reg2, r0, GUEST_BASE + 4);
         } else {
-            tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE + 4);
-            tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
-
-            tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE);
-            /* bswap */
-            tcg_out_opc(s, (0xc8 + data_reg2) | P_EXT);
+            tcg_out_ld(s, TCG_TYPE_I32, data_reg2, r0, GUEST_BASE + 4);
+            tcg_out_ld(s, TCG_TYPE_I32, data_reg, r0, GUEST_BASE);
+        }
+        if (bswap) {
+            tcg_out_bswap32(s, data_reg);
+            tcg_out_bswap32(s, data_reg2);
         }
         break;
     default:
@@ -759,6 +898,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 {
     int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap;
 #if defined(CONFIG_SOFTMMU)
+    int stack_adjust;
     uint8_t *label1_ptr, *label2_ptr;
 #endif
 #if TARGET_LONG_BITS == 64
@@ -785,48 +925,42 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     r1 = TCG_REG_EDX;
 
 #if defined(CONFIG_SOFTMMU)
-    tcg_out_mov(s, r1, addr_reg); 
-
-    tcg_out_mov(s, r0, addr_reg); 
- 
-    tcg_out_modrm(s, 0xc1, 5, r1); /* shr $x, r1 */
-    tcg_out8(s, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 
-    
-    tcg_out_modrm(s, 0x81, 4, r0); /* andl $x, r0 */
-    tcg_out32(s, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    
-    tcg_out_modrm(s, 0x81, 4, r1); /* andl $x, r1 */
-    tcg_out32(s, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-
-    tcg_out_opc(s, 0x8d); /* lea offset(r1, %ebp), r1 */
-    tcg_out8(s, 0x80 | (r1 << 3) | 0x04);
-    tcg_out8(s, (5 << 3) | r1);
-    tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
+    tcg_out_mov(s, r1, addr_reg);
+    tcg_out_mov(s, r0, addr_reg);
+
+    tcg_out_shifti(s, SHIFT_SHR, r1, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+    tgen_arithi(s, ARITH_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+    tgen_arithi(s, ARITH_AND, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
+
+    tcg_out_modrm_sib_offset(s, OPC_LEA, r1, TCG_AREG0, r1, 0,
+                             offsetof(CPUState,
+                                      tlb_table[mem_index][0].addr_write));
 
     /* cmp 0(r1), r0 */
-    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
-    
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
+
     tcg_out_mov(s, r0, addr_reg);
-    
+
 #if TARGET_LONG_BITS == 32
     /* je label1 */
-    tcg_out8(s, 0x70 + JCC_JE);
+    tcg_out8(s, OPC_JCC_short + JCC_JE);
     label1_ptr = s->code_ptr;
     s->code_ptr++;
 #else
     /* jne label3 */
-    tcg_out8(s, 0x70 + JCC_JNE);
+    tcg_out8(s, OPC_JCC_short + JCC_JNE);
     label3_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* cmp 4(r1), addr_reg2 */
-    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
 
     /* je label1 */
-    tcg_out8(s, 0x70 + JCC_JE);
+    tcg_out8(s, OPC_JCC_short + JCC_JE);
     label1_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* label3: */
     *label3_ptr = s->code_ptr - label3_ptr - 1;
 #endif
@@ -836,76 +970,68 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     if (opc == 3) {
         tcg_out_mov(s, TCG_REG_EDX, data_reg);
         tcg_out_mov(s, TCG_REG_ECX, data_reg2);
-        tcg_out8(s, 0x6a); /* push Ib */
-        tcg_out8(s, mem_index);
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
-        tcg_out_addi(s, TCG_REG_ESP, 4);
+        tcg_out_pushi(s, mem_index);
+        stack_adjust = 4;
     } else {
         switch(opc) {
         case 0:
-            /* movzbl */
-            tcg_out_modrm(s, 0xb6 | P_EXT, TCG_REG_EDX, data_reg);
+            tcg_out_ext8u(s, TCG_REG_EDX, data_reg);
             break;
         case 1:
-            /* movzwl */
-            tcg_out_modrm(s, 0xb7 | P_EXT, TCG_REG_EDX, data_reg);
+            tcg_out_ext16u(s, TCG_REG_EDX, data_reg);
             break;
         case 2:
             tcg_out_mov(s, TCG_REG_EDX, data_reg);
             break;
         }
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
+        stack_adjust = 0;
     }
 #else
     if (opc == 3) {
         tcg_out_mov(s, TCG_REG_EDX, addr_reg2);
-        tcg_out8(s, 0x6a); /* push Ib */
-        tcg_out8(s, mem_index);
-        tcg_out_opc(s, 0x50 + data_reg2); /* push */
-        tcg_out_opc(s, 0x50 + data_reg); /* push */
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
-        tcg_out_addi(s, TCG_REG_ESP, 12);
+        tcg_out_pushi(s, mem_index);
+        tcg_out_push(s, data_reg2);
+        tcg_out_push(s, data_reg);
+        stack_adjust = 12;
     } else {
         tcg_out_mov(s, TCG_REG_EDX, addr_reg2);
         switch(opc) {
         case 0:
-            /* movzbl */
-            tcg_out_modrm(s, 0xb6 | P_EXT, TCG_REG_ECX, data_reg);
+            tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
             break;
         case 1:
-            /* movzwl */
-            tcg_out_modrm(s, 0xb7 | P_EXT, TCG_REG_ECX, data_reg);
+            tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
             break;
         case 2:
             tcg_out_mov(s, TCG_REG_ECX, data_reg);
             break;
         }
-        tcg_out8(s, 0x6a); /* push Ib */
-        tcg_out8(s, mem_index);
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
-        tcg_out_addi(s, TCG_REG_ESP, 4);
+        tcg_out_pushi(s, mem_index);
+        stack_adjust = 4;
     }
 #endif
-    
+
+    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
+
+    if (stack_adjust == 4) {
+        /* Pop and discard.  This is 2 bytes smaller than the add.  */
+        tcg_out_pop(s, TCG_REG_ECX);
+    } else if (stack_adjust != 0) {
+        tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
+    }
+
     /* jmp label2 */
-    tcg_out8(s, 0xeb);
+    tcg_out8(s, OPC_JMP_short);
     label2_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* label1: */
     *label1_ptr = s->code_ptr - label1_ptr - 1;
 
     /* add x(r1), r0 */
-    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
+    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
+                         offsetof(CPUTLBEntry, addend) -
                          offsetof(CPUTLBEntry, addr_write));
 #else
     r0 = addr_reg;
@@ -918,44 +1044,37 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 #endif
     switch(opc) {
     case 0:
-        /* movb */
-        tcg_out_modrm_offset(s, 0x88, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVB_EvGv, data_reg, r0, GUEST_BASE);
         break;
     case 1:
         if (bswap) {
             tcg_out_mov(s, r1, data_reg);
-            tcg_out8(s, 0x66); /* rolw $8, %ecx */
-            tcg_out_modrm(s, 0xc1, 0, r1);
-            tcg_out8(s, 8);
+            tcg_out_rolw_8(s, r1);
             data_reg = r1;
         }
         /* movw */
         tcg_out8(s, 0x66);
-        tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVL_EvGv, data_reg, r0, GUEST_BASE);
         break;
     case 2:
         if (bswap) {
             tcg_out_mov(s, r1, data_reg);
-            /* bswap data_reg */
-            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
+            tcg_out_bswap32(s, r1);
             data_reg = r1;
         }
-        /* movl */
-        tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE);
+        tcg_out_st(s, TCG_TYPE_I32, data_reg, r0, GUEST_BASE);
         break;
     case 3:
         if (bswap) {
             tcg_out_mov(s, r1, data_reg2);
-            /* bswap data_reg */
-            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
-            tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE);
+            tcg_out_bswap32(s, r1);
+            tcg_out_st(s, TCG_TYPE_I32, r1, r0, GUEST_BASE);
             tcg_out_mov(s, r1, data_reg);
-            /* bswap data_reg */
-            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
-            tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE + 4);
+            tcg_out_bswap32(s, r1);
+            tcg_out_st(s, TCG_TYPE_I32, r1, r0, GUEST_BASE + 4);
         } else {
-            tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE);
-            tcg_out_modrm_offset(s, 0x89, data_reg2, r0, GUEST_BASE + 4);
+            tcg_out_st(s, TCG_TYPE_I32, data_reg, r0, GUEST_BASE);
+            tcg_out_st(s, TCG_TYPE_I32, data_reg2, r0, GUEST_BASE + 4);
         }
         break;
     default:
@@ -972,41 +1091,41 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                               const TCGArg *args, const int *const_args)
 {
     int c;
-    
+
     switch(opc) {
     case INDEX_op_exit_tb:
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EAX, args[0]);
-        tcg_out8(s, 0xe9); /* jmp tb_ret_addr */
+        tcg_out8(s, OPC_JMP_long); /* jmp tb_ret_addr */
         tcg_out32(s, tb_ret_addr - s->code_ptr - 4);
         break;
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* direct jump method */
-            tcg_out8(s, 0xe9); /* jmp im */
+            tcg_out8(s, OPC_JMP_long); /* jmp im */
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
             tcg_out32(s, 0);
         } else {
             /* indirect jump method */
-            /* jmp Ev */
-            tcg_out_modrm_offset(s, 0xff, 4, -1, 
+            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
                                  (tcg_target_long)(s->tb_next + args[0]));
         }
         s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
         break;
     case INDEX_op_call:
         if (const_args[0]) {
-            tcg_out8(s, 0xe8);
-            tcg_out32(s, args[0] - (tcg_target_long)s->code_ptr - 4);
+            tcg_out_calli(s, args[0]);
         } else {
-            tcg_out_modrm(s, 0xff, 2, args[0]);
+            /* call *reg */
+            tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
         }
         break;
     case INDEX_op_jmp:
         if (const_args[0]) {
-            tcg_out8(s, 0xe9);
+            tcg_out8(s, OPC_JMP_long);
             tcg_out32(s, args[0] - (tcg_target_long)s->code_ptr - 4);
         } else {
-            tcg_out_modrm(s, 0xff, 4, args[0]);
+            /* jmp *reg */
+            tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
         }
         break;
     case INDEX_op_br:
@@ -1017,37 +1136,54 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_ld8u_i32:
         /* movzbl */
-        tcg_out_modrm_offset(s, 0xb6 | P_EXT, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld8s_i32:
         /* movsbl */
-        tcg_out_modrm_offset(s, 0xbe | P_EXT, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVSBL, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld16u_i32:
         /* movzwl */
-        tcg_out_modrm_offset(s, 0xb7 | P_EXT, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld16s_i32:
         /* movswl */
-        tcg_out_modrm_offset(s, 0xbf | P_EXT, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVSWL, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld_i32:
-        /* movl */
-        tcg_out_modrm_offset(s, 0x8b, args[0], args[1], args[2]);
+        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
         break;
     case INDEX_op_st8_i32:
         /* movb */
-        tcg_out_modrm_offset(s, 0x88, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVB_EvGv, args[0], args[1], args[2]);
         break;
     case INDEX_op_st16_i32:
         /* movw */
         tcg_out8(s, 0x66);
-        tcg_out_modrm_offset(s, 0x89, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVL_EvGv, args[0], args[1], args[2]);
         break;
     case INDEX_op_st_i32:
-        /* movl */
-        tcg_out_modrm_offset(s, 0x89, args[0], args[1], args[2]);
+        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
         break;
+    case INDEX_op_add_i32:
+        /* For 3-operand addition, use LEA.  */
+        if (args[0] != args[1]) {
+            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
+
+            if (const_args[2]) {
+                c3 = a2, a2 = -1;
+            } else if (a0 == a2) {
+                /* Watch out for dest = src + dest, since we've removed
+                   the matching constraint on the add.  */
+                tgen_arithr(s, ARITH_ADD, a0, a1);
+                break;
+            }
+
+            tcg_out_modrm_sib_offset(s, OPC_LEA, a0, a1, a2, 0, c3);
+            break;
+        }
+        c = ARITH_ADD;
+        goto gen_arith;
     case INDEX_op_sub_i32:
         c = ARITH_SUB;
         goto gen_arith;
@@ -1060,13 +1196,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_xor_i32:
         c = ARITH_XOR;
         goto gen_arith;
-    case INDEX_op_add_i32:
-        c = ARITH_ADD;
     gen_arith:
         if (const_args[2]) {
             tgen_arithi(s, c, args[0], args[2], 0);
         } else {
-            tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]);
+            tgen_arithr(s, c, args[0], args[2]);
         }
         break;
     case INDEX_op_mul_i32:
@@ -1074,37 +1208,32 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             int32_t val;
             val = args[2];
             if (val == (int8_t)val) {
-                tcg_out_modrm(s, 0x6b, args[0], args[0]);
+                tcg_out_modrm(s, OPC_IMUL_GvEvIb, args[0], args[0]);
                 tcg_out8(s, val);
             } else {
-                tcg_out_modrm(s, 0x69, args[0], args[0]);
+                tcg_out_modrm(s, OPC_IMUL_GvEvIz, args[0], args[0]);
                 tcg_out32(s, val);
             }
         } else {
-            tcg_out_modrm(s, 0xaf | P_EXT, args[0], args[2]);
+            tcg_out_modrm(s, OPC_IMUL_GvEv, args[0], args[2]);
         }
         break;
     case INDEX_op_mulu2_i32:
-        tcg_out_modrm(s, 0xf7, 4, args[3]);
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
         break;
     case INDEX_op_div2_i32:
-        tcg_out_modrm(s, 0xf7, 7, args[4]);
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_IDIV, args[4]);
         break;
     case INDEX_op_divu2_i32:
-        tcg_out_modrm(s, 0xf7, 6, args[4]);
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_DIV, args[4]);
         break;
     case INDEX_op_shl_i32:
         c = SHIFT_SHL;
     gen_shift32:
         if (const_args[2]) {
-            if (args[2] == 1) {
-                tcg_out_modrm(s, 0xd1, c, args[0]);
-            } else {
-                tcg_out_modrm(s, 0xc1, c, args[0]);
-                tcg_out8(s, args[2]);
-            }
+            tcg_out_shifti(s, c, args[0], args[2]);
         } else {
-            tcg_out_modrm(s, 0xd3, c, args[0]);
+            tcg_out_modrm(s, OPC_SHIFT_cl, c, args[0]);
         }
         break;
     case INDEX_op_shr_i32:
@@ -1121,24 +1250,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         goto gen_shift32;
 
     case INDEX_op_add2_i32:
-        if (const_args[4]) 
+        if (const_args[4]) {
             tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]);
-        if (const_args[5]) 
+        } else {
+            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
+        }
+        if (const_args[5]) {
             tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]);
+        } else {
+            tgen_arithr(s, ARITH_ADC, args[1], args[5]);
+        }
         break;
     case INDEX_op_sub2_i32:
-        if (const_args[4]) 
+        if (const_args[4]) {
             tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]);
-        if (const_args[5]) 
+        } else {
+            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
+        }
+        if (const_args[5]) {
             tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
+        } else {
+            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
+        }
         break;
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
@@ -1149,33 +1282,31 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_bswap16_i32:
-        tcg_out8(s, 0x66);
-        tcg_out_modrm(s, 0xc1, SHIFT_ROL, args[0]);
-        tcg_out8(s, 8);
+        tcg_out_rolw_8(s, args[0]);
         break;
     case INDEX_op_bswap32_i32:
-        tcg_out_opc(s, (0xc8 + args[0]) | P_EXT);
+        tcg_out_bswap32(s, args[0]);
         break;
 
     case INDEX_op_neg_i32:
-        tcg_out_modrm(s, 0xf7, 3, args[0]);
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NEG, args[0]);
         break;
 
     case INDEX_op_not_i32:
-        tcg_out_modrm(s, 0xf7, 2, args[0]);
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NOT, args[0]);
         break;
 
     case INDEX_op_ext8s_i32:
-        tcg_out_modrm(s, 0xbe | P_EXT, args[0], args[1]);
+        tcg_out_ext8s(s, args[0], args[1]);
         break;
     case INDEX_op_ext16s_i32:
-        tcg_out_modrm(s, 0xbf | P_EXT, args[0], args[1]);
+        tcg_out_ext16s(s, args[0], args[1]);
         break;
     case INDEX_op_ext8u_i32:
-        tcg_out_modrm(s, 0xb6 | P_EXT, args[0], args[1]);
+        tcg_out_ext8u(s, args[0], args[1]);
         break;
     case INDEX_op_ext16u_i32:
-        tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
+        tcg_out_ext16u(s, args[0], args[1]);
         break;
 
     case INDEX_op_setcond_i32:
@@ -1203,7 +1334,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_qemu_ld64:
         tcg_out_qemu_ld(s, args, 3);
         break;
-        
+
     case INDEX_op_qemu_st8:
         tcg_out_qemu_st(s, args, 0);
         break;
@@ -1239,7 +1370,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_st16_i32, { "r", "r" } },
     { INDEX_op_st_i32, { "r", "r" } },
 
-    { INDEX_op_add_i32, { "r", "0", "ri" } },
+    { INDEX_op_add_i32, { "r", "r", "ri" } },
     { INDEX_op_sub_i32, { "r", "0", "ri" } },
     { INDEX_op_mul_i32, { "r", "0", "ri" } },
     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
@@ -1270,8 +1401,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
 
     { INDEX_op_ext8s_i32, { "r", "q" } },
     { INDEX_op_ext16s_i32, { "r", "r" } },
-    { INDEX_op_ext8u_i32, { "r", "q"} },
-    { INDEX_op_ext16u_i32, { "r", "r"} },
+    { INDEX_op_ext8u_i32, { "r", "q" } },
+    { INDEX_op_ext16u_i32, { "r", "r" } },
 
     { INDEX_op_setcond_i32, { "q", "r", "ri" } },
     { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
@@ -1312,21 +1443,11 @@ static int tcg_target_callee_save_regs[] = {
     TCG_REG_EDI,
 };
 
-static inline void tcg_out_push(TCGContext *s, int reg)
-{
-    tcg_out_opc(s, 0x50 + reg);
-}
-
-static inline void tcg_out_pop(TCGContext *s, int reg)
-{
-    tcg_out_opc(s, 0x58 + reg);
-}
-
 /* Generate global QEMU prologue and epilogue code */
 void tcg_target_qemu_prologue(TCGContext *s)
 {
     int i, frame_size, push_size, stack_addend;
-    
+
     /* TB prologue */
     /* save all callee saved registers */
     for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
@@ -1335,20 +1456,20 @@ void tcg_target_qemu_prologue(TCGContext *s)
     /* reserve some stack space */
     push_size = 4 + ARRAY_SIZE(tcg_target_callee_save_regs) * 4;
     frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
-    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & 
+    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
         ~(TCG_TARGET_STACK_ALIGN - 1);
     stack_addend = frame_size - push_size;
     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
 
-    tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */
-    
+    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_EAX); /* jmp *%eax */
+
     /* TB epilogue */
     tb_ret_addr = s->code_ptr;
     tcg_out_addi(s, TCG_REG_ESP, stack_addend);
     for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
         tcg_out_pop(s, tcg_target_callee_save_regs[i]);
     }
-    tcg_out8(s, 0xc3); /* ret */
+    tcg_out_opc(s, OPC_RET);
 }
 
 void tcg_target_init(TCGContext *s)
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 6e69ef4b49..905f48b3b3 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -40,6 +40,12 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif
 
+#ifdef CONFIG_USE_GUEST_BASE
+#define TCG_GUEST_BASE_REG TCG_REG_R55
+#else
+#define TCG_GUEST_BASE_REG TCG_REG_R0
+#endif
+
 /* Branch registers */
 enum {
     TCG_REG_B0 = 0,
@@ -1640,9 +1646,13 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 {
+    static uint64_t const opc_ld_m1[4] = {
+        OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
+    };
+    static uint64_t const opc_sxt_i29[4] = {
+        OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
+    };
     int addr_reg, data_reg, mem_index, s_bits, bswap;
-    uint64_t opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 };
-    uint64_t opc_sxt_i29[8] = { OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 };
 
     data_reg = *args++;
     addr_reg = *args++;
@@ -1655,19 +1665,21 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     bswap = 0;
 #endif
 
-    tcg_out_bundle(s, mLX,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_l2 ((tcg_target_long) GUEST_BASE),
-                   tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R2,
-                               GUEST_BASE));
-
 #if TARGET_LONG_BITS == 32
-    tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
-                               TCG_REG_R3, addr_reg),
-                   tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
-                               TCG_REG_R2, TCG_REG_R3));
+    if (GUEST_BASE != 0) {
+        tcg_out_bundle(s, mII,
+                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
+                                   TCG_REG_R3, addr_reg),
+                       tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
+                                   TCG_GUEST_BASE_REG, TCG_REG_R3));
+    } else {
+        tcg_out_bundle(s, miI,
+                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
+                                   TCG_REG_R2, addr_reg),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+    }
 
     if (!bswap || s_bits == 0) {
         if (s_bits == opc) {
@@ -1723,12 +1735,20 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
         }
     }
 #else
-    tcg_out_bundle(s, MmI,
-                   tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
-                               TCG_REG_R2, addr_reg),
-                   tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
-                               data_reg, TCG_REG_R2),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+    if (GUEST_BASE != 0) {
+        tcg_out_bundle(s, MmI,
+                       tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
+                                   TCG_GUEST_BASE_REG, addr_reg),
+                       tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
+                                   data_reg, TCG_REG_R2),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+    } else {
+        tcg_out_bundle(s, mmI,
+                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
+                                   data_reg, addr_reg),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+    }
 
     if (bswap && s_bits == 1) {
         tcg_out_bundle(s, mII,
@@ -1763,8 +1783,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 {
+    static uint64_t const opc_st_m4[4] = {
+        OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
+    };
     int addr_reg, data_reg, bswap;
-    uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 };
+#if TARGET_LONG_BITS == 64
+    uint64_t add_guest_base;
+#endif
 
     data_reg = *args++;
     addr_reg = *args++;
@@ -1775,19 +1800,22 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     bswap = 0;
 #endif
 
-    tcg_out_bundle(s, mLX,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_l2 ((tcg_target_long) GUEST_BASE),
-                   tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R2,
-                               GUEST_BASE));
-
 #if TARGET_LONG_BITS == 32
-    tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
-                               TCG_REG_R3, addr_reg),
-                   tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
-                               TCG_REG_R2, TCG_REG_R3));
+    if (GUEST_BASE != 0) {
+        tcg_out_bundle(s, mII,
+                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
+                                   TCG_REG_R3, addr_reg),
+                       tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
+                                   TCG_GUEST_BASE_REG, TCG_REG_R3));
+    } else {
+        tcg_out_bundle(s, miI,
+                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
+                                   TCG_REG_R3, addr_reg),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+    }
+
     if (bswap) {
         if (opc == 1) {
             tcg_out_bundle(s, mII,
@@ -1820,18 +1848,24 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                    tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                    tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
 #else
+    if (GUEST_BASE != 0) {
+        add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
+                                     TCG_GUEST_BASE_REG, addr_reg);
+        addr_reg = TCG_REG_R2;
+    } else {
+        add_guest_base = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+    }
+
     if (!bswap || opc == 0) {
-        tcg_out_bundle(s, MmI,
-                       tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
-                                   TCG_REG_R2, addr_reg),
+        tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI),
+                       add_guest_base,
                        tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
-                                   data_reg, TCG_REG_R2),
+                                   data_reg, addr_reg),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     } else {
         if (opc == 1) {
             tcg_out_bundle(s, mII,
-                           tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
-                                       TCG_REG_R2, addr_reg),
+                           add_guest_base,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 15, 15),
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
@@ -1839,8 +1873,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
             data_reg = TCG_REG_R3;
         } else if (opc == 2) {
             tcg_out_bundle(s, mII,
-                           tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
-                                       TCG_REG_R2, addr_reg),
+                           add_guest_base,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 31, 31),
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
@@ -1848,8 +1881,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
             data_reg = TCG_REG_R3;
         } else if (opc == 3) {
             tcg_out_bundle(s, miI,
-                           tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
-                                       TCG_REG_R2, addr_reg),
+                           add_guest_base,
                            tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, data_reg, 0xb));
@@ -1857,7 +1889,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         }
         tcg_out_bundle(s, miI,
                        tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
-                                   data_reg, TCG_REG_R2),
+                                   data_reg, addr_reg),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
@@ -2254,6 +2286,18 @@ void tcg_target_qemu_prologue(TCGContext *s)
                                TCG_REG_B6, TCG_REG_R32, 0),
                    tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22,
                                TCG_REG_R32, TCG_REG_B0));
+
+    /* ??? If GUEST_BASE < 0x200000, we could load the register via
+       an ADDL in the M slot of the next bundle.  */
+    if (GUEST_BASE != 0) {
+        tcg_out_bundle(s, mlx,
+                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_l2 (GUEST_BASE),
+                       tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
+                                   TCG_GUEST_BASE_REG, GUEST_BASE));
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
+
     tcg_out_bundle(s, miB,
                    tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                    tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
@@ -2282,39 +2326,56 @@ void tcg_target_init(TCGContext *s)
                    0xffffffffffffffffull);
     tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I64],
                    0xffffffffffffffffull);
-    tcg_regset_set(tcg_target_call_clobber_regs,
-                   (1 << TCG_REG_R8)  |
-                   (1 << TCG_REG_R9)  |
-                   (1 << TCG_REG_R10) |
-                   (1 << TCG_REG_R11) |
-                   (1 << TCG_REG_R13) |
-                   (1 << TCG_REG_R14) |
-                   (1 << TCG_REG_R15) |
-                   (1 << TCG_REG_R16) |
-                   (1 << TCG_REG_R17) |
-                   (1 << TCG_REG_R18) |
-                   (1 << TCG_REG_R19) |
-                   (1 << TCG_REG_R20) |
-                   (1 << TCG_REG_R21) |
-                   (1 << TCG_REG_R22) |
-                   (1 << TCG_REG_R23) |
-                   (1 << TCG_REG_R24) |
-                   (1 << TCG_REG_R25) |
-                   (1 << TCG_REG_R26) |
-                   (1 << TCG_REG_R27) |
-                   (1 << TCG_REG_R28) |
-                   (1 << TCG_REG_R29) |
-                   (1 << TCG_REG_R30) |
-                   (1 << TCG_REG_R31));
-    tcg_regset_clear(s->reserved_regs);
 
+    tcg_regset_clear(tcg_target_call_clobber_regs);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R15);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R16);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R17);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R18);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R19);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R27);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R28);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R29);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R30);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R31);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R56);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R57);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R58);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R59);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R60);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R61);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R62);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R63);
+
+    tcg_regset_clear(s->reserved_regs);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);   /* zero register */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1);   /* global pointer */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2);   /* internal use */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3);   /* internal use */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12);  /* stack pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13);  /* thread pointer */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R32);  /* return address */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R33);  /* PFS */
 
+    /* The following 3 are not in use, are call-saved, but *not* saved
+       by the prologue.  Therefore we cannot use them without modifying
+       the prologue.  There doesn't seem to be any good reason to use
+       these as opposed to the windowed registers.  */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R4);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
+
     tcg_add_target_add_op_defs(ia64_op_defs);
 }
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a99ecb9a9e..880e7ceef9 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -240,7 +240,10 @@ void tcg_context_init(TCGContext *s)
     }
     
     tcg_target_init(s);
+}
 
+void tcg_prologue_init(TCGContext *s)
+{
     /* init global prologue and epilogue */
     s->code_buf = code_gen_prologue;
     s->code_ptr = s->code_buf;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 44856e1dd5..58538235de 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -346,6 +346,7 @@ static inline void *tcg_malloc(int size)
 }
 
 void tcg_context_init(TCGContext *s);
+void tcg_prologue_init(TCGContext *s);
 void tcg_func_start(TCGContext *s);
 
 int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf);