summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--MAINTAINERS11
-rw-r--r--Makefile32
-rw-r--r--Makefile.objs8
-rw-r--r--Makefile.target1
-rw-r--r--block/qed.c7
-rw-r--r--block/vmdk.c3
-rw-r--r--device_tree.c1
-rw-r--r--exec-all.h3
-rw-r--r--exec.c1
-rw-r--r--gdbstub.c35
-rw-r--r--hmp-commands.hx3
-rw-r--r--hw/9pfs/virtio-9p.c60
-rw-r--r--hw/audiodev.h2
-rw-r--r--hw/ide/ahci.c16
-rw-r--r--hw/lm4549.c336
-rw-r--r--hw/lm4549.h43
-rw-r--r--hw/pl041.c636
-rw-r--r--hw/pl041.h135
-rw-r--r--hw/pl041.hx81
-rw-r--r--hw/qxl.c66
-rw-r--r--hw/qxl.h3
-rw-r--r--hw/realview.c8
-rw-r--r--hw/versatilepb.c8
-rw-r--r--hw/vexpress.c7
-rw-r--r--kvm-all.c10
-rw-r--r--libcacard/cac.c3
-rw-r--r--libcacard/card_7816.c2
-rw-r--r--libcacard/vscclient.c9
-rw-r--r--linux-user/signal.c28
-rw-r--r--migration.c11
-rw-r--r--migration.h2
-rw-r--r--monitor.c102
-rw-r--r--qemu-doc.texi2
-rw-r--r--qmp-commands.hx3
-rwxr-xr-xscripts/analyse-9p-simpletrace.py164
-rwxr-xr-xscripts/kvm/kvm_stat480
-rwxr-xr-xscripts/kvm/vmxcap224
-rw-r--r--target-i386/cpu.h7
-rw-r--r--target-i386/helper.c1
-rw-r--r--target-i386/kvm.c15
-rw-r--r--target-i386/machine.c43
-rw-r--r--target-i386/op_helper.c6
-rw-r--r--target-sparc/cpu.h8
-rw-r--r--target-sparc/cpu_init.c6
-rw-r--r--target-sparc/fop_helper.c294
-rw-r--r--target-sparc/helper.h122
-rw-r--r--target-sparc/ldst_helper.c123
-rw-r--r--target-sparc/machine.c20
-rw-r--r--target-sparc/translate.c1460
-rw-r--r--target-sparc/vis_helper.c251
-rw-r--r--tcg/tcg-op.h65
-rw-r--r--trace-events2
-rw-r--r--ui/qemu-spice.h14
-rw-r--r--ui/spice-core.c78
55 files changed, 3789 insertions, 1273 deletions
diff --git a/.gitignore b/.gitignore
index 59c343c414..6d2acab09a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ libdis*
 libhw32
 libhw64
 libuser
+linux-headers/asm
 qapi-generated
 qemu-doc.html
 qemu-tech.html
diff --git a/MAINTAINERS b/MAINTAINERS
index 2b4c5d727e..4535eeb61f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -118,7 +118,7 @@ F: target-i386/
 
 Xtensa
 M: Max Filippov <jcmvbkbc@gmail.com>
-W: http://kkv.spb.su/doku.php?id=etc:users:jcmvbkbc:qemu-target-xtensa
+W: http://wiki.osll.spb.ru/doku.php?id=etc:users:jcmvbkbc:qemu-target-xtensa
 S: Maintained
 F: target-xtensa/
 
@@ -348,10 +348,15 @@ F: hw/pc.[ch] hw/pc_piix.c
 
 Xtensa Machines
 ---------------
-DC232B
+sim
 M: Max Filippov <jcmvbkbc@gmail.com>
 S: Maintained
-F: hw/xtensa_dc232b.c
+F: hw/xtensa_sim.c
+
+Avnet LX60
+M: Max Filippov <jcmvbkbc@gmail.com>
+S: Maintained
+F: hw/xtensa_lx60.c
 
 Devices
 -------
diff --git a/Makefile b/Makefile
index f63fc02ca3..ba8d738d9b 100644
--- a/Makefile
+++ b/Makefile
@@ -146,27 +146,25 @@ endif
 qemu-img.o: qemu-img-cmds.h
 qemu-img.o qemu-tool.o qemu-nbd.o qemu-io.o cmd.o qemu-ga.o: $(GENERATED_HEADERS)
 
-tools-obj-y = qemu-tool.o qemu-error.o $(oslib-obj-y) $(trace-obj-y) \
-        $(block-obj-y) $(qobject-obj-y) $(version-obj-y) qemu-timer-common.o
+tools-obj-y = qemu-tool.o $(oslib-obj-y) $(trace-obj-y) \
+	qemu-timer-common.o cutils.o
 
-qemu-img$(EXESUF): qemu-img.o $(tools-obj-y)
-qemu-nbd$(EXESUF): qemu-nbd.o $(tools-obj-y)
-qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y)
+qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y)
+qemu-nbd$(EXESUF): qemu-nbd.o $(tools-obj-y) $(block-obj-y)
+qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y)
 
 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $@")
 
 check-qint.o check-qstring.o check-qdict.o check-qlist.o check-qfloat.o check-qjson.o test-coroutine.o: $(GENERATED_HEADERS)
 
-CHECK_PROG_DEPS = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o
-
-check-qint: check-qint.o qint.o $(CHECK_PROG_DEPS)
-check-qstring: check-qstring.o qstring.o $(CHECK_PROG_DEPS)
-check-qdict: check-qdict.o qdict.o qfloat.o qint.o qstring.o qbool.o qlist.o $(CHECK_PROG_DEPS)
-check-qlist: check-qlist.o qlist.o qint.o $(CHECK_PROG_DEPS)
-check-qfloat: check-qfloat.o qfloat.o $(CHECK_PROG_DEPS)
-check-qjson: check-qjson.o qfloat.o qint.o qdict.o qstring.o qlist.o qbool.o qjson.o json-streamer.o json-lexer.o json-parser.o error.o qerror.o qemu-error.o $(CHECK_PROG_DEPS)
-test-coroutine: test-coroutine.o qemu-timer-common.o async.o $(coroutine-obj-y) $(CHECK_PROG_DEPS)
+check-qint: check-qint.o qint.o $(tools-obj-y)
+check-qstring: check-qstring.o qstring.o $(tools-obj-y)
+check-qdict: check-qdict.o qdict.o qfloat.o qint.o qstring.o qbool.o qlist.o $(tools-obj-y)
+check-qlist: check-qlist.o qlist.o qint.o $(tools-obj-y)
+check-qfloat: check-qfloat.o qfloat.o $(tools-obj-y)
+check-qjson: check-qjson.o $(qobject-obj-y) $(tools-obj-y)
+test-coroutine: test-coroutine.o qemu-timer-common.o async.o $(coroutine-obj-y) $(tools-obj-y)
 
 $(qapi-obj-y): $(GENERATED_HEADERS)
 qapi-dir := qapi-generated
@@ -204,16 +202,16 @@ qmp-marshal.c: $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py -m -o "." < $<, "  GEN   $@")
 
 test-visitor.o: $(addprefix $(qapi-dir)/, test-qapi-types.c test-qapi-types.h test-qapi-visit.c test-qapi-visit.h) $(qapi-obj-y)
-test-visitor: test-visitor.o qfloat.o qint.o qdict.o qstring.o qlist.o qbool.o $(qapi-obj-y) error.o osdep.o $(oslib-obj-y) qjson.o json-streamer.o json-lexer.o json-parser.o qerror.o qemu-error.o qemu-tool.o $(qapi-dir)/test-qapi-visit.o $(qapi-dir)/test-qapi-types.o
+test-visitor: test-visitor.o $(qobject-obj-y) $(qapi-obj-y) $(tools-obj-y) $(qapi-dir)/test-qapi-visit.o $(qapi-dir)/test-qapi-types.o
 
 test-qmp-commands.o: $(addprefix $(qapi-dir)/, test-qapi-types.c test-qapi-types.h test-qapi-visit.c test-qapi-visit.h test-qmp-marshal.c test-qmp-commands.h) $(qapi-obj-y)
-test-qmp-commands: test-qmp-commands.o qfloat.o qint.o qdict.o qstring.o qlist.o qbool.o $(qapi-obj-y) error.o osdep.o $(oslib-obj-y) qjson.o json-streamer.o json-lexer.o json-parser.o qerror.o qemu-error.o qemu-tool.o $(qapi-dir)/test-qapi-visit.o $(qapi-dir)/test-qapi-types.o $(qapi-dir)/test-qmp-marshal.o module.o
+test-qmp-commands: test-qmp-commands.o $(qobject-obj-y) $(qapi-obj-y) $(tools-obj-y) $(qapi-dir)/test-qapi-visit.o $(qapi-dir)/test-qapi-types.o $(qapi-dir)/test-qmp-marshal.o module.o
 
 QGALIB_GEN=$(addprefix $(qapi-dir)/, qga-qapi-types.c qga-qapi-types.h qga-qapi-visit.c qga-qmp-marshal.c)
 $(QGALIB_GEN): $(GENERATED_HEADERS)
 $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)
 
-qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(qapi-obj-y) $(trace-obj-y) $(qobject-obj-y) $(version-obj-y) $(addprefix $(qapi-dir)/, qga-qapi-visit.o qga-qapi-types.o qga-qmp-marshal.o)
+qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(qapi-obj-y) $(tools-obj-y) $(qobject-obj-y) $(version-obj-y) $(addprefix $(qapi-dir)/, qga-qapi-visit.o qga-qapi-types.o qga-qmp-marshal.o)
 
 QEMULIBS=libhw32 libhw64 libuser libdis libdis-user
 
diff --git a/Makefile.objs b/Makefile.objs
index 01587c8f8f..a19e7c5f57 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -2,7 +2,7 @@
 # QObject
 qobject-obj-y = qint.o qstring.o qdict.o qlist.o qfloat.o qbool.o
 qobject-obj-y += qjson.o json-lexer.o json-streamer.o json-parser.o
-qobject-obj-y += qerror.o error.o
+qobject-obj-y += qerror.o error.o qemu-error.o
 
 #######################################################################
 # oslib-obj-y is code depending on the OS (win32 vs posix)
@@ -25,7 +25,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 
 block-obj-y = cutils.o cache-utils.o qemu-option.o module.o async.o
 block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o
-block-obj-y += $(coroutine-obj-y)
+block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 
@@ -76,7 +76,7 @@ common-obj-y = $(block-obj-y) blockdev.o
 common-obj-y += $(net-obj-y)
 common-obj-y += $(qobject-obj-y)
 common-obj-$(CONFIG_LINUX) += $(fsdev-obj-$(CONFIG_LINUX))
-common-obj-y += readline.o console.o cursor.o qemu-error.o
+common-obj-y += readline.o console.o cursor.o
 common-obj-y += $(oslib-obj-y)
 common-obj-$(CONFIG_WIN32) += os-win32.o
 common-obj-$(CONFIG_POSIX) += os-posix.o
@@ -415,7 +415,7 @@ common-obj-y += qmp.o hmp.o
 
 qga-nested-y = guest-agent-commands.o guest-agent-command-state.o
 qga-obj-y = $(addprefix qga/, $(qga-nested-y))
-qga-obj-y += qemu-ga.o qemu-tool.o qemu-error.o qemu-sockets.o module.o qemu-option.o cutils.o osdep.o
+qga-obj-y += qemu-ga.o qemu-sockets.o module.o qemu-option.o
 qga-obj-$(CONFIG_WIN32) += oslib-win32.o
 qga-obj-$(CONFIG_POSIX) += oslib-posix.o
 
diff --git a/Makefile.target b/Makefile.target
index fe5f6f70af..530c1d1e63 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -362,6 +362,7 @@ obj-arm-y += syborg_virtio.o
 obj-arm-y += vexpress.o
 obj-arm-y += strongarm.o
 obj-arm-y += collie.o
+obj-arm-y += pl041.o lm4549.o
 
 obj-sh4-y = shix.o r2d.o sh7750.o sh7750_regnames.o tc58128.o
 obj-sh4-y += sh_timer.o sh_serial.o sh_intc.o sh_pci.o sm501.o
diff --git a/block/qed.c b/block/qed.c
index 2e06992784..d032a4574c 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -388,7 +388,6 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
     if (ret < 0) {
         return ret;
     }
-    ret = 0; /* ret should always be 0 or -errno */
     qed_header_le_to_cpu(&le_header, &s->header);
 
     if (s->header.magic != QED_MAGIC) {
@@ -1420,8 +1419,10 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
     memcpy(buffer, &le_header, sizeof(le_header));
     buffer_len = sizeof(le_header);
 
-    memcpy(buffer + buffer_len, backing_file, backing_file_len);
-    buffer_len += backing_file_len;
+    if (backing_file) {
+        memcpy(buffer + buffer_len, backing_file, backing_file_len);
+        buffer_len += backing_file_len;
+    }
 
     /* Write new header */
     ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
diff --git a/block/vmdk.c b/block/vmdk.c
index 6be592ffd6..e93828eeae 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -208,7 +208,7 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
 {
     char desc[DESC_SIZE];
-    uint32_t cid;
+    uint32_t cid = 0xffffffff;
     const char *p_name, *cid_str;
     size_t cid_str_size;
     BDRVVmdkState *s = bs->opaque;
@@ -1407,7 +1407,6 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
             bdrv_delete(bs);
             return -EINVAL;
         }
-        filesize = bdrv_getlength(bs);
         parent_cid = vmdk_read_cid(bs, 0);
         bdrv_delete(bs);
         relative_path(parent_filename, sizeof(parent_filename),
diff --git a/device_tree.c b/device_tree.c
index dc69232f10..86a694c955 100644
--- a/device_tree.c
+++ b/device_tree.c
@@ -153,6 +153,7 @@ int qemu_devtree_add_subnode(void *fdt, const char *name)
     int retval;
 
     if (!basename) {
+        g_free(dupname);
         return -1;
     }
 
diff --git a/exec-all.h b/exec-all.h
index 72ef246793..85a37bf1ed 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -340,8 +340,7 @@ static inline tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong add
         cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
 #endif
     }
-    p = (void *)(unsigned long)addr
-        + env1->tlb_table[mmu_idx][page_index].addend;
+    p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
     return qemu_ram_addr_from_host_nofail(p);
 }
 #endif
diff --git a/exec.c b/exec.c
index 9dc4edbf61..2f3c6a0ce3 100644
--- a/exec.c
+++ b/exec.c
@@ -469,7 +469,6 @@ static void code_gen_alloc(unsigned long tb_size)
     code_gen_buffer_size = tb_size;
     if (code_gen_buffer_size == 0) {
 #if defined(CONFIG_USER_ONLY)
-        /* in user mode, phys_ram_size is not meaningful */
         code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 #else
         /* XXX: needs adjustments */
diff --git a/gdbstub.c b/gdbstub.c
index 4009058788..a25f404430 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -814,7 +814,11 @@ static int cpu_gdb_read_register(CPUState *env, uint8_t *mem_buf, int n)
 #if defined(TARGET_ABI32) || !defined(TARGET_SPARC64)
     if (n < 64) {
         /* fprs */
-        GET_REG32(*((uint32_t *)&env->fpr[n - 32]));
+        if (n & 1) {
+            GET_REG32(env->fpr[(n - 32) / 2].l.lower);
+        } else {
+            GET_REG32(env->fpr[(n - 32) / 2].l.upper);
+        }
     }
     /* Y, PSR, WIM, TBR, PC, NPC, FPSR, CPSR */
     switch (n) {
@@ -831,15 +835,15 @@ static int cpu_gdb_read_register(CPUState *env, uint8_t *mem_buf, int n)
 #else
     if (n < 64) {
         /* f0-f31 */
-        GET_REG32(*((uint32_t *)&env->fpr[n - 32]));
+        if (n & 1) {
+            GET_REG32(env->fpr[(n - 32) / 2].l.lower);
+        } else {
+            GET_REG32(env->fpr[(n - 32) / 2].l.upper);
+        }
     }
     if (n < 80) {
         /* f32-f62 (double width, even numbers only) */
-        uint64_t val;
-
-        val = (uint64_t)*((uint32_t *)&env->fpr[(n - 64) * 2 + 32]) << 32;
-        val |= *((uint32_t *)&env->fpr[(n - 64) * 2 + 33]);
-        GET_REG64(val);
+        GET_REG64(env->fpr[(n - 32) / 2].ll);
     }
     switch (n) {
     case 80: GET_REGL(env->pc);
@@ -878,7 +882,12 @@ static int cpu_gdb_write_register(CPUState *env, uint8_t *mem_buf, int n)
 #if defined(TARGET_ABI32) || !defined(TARGET_SPARC64)
     else if (n < 64) {
         /* fprs */
-        *((uint32_t *)&env->fpr[n - 32]) = tmp;
+        /* f0-f31 */
+        if (n & 1) {
+            env->fpr[(n - 32) / 2].l.lower = tmp;
+        } else {
+            env->fpr[(n - 32) / 2].l.upper = tmp;
+        }
     } else {
         /* Y, PSR, WIM, TBR, PC, NPC, FPSR, CPSR */
         switch (n) {
@@ -896,12 +905,16 @@ static int cpu_gdb_write_register(CPUState *env, uint8_t *mem_buf, int n)
 #else
     else if (n < 64) {
         /* f0-f31 */
-        env->fpr[n] = ldfl_p(mem_buf);
+        tmp = ldl_p(mem_buf);
+        if (n & 1) {
+            env->fpr[(n - 32) / 2].l.lower = tmp;
+        } else {
+            env->fpr[(n - 32) / 2].l.upper = tmp;
+        }
         return 4;
     } else if (n < 80) {
         /* f32-f62 (double width, even numbers only) */
-        *((uint32_t *)&env->fpr[(n - 64) * 2 + 32]) = tmp >> 32;
-        *((uint32_t *)&env->fpr[(n - 64) * 2 + 33]) = tmp;
+        env->fpr[(n - 32) / 2].ll = tmp;
     } else {
         switch (n) {
         case 80: env->pc = tmp; break;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 969ccf5edd..089c1ac23d 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -823,7 +823,8 @@ ETEXI
         .params     = "protocol hostname port tls-port cert-subject",
         .help       = "send migration info to spice/vnc client",
         .user_print = monitor_user_noop,
-        .mhandler.cmd_new = client_migrate_info,
+        .mhandler.cmd_async = client_migrate_info,
+        .flags      = MONITOR_CMD_ASYNC,
     },
 
 STEXI
diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index aab3bebcc7..8b6813f8de 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -969,7 +969,7 @@ static void complete_pdu(V9fsState *s, V9fsPDU *pdu, ssize_t len)
         if (s->proto_version == V9FS_PROTO_2000L) {
             id = P9_RLERROR;
         }
-        trace_complete_pdu(pdu->tag, pdu->id, err); /* Trace ERROR */
+        trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
     }
 
     /* fill out the header */
@@ -1332,11 +1332,11 @@ static void v9fs_attach(void *opaque)
     }
     offset += pdu_marshal(pdu, offset, "Q", &qid);
     err = offset;
+    trace_v9fs_attach_return(pdu->tag, pdu->id,
+                             qid.type, qid.version, qid.path);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    trace_v9fs_attach_return(pdu->tag, pdu->id,
-                             qid.type, qid.version, qid.path);
     complete_pdu(s, pdu, err);
     v9fs_string_free(&uname);
     v9fs_string_free(&aname);
@@ -1371,13 +1371,12 @@ static void v9fs_stat(void *opaque)
     }
     offset += pdu_marshal(pdu, offset, "wS", 0, &v9stat);
     err = offset;
+    trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
+                           v9stat.atime, v9stat.mtime, v9stat.length);
     v9fs_stat_free(&v9stat);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
-                           v9stat.atime, v9stat.mtime, v9stat.length);
-
     complete_pdu(s, pdu, err);
 }
 
@@ -1421,13 +1420,12 @@ static void v9fs_getattr(void *opaque)
     }
     retval = offset;
     retval += pdu_marshal(pdu, offset, "A", &v9stat_dotl);
-out:
-    put_fid(pdu, fidp);
-out_nofid:
     trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
                               v9stat_dotl.st_mode, v9stat_dotl.st_uid,
                               v9stat_dotl.st_gid);
-
+out:
+    put_fid(pdu, fidp);
+out_nofid:
     complete_pdu(s, pdu, retval);
 }
 
@@ -1605,6 +1603,7 @@ static void v9fs_walk(void *opaque)
         v9fs_path_copy(&newfidp->path, &path);
     }
     err = v9fs_walk_marshal(pdu, nwnames, qids);
+    trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
 out:
     put_fid(pdu, fidp);
     if (newfidp) {
@@ -1613,7 +1612,6 @@ out:
     v9fs_path_free(&dpath);
     v9fs_path_free(&path);
 out_nofid:
-    trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
     complete_pdu(s, pdu, err);
     if (nwnames && nwnames <= P9_MAXWELEM) {
         for (name_idx = 0; name_idx < nwnames; name_idx++) {
@@ -1648,10 +1646,10 @@ static int32_t get_iounit(V9fsPDU *pdu, V9fsPath *path)
 static void v9fs_open(void *opaque)
 {
     int flags;
-    int iounit;
     int32_t fid;
     int32_t mode;
     V9fsQID qid;
+    int iounit = 0;
     ssize_t err = 0;
     size_t offset = 7;
     struct stat stbuf;
@@ -1709,11 +1707,11 @@ static void v9fs_open(void *opaque)
         offset += pdu_marshal(pdu, offset, "Qd", &qid, iounit);
         err = offset;
     }
+    trace_v9fs_open_return(pdu->tag, pdu->id,
+                           qid.type, qid.version, qid.path, iounit);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    trace_v9fs_open_return(pdu->tag, pdu->id,
-                           qid.type, qid.version, qid.path, iounit);
     complete_pdu(s, pdu, err);
 }
 
@@ -1759,11 +1757,11 @@ static void v9fs_lcreate(void *opaque)
     stat_to_qid(&stbuf, &qid);
     offset += pdu_marshal(pdu, offset, "Qd", &qid, iounit);
     err = offset;
+    trace_v9fs_lcreate_return(pdu->tag, pdu->id,
+                              qid.type, qid.version, qid.path, iounit);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    trace_v9fs_lcreate_return(pdu->tag, pdu->id,
-                              qid.type, qid.version, qid.path, iounit);
     complete_pdu(pdu->s, pdu, err);
     v9fs_string_free(&name);
 }
@@ -1978,10 +1976,10 @@ static void v9fs_read(void *opaque)
     } else {
         err = -EINVAL;
     }
+    trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
     complete_pdu(s, pdu, err);
 }
 
@@ -2090,10 +2088,10 @@ static void v9fs_readdir(void *opaque)
     retval = offset;
     retval += pdu_marshal(pdu, offset, "d", count);
     retval += count;
+    trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
     complete_pdu(s, pdu, retval);
 }
 
@@ -2202,10 +2200,10 @@ static void v9fs_write(void *opaque)
     } while (total < count && len > 0);
     offset += pdu_marshal(pdu, offset, "d", total);
     err = offset;
+    trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
     complete_pdu(s, pdu, err);
 }
 
@@ -2362,11 +2360,11 @@ static void v9fs_create(void *opaque)
     stat_to_qid(&stbuf, &qid);
     offset += pdu_marshal(pdu, offset, "Qd", &qid, iounit);
     err = offset;
+    trace_v9fs_create_return(pdu->tag, pdu->id,
+                             qid.type, qid.version, qid.path, iounit);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-   trace_v9fs_create_return(pdu->tag, pdu->id,
-                            qid.type, qid.version, qid.path, iounit);
    complete_pdu(pdu->s, pdu, err);
    v9fs_string_free(&name);
    v9fs_string_free(&extension);
@@ -2401,11 +2399,11 @@ static void v9fs_symlink(void *opaque)
     stat_to_qid(&stbuf, &qid);
     offset += pdu_marshal(pdu, offset, "Q", &qid);
     err = offset;
+    trace_v9fs_symlink_return(pdu->tag, pdu->id,
+                              qid.type, qid.version, qid.path);
 out:
     put_fid(pdu, dfidp);
 out_nofid:
-    trace_v9fs_symlink_return(pdu->tag, pdu->id,
-                              qid.type, qid.version, qid.path);
     complete_pdu(pdu->s, pdu, err);
     v9fs_string_free(&name);
     v9fs_string_free(&symname);
@@ -2950,10 +2948,11 @@ static void v9fs_mknod(void *opaque)
     stat_to_qid(&stbuf, &qid);
     err = offset;
     err += pdu_marshal(pdu, offset, "Q", &qid);
+    trace_v9fs_mknod_return(pdu->tag, pdu->id,
+                            qid.type, qid.version, qid.path);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    trace_v9fs_mknod_return(pdu->tag, pdu->id, qid.type, qid.version, qid.path);
     complete_pdu(s, pdu, err);
     v9fs_string_free(&name);
 }
@@ -3049,12 +3048,11 @@ static void v9fs_getlock(void *opaque)
                           glock->start, glock->length, glock->proc_id,
                           &glock->client_id);
     err = offset;
+    trace_v9fs_getlock_return(pdu->tag, pdu->id, glock->type, glock->start,
+                              glock->length, glock->proc_id);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    trace_v9fs_getlock_return(pdu->tag, pdu->id, glock->type, glock->start,
-                              glock->length, glock->proc_id);
-
     complete_pdu(s, pdu, err);
     v9fs_string_free(&glock->client_id);
     g_free(glock);
@@ -3089,11 +3087,11 @@ static void v9fs_mkdir(void *opaque)
     stat_to_qid(&stbuf, &qid);
     offset += pdu_marshal(pdu, offset, "Q", &qid);
     err = offset;
+    trace_v9fs_mkdir_return(pdu->tag, pdu->id,
+                            qid.type, qid.version, qid.path, err);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    trace_v9fs_mkdir_return(pdu->tag, pdu->id,
-                            qid.type, qid.version, qid.path, err);
     complete_pdu(pdu->s, pdu, err);
     v9fs_string_free(&name);
 }
@@ -3183,13 +3181,13 @@ static void v9fs_xattrwalk(void *opaque)
         offset += pdu_marshal(pdu, offset, "q", size);
         err = offset;
     }
+    trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
 out:
     put_fid(pdu, file_fidp);
     if (xattr_fidp) {
         put_fid(pdu, xattr_fidp);
     }
 out_nofid:
-    trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
     complete_pdu(s, pdu, err);
     v9fs_string_free(&name);
 }
@@ -3260,11 +3258,11 @@ static void v9fs_readlink(void *opaque)
     }
     offset += pdu_marshal(pdu, offset, "s", &target);
     err = offset;
+    trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
     v9fs_string_free(&target);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
     complete_pdu(pdu->s, pdu, err);
 }
 
diff --git a/hw/audiodev.h b/hw/audiodev.h
index 8e930b21ae..d60c3498ee 100644
--- a/hw/audiodev.h
+++ b/hw/audiodev.h
@@ -11,7 +11,7 @@ int Adlib_init(qemu_irq *pic);
 int GUS_init(qemu_irq *pic);
 
 /* ac97.c */
-int ac97_init(PCIBus *buf);
+int ac97_init(PCIBus *bus);
 
 /* cs4231a.c */
 int cs4231a_init(qemu_irq *pic);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 1c7e3a00b5..0af201de2f 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -327,7 +327,7 @@ static void ahci_mem_write(void *opaque, target_phys_addr_t addr,
     }
 
     if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
-        DPRINTF(-1, "(addr 0x%08X), val 0x%08X\n", (unsigned) addr, val);
+        DPRINTF(-1, "(addr 0x%08X), val 0x%08"PRIX64"\n", (unsigned) addr, val);
 
         switch (addr) {
             case HOST_CAP: /* R/WO, RO */
@@ -777,7 +777,8 @@ static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
     ncq_tfs->sector_count = ((uint16_t)ncq_fis->sector_count_high << 8) |
                                 ncq_fis->sector_count_low;
 
-    DPRINTF(port, "NCQ transfer LBA from %ld to %ld, drive max %ld\n",
+    DPRINTF(port, "NCQ transfer LBA from %"PRId64" to %"PRId64", "
+            "drive max %"PRId64"\n",
             ncq_tfs->lba, ncq_tfs->lba + ncq_tfs->sector_count - 2,
             s->dev[port].port.ifs[0].nb_sectors - 1);
 
@@ -786,10 +787,12 @@ static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
 
     switch(ncq_fis->command) {
         case READ_FPDMA_QUEUED:
-            DPRINTF(port, "NCQ reading %d sectors from LBA %ld, tag %d\n",
+            DPRINTF(port, "NCQ reading %d sectors from LBA %"PRId64", "
+                    "tag %d\n",
                     ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
 
-            DPRINTF(port, "tag %d aio read %ld\n", ncq_tfs->tag, ncq_tfs->lba);
+            DPRINTF(port, "tag %d aio read %"PRId64"\n",
+                    ncq_tfs->tag, ncq_tfs->lba);
 
             bdrv_acct_start(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->acct,
                             (ncq_tfs->sector_count-1) * BDRV_SECTOR_SIZE,
@@ -799,10 +802,11 @@ static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
                                            ncq_cb, ncq_tfs);
             break;
         case WRITE_FPDMA_QUEUED:
-            DPRINTF(port, "NCQ writing %d sectors to LBA %ld, tag %d\n",
+            DPRINTF(port, "NCQ writing %d sectors to LBA %"PRId64", tag %d\n",
                     ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
 
-            DPRINTF(port, "tag %d aio write %ld\n", ncq_tfs->tag, ncq_tfs->lba);
+            DPRINTF(port, "tag %d aio write %"PRId64"\n",
+                    ncq_tfs->tag, ncq_tfs->lba);
 
             bdrv_acct_start(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->acct,
                             (ncq_tfs->sector_count-1) * BDRV_SECTOR_SIZE,
diff --git a/hw/lm4549.c b/hw/lm4549.c
new file mode 100644
index 0000000000..4d5b83125f
--- /dev/null
+++ b/hw/lm4549.c
@@ -0,0 +1,336 @@
+/*
+ * LM4549 Audio Codec Interface
+ *
+ * Copyright (c) 2011
+ * Written by Mathieu Sonet - www.elasticsheep.com
+ *
+ * This code is licenced under the GPL.
+ *
+ * *****************************************************************
+ *
+ * This driver emulates the LM4549 codec.
+ *
+ * It supports only one playback voice and no record voice.
+ */
+
+#include "hw.h"
+#include "audio/audio.h"
+#include "lm4549.h"
+
+#if 0
+#define LM4549_DEBUG  1
+#endif
+
+#if 0
+#define LM4549_DUMP_DAC_INPUT 1
+#endif
+
+#ifdef LM4549_DEBUG
+#define DPRINTF(fmt, ...) \
+do { printf("lm4549: " fmt , ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#if defined(LM4549_DUMP_DAC_INPUT)
+#include <stdio.h>
+static FILE *fp_dac_input;
+#endif
+
+/* LM4549 register list */
+enum {
+    LM4549_Reset                    = 0x00,
+    LM4549_Master_Volume            = 0x02,
+    LM4549_Line_Out_Volume          = 0x04,
+    LM4549_Master_Volume_Mono       = 0x06,
+    LM4549_PC_Beep_Volume           = 0x0A,
+    LM4549_Phone_Volume             = 0x0C,
+    LM4549_Mic_Volume               = 0x0E,
+    LM4549_Line_In_Volume           = 0x10,
+    LM4549_CD_Volume                = 0x12,
+    LM4549_Video_Volume             = 0x14,
+    LM4549_Aux_Volume               = 0x16,
+    LM4549_PCM_Out_Volume           = 0x18,
+    LM4549_Record_Select            = 0x1A,
+    LM4549_Record_Gain              = 0x1C,
+    LM4549_General_Purpose          = 0x20,
+    LM4549_3D_Control               = 0x22,
+    LM4549_Powerdown_Ctrl_Stat      = 0x26,
+    LM4549_Ext_Audio_ID             = 0x28,
+    LM4549_Ext_Audio_Stat_Ctrl      = 0x2A,
+    LM4549_PCM_Front_DAC_Rate       = 0x2C,
+    LM4549_PCM_ADC_Rate             = 0x32,
+    LM4549_Vendor_ID1               = 0x7C,
+    LM4549_Vendor_ID2               = 0x7E
+};
+
+static void lm4549_reset(lm4549_state *s)
+{
+    uint16_t *regfile = s->regfile;
+
+    regfile[LM4549_Reset]               = 0x0d50;
+    regfile[LM4549_Master_Volume]       = 0x8008;
+    regfile[LM4549_Line_Out_Volume]     = 0x8000;
+    regfile[LM4549_Master_Volume_Mono]  = 0x8000;
+    regfile[LM4549_PC_Beep_Volume]      = 0x0000;
+    regfile[LM4549_Phone_Volume]        = 0x8008;
+    regfile[LM4549_Mic_Volume]          = 0x8008;
+    regfile[LM4549_Line_In_Volume]      = 0x8808;
+    regfile[LM4549_CD_Volume]           = 0x8808;
+    regfile[LM4549_Video_Volume]        = 0x8808;
+    regfile[LM4549_Aux_Volume]          = 0x8808;
+    regfile[LM4549_PCM_Out_Volume]      = 0x8808;
+    regfile[LM4549_Record_Select]       = 0x0000;
+    regfile[LM4549_Record_Gain]         = 0x8000;
+    regfile[LM4549_General_Purpose]     = 0x0000;
+    regfile[LM4549_3D_Control]          = 0x0101;
+    regfile[LM4549_Powerdown_Ctrl_Stat] = 0x000f;
+    regfile[LM4549_Ext_Audio_ID]        = 0x0001;
+    regfile[LM4549_Ext_Audio_Stat_Ctrl] = 0x0000;
+    regfile[LM4549_PCM_Front_DAC_Rate]  = 0xbb80;
+    regfile[LM4549_PCM_ADC_Rate]        = 0xbb80;
+    regfile[LM4549_Vendor_ID1]          = 0x4e53;
+    regfile[LM4549_Vendor_ID2]          = 0x4331;
+}
+
+static void lm4549_audio_transfer(lm4549_state *s)
+{
+    uint32_t written_bytes, written_samples;
+    uint32_t i;
+
+    /* Activate the voice */
+    AUD_set_active_out(s->voice, 1);
+    s->voice_is_active = 1;
+
+    /* Try to write the buffer content */
+    written_bytes = AUD_write(s->voice, s->buffer,
+                              s->buffer_level * sizeof(uint16_t));
+    written_samples = written_bytes >> 1;
+
+#if defined(LM4549_DUMP_DAC_INPUT)
+    fwrite(s->buffer, sizeof(uint8_t), written_bytes, fp_dac_input);
+#endif
+
+    s->buffer_level -= written_samples;
+
+    if (s->buffer_level > 0) {
+        /* Move the data back to the start of the buffer */
+        for (i = 0; i < s->buffer_level; i++) {
+            s->buffer[i] = s->buffer[i + written_samples];
+        }
+    }
+}
+
+static void lm4549_audio_out_callback(void *opaque, int free)
+{
+    lm4549_state *s = (lm4549_state *)opaque;
+    static uint32_t prev_buffer_level;
+
+#ifdef LM4549_DEBUG
+    int size = AUD_get_buffer_size_out(s->voice);
+    DPRINTF("audio_out_callback size = %i free = %i\n", size, free);
+#endif
+
+    /* Detect that no data are consumed
+       => disable the voice */
+    if (s->buffer_level == prev_buffer_level) {
+        AUD_set_active_out(s->voice, 0);
+        s->voice_is_active = 0;
+    }
+    prev_buffer_level = s->buffer_level;
+
+    /* Check if a buffer transfer is pending */
+    if (s->buffer_level == LM4549_BUFFER_SIZE) {
+        lm4549_audio_transfer(s);
+
+        /* Request more data */
+        if (s->data_req_cb != NULL) {
+            (s->data_req_cb)(s->opaque);
+        }
+    }
+}
+
+uint32_t lm4549_read(lm4549_state *s, target_phys_addr_t offset)
+{
+    uint16_t *regfile = s->regfile;
+    uint32_t value = 0;
+
+    /* Read the stored value */
+    assert(offset < 128);
+    value = regfile[offset];
+
+    DPRINTF("read [0x%02x] = 0x%04x\n", offset, value);
+
+    return value;
+}
+
+void lm4549_write(lm4549_state *s,
+                  target_phys_addr_t offset, uint32_t value)
+{
+    uint16_t *regfile = s->regfile;
+
+    assert(offset < 128);
+    DPRINTF("write [0x%02x] = 0x%04x\n", offset, value);
+
+    switch (offset) {
+    case LM4549_Reset:
+        lm4549_reset(s);
+        break;
+
+    case LM4549_PCM_Front_DAC_Rate:
+        regfile[LM4549_PCM_Front_DAC_Rate] = value;
+        DPRINTF("DAC rate change = %i\n", value);
+
+        /* Re-open a voice with the new sample rate */
+        struct audsettings as;
+        as.freq = value;
+        as.nchannels = 2;
+        as.fmt = AUD_FMT_S16;
+        as.endianness = 0;
+
+        s->voice = AUD_open_out(
+            &s->card,
+            s->voice,
+            "lm4549.out",
+            s,
+            lm4549_audio_out_callback,
+            &as
+        );
+        break;
+
+    case LM4549_Powerdown_Ctrl_Stat:
+        value &= ~0xf;
+        value |= regfile[LM4549_Powerdown_Ctrl_Stat] & 0xf;
+        regfile[LM4549_Powerdown_Ctrl_Stat] = value;
+        break;
+
+    case LM4549_Ext_Audio_ID:
+    case LM4549_Vendor_ID1:
+    case LM4549_Vendor_ID2:
+        DPRINTF("Write to read-only register 0x%x\n", (int)offset);
+        break;
+
+    default:
+        /* Store the new value */
+        regfile[offset] = value;
+        break;
+    }
+}
+
+uint32_t lm4549_write_samples(lm4549_state *s, uint32_t left, uint32_t right)
+{
+    /* The left and right samples are in 20-bit resolution.
+       The LM4549 has 18-bit resolution and only uses the bits [19:2].
+       This model supports 16-bit playback.
+    */
+
+    if (s->buffer_level >= LM4549_BUFFER_SIZE) {
+        DPRINTF("write_sample Buffer full\n");
+        return 0;
+    }
+
+    /* Store 16-bit samples in the buffer */
+    s->buffer[s->buffer_level++] = (left >> 4);
+    s->buffer[s->buffer_level++] = (right >> 4);
+
+    if (s->buffer_level == LM4549_BUFFER_SIZE) {
+        /* Trigger the transfer of the buffer to the audio host */
+        lm4549_audio_transfer(s);
+    }
+
+    return 1;
+}
+
+static int lm4549_post_load(void *opaque, int version_id)
+{
+    lm4549_state *s = (lm4549_state *)opaque;
+    uint16_t *regfile = s->regfile;
+
+    /* Re-open a voice with the current sample rate */
+    uint32_t freq = regfile[LM4549_PCM_Front_DAC_Rate];
+
+    DPRINTF("post_load freq = %i\n", freq);
+    DPRINTF("post_load voice_is_active = %i\n", s->voice_is_active);
+
+    struct audsettings as;
+    as.freq = freq;
+    as.nchannels = 2;
+    as.fmt = AUD_FMT_S16;
+    as.endianness = 0;
+
+    s->voice = AUD_open_out(
+        &s->card,
+        s->voice,
+        "lm4549.out",
+        s,
+        lm4549_audio_out_callback,
+        &as
+    );
+
+    /* Request data */
+    if (s->voice_is_active == 1) {
+        lm4549_audio_out_callback(s, AUD_get_buffer_size_out(s->voice));
+    }
+
+    return 0;
+}
+
+void lm4549_init(lm4549_state *s, lm4549_callback data_req_cb, void* opaque)
+{
+    struct audsettings as;
+
+    /* Store the callback and opaque pointer */
+    s->data_req_cb = data_req_cb;
+    s->opaque = opaque;
+
+    /* Init the registers */
+    lm4549_reset(s);
+
+    /* Register an audio card */
+    AUD_register_card("lm4549", &s->card);
+
+    /* Open a default voice */
+    as.freq = 48000;
+    as.nchannels = 2;
+    as.fmt = AUD_FMT_S16;
+    as.endianness = 0;
+
+    s->voice = AUD_open_out(
+        &s->card,
+        s->voice,
+        "lm4549.out",
+        s,
+        lm4549_audio_out_callback,
+        &as
+    );
+
+    AUD_set_volume_out(s->voice, 0, 255, 255);
+
+    s->voice_is_active = 0;
+
+    /* Reset the input buffer */
+    memset(s->buffer, 0x00, sizeof(s->buffer));
+    s->buffer_level = 0;
+
+#if defined(LM4549_DUMP_DAC_INPUT)
+    fp_dac_input = fopen("lm4549_dac_input.pcm", "wb");
+    if (!fp_dac_input) {
+        hw_error("Unable to open lm4549_dac_input.pcm for writing\n");
+    }
+#endif
+}
+
+const VMStateDescription vmstate_lm4549_state = {
+    .name = "lm4549_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .post_load = &lm4549_post_load,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT32(voice_is_active, lm4549_state),
+        VMSTATE_UINT16_ARRAY(regfile, lm4549_state, 128),
+        VMSTATE_UINT16_ARRAY(buffer, lm4549_state, LM4549_BUFFER_SIZE),
+        VMSTATE_UINT32(buffer_level, lm4549_state),
+        VMSTATE_END_OF_LIST()
+    }
+};
diff --git a/hw/lm4549.h b/hw/lm4549.h
new file mode 100644
index 0000000000..70d0ac1750
--- /dev/null
+++ b/hw/lm4549.h
@@ -0,0 +1,43 @@
+/*
+ * LM4549 Audio Codec Interface
+ *
+ * Copyright (c) 2011
+ * Written by Mathieu Sonet - www.elasticsheep.com
+ *
+ * This code is licenced under the GPL.
+ *
+ * *****************************************************************
+ */
+
+#ifndef HW_LM4549_H
+#define HW_LM4549_H
+
+#include "audio/audio.h"
+
+typedef void (*lm4549_callback)(void *opaque);
+
+#define LM4549_BUFFER_SIZE (512 * 2) /* 512 16-bit stereo samples */
+
+
+typedef struct {
+    QEMUSoundCard card;
+    SWVoiceOut *voice;
+    uint32_t voice_is_active;
+
+    uint16_t regfile[128];
+    lm4549_callback data_req_cb;
+    void *opaque;
+
+    uint16_t buffer[LM4549_BUFFER_SIZE];
+    uint32_t buffer_level;
+} lm4549_state;
+
+extern const VMStateDescription vmstate_lm4549_state;
+
+
+void lm4549_init(lm4549_state *s, lm4549_callback data_req, void *opaque);
+uint32_t lm4549_read(lm4549_state *s, target_phys_addr_t offset);
+void lm4549_write(lm4549_state *s, target_phys_addr_t offset, uint32_t value);
+uint32_t lm4549_write_samples(lm4549_state *s, uint32_t left, uint32_t right);
+
+#endif /* #ifndef HW_LM4549_H */
diff --git a/hw/pl041.c b/hw/pl041.c
new file mode 100644
index 0000000000..efd52ac42f
--- /dev/null
+++ b/hw/pl041.c
@@ -0,0 +1,636 @@
+/*
+ * Arm PrimeCell PL041 Advanced Audio Codec Interface
+ *
+ * Copyright (c) 2011
+ * Written by Mathieu Sonet - www.elasticsheep.com
+ *
+ * This code is licenced under the GPL.
+ *
+ * *****************************************************************
+ *
+ * This driver emulates the ARM AACI interface
+ * connected to a LM4549 codec.
+ *
+ * Limitations:
+ * - Supports only a playback on one channel (Versatile/Vexpress)
+ * - Supports only one TX FIFO in compact-mode or non-compact mode.
+ * - Supports playback of 12, 16, 18 and 20 bits samples.
+ * - Record is not supported.
+ * - The PL041 is hardwired to a LM4549 codec.
+ *
+ */
+
+#include "sysbus.h"
+
+#include "pl041.h"
+#include "lm4549.h"
+
+#if 0
+#define PL041_DEBUG_LEVEL 1
+#endif
+
+#if defined(PL041_DEBUG_LEVEL) && (PL041_DEBUG_LEVEL >= 1)
+#define DBG_L1(fmt, ...) \
+do { printf("pl041: " fmt , ## __VA_ARGS__); } while (0)
+#else
+#define DBG_L1(fmt, ...) \
+do { } while (0)
+#endif
+
+#if defined(PL041_DEBUG_LEVEL) && (PL041_DEBUG_LEVEL >= 2)
+#define DBG_L2(fmt, ...) \
+do { printf("pl041: " fmt , ## __VA_ARGS__); } while (0)
+#else
+#define DBG_L2(fmt, ...) \
+do { } while (0)
+#endif
+
+
+#define MAX_FIFO_DEPTH      (1024)
+#define DEFAULT_FIFO_DEPTH  (8)
+
+#define SLOT1_RW    (1 << 19)
+
+/* This FIFO only stores 20-bit samples on 32-bit words.
+   So its level is independent of the selected mode */
+typedef struct {
+    uint32_t level;
+    uint32_t data[MAX_FIFO_DEPTH];
+} pl041_fifo;
+
+typedef struct {
+    pl041_fifo tx_fifo;
+    uint8_t tx_enabled;
+    uint8_t tx_compact_mode;
+    uint8_t tx_sample_size;
+
+    pl041_fifo rx_fifo;
+    uint8_t rx_enabled;
+    uint8_t rx_compact_mode;
+    uint8_t rx_sample_size;
+} pl041_channel;
+
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    uint32_t fifo_depth; /* FIFO depth in non-compact mode */
+
+    pl041_regfile regs;
+    pl041_channel fifo1;
+    lm4549_state codec;
+} pl041_state;
+
+
+static const unsigned char pl041_default_id[8] = {
+    0x41, 0x10, 0x04, 0x00, 0x0d, 0xf0, 0x05, 0xb1
+};
+
+#if defined(PL041_DEBUG_LEVEL)
+#define REGISTER(name, offset) #name,
+static const char *pl041_regs_name[] = {
+    #include "pl041.hx"
+};
+#undef REGISTER
+#endif
+
+
+#if defined(PL041_DEBUG_LEVEL)
+static const char *get_reg_name(target_phys_addr_t offset)
+{
+    if (offset <= PL041_dr1_7) {
+        return pl041_regs_name[offset >> 2];
+    }
+
+    return "unknown";
+}
+#endif
+
+static uint8_t pl041_compute_periphid3(pl041_state *s)
+{
+    uint8_t id3 = 1; /* One channel */
+
+    /* Add the fifo depth information */
+    switch (s->fifo_depth) {
+    case 8:
+        id3 |= 0 << 3;
+        break;
+    case 32:
+        id3 |= 1 << 3;
+        break;
+    case 64:
+        id3 |= 2 << 3;
+        break;
+    case 128:
+        id3 |= 3 << 3;
+        break;
+    case 256:
+        id3 |= 4 << 3;
+        break;
+    case 512:
+        id3 |= 5 << 3;
+        break;
+    case 1024:
+        id3 |= 6 << 3;
+        break;
+    case 2048:
+        id3 |= 7 << 3;
+        break;
+    }
+
+    return id3;
+}
+
+static void pl041_reset(pl041_state *s)
+{
+    DBG_L1("pl041_reset\n");
+
+    memset(&s->regs, 0x00, sizeof(pl041_regfile));
+
+    s->regs.slfr = SL1TXEMPTY | SL2TXEMPTY | SL12TXEMPTY;
+    s->regs.sr1 = TXFE | RXFE | TXHE;
+    s->regs.isr1 = 0;
+
+    memset(&s->fifo1, 0x00, sizeof(s->fifo1));
+}
+
+
+static void pl041_fifo1_write(pl041_state *s, uint32_t value)
+{
+    pl041_channel *channel = &s->fifo1;
+    pl041_fifo *fifo = &s->fifo1.tx_fifo;
+
+    /* Push the value in the FIFO */
+    if (channel->tx_compact_mode == 0) {
+        /* Non-compact mode */
+
+        if (fifo->level < s->fifo_depth) {
+            /* Pad the value with 0 to obtain a 20-bit sample */
+            switch (channel->tx_sample_size) {
+            case 12:
+                value = (value << 8) & 0xFFFFF;
+                break;
+            case 16:
+                value = (value << 4) & 0xFFFFF;
+                break;
+            case 18:
+                value = (value << 2) & 0xFFFFF;
+                break;
+            case 20:
+            default:
+                break;
+            }
+
+            /* Store the sample in the FIFO */
+            fifo->data[fifo->level++] = value;
+        }
+#if defined(PL041_DEBUG_LEVEL)
+        else {
+            DBG_L1("fifo1 write: overrun\n");
+        }
+#endif
+    } else {
+        /* Compact mode */
+
+        if ((fifo->level + 2) < s->fifo_depth) {
+            uint32_t i = 0;
+            uint32_t sample = 0;
+
+            for (i = 0; i < 2; i++) {
+                sample = value & 0xFFFF;
+                value = value >> 16;
+
+                /* Pad each sample with 0 to obtain a 20-bit sample */
+                switch (channel->tx_sample_size) {
+                case 12:
+                    sample = sample << 8;
+                    break;
+                case 16:
+                default:
+                    sample = sample << 4;
+                    break;
+                }
+
+                /* Store the sample in the FIFO */
+                fifo->data[fifo->level++] = sample;
+            }
+        }
+#if defined(PL041_DEBUG_LEVEL)
+        else {
+            DBG_L1("fifo1 write: overrun\n");
+        }
+#endif
+    }
+
+    /* Update the status register */
+    if (fifo->level > 0) {
+        s->regs.sr1 &= ~(TXUNDERRUN | TXFE);
+    }
+
+    if (fifo->level >= (s->fifo_depth / 2)) {
+        s->regs.sr1 &= ~TXHE;
+    }
+
+    if (fifo->level >= s->fifo_depth) {
+        s->regs.sr1 |= TXFF;
+    }
+
+    DBG_L2("fifo1_push sr1 = 0x%08x\n", s->regs.sr1);
+}
+
+static void pl041_fifo1_transmit(pl041_state *s)
+{
+    pl041_channel *channel = &s->fifo1;
+    pl041_fifo *fifo = &s->fifo1.tx_fifo;
+    uint32_t slots = s->regs.txcr1 & TXSLOT_MASK;
+    uint32_t written_samples;
+
+    /* Check if FIFO1 transmit is enabled */
+    if ((channel->tx_enabled) && (slots & (TXSLOT3 | TXSLOT4))) {
+        if (fifo->level >= (s->fifo_depth / 2)) {
+            int i;
+
+            DBG_L1("Transfer FIFO level = %i\n", fifo->level);
+
+            /* Try to transfer the whole FIFO */
+            for (i = 0; i < (fifo->level / 2); i++) {
+                uint32_t left = fifo->data[i * 2];
+                uint32_t right = fifo->data[i * 2 + 1];
+
+                 /* Transmit two 20-bit samples to the codec */
+                if (lm4549_write_samples(&s->codec, left, right) == 0) {
+                    DBG_L1("Codec buffer full\n");
+                    break;
+                }
+            }
+
+            written_samples = i * 2;
+            if (written_samples > 0) {
+                /* Update the FIFO level */
+                fifo->level -= written_samples;
+
+                /* Move back the pending samples to the start of the FIFO */
+                for (i = 0; i < fifo->level; i++) {
+                    fifo->data[i] = fifo->data[written_samples + i];
+                }
+
+                /* Update the status register */
+                s->regs.sr1 &= ~TXFF;
+
+                if (fifo->level <= (s->fifo_depth / 2)) {
+                    s->regs.sr1 |= TXHE;
+                }
+
+                if (fifo->level == 0) {
+                    s->regs.sr1 |= TXFE | TXUNDERRUN;
+                    DBG_L1("Empty FIFO\n");
+                }
+            }
+        }
+    }
+}
+
+static void pl041_isr1_update(pl041_state *s)
+{
+    /* Update ISR1 */
+    if (s->regs.sr1 & TXUNDERRUN) {
+        s->regs.isr1 |= URINTR;
+    } else {
+        s->regs.isr1 &= ~URINTR;
+    }
+
+    if (s->regs.sr1 & TXHE) {
+        s->regs.isr1 |= TXINTR;
+    } else {
+        s->regs.isr1 &= ~TXINTR;
+    }
+
+    if (!(s->regs.sr1 & TXBUSY) && (s->regs.sr1 & TXFE)) {
+        s->regs.isr1 |= TXCINTR;
+    } else {
+        s->regs.isr1 &= ~TXCINTR;
+    }
+
+    /* Update the irq state */
+    qemu_set_irq(s->irq, ((s->regs.isr1 & s->regs.ie1) > 0) ? 1 : 0);
+    DBG_L2("Set interrupt sr1 = 0x%08x isr1 = 0x%08x masked = 0x%08x\n",
+           s->regs.sr1, s->regs.isr1, s->regs.isr1 & s->regs.ie1);
+}
+
+static void pl041_request_data(void *opaque)
+{
+    pl041_state *s = (pl041_state *)opaque;
+
+    /* Trigger pending transfers */
+    pl041_fifo1_transmit(s);
+    pl041_isr1_update(s);
+}
+
+static uint64_t pl041_read(void *opaque, target_phys_addr_t offset,
+                                unsigned size)
+{
+    pl041_state *s = (pl041_state *)opaque;
+    int value;
+
+    if ((offset >= PL041_periphid0) && (offset <= PL041_pcellid3)) {
+        if (offset == PL041_periphid3) {
+            value = pl041_compute_periphid3(s);
+        } else {
+            value = pl041_default_id[(offset - PL041_periphid0) >> 2];
+        }
+
+        DBG_L1("pl041_read [0x%08x] => 0x%08x\n", offset, value);
+        return value;
+    } else if (offset <= PL041_dr4_7) {
+        value = *((uint32_t *)&s->regs + (offset >> 2));
+    } else {
+        DBG_L1("pl041_read: Reserved offset %x\n", (int)offset);
+        return 0;
+    }
+
+    switch (offset) {
+    case PL041_allints:
+        value = s->regs.isr1 & 0x7F;
+        break;
+    }
+
+    DBG_L1("pl041_read [0x%08x] %s => 0x%08x\n", offset,
+           get_reg_name(offset), value);
+
+    return value;
+}
+
+static void pl041_write(void *opaque, target_phys_addr_t offset,
+                             uint64_t value, unsigned size)
+{
+    pl041_state *s = (pl041_state *)opaque;
+    uint16_t control, data;
+    uint32_t result;
+
+    DBG_L1("pl041_write [0x%08x] %s <= 0x%08x\n", offset,
+           get_reg_name(offset), (unsigned int)value);
+
+    /* Write the register */
+    if (offset <= PL041_dr4_7) {
+        *((uint32_t *)&s->regs + (offset >> 2)) = value;
+    } else {
+        DBG_L1("pl041_write: Reserved offset %x\n", (int)offset);
+        return;
+    }
+
+    /* Execute the actions */
+    switch (offset) {
+    case PL041_txcr1:
+    {
+        pl041_channel *channel = &s->fifo1;
+
+        uint32_t txen = s->regs.txcr1 & TXEN;
+        uint32_t tsize = (s->regs.txcr1 & TSIZE_MASK) >> TSIZE_MASK_BIT;
+        uint32_t compact_mode = (s->regs.txcr1 & TXCOMPACT) ? 1 : 0;
+#if defined(PL041_DEBUG_LEVEL)
+        uint32_t slots = (s->regs.txcr1 & TXSLOT_MASK) >> TXSLOT_MASK_BIT;
+        uint32_t txfen = (s->regs.txcr1 & TXFEN) > 0 ? 1 : 0;
+#endif
+
+        DBG_L1("=> txen = %i slots = 0x%01x tsize = %i compact = %i "
+               "txfen = %i\n", txen, slots,  tsize, compact_mode, txfen);
+
+        channel->tx_enabled = txen;
+        channel->tx_compact_mode = compact_mode;
+
+        switch (tsize) {
+        case 0:
+            channel->tx_sample_size = 16;
+            break;
+        case 1:
+            channel->tx_sample_size = 18;
+            break;
+        case 2:
+            channel->tx_sample_size = 20;
+            break;
+        case 3:
+            channel->tx_sample_size = 12;
+            break;
+        }
+
+        DBG_L1("TX enabled = %i\n", channel->tx_enabled);
+        DBG_L1("TX compact mode = %i\n", channel->tx_compact_mode);
+        DBG_L1("TX sample width = %i\n", channel->tx_sample_size);
+
+        /* Check if compact mode is allowed with selected tsize */
+        if (channel->tx_compact_mode == 1) {
+            if ((channel->tx_sample_size == 18) ||
+                (channel->tx_sample_size == 20)) {
+                channel->tx_compact_mode = 0;
+                DBG_L1("Compact mode not allowed with 18/20-bit sample size\n");
+            }
+        }
+
+        break;
+    }
+    case PL041_sl1tx:
+        s->regs.slfr &= ~SL1TXEMPTY;
+
+        control = (s->regs.sl1tx >> 12) & 0x7F;
+        data = (s->regs.sl2tx >> 4) & 0xFFFF;
+
+        if ((s->regs.sl1tx & SLOT1_RW) == 0) {
+            /* Write operation */
+            lm4549_write(&s->codec, control, data);
+        } else {
+            /* Read operation */
+            result = lm4549_read(&s->codec, control);
+
+            /* Store the returned value */
+            s->regs.sl1rx = s->regs.sl1tx & ~SLOT1_RW;
+            s->regs.sl2rx = result << 4;
+
+            s->regs.slfr &= ~(SL1RXBUSY | SL2RXBUSY);
+            s->regs.slfr |= SL1RXVALID | SL2RXVALID;
+        }
+        break;
+
+    case PL041_sl2tx:
+        s->regs.sl2tx = value;
+        s->regs.slfr &= ~SL2TXEMPTY;
+        break;
+
+    case PL041_intclr:
+        DBG_L1("=> Clear interrupt intclr = 0x%08x isr1 = 0x%08x\n",
+               s->regs.intclr, s->regs.isr1);
+
+        if (s->regs.intclr & TXUEC1) {
+            s->regs.sr1 &= ~TXUNDERRUN;
+        }
+        break;
+
+    case PL041_maincr:
+    {
+#if defined(PL041_DEBUG_LEVEL)
+        char debug[] = " AACIFE  SL1RXEN  SL1TXEN";
+        if (!(value & AACIFE)) {
+            debug[0] = '!';
+        }
+        if (!(value & SL1RXEN)) {
+            debug[8] = '!';
+        }
+        if (!(value & SL1TXEN)) {
+            debug[17] = '!';
+        }
+        DBG_L1("%s\n", debug);
+#endif
+
+        if ((s->regs.maincr & AACIFE) == 0) {
+            pl041_reset(s);
+        }
+        break;
+    }
+
+    case PL041_dr1_0:
+    case PL041_dr1_1:
+    case PL041_dr1_2:
+    case PL041_dr1_3:
+        pl041_fifo1_write(s, value);
+        break;
+    }
+
+    /* Transmit the FIFO content */
+    pl041_fifo1_transmit(s);
+
+    /* Update the ISR1 register */
+    pl041_isr1_update(s);
+}
+
+static void pl041_device_reset(DeviceState *d)
+{
+    pl041_state *s = DO_UPCAST(pl041_state, busdev.qdev, d);
+
+    pl041_reset(s);
+}
+
+static const MemoryRegionOps pl041_ops = {
+    .read = pl041_read,
+    .write = pl041_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static int pl041_init(SysBusDevice *dev)
+{
+    pl041_state *s = FROM_SYSBUS(pl041_state, dev);
+
+    DBG_L1("pl041_init 0x%08x\n", (uint32_t)s);
+
+    /* Check the device properties */
+    switch (s->fifo_depth) {
+    case 8:
+    case 32:
+    case 64:
+    case 128:
+    case 256:
+    case 512:
+    case 1024:
+    case 2048:
+        break;
+    case 16:
+    default:
+        /* NC FIFO depth of 16 is not allowed because its id bits in
+           AACIPERIPHID3 overlap with the id for the default NC FIFO depth */
+        fprintf(stderr, "pl041: unsupported non-compact fifo depth [%i]\n",
+                s->fifo_depth);
+        return -1;
+    }
+
+    /* Connect the device to the sysbus */
+    memory_region_init_io(&s->iomem, &pl041_ops, s, "pl041", 0x1000);
+    sysbus_init_mmio_region(dev, &s->iomem);
+    sysbus_init_irq(dev, &s->irq);
+
+    /* Init the codec */
+    lm4549_init(&s->codec, &pl041_request_data, (void *)s);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_pl041_regfile = {
+    .name = "pl041_regfile",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+#define REGISTER(name, offset) VMSTATE_UINT32(name, pl041_regfile),
+        #include "pl041.hx"
+#undef REGISTER
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_pl041_fifo = {
+    .name = "pl041_fifo",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT32(level, pl041_fifo),
+        VMSTATE_UINT32_ARRAY(data, pl041_fifo, MAX_FIFO_DEPTH),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_pl041_channel = {
+    .name = "pl041_channel",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_STRUCT(tx_fifo, pl041_channel, 0,
+                       vmstate_pl041_fifo, pl041_fifo),
+        VMSTATE_UINT8(tx_enabled, pl041_channel),
+        VMSTATE_UINT8(tx_compact_mode, pl041_channel),
+        VMSTATE_UINT8(tx_sample_size, pl041_channel),
+        VMSTATE_STRUCT(rx_fifo, pl041_channel, 0,
+                       vmstate_pl041_fifo, pl041_fifo),
+        VMSTATE_UINT8(rx_enabled, pl041_channel),
+        VMSTATE_UINT8(rx_compact_mode, pl041_channel),
+        VMSTATE_UINT8(rx_sample_size, pl041_channel),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_pl041 = {
+    .name = "pl041",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(fifo_depth, pl041_state),
+        VMSTATE_STRUCT(regs, pl041_state, 0,
+                       vmstate_pl041_regfile, pl041_regfile),
+        VMSTATE_STRUCT(fifo1, pl041_state, 0,
+                       vmstate_pl041_channel, pl041_channel),
+        VMSTATE_STRUCT(codec, pl041_state, 0,
+                       vmstate_lm4549_state, lm4549_state),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static SysBusDeviceInfo pl041_device_info = {
+    .init = pl041_init,
+    .qdev.name = "pl041",
+    .qdev.size = sizeof(pl041_state),
+    .qdev.vmsd = &vmstate_pl041,
+    .qdev.reset = pl041_device_reset,
+    .qdev.no_user = 1,
+    .qdev.props = (Property[]) {
+        /* Non-compact FIFO depth property */
+        DEFINE_PROP_UINT32("nc_fifo_depth", pl041_state,
+                           fifo_depth, DEFAULT_FIFO_DEPTH),
+        DEFINE_PROP_END_OF_LIST(),
+    },
+};
+
+static void pl041_register_device(void)
+{
+    sysbus_register_withprop(&pl041_device_info);
+}
+
+device_init(pl041_register_device)
diff --git a/hw/pl041.h b/hw/pl041.h
new file mode 100644
index 0000000000..1f224326e5
--- /dev/null
+++ b/hw/pl041.h
@@ -0,0 +1,135 @@
+/*
+ * Arm PrimeCell PL041 Advanced Audio Codec Interface
+ *
+ * Copyright (c) 2011
+ * Written by Mathieu Sonet - www.elasticsheep.com
+ *
+ * This code is licenced under the GPL.
+ *
+ * *****************************************************************
+ */
+
+#ifndef HW_PL041_H
+#define HW_PL041_H
+
+/* Register file */
+#define REGISTER(name, offset) uint32_t name;
+typedef struct {
+    #include "pl041.hx"
+} pl041_regfile;
+#undef REGISTER
+
+/* Register addresses */
+#define REGISTER(name, offset) PL041_##name = offset,
+enum {
+    #include "pl041.hx"
+
+    PL041_periphid0 = 0xFE0,
+    PL041_periphid1 = 0xFE4,
+    PL041_periphid2 = 0xFE8,
+    PL041_periphid3 = 0xFEC,
+    PL041_pcellid0  = 0xFF0,
+    PL041_pcellid1  = 0xFF4,
+    PL041_pcellid2  = 0xFF8,
+    PL041_pcellid3  = 0xFFC,
+};
+#undef REGISTER
+
+/* Register bits */
+
+/* IEx */
+#define TXCIE           (1 << 0)
+#define RXTIE           (1 << 1)
+#define TXIE            (1 << 2)
+#define RXIE            (1 << 3)
+#define RXOIE           (1 << 4)
+#define TXUIE           (1 << 5)
+#define RXTOIE          (1 << 6)
+
+/* TXCRx */
+#define TXEN            (1 << 0)
+#define TXSLOT1         (1 << 1)
+#define TXSLOT2         (1 << 2)
+#define TXSLOT3         (1 << 3)
+#define TXSLOT4         (1 << 4)
+#define TXCOMPACT       (1 << 15)
+#define TXFEN           (1 << 16)
+
+#define TXSLOT_MASK_BIT (1)
+#define TXSLOT_MASK     (0xFFF << TXSLOT_MASK_BIT)
+
+#define TSIZE_MASK_BIT  (13)
+#define TSIZE_MASK      (0x3 << TSIZE_MASK_BIT)
+
+#define TSIZE_16BITS    (0x0 << TSIZE_MASK_BIT)
+#define TSIZE_18BITS    (0x1 << TSIZE_MASK_BIT)
+#define TSIZE_20BITS    (0x2 << TSIZE_MASK_BIT)
+#define TSIZE_12BITS    (0x3 << TSIZE_MASK_BIT)
+
+/* SRx */
+#define RXFE         (1 << 0)
+#define TXFE         (1 << 1)
+#define RXHF         (1 << 2)
+#define TXHE         (1 << 3)
+#define RXFF         (1 << 4)
+#define TXFF         (1 << 5)
+#define RXBUSY       (1 << 6)
+#define TXBUSY       (1 << 7)
+#define RXOVERRUN    (1 << 8)
+#define TXUNDERRUN   (1 << 9)
+#define RXTIMEOUT    (1 << 10)
+#define RXTOFE       (1 << 11)
+
+/* ISRx */
+#define TXCINTR      (1 << 0)
+#define RXTOINTR     (1 << 1)
+#define TXINTR       (1 << 2)
+#define RXINTR       (1 << 3)
+#define ORINTR       (1 << 4)
+#define URINTR       (1 << 5)
+#define RXTOFEINTR   (1 << 6)
+
+/* SLFR */
+#define SL1RXBUSY    (1 << 0)
+#define SL1TXBUSY    (1 << 1)
+#define SL2RXBUSY    (1 << 2)
+#define SL2TXBUSY    (1 << 3)
+#define SL12RXBUSY   (1 << 4)
+#define SL12TXBUSY   (1 << 5)
+#define SL1RXVALID   (1 << 6)
+#define SL1TXEMPTY   (1 << 7)
+#define SL2RXVALID   (1 << 8)
+#define SL2TXEMPTY   (1 << 9)
+#define SL12RXVALID  (1 << 10)
+#define SL12TXEMPTY  (1 << 11)
+#define RAWGPIOINT   (1 << 12)
+#define RWIS         (1 << 13)
+
+/* MAINCR */
+#define AACIFE       (1 << 0)
+#define LOOPBACK     (1 << 1)
+#define LOWPOWER     (1 << 2)
+#define SL1RXEN      (1 << 3)
+#define SL1TXEN      (1 << 4)
+#define SL2RXEN      (1 << 5)
+#define SL2TXEN      (1 << 6)
+#define SL12RXEN     (1 << 7)
+#define SL12TXEN     (1 << 8)
+#define DMAENABLE    (1 << 9)
+
+/* INTCLR */
+#define WISC         (1 << 0)
+#define RXOEC1       (1 << 1)
+#define RXOEC2       (1 << 2)
+#define RXOEC3       (1 << 3)
+#define RXOEC4       (1 << 4)
+#define TXUEC1       (1 << 5)
+#define TXUEC2       (1 << 6)
+#define TXUEC3       (1 << 7)
+#define TXUEC4       (1 << 8)
+#define RXTOFEC1     (1 << 9)
+#define RXTOFEC2     (1 << 10)
+#define RXTOFEC3     (1 << 11)
+#define RXTOFEC4     (1 << 12)
+
+#endif /* #ifndef HW_PL041_H */
diff --git a/hw/pl041.hx b/hw/pl041.hx
new file mode 100644
index 0000000000..e972996725
--- /dev/null
+++ b/hw/pl041.hx
@@ -0,0 +1,81 @@
+/*
+ * Arm PrimeCell PL041 Advanced Audio Codec Interface
+ *
+ * Copyright (c) 2011
+ * Written by Mathieu Sonet - www.elasticsheep.com
+ *
+ * This code is licenced under the GPL.
+ *
+ * *****************************************************************
+ */
+
+/* PL041 register file description */
+
+REGISTER( rxcr1,   0x00 )
+REGISTER( txcr1,   0x04 )
+REGISTER( sr1,     0x08 )
+REGISTER( isr1,    0x0C )
+REGISTER( ie1,     0x10 )
+REGISTER( rxcr2,   0x14 )
+REGISTER( txcr2,   0x18 )
+REGISTER( sr2,     0x1C )
+REGISTER( isr2,    0x20 )
+REGISTER( ie2,     0x24 )
+REGISTER( rxcr3,   0x28 )
+REGISTER( txcr3,   0x2C )
+REGISTER( sr3,     0x30 )
+REGISTER( isr3,    0x34 )
+REGISTER( ie3,     0x38 )
+REGISTER( rxcr4,   0x3C )
+REGISTER( txcr4,   0x40 )
+REGISTER( sr4,     0x44 )
+REGISTER( isr4,    0x48 )
+REGISTER( ie4,     0x4C )
+REGISTER( sl1rx,   0x50 )
+REGISTER( sl1tx,   0x54 )
+REGISTER( sl2rx,   0x58 )
+REGISTER( sl2tx,   0x5C )
+REGISTER( sl12rx,  0x60 )
+REGISTER( sl12tx,  0x64 )
+REGISTER( slfr,    0x68 )
+REGISTER( slistat, 0x6C )
+REGISTER( slien,   0x70 )
+REGISTER( intclr,  0x74 )
+REGISTER( maincr,  0x78 )
+REGISTER( reset,   0x7C )
+REGISTER( sync,    0x80 )
+REGISTER( allints, 0x84 )
+REGISTER( mainfr,  0x88 )
+REGISTER( unused,  0x8C )
+REGISTER( dr1_0,   0x90 )
+REGISTER( dr1_1,   0x94 )
+REGISTER( dr1_2,   0x98 )
+REGISTER( dr1_3,   0x9C )
+REGISTER( dr1_4,   0xA0 )
+REGISTER( dr1_5,   0xA4 )
+REGISTER( dr1_6,   0xA8 )
+REGISTER( dr1_7,   0xAC )
+REGISTER( dr2_0,   0xB0 )
+REGISTER( dr2_1,   0xB4 )
+REGISTER( dr2_2,   0xB8 )
+REGISTER( dr2_3,   0xBC )
+REGISTER( dr2_4,   0xC0 )
+REGISTER( dr2_5,   0xC4 )
+REGISTER( dr2_6,   0xC8 )
+REGISTER( dr2_7,   0xCC )
+REGISTER( dr3_0,   0xD0 )
+REGISTER( dr3_1,   0xD4 )
+REGISTER( dr3_2,   0xD8 )
+REGISTER( dr3_3,   0xDC )
+REGISTER( dr3_4,   0xE0 )
+REGISTER( dr3_5,   0xE4 )
+REGISTER( dr3_6,   0xE8 )
+REGISTER( dr3_7,   0xEC )
+REGISTER( dr4_0,   0xF0 )
+REGISTER( dr4_1,   0xF4 )
+REGISTER( dr4_2,   0xF8 )
+REGISTER( dr4_3,   0xFC )
+REGISTER( dr4_4,   0x100 )
+REGISTER( dr4_5,   0x104 )
+REGISTER( dr4_6,   0x108 )
+REGISTER( dr4_7,   0x10C )
diff --git a/hw/qxl.c b/hw/qxl.c
index 03848edb75..12f71aa56c 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -18,8 +18,6 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <pthread.h>
-
 #include "qemu-common.h"
 #include "qemu-timer.h"
 #include "qemu-queue.h"
@@ -238,6 +236,9 @@ void qxl_spice_reset_image_cache(PCIQXLDevice *qxl)
 void qxl_spice_reset_cursor(PCIQXLDevice *qxl)
 {
     qxl->ssd.worker->reset_cursor(qxl->ssd.worker);
+    qemu_mutex_lock(&qxl->track_lock);
+    qxl->guest_cursor = 0;
+    qemu_mutex_unlock(&qxl->track_lock);
 }
 
 
@@ -330,6 +331,7 @@ static void init_qxl_ram(PCIQXLDevice *d)
     d->ram->magic       = cpu_to_le32(QXL_RAM_MAGIC);
     d->ram->int_pending = cpu_to_le32(0);
     d->ram->int_mask    = cpu_to_le32(0);
+    d->ram->update_surface = 0;
     SPICE_RING_INIT(&d->ram->cmd_ring);
     SPICE_RING_INIT(&d->ram->cursor_ring);
     SPICE_RING_INIT(&d->ram->release_ring);
@@ -402,7 +404,9 @@ static void qxl_track_command(PCIQXLDevice *qxl, struct QXLCommandExt *ext)
     {
         QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
         if (cmd->type == QXL_CURSOR_SET) {
+            qemu_mutex_lock(&qxl->track_lock);
             qxl->guest_cursor = ext->cmd.data;
+            qemu_mutex_unlock(&qxl->track_lock);
         }
         break;
     }
@@ -1067,6 +1071,7 @@ static int qxl_destroy_primary(PCIQXLDevice *d, qxl_async_io async)
 
     d->mode = QXL_MODE_UNDEFINED;
     qemu_spice_destroy_primary_surface(&d->ssd, 0, async);
+    qxl_spice_reset_cursor(d);
     return 1;
 }
 
@@ -1215,10 +1220,6 @@ async_common:
         if (!SPICE_RING_IS_EMPTY(&d->ram->release_ring)) {
             break;
         }
-        pthread_yield();
-        if (!SPICE_RING_IS_EMPTY(&d->ram->release_ring)) {
-            break;
-        }
         d->oom_running = 1;
         qxl_spice_oom(d);
         d->oom_running = 0;
@@ -1372,7 +1373,7 @@ static void qxl_send_events(PCIQXLDevice *d, uint32_t events)
     if ((old_pending & le_events) == le_events) {
         return;
     }
-    if (pthread_self() == d->main) {
+    if (qemu_thread_is_self(&d->main)) {
         qxl_update_irq(d);
     } else {
         if (write(d->pipe[1], d, 1) != 1) {
@@ -1391,7 +1392,7 @@ static void init_pipe_signaling(PCIQXLDevice *d)
    fcntl(d->pipe[1], F_SETFL, O_NONBLOCK);
    fcntl(d->pipe[0], F_SETOWN, getpid());
 
-   d->main = pthread_self();
+   qemu_thread_get_self(&d->main);
    qemu_set_fd_handler(d->pipe[0], pipe_read, NULL, d);
 }
 
@@ -1710,10 +1711,12 @@ static int qxl_post_load(void *opaque, int version)
             cmds[out].group_id = MEMSLOT_GROUP_GUEST;
             out++;
         }
-        cmds[out].cmd.data = d->guest_cursor;
-        cmds[out].cmd.type = QXL_CMD_CURSOR;
-        cmds[out].group_id = MEMSLOT_GROUP_GUEST;
-        out++;
+        if (d->guest_cursor) {
+            cmds[out].cmd.data = d->guest_cursor;
+            cmds[out].cmd.type = QXL_CMD_CURSOR;
+            cmds[out].group_id = MEMSLOT_GROUP_GUEST;
+            out++;
+        }
         qxl_spice_loadvm_commands(d, cmds, out);
         g_free(cmds);
 
@@ -1787,6 +1790,19 @@ static VMStateDescription qxl_vmstate = {
     },
 };
 
+static Property qxl_properties[] = {
+        DEFINE_PROP_UINT32("ram_size", PCIQXLDevice, vga.vram_size,
+                           64 * 1024 * 1024),
+        DEFINE_PROP_UINT32("vram_size", PCIQXLDevice, vram_size,
+                           64 * 1024 * 1024),
+        DEFINE_PROP_UINT32("revision", PCIQXLDevice, revision,
+                           QXL_DEFAULT_REVISION),
+        DEFINE_PROP_UINT32("debug", PCIQXLDevice, debug, 0),
+        DEFINE_PROP_UINT32("guestdebug", PCIQXLDevice, guestdebug, 0),
+        DEFINE_PROP_UINT32("cmdlog", PCIQXLDevice, cmdlog, 0),
+        DEFINE_PROP_END_OF_LIST(),
+};
+
 static PCIDeviceInfo qxl_info_primary = {
     .qdev.name    = "qxl-vga",
     .qdev.desc    = "Spice QXL GPU (primary, vga compatible)",
@@ -1799,18 +1815,7 @@ static PCIDeviceInfo qxl_info_primary = {
     .vendor_id    = REDHAT_PCI_VENDOR_ID,
     .device_id    = QXL_DEVICE_ID_STABLE,
     .class_id     = PCI_CLASS_DISPLAY_VGA,
-    .qdev.props = (Property[]) {
-        DEFINE_PROP_UINT32("ram_size", PCIQXLDevice, vga.vram_size,
-                           64 * 1024 * 1024),
-        DEFINE_PROP_UINT32("vram_size", PCIQXLDevice, vram_size,
-                           64 * 1024 * 1024),
-        DEFINE_PROP_UINT32("revision", PCIQXLDevice, revision,
-                           QXL_DEFAULT_REVISION),
-        DEFINE_PROP_UINT32("debug", PCIQXLDevice, debug, 0),
-        DEFINE_PROP_UINT32("guestdebug", PCIQXLDevice, guestdebug, 0),
-        DEFINE_PROP_UINT32("cmdlog", PCIQXLDevice, cmdlog, 0),
-        DEFINE_PROP_END_OF_LIST(),
-    }
+    .qdev.props   = qxl_properties,
 };
 
 static PCIDeviceInfo qxl_info_secondary = {
@@ -1823,18 +1828,7 @@ static PCIDeviceInfo qxl_info_secondary = {
     .vendor_id    = REDHAT_PCI_VENDOR_ID,
     .device_id    = QXL_DEVICE_ID_STABLE,
     .class_id     = PCI_CLASS_DISPLAY_OTHER,
-    .qdev.props = (Property[]) {
-        DEFINE_PROP_UINT32("ram_size", PCIQXLDevice, vga.vram_size,
-                           64 * 1024 * 1024),
-        DEFINE_PROP_UINT32("vram_size", PCIQXLDevice, vram_size,
-                           64 * 1024 * 1024),
-        DEFINE_PROP_UINT32("revision", PCIQXLDevice, revision,
-                           QXL_DEFAULT_REVISION),
-        DEFINE_PROP_UINT32("debug", PCIQXLDevice, debug, 0),
-        DEFINE_PROP_UINT32("guestdebug", PCIQXLDevice, guestdebug, 0),
-        DEFINE_PROP_UINT32("cmdlog", PCIQXLDevice, cmdlog, 0),
-        DEFINE_PROP_END_OF_LIST(),
-    }
+    .qdev.props   = qxl_properties,
 };
 
 static void qxl_register(void)
diff --git a/hw/qxl.h b/hw/qxl.h
index 868db813f9..37b2619e55 100644
--- a/hw/qxl.h
+++ b/hw/qxl.h
@@ -4,6 +4,7 @@
 #include "hw.h"
 #include "pci.h"
 #include "vga_int.h"
+#include "qemu-thread.h"
 
 #include "ui/qemu-spice.h"
 #include "ui/spice-display.h"
@@ -63,7 +64,7 @@ typedef struct PCIQXLDevice {
     QemuMutex          track_lock;
 
     /* thread signaling */
-    pthread_t          main;
+    QemuThread         main;
     int                pipe[2];
 
     /* ram pci bar */
diff --git a/hw/realview.c b/hw/realview.c
index 14281b0f06..9a8e63c8f5 100644
--- a/hw/realview.c
+++ b/hw/realview.c
@@ -125,7 +125,7 @@ static void realview_init(ram_addr_t ram_size,
     MemoryRegion *ram_hi = g_new(MemoryRegion, 1);
     MemoryRegion *ram_alias = g_new(MemoryRegion, 1);
     MemoryRegion *ram_hack = g_new(MemoryRegion, 1);
-    DeviceState *dev, *sysctl, *gpio2;
+    DeviceState *dev, *sysctl, *gpio2, *pl041;
     SysBusDevice *busdev;
     qemu_irq *irqp;
     qemu_irq pic[64];
@@ -232,6 +232,12 @@ static void realview_init(ram_addr_t ram_size,
         pic[n] = qdev_get_gpio_in(dev, n);
     }
 
+    pl041 = qdev_create(NULL, "pl041");
+    qdev_prop_set_uint32(pl041, "nc_fifo_depth", 512);
+    qdev_init_nofail(pl041);
+    sysbus_mmio_map(sysbus_from_qdev(pl041), 0, 0x10004000);
+    sysbus_connect_irq(sysbus_from_qdev(pl041), 0, pic[19]);
+
     sysbus_create_simple("pl050_keyboard", 0x10006000, pic[20]);
     sysbus_create_simple("pl050_mouse", 0x10007000, pic[21]);
 
diff --git a/hw/versatilepb.c b/hw/versatilepb.c
index 68402cc479..6370600bb3 100644
--- a/hw/versatilepb.c
+++ b/hw/versatilepb.c
@@ -182,6 +182,7 @@ static void versatile_init(ram_addr_t ram_size,
     qemu_irq sic[32];
     DeviceState *dev, *sysctl;
     SysBusDevice *busdev;
+    DeviceState *pl041;
     PCIBus *pci_bus;
     NICInfo *nd;
     int n;
@@ -273,6 +274,13 @@ static void versatile_init(ram_addr_t ram_size,
     /* Add PL031 Real Time Clock. */
     sysbus_create_simple("pl031", 0x101e8000, pic[10]);
 
+    /* Add PL041 AACI Interface to the LM4549 codec */
+    pl041 = qdev_create(NULL, "pl041");
+    qdev_prop_set_uint32(pl041, "nc_fifo_depth", 512);
+    qdev_init_nofail(pl041);
+    sysbus_mmio_map(sysbus_from_qdev(pl041), 0, 0x10004000);
+    sysbus_connect_irq(sysbus_from_qdev(pl041), 0, sic[24]);
+
     /* Memory map for Versatile/PB:  */
     /* 0x10000000 System registers.  */
     /* 0x10001000 PCI controller config registers.  */
diff --git a/hw/vexpress.c b/hw/vexpress.c
index c9766dd0c4..0940a26d73 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -41,7 +41,7 @@ static void vexpress_a9_init(ram_addr_t ram_size,
 {
     CPUState *env = NULL;
     ram_addr_t ram_offset, vram_offset, sram_offset;
-    DeviceState *dev, *sysctl;
+    DeviceState *dev, *sysctl, *pl041;
     SysBusDevice *busdev;
     qemu_irq *irqp;
     qemu_irq pic[64];
@@ -118,6 +118,11 @@ static void vexpress_a9_init(ram_addr_t ram_size,
     /* 0x10001000 SP810 system control */
     /* 0x10002000 serial bus PCI */
     /* 0x10004000 PL041 audio */
+    pl041 = qdev_create(NULL, "pl041");
+    qdev_prop_set_uint32(pl041, "nc_fifo_depth", 512);
+    qdev_init_nofail(pl041);
+    sysbus_mmio_map(sysbus_from_qdev(pl041), 0, 0x10004000);
+    sysbus_connect_irq(sysbus_from_qdev(pl041), 0, pic[11]);
 
     dev = sysbus_create_varargs("pl181", 0x10005000, pic[9], pic[10], NULL);
     /* Wire up MMC card detect and read-only signals */
diff --git a/kvm-all.c b/kvm-all.c
index e7faf5cba9..c09ddf7ac5 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -64,6 +64,7 @@ struct KVMState
     int vmfd;
     int coalesced_mmio;
     struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
+    bool coalesced_flush_in_progress;
     int broken_set_mem_region;
     int migration_log;
     int vcpu_events;
@@ -876,6 +877,13 @@ static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run)
 void kvm_flush_coalesced_mmio_buffer(void)
 {
     KVMState *s = kvm_state;
+
+    if (s->coalesced_flush_in_progress) {
+        return;
+    }
+
+    s->coalesced_flush_in_progress = true;
+
     if (s->coalesced_mmio_ring) {
         struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
         while (ring->first != ring->last) {
@@ -888,6 +896,8 @@ void kvm_flush_coalesced_mmio_buffer(void)
             ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
         }
     }
+
+    s->coalesced_flush_in_progress = false;
 }
 
 static void do_kvm_cpu_synchronize_state(void *_env)
diff --git a/libcacard/cac.c b/libcacard/cac.c
index f4b0b1b057..927a4ca296 100644
--- a/libcacard/cac.c
+++ b/libcacard/cac.c
@@ -266,7 +266,8 @@ static void
 cac_delete_pki_applet_private(VCardAppletPrivate *applet_private)
 {
     CACPKIAppletData *pki_applet_data = NULL;
-    if (pki_applet_data == NULL) {
+
+    if (applet_private == NULL) {
         return;
     }
     pki_applet_data = &(applet_private->u.pki_data);
diff --git a/libcacard/card_7816.c b/libcacard/card_7816.c
index 9fd59d4a5f..6fe27d5631 100644
--- a/libcacard/card_7816.c
+++ b/libcacard/card_7816.c
@@ -754,7 +754,7 @@ vcard_process_apdu(VCard *card, VCardAPDU *apdu, VCardResponse **response)
         return vcard7816_vm_process_apdu(card, apdu, response);
     case VCARD_DIRECT:
         /* if we are type direct, then the applet should handle everything */
-        assert("VCARD_DIRECT: applet failure");
+        assert(!"VCARD_DIRECT: applet failure");
         break;
     }
     *response =
diff --git a/libcacard/vscclient.c b/libcacard/vscclient.c
index 2191f6038c..e317a25faf 100644
--- a/libcacard/vscclient.c
+++ b/libcacard/vscclient.c
@@ -357,6 +357,7 @@ connect_to_qemu(
     if (sock < 0) {
         /* Error */
         fprintf(stderr, "Error opening socket!\n");
+        return -1;
     }
 
     memset(&hints, 0, sizeof(struct addrinfo));
@@ -370,13 +371,13 @@ connect_to_qemu(
     if (ret != 0) {
         /* Error */
         fprintf(stderr, "getaddrinfo failed\n");
-        return 5;
+        return -1;
     }
 
     if (connect(sock, server->ai_addr, server->ai_addrlen) < 0) {
         /* Error */
         fprintf(stderr, "Could not connect\n");
-        return 5;
+        return -1;
     }
     if (verbose) {
         printf("Connected (sizeof Header=%zd)!\n", sizeof(VSCMsgHeader));
@@ -505,6 +506,10 @@ main(
     qemu_host = strdup(argv[argc - 2]);
     qemu_port = strdup(argv[argc - 1]);
     sock = connect_to_qemu(qemu_host, qemu_port);
+    if (sock == -1) {
+        fprintf(stderr, "error opening socket, exiting.\n");
+        exit(5);
+    }
 
     qemu_mutex_init(&write_lock);
     qemu_mutex_init(&pending_reader_lock);
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 40c5eb1846..f3b767ea7e 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -2296,12 +2296,14 @@ void sparc64_set_context(CPUSPARCState *env)
      */
     err |= __get_user(env->fprs, &(ucp->tuc_mcontext.mc_fpregs.mcfpu_fprs));
     {
-        uint32_t *src, *dst;
-        src = ucp->tuc_mcontext.mc_fpregs.mcfpu_fregs.sregs;
-        dst = env->fpr;
-        /* XXX: check that the CPU storage is the same as user context */
-        for (i = 0; i < 64; i++, dst++, src++)
-            err |= __get_user(*dst, src);
+        uint32_t *src = ucp->tuc_mcontext.mc_fpregs.mcfpu_fregs.sregs;
+        for (i = 0; i < 64; i++, src++) {
+            if (i & 1) {
+                err |= __get_user(env->fpr[i/2].l.lower, src);
+            } else {
+                err |= __get_user(env->fpr[i/2].l.upper, src);
+            }
+        }
     }
     err |= __get_user(env->fsr,
                       &(ucp->tuc_mcontext.mc_fpregs.mcfpu_fsr));
@@ -2390,12 +2392,14 @@ void sparc64_get_context(CPUSPARCState *env)
     err |= __put_user(i7, &(mcp->mc_i7));
 
     {
-        uint32_t *src, *dst;
-        src = env->fpr;
-        dst = ucp->tuc_mcontext.mc_fpregs.mcfpu_fregs.sregs;
-        /* XXX: check that the CPU storage is the same as user context */
-        for (i = 0; i < 64; i++, dst++, src++)
-            err |= __put_user(*src, dst);
+        uint32_t *dst = ucp->tuc_mcontext.mc_fpregs.mcfpu_fregs.sregs;
+        for (i = 0; i < 64; i++, dst++) {
+            if (i & 1) {
+                err |= __put_user(env->fpr[i/2].l.lower, dst);
+            } else {
+                err |= __put_user(env->fpr[i/2].l.upper, dst);
+            }
+        }
     }
     err |= __put_user(env->fsr, &(mcp->mc_fpregs.mcfpu_fsr));
     err |= __put_user(env->gsr, &(mcp->mc_fpregs.mcfpu_gsr));
diff --git a/migration.c b/migration.c
index 6c70f72ce3..3b4abbde64 100644
--- a/migration.c
+++ b/migration.c
@@ -336,11 +336,22 @@ void remove_migration_state_change_notifier(Notifier *notify)
     notifier_list_remove(&migration_state_notifiers, notify);
 }
 
+bool migration_is_active(MigrationState *s)
+{
+    return s->state == MIG_STATE_ACTIVE;
+}
+
 bool migration_has_finished(MigrationState *s)
 {
     return s->state == MIG_STATE_COMPLETED;
 }
 
+bool migration_has_failed(MigrationState *s)
+{
+    return (s->state == MIG_STATE_CANCELLED ||
+            s->state == MIG_STATE_ERROR);
+}
+
 void migrate_fd_connect(MigrationState *s)
 {
     int ret;
diff --git a/migration.h b/migration.h
index a1f80d0728..1b8ee58530 100644
--- a/migration.h
+++ b/migration.h
@@ -76,7 +76,9 @@ void migrate_fd_connect(MigrationState *s);
 
 void add_migration_state_change_notifier(Notifier *notify);
 void remove_migration_state_change_notifier(Notifier *notify);
+bool migration_is_active(MigrationState *);
 bool migration_has_finished(MigrationState *);
+bool migration_has_failed(MigrationState *);
 
 uint64_t ram_bytes_remaining(void);
 uint64_t ram_bytes_transferred(void);
diff --git a/monitor.c b/monitor.c
index e3c7d0d564..7c2a9b95f8 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1004,7 +1004,8 @@ static int add_graphics_client(Monitor *mon, const QDict *qdict, QObject **ret_d
     return -1;
 }
 
-static int client_migrate_info(Monitor *mon, const QDict *qdict, QObject **ret_data)
+static int client_migrate_info(Monitor *mon, const QDict *qdict,
+                               MonitorCompletion cb, void *opaque)
 {
     const char *protocol = qdict_get_str(qdict, "protocol");
     const char *hostname = qdict_get_str(qdict, "hostname");
@@ -1019,7 +1020,8 @@ static int client_migrate_info(Monitor *mon, const QDict *qdict, QObject **ret_d
             return -1;
         }
 
-        ret = qemu_spice_migrate_info(hostname, port, tls_port, subject);
+        ret = qemu_spice_migrate_info(hostname, port, tls_port, subject,
+                                      cb, opaque);
         if (ret != 0) {
             qerror_report(QERR_UNDEFINED_ERROR);
             return -1;
@@ -3233,55 +3235,55 @@ static const MonitorDef monitor_defs[] = {
 #endif
     { "tbr", offsetof(CPUState, tbr) },
     { "fsr", offsetof(CPUState, fsr) },
-    { "f0", offsetof(CPUState, fpr[0]) },
-    { "f1", offsetof(CPUState, fpr[1]) },
-    { "f2", offsetof(CPUState, fpr[2]) },
-    { "f3", offsetof(CPUState, fpr[3]) },
-    { "f4", offsetof(CPUState, fpr[4]) },
-    { "f5", offsetof(CPUState, fpr[5]) },
-    { "f6", offsetof(CPUState, fpr[6]) },
-    { "f7", offsetof(CPUState, fpr[7]) },
-    { "f8", offsetof(CPUState, fpr[8]) },
-    { "f9", offsetof(CPUState, fpr[9]) },
-    { "f10", offsetof(CPUState, fpr[10]) },
-    { "f11", offsetof(CPUState, fpr[11]) },
-    { "f12", offsetof(CPUState, fpr[12]) },
-    { "f13", offsetof(CPUState, fpr[13]) },
-    { "f14", offsetof(CPUState, fpr[14]) },
-    { "f15", offsetof(CPUState, fpr[15]) },
-    { "f16", offsetof(CPUState, fpr[16]) },
-    { "f17", offsetof(CPUState, fpr[17]) },
-    { "f18", offsetof(CPUState, fpr[18]) },
-    { "f19", offsetof(CPUState, fpr[19]) },
-    { "f20", offsetof(CPUState, fpr[20]) },
-    { "f21", offsetof(CPUState, fpr[21]) },
-    { "f22", offsetof(CPUState, fpr[22]) },
-    { "f23", offsetof(CPUState, fpr[23]) },
-    { "f24", offsetof(CPUState, fpr[24]) },
-    { "f25", offsetof(CPUState, fpr[25]) },
-    { "f26", offsetof(CPUState, fpr[26]) },
-    { "f27", offsetof(CPUState, fpr[27]) },
-    { "f28", offsetof(CPUState, fpr[28]) },
-    { "f29", offsetof(CPUState, fpr[29]) },
-    { "f30", offsetof(CPUState, fpr[30]) },
-    { "f31", offsetof(CPUState, fpr[31]) },
+    { "f0", offsetof(CPUState, fpr[0].l.upper) },
+    { "f1", offsetof(CPUState, fpr[0].l.lower) },
+    { "f2", offsetof(CPUState, fpr[1].l.upper) },
+    { "f3", offsetof(CPUState, fpr[1].l.lower) },
+    { "f4", offsetof(CPUState, fpr[2].l.upper) },
+    { "f5", offsetof(CPUState, fpr[2].l.lower) },
+    { "f6", offsetof(CPUState, fpr[3].l.upper) },
+    { "f7", offsetof(CPUState, fpr[3].l.lower) },
+    { "f8", offsetof(CPUState, fpr[4].l.upper) },
+    { "f9", offsetof(CPUState, fpr[4].l.lower) },
+    { "f10", offsetof(CPUState, fpr[5].l.upper) },
+    { "f11", offsetof(CPUState, fpr[5].l.lower) },
+    { "f12", offsetof(CPUState, fpr[6].l.upper) },
+    { "f13", offsetof(CPUState, fpr[6].l.lower) },
+    { "f14", offsetof(CPUState, fpr[7].l.upper) },
+    { "f15", offsetof(CPUState, fpr[7].l.lower) },
+    { "f16", offsetof(CPUState, fpr[8].l.upper) },
+    { "f17", offsetof(CPUState, fpr[8].l.lower) },
+    { "f18", offsetof(CPUState, fpr[9].l.upper) },
+    { "f19", offsetof(CPUState, fpr[9].l.lower) },
+    { "f20", offsetof(CPUState, fpr[10].l.upper) },
+    { "f21", offsetof(CPUState, fpr[10].l.lower) },
+    { "f22", offsetof(CPUState, fpr[11].l.upper) },
+    { "f23", offsetof(CPUState, fpr[11].l.lower) },
+    { "f24", offsetof(CPUState, fpr[12].l.upper) },
+    { "f25", offsetof(CPUState, fpr[12].l.lower) },
+    { "f26", offsetof(CPUState, fpr[13].l.upper) },
+    { "f27", offsetof(CPUState, fpr[13].l.lower) },
+    { "f28", offsetof(CPUState, fpr[14].l.upper) },
+    { "f29", offsetof(CPUState, fpr[14].l.lower) },
+    { "f30", offsetof(CPUState, fpr[15].l.upper) },
+    { "f31", offsetof(CPUState, fpr[15].l.lower) },
 #ifdef TARGET_SPARC64
-    { "f32", offsetof(CPUState, fpr[32]) },
-    { "f34", offsetof(CPUState, fpr[34]) },
-    { "f36", offsetof(CPUState, fpr[36]) },
-    { "f38", offsetof(CPUState, fpr[38]) },
-    { "f40", offsetof(CPUState, fpr[40]) },
-    { "f42", offsetof(CPUState, fpr[42]) },
-    { "f44", offsetof(CPUState, fpr[44]) },
-    { "f46", offsetof(CPUState, fpr[46]) },
-    { "f48", offsetof(CPUState, fpr[48]) },
-    { "f50", offsetof(CPUState, fpr[50]) },
-    { "f52", offsetof(CPUState, fpr[52]) },
-    { "f54", offsetof(CPUState, fpr[54]) },
-    { "f56", offsetof(CPUState, fpr[56]) },
-    { "f58", offsetof(CPUState, fpr[58]) },
-    { "f60", offsetof(CPUState, fpr[60]) },
-    { "f62", offsetof(CPUState, fpr[62]) },
+    { "f32", offsetof(CPUState, fpr[16]) },
+    { "f34", offsetof(CPUState, fpr[17]) },
+    { "f36", offsetof(CPUState, fpr[18]) },
+    { "f38", offsetof(CPUState, fpr[19]) },
+    { "f40", offsetof(CPUState, fpr[20]) },
+    { "f42", offsetof(CPUState, fpr[21]) },
+    { "f44", offsetof(CPUState, fpr[22]) },
+    { "f46", offsetof(CPUState, fpr[23]) },
+    { "f48", offsetof(CPUState, fpr[24]) },
+    { "f50", offsetof(CPUState, fpr[25]) },
+    { "f52", offsetof(CPUState, fpr[26]) },
+    { "f54", offsetof(CPUState, fpr[27]) },
+    { "f56", offsetof(CPUState, fpr[28]) },
+    { "f58", offsetof(CPUState, fpr[29]) },
+    { "f60", offsetof(CPUState, fpr[30]) },
+    { "f62", offsetof(CPUState, fpr[31]) },
     { "asi", offsetof(CPUState, asi) },
     { "pstate", offsetof(CPUState, pstate) },
     { "cansave", offsetof(CPUState, cansave) },
diff --git a/qemu-doc.texi b/qemu-doc.texi
index ad19b73f15..149e9bd28b 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -227,7 +227,7 @@ QEMU uses YM3812 emulation by Tatsuyuki Satoh.
 QEMU uses GUS emulation (GUSEMU32 @url{http://www.deinmeister.de/gusemu/})
 by Tibor "TS" Schütz.
 
-Not that, by default, GUS shares IRQ(7) with parallel ports and so
+Note that, by default, GUS shares IRQ(7) with parallel ports and so
 qemu must be told to not have parallel ports to have working GUS
 
 @example
diff --git a/qmp-commands.hx b/qmp-commands.hx
index eb3072c504..97975a5207 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -566,7 +566,8 @@ EQMP
         .params     = "protocol hostname port tls-port cert-subject",
         .help       = "send migration info to spice/vnc client",
         .user_print = monitor_user_noop,
-        .mhandler.cmd_new = client_migrate_info,
+        .mhandler.cmd_async = client_migrate_info,
+        .flags      = MONITOR_CMD_ASYNC,
     },
 
 SQMP
diff --git a/scripts/analyse-9p-simpletrace.py b/scripts/analyse-9p-simpletrace.py
index 4358d6b594..b6d58fde96 100755
--- a/scripts/analyse-9p-simpletrace.py
+++ b/scripts/analyse-9p-simpletrace.py
@@ -7,11 +7,11 @@
 import simpletrace
 
 class VirtFSRequestTracker(simpletrace.Analyzer):
-	def begin(self):
-		print "Pretty printing 9p simpletrace log ..."
+        def begin(self):
+                print "Pretty printing 9p simpletrace log ..."
 
-        def complete_pdu(self, tag, id, err):
-                print "ERROR (tag =", tag, ", id =", id, ",err =", err, ")"
+        def v9fs_rerror(self, tag, id, err):
+                print "RERROR (tag =", tag, ", id =", id, ",err =", err, ")"
 
         def v9fs_version(self, tag, id, msize, version):
                 print "TVERSION (tag =", tag, ", msize =", msize, ", version =", version, ")"
@@ -22,121 +22,121 @@ class VirtFSRequestTracker(simpletrace.Analyzer):
         def v9fs_attach(self, tag, id, fid, afid, uname, aname):
                 print "TATTACH (tag =", tag, ", fid =", fid, ", afid =", afid, ", uname =", uname, ", aname =", aname, ")"
 
-	def v9fs_attach_return(self, tag, id, type, verison, path):
-		print "RATTACH (tag =", tag, ", qid={type =", type, ", version =", version, ", path =", path, "})"
+        def v9fs_attach_return(self, tag, id, type, version, path):
+                print "RATTACH (tag =", tag, ", qid={type =", type, ", version =", version, ", path =", path, "})"
 
-	def v9fs_stat(self, tag, id, fid):
-		print "TSTAT (tag =", tag, ", fid =", fid, ")"
+        def v9fs_stat(self, tag, id, fid):
+                print "TSTAT (tag =", tag, ", fid =", fid, ")"
 
-	def v9fs_stat_return(self, tag, id, mode, atime, mtime, length):
-		print "RSTAT (tag =", tag, ", mode =", mode, ", atime =", atime, ", mtime =", mtime, ", length =", length, ")"
+        def v9fs_stat_return(self, tag, id, mode, atime, mtime, length):
+                print "RSTAT (tag =", tag, ", mode =", mode, ", atime =", atime, ", mtime =", mtime, ", length =", length, ")"
 
-	def v9fs_getattr(self, tag, id, fid, request_mask):
-		print "TGETATTR (tag =", tag, ", fid =", fid, ", request_mask =", hex(request_mask), ")"
+        def v9fs_getattr(self, tag, id, fid, request_mask):
+                print "TGETATTR (tag =", tag, ", fid =", fid, ", request_mask =", hex(request_mask), ")"
 
-	def v9fs_getattr_return(self, tag, id, result_mask, mode, uid, gid):
-		print "RGETATTR (tag =", tag, ", result_mask =", hex(result_mask), ", mode =", oct(mode), ", uid =", uid, ", gid =", gid, ")"
+        def v9fs_getattr_return(self, tag, id, result_mask, mode, uid, gid):
+                print "RGETATTR (tag =", tag, ", result_mask =", hex(result_mask), ", mode =", oct(mode), ", uid =", uid, ", gid =", gid, ")"
 
-	def v9fs_walk(self, tag, id, fid, newfid, nwnames):
-		print "TWALK (tag =", tag, ", fid =", fid, ", newfid =", newfid, ", nwnames =", nwnames, ")"
+        def v9fs_walk(self, tag, id, fid, newfid, nwnames):
+                print "TWALK (tag =", tag, ", fid =", fid, ", newfid =", newfid, ", nwnames =", nwnames, ")"
 
-	def v9fs_walk_return(self, tag, id, nwnames, qids):
-		print "RWALK (tag =", tag, ", nwnames =", nwnames, ", qids =", hex(qids), ")"
+        def v9fs_walk_return(self, tag, id, nwnames, qids):
+                print "RWALK (tag =", tag, ", nwnames =", nwnames, ", qids =", hex(qids), ")"
 
-	def v9fs_open(self, tag, id, fid, mode):
-		print "TOPEN (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ")"
+        def v9fs_open(self, tag, id, fid, mode):
+                print "TOPEN (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ")"
 
-	def v9fs_open_return(self, tag, id, type, version, path, iounit):
-		print "ROPEN (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")"
+        def v9fs_open_return(self, tag, id, type, version, path, iounit):
+                print "ROPEN (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")"
 
-	def v9fs_lcreate(self, tag, id, dfid, flags, mode, gid):
-		print "TLCREATE (tag =", tag, ", dfid =", dfid, ", flags =", oct(flags), ", mode =", oct(mode), ", gid =", gid, ")"
+        def v9fs_lcreate(self, tag, id, dfid, flags, mode, gid):
+                print "TLCREATE (tag =", tag, ", dfid =", dfid, ", flags =", oct(flags), ", mode =", oct(mode), ", gid =", gid, ")"
 
-	def v9fs_lcreate_return(self, id, type, version, path, iounit):
-		print "RLCREATE (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")"
+        def v9fs_lcreate_return(self, tag, id, type, version, path, iounit):
+                print "RLCREATE (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")"
 
-	def v9fs_fsync(self, tag, id, fid, datasync):
-		print "TFSYNC (tag =", tag, ", fid =", fid, ", datasync =", datasync, ")"
+        def v9fs_fsync(self, tag, id, fid, datasync):
+                print "TFSYNC (tag =", tag, ", fid =", fid, ", datasync =", datasync, ")"
 
-	def v9fs_clunk(self, tag, id, fid):
-		print "TCLUNK (tag =", tag, ", fid =", fid, ")"
+        def v9fs_clunk(self, tag, id, fid):
+                print "TCLUNK (tag =", tag, ", fid =", fid, ")"
 
-	def v9fs_read(self, tag, id, fid, off, max_count):
-		print "TREAD (tag =", tag, ", fid =", fid, ", off =", off, ", max_count =", max_count, ")"
+        def v9fs_read(self, tag, id, fid, off, max_count):
+                print "TREAD (tag =", tag, ", fid =", fid, ", off =", off, ", max_count =", max_count, ")"
 
-	def v9fs_read_return(self, tag, id, count, err):
-		print "RREAD (tag =", tag, ", count =", count, ", err =", err, ")"
+        def v9fs_read_return(self, tag, id, count, err):
+                print "RREAD (tag =", tag, ", count =", count, ", err =", err, ")"
 
-	def v9fs_readdir(self, tag, id, fid, offset, max_count):
-		print "TREADDIR (tag =", tag, ", fid =", fid, ", offset =", offset, ", max_count =", max_count, ")"
+        def v9fs_readdir(self, tag, id, fid, offset, max_count):
+                print "TREADDIR (tag =", tag, ", fid =", fid, ", offset =", offset, ", max_count =", max_count, ")"
 
-	def v9fs_readdir_return(self, tag, id, count, retval):
-		print "RREADDIR (tag =", tag, ", count =", count, ", retval =", retval, ")"
+        def v9fs_readdir_return(self, tag, id, count, retval):
+                print "RREADDIR (tag =", tag, ", count =", count, ", retval =", retval, ")"
 
-	def v9fs_write(self, tag, id, fid, off, count, cnt):
-		print "TWRITE (tag =", tag, ", fid =", fid, ", off =", off, ", count =", count, ", cnt =", cnt, ")"
+        def v9fs_write(self, tag, id, fid, off, count, cnt):
+                print "TWRITE (tag =", tag, ", fid =", fid, ", off =", off, ", count =", count, ", cnt =", cnt, ")"
 
-	def v9fs_write_return(self, tag, id, total, err):
-		print "RWRITE (tag =", tag, ", total =", total, ", err =", err, ")"
+        def v9fs_write_return(self, tag, id, total, err):
+                print "RWRITE (tag =", tag, ", total =", total, ", err =", err, ")"
 
-	def v9fs_create(self, tag, id, fid, perm, name, mode):
-		print "TCREATE (tag =", tag, ", fid =", fid, ", perm =", oct(perm), ", name =", name, ", mode =", oct(mode), ")"
+        def v9fs_create(self, tag, id, fid, name, perm, mode):
+                print "TCREATE (tag =", tag, ", fid =", fid, ", perm =", oct(perm), ", name =", name, ", mode =", oct(mode), ")"
 
-	def v9fs_create_return(self, tag, id, type, verison, path, iounit):
-		print "RCREATE (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")"
+        def v9fs_create_return(self, tag, id, type, version, path, iounit):
+                print "RCREATE (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")"
 
-	def v9fs_symlink(self, tag, id, fid, name, symname, gid):
-		print "TSYMLINK (tag =", tag, ", fid =", fid, ", name =", name, ", symname =", symname, ", gid =", gid, ")"
+        def v9fs_symlink(self, tag, id, fid, name, symname, gid):
+                print "TSYMLINK (tag =", tag, ", fid =", fid, ", name =", name, ", symname =", symname, ", gid =", gid, ")"
 
-	def v9fs_symlink_return(self, tag, id, type, version, path):
-		print "RSYMLINK (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "})"
+        def v9fs_symlink_return(self, tag, id, type, version, path):
+                print "RSYMLINK (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "})"
 
-	def v9fs_flush(self, tag, id, flush_tag):
-		print "TFLUSH (tag =", tag, ", flush_tag =", flush_tag, ")"
+        def v9fs_flush(self, tag, id, flush_tag):
+                print "TFLUSH (tag =", tag, ", flush_tag =", flush_tag, ")"
 
-	def v9fs_link(self, tag, id, dfid, oldfid, name):
-		print "TLINK (tag =", tag, ", dfid =", dfid, ", oldfid =", oldfid, ", name =", name, ")"
+        def v9fs_link(self, tag, id, dfid, oldfid, name):
+                print "TLINK (tag =", tag, ", dfid =", dfid, ", oldfid =", oldfid, ", name =", name, ")"
 
-	def v9fs_remove(self, tag, id, fid):
-		print "TREMOVE (tag =", tag, ", fid =", fid, ")"
+        def v9fs_remove(self, tag, id, fid):
+                print "TREMOVE (tag =", tag, ", fid =", fid, ")"
 
-	def v9fs_wstat(self, tag, id, fid, mode, atime, mtime):
-		print "TWSTAT (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ", atime =", atime, "mtime =", mtime, ")"
+        def v9fs_wstat(self, tag, id, fid, mode, atime, mtime):
+                print "TWSTAT (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ", atime =", atime, "mtime =", mtime, ")"
 
-	def v9fs_mknod(self, tag, id, fid, mode, major, minor):
-		print "TMKNOD (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ", major =", major, ", minor =", minor, ")"
+        def v9fs_mknod(self, tag, id, fid, mode, major, minor):
+                print "TMKNOD (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ", major =", major, ", minor =", minor, ")"
 
-	def v9fs_lock(self, tag, id, fid, type, start, length):
-		print "TLOCK (tag =", tag, ", fid =", fid, "type =", type, ", start =", start, ", length =", length, ")"
+        def v9fs_lock(self, tag, id, fid, type, start, length):
+                print "TLOCK (tag =", tag, ", fid =", fid, "type =", type, ", start =", start, ", length =", length, ")"
 
-	def v9fs_lock_return(self, tag, id, status):
-		print "RLOCK (tag =", tag, ", status =", status, ")"
+        def v9fs_lock_return(self, tag, id, status):
+                print "RLOCK (tag =", tag, ", status =", status, ")"
 
-	def v9fs_getlock(self, tag, id, fid, type, start, length):
-		print "TGETLOCK (tag =", tag, ", fid =", fid, "type =", type, ", start =", start, ", length =", length, ")"
+        def v9fs_getlock(self, tag, id, fid, type, start, length):
+                print "TGETLOCK (tag =", tag, ", fid =", fid, "type =", type, ", start =", start, ", length =", length, ")"
 
-	def v9fs_getlock_return(self, tag, id, type, start, length, proc_id):
-		print "RGETLOCK (tag =", tag, "type =", type, ", start =", start, ", length =", length, ", proc_id =", proc_id,  ")"
+        def v9fs_getlock_return(self, tag, id, type, start, length, proc_id):
+                print "RGETLOCK (tag =", tag, "type =", type, ", start =", start, ", length =", length, ", proc_id =", proc_id,  ")"
 
-	def v9fs_mkdir(self, tag, id, fid, name, mode, gid):
-		print "TMKDIR (tag =", tag, ", fid =", fid, ", name =", name, ", mode =", mode, ", gid =", gid, ")"
+        def v9fs_mkdir(self, tag, id, fid, name, mode, gid):
+                print "TMKDIR (tag =", tag, ", fid =", fid, ", name =", name, ", mode =", mode, ", gid =", gid, ")"
 
-	def v9fs_mkdir_return(self, tag, id, type, version, path, err):
-		print "RMKDIR (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, err =", err, ")"
+        def v9fs_mkdir_return(self, tag, id, type, version, path, err):
+                print "RMKDIR (tag =", tag,  ", qid={type =", type, ", version =", version, ", path =", path, "}, err =", err, ")"
 
-	def v9fs_xattrwalk(self, tag, id, fid, newfid, name):
-		print "TXATTRWALK (tag =", tag, ", fid =", fid, ", newfid =", newfid, ", xattr name =", name, ")"
+        def v9fs_xattrwalk(self, tag, id, fid, newfid, name):
+                print "TXATTRWALK (tag =", tag, ", fid =", fid, ", newfid =", newfid, ", xattr name =", name, ")"
 
-	def v9fs_xattrwalk_return(self, tag, id, size):
-		print "RXATTRWALK (tag =", tag, ", xattrsize  =", size, ")"
+        def v9fs_xattrwalk_return(self, tag, id, size):
+                print "RXATTRWALK (tag =", tag, ", xattrsize  =", size, ")"
 
-	def v9fs_xattrcreate(self, tag, id, fid, name, size, flags):
-		print "TXATTRCREATE (tag =", tag, ", fid =", fid, ", name =", name, ", xattrsize =", size, ", flags =", flags, ")"
+        def v9fs_xattrcreate(self, tag, id, fid, name, size, flags):
+                print "TXATTRCREATE (tag =", tag, ", fid =", fid, ", name =", name, ", xattrsize =", size, ", flags =", flags, ")"
 
-	def v9fs_readlink(self, tag, id, fid):
-		print "TREADLINK (tag =", tag, ", fid =", fid, ")"
+        def v9fs_readlink(self, tag, id, fid):
+                print "TREADLINK (tag =", tag, ", fid =", fid, ")"
 
-	def v9fs_readlink_return(self, tag, id, target):
-		print "RREADLINK (tag =", tag, ", target =", target, ")"
+        def v9fs_readlink_return(self, tag, id, target):
+                print "RREADLINK (tag =", tag, ", target =", target, ")"
 
 simpletrace.run(VirtFSRequestTracker())
diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat
new file mode 100755
index 0000000000..56d2bd7f21
--- /dev/null
+++ b/scripts/kvm/kvm_stat
@@ -0,0 +1,480 @@
+#!/usr/bin/python
+#
+# top-like utility for displaying kvm statistics
+#
+# Copyright 2006-2008 Qumranet Technologies
+# Copyright 2008-2011 Red Hat, Inc.
+#
+# Authors:
+#  Avi Kivity <avi@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+
+import curses
+import sys, os, time, optparse
+
+class DebugfsProvider(object):
+    def __init__(self):
+        self.base = '/sys/kernel/debug/kvm'
+        self._fields = os.listdir(self.base)
+    def fields(self):
+        return self._fields
+    def select(self, fields):
+        self._fields = fields
+    def read(self):
+        def val(key):
+            return int(file(self.base + '/' + key).read())
+        return dict([(key, val(key)) for key in self._fields])
+
+vmx_exit_reasons = {
+    0: 'EXCEPTION_NMI',
+    1: 'EXTERNAL_INTERRUPT',
+    2: 'TRIPLE_FAULT',
+    7: 'PENDING_INTERRUPT',
+    8: 'NMI_WINDOW',
+    9: 'TASK_SWITCH',
+    10: 'CPUID',
+    12: 'HLT',
+    14: 'INVLPG',
+    15: 'RDPMC',
+    16: 'RDTSC',
+    18: 'VMCALL',
+    19: 'VMCLEAR',
+    20: 'VMLAUNCH',
+    21: 'VMPTRLD',
+    22: 'VMPTRST',
+    23: 'VMREAD',
+    24: 'VMRESUME',
+    25: 'VMWRITE',
+    26: 'VMOFF',
+    27: 'VMON',
+    28: 'CR_ACCESS',
+    29: 'DR_ACCESS',
+    30: 'IO_INSTRUCTION',
+    31: 'MSR_READ',
+    32: 'MSR_WRITE',
+    33: 'INVALID_STATE',
+    36: 'MWAIT_INSTRUCTION',
+    39: 'MONITOR_INSTRUCTION',
+    40: 'PAUSE_INSTRUCTION',
+    41: 'MCE_DURING_VMENTRY',
+    43: 'TPR_BELOW_THRESHOLD',
+    44: 'APIC_ACCESS',
+    48: 'EPT_VIOLATION',
+    49: 'EPT_MISCONFIG',
+    54: 'WBINVD',
+    55: 'XSETBV',
+}
+
+svm_exit_reasons = {
+    0x000: 'READ_CR0',
+    0x003: 'READ_CR3',
+    0x004: 'READ_CR4',
+    0x008: 'READ_CR8',
+    0x010: 'WRITE_CR0',
+    0x013: 'WRITE_CR3',
+    0x014: 'WRITE_CR4',
+    0x018: 'WRITE_CR8',
+    0x020: 'READ_DR0',
+    0x021: 'READ_DR1',
+    0x022: 'READ_DR2',
+    0x023: 'READ_DR3',
+    0x024: 'READ_DR4',
+    0x025: 'READ_DR5',
+    0x026: 'READ_DR6',
+    0x027: 'READ_DR7',
+    0x030: 'WRITE_DR0',
+    0x031: 'WRITE_DR1',
+    0x032: 'WRITE_DR2',
+    0x033: 'WRITE_DR3',
+    0x034: 'WRITE_DR4',
+    0x035: 'WRITE_DR5',
+    0x036: 'WRITE_DR6',
+    0x037: 'WRITE_DR7',
+    0x040: 'EXCP_BASE',
+    0x060: 'INTR',
+    0x061: 'NMI',
+    0x062: 'SMI',
+    0x063: 'INIT',
+    0x064: 'VINTR',
+    0x065: 'CR0_SEL_WRITE',
+    0x066: 'IDTR_READ',
+    0x067: 'GDTR_READ',
+    0x068: 'LDTR_READ',
+    0x069: 'TR_READ',
+    0x06a: 'IDTR_WRITE',
+    0x06b: 'GDTR_WRITE',
+    0x06c: 'LDTR_WRITE',
+    0x06d: 'TR_WRITE',
+    0x06e: 'RDTSC',
+    0x06f: 'RDPMC',
+    0x070: 'PUSHF',
+    0x071: 'POPF',
+    0x072: 'CPUID',
+    0x073: 'RSM',
+    0x074: 'IRET',
+    0x075: 'SWINT',
+    0x076: 'INVD',
+    0x077: 'PAUSE',
+    0x078: 'HLT',
+    0x079: 'INVLPG',
+    0x07a: 'INVLPGA',
+    0x07b: 'IOIO',
+    0x07c: 'MSR',
+    0x07d: 'TASK_SWITCH',
+    0x07e: 'FERR_FREEZE',
+    0x07f: 'SHUTDOWN',
+    0x080: 'VMRUN',
+    0x081: 'VMMCALL',
+    0x082: 'VMLOAD',
+    0x083: 'VMSAVE',
+    0x084: 'STGI',
+    0x085: 'CLGI',
+    0x086: 'SKINIT',
+    0x087: 'RDTSCP',
+    0x088: 'ICEBP',
+    0x089: 'WBINVD',
+    0x08a: 'MONITOR',
+    0x08b: 'MWAIT',
+    0x08c: 'MWAIT_COND',
+    0x400: 'NPF',
+}
+
+vendor_exit_reasons = {
+    'vmx': vmx_exit_reasons,
+    'svm': svm_exit_reasons,
+}
+
+exit_reasons = None
+
+for line in file('/proc/cpuinfo').readlines():
+    if line.startswith('flags'):
+        for flag in line.split():
+            if flag in vendor_exit_reasons:
+                exit_reasons = vendor_exit_reasons[flag]
+
+filters = {
+    'kvm_exit': ('exit_reason', exit_reasons)
+}
+
+def invert(d):
+    return dict((x[1], x[0]) for x in d.iteritems())
+
+for f in filters:
+    filters[f] = (filters[f][0], invert(filters[f][1]))
+
+import ctypes, struct, array
+
+libc = ctypes.CDLL('libc.so.6')
+syscall = libc.syscall
+class perf_event_attr(ctypes.Structure):
+    _fields_ = [('type', ctypes.c_uint32),
+                ('size', ctypes.c_uint32),
+                ('config', ctypes.c_uint64),
+                ('sample_freq', ctypes.c_uint64),
+                ('sample_type', ctypes.c_uint64),
+                ('read_format', ctypes.c_uint64),
+                ('flags', ctypes.c_uint64),
+                ('wakeup_events', ctypes.c_uint32),
+                ('bp_type', ctypes.c_uint32),
+                ('bp_addr', ctypes.c_uint64),
+                ('bp_len', ctypes.c_uint64),
+                ]
+def _perf_event_open(attr, pid, cpu, group_fd, flags):
+    return syscall(298, ctypes.pointer(attr), ctypes.c_int(pid),
+                   ctypes.c_int(cpu), ctypes.c_int(group_fd),
+                   ctypes.c_long(flags))
+
+PERF_TYPE_HARDWARE              = 0
+PERF_TYPE_SOFTWARE              = 1
+PERF_TYPE_TRACEPOINT            = 2
+PERF_TYPE_HW_CACHE              = 3
+PERF_TYPE_RAW                   = 4
+PERF_TYPE_BREAKPOINT            = 5
+
+PERF_SAMPLE_IP                  = 1 << 0
+PERF_SAMPLE_TID                 = 1 << 1
+PERF_SAMPLE_TIME                = 1 << 2
+PERF_SAMPLE_ADDR                = 1 << 3
+PERF_SAMPLE_READ                = 1 << 4
+PERF_SAMPLE_CALLCHAIN           = 1 << 5
+PERF_SAMPLE_ID                  = 1 << 6
+PERF_SAMPLE_CPU                 = 1 << 7
+PERF_SAMPLE_PERIOD              = 1 << 8
+PERF_SAMPLE_STREAM_ID           = 1 << 9
+PERF_SAMPLE_RAW                 = 1 << 10
+
+PERF_FORMAT_TOTAL_TIME_ENABLED  = 1 << 0
+PERF_FORMAT_TOTAL_TIME_RUNNING  = 1 << 1
+PERF_FORMAT_ID                  = 1 << 2
+PERF_FORMAT_GROUP               = 1 << 3
+
+import re
+
+sys_tracing = '/sys/kernel/debug/tracing'
+
+class Group(object):
+    def __init__(self, cpu):
+        self.events = []
+        self.group_leader = None
+        self.cpu = cpu
+    def add_event(self, name, event_set, tracepoint, filter = None):
+        self.events.append(Event(group = self,
+                                 name = name, event_set = event_set,
+                                 tracepoint = tracepoint, filter = filter))
+        if len(self.events) == 1:
+            self.file = os.fdopen(self.events[0].fd)
+    def read(self):
+        bytes = 8 * (1 + len(self.events))
+        fmt = 'xxxxxxxx' + 'q' * len(self.events)
+        return dict(zip([event.name for event in self.events],
+                        struct.unpack(fmt, self.file.read(bytes))))
+
+class Event(object):
+    def __init__(self, group, name, event_set, tracepoint, filter = None):
+        self.name = name
+        attr = perf_event_attr()
+        attr.type = PERF_TYPE_TRACEPOINT
+        attr.size = ctypes.sizeof(attr)
+        id_path = os.path.join(sys_tracing, 'events', event_set,
+                               tracepoint, 'id')
+        id = int(file(id_path).read())
+        attr.config = id
+        attr.sample_type = (PERF_SAMPLE_RAW
+                            | PERF_SAMPLE_TIME
+                            | PERF_SAMPLE_CPU)
+        attr.sample_period = 1
+        attr.read_format = PERF_FORMAT_GROUP
+        group_leader = -1
+        if group.events:
+            group_leader = group.events[0].fd
+        fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0)
+        if fd == -1:
+            raise Exception('perf_event_open failed')
+        if filter:
+            import fcntl
+            fcntl.ioctl(fd, 0x40082406, filter)
+        self.fd = fd
+    def enable(self):
+        import fcntl
+        fcntl.ioctl(self.fd, 0x00002400, 0)
+    def disable(self):
+        import fcntl
+        fcntl.ioctl(self.fd, 0x00002401, 0)
+
+class TracepointProvider(object):
+    def __init__(self):
+        path = os.path.join(sys_tracing, 'events', 'kvm')
+        fields = [f
+                  for f in os.listdir(path)
+                  if os.path.isdir(os.path.join(path, f))]
+        extra = []
+        for f in fields:
+            if f in filters:
+                subfield, values = filters[f]
+                for name, number in values.iteritems():
+                    extra.append(f + '(' + name + ')')
+        fields += extra
+        self._setup(fields)
+        self.select(fields)
+    def fields(self):
+        return self._fields
+    def _setup(self, _fields):
+        self._fields = _fields
+        cpure = r'cpu([0-9]+)'
+        self.cpus = [int(re.match(cpure, x).group(1))
+                     for x in os.listdir('/sys/devices/system/cpu')
+                     if re.match(cpure, x)]
+        import resource
+        nfiles = len(self.cpus) * 1000
+        resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles))
+        events = []
+        self.group_leaders = []
+        for cpu in self.cpus:
+            group = Group(cpu)
+            for name in _fields:
+                tracepoint = name
+                filter = None
+                m = re.match(r'(.*)\((.*)\)', name)
+                if m:
+                    tracepoint, sub = m.groups()
+                    filter = '%s==%d\0' % (filters[tracepoint][0],
+                                           filters[tracepoint][1][sub])
+                event = group.add_event(name, event_set = 'kvm',
+                                        tracepoint = tracepoint,
+                                        filter = filter)
+            self.group_leaders.append(group)
+    def select(self, fields):
+        for group in self.group_leaders:
+            for event in group.events:
+                if event.name in fields:
+                    event.enable()
+                else:
+                    event.disable()
+    def read(self):
+        from collections import defaultdict
+        ret = defaultdict(int)
+        for group in self.group_leaders:
+            for name, val in group.read().iteritems():
+                ret[name] += val
+        return ret
+
+class Stats:
+    def __init__(self, provider, fields = None):
+        self.provider = provider
+        self.fields_filter = fields
+        self._update()
+    def _update(self):
+        def wanted(key):
+            import re
+            if not self.fields_filter:
+                return True
+            return re.match(self.fields_filter, key) is not None
+        self.values = dict([(key, None)
+                            for key in provider.fields()
+                            if wanted(key)])
+        self.provider.select(self.values.keys())
+    def set_fields_filter(self, fields_filter):
+        self.fields_filter = fields_filter
+        self._update()
+    def get(self):
+        new = self.provider.read()
+        for key in self.provider.fields():
+            oldval = self.values.get(key, (0, 0))
+            newval = new[key]
+            newdelta = None
+            if oldval is not None:
+                newdelta = newval - oldval[0]
+            self.values[key] = (newval, newdelta)
+        return self.values
+
+if not os.access('/sys/kernel/debug', os.F_OK):
+    print 'Please enable CONFIG_DEBUG_FS in your kernel'
+    sys.exit(1)
+if not os.access('/sys/kernel/debug/kvm', os.F_OK):
+    print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')"
+    print "and ensure the kvm modules are loaded"
+    sys.exit(1)
+
+label_width = 40
+number_width = 10
+
+def tui(screen, stats):
+    curses.use_default_colors()
+    curses.noecho()
+    drilldown = False
+    fields_filter = stats.fields_filter
+    def update_drilldown():
+        if not fields_filter:
+            if drilldown:
+                stats.set_fields_filter(None)
+            else:
+                stats.set_fields_filter(r'^[^\(]*$')
+    update_drilldown()
+    def refresh(sleeptime):
+        screen.erase()
+        screen.addstr(0, 0, 'kvm statistics')
+        row = 2
+        s = stats.get()
+        def sortkey(x):
+            if s[x][1]:
+                return (-s[x][1], -s[x][0])
+            else:
+                return (0, -s[x][0])
+        for key in sorted(s.keys(), key = sortkey):
+            if row >= screen.getmaxyx()[0]:
+                break
+            values = s[key]
+            if not values[0] and not values[1]:
+                break
+            col = 1
+            screen.addstr(row, col, key)
+            col += label_width
+            screen.addstr(row, col, '%10d' % (values[0],))
+            col += number_width
+            if values[1] is not None:
+                screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
+            row += 1
+        screen.refresh()
+
+    sleeptime = 0.25
+    while True:
+        refresh(sleeptime)
+        curses.halfdelay(int(sleeptime * 10))
+        sleeptime = 3
+        try:
+            c = screen.getkey()
+            if c == 'x':
+                drilldown = not drilldown
+                update_drilldown()
+            if c == 'q':
+                break
+        except KeyboardInterrupt:
+            break
+        except curses.error:
+            continue
+
+def batch(stats):
+    s = stats.get()
+    time.sleep(1)
+    s = stats.get()
+    for key in sorted(s.keys()):
+        values = s[key]
+        print '%-22s%10d%10d' % (key, values[0], values[1])
+
+def log(stats):
+    keys = sorted(stats.get().iterkeys())
+    def banner():
+        for k in keys:
+            print '%10s' % k[0:9],
+        print
+    def statline():
+        s = stats.get()
+        for k in keys:
+            print ' %9d' % s[k][1],
+        print
+    line = 0
+    banner_repeat = 20
+    while True:
+        time.sleep(1)
+        if line % banner_repeat == 0:
+            banner()
+        statline()
+        line += 1
+
+options = optparse.OptionParser()
+options.add_option('-1', '--once', '--batch',
+                   action = 'store_true',
+                   default = False,
+                   dest = 'once',
+                   help = 'run in batch mode for one second',
+                   )
+options.add_option('-l', '--log',
+                   action = 'store_true',
+                   default = False,
+                   dest = 'log',
+                   help = 'run in logging mode (like vmstat)',
+                   )
+options.add_option('-f', '--fields',
+                   action = 'store',
+                   default = None,
+                   dest = 'fields',
+                   help = 'fields to display (regex)',
+                   )
+(options, args) = options.parse_args(sys.argv)
+
+try:
+    provider = TracepointProvider()
+except:
+    provider = DebugfsProvider()
+
+stats = Stats(provider, fields = options.fields)
+
+if options.log:
+    log(stats)
+elif not options.once:
+    import curses.wrapper
+    curses.wrapper(tui, stats)
+else:
+    batch(stats)
diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
new file mode 100755
index 0000000000..a74ce71917
--- /dev/null
+++ b/scripts/kvm/vmxcap
@@ -0,0 +1,224 @@
+#!/usr/bin/python
+#
+# tool for querying VMX capabilities
+#
+# Copyright 2009-2010 Red Hat, Inc.
+#
+# Authors:
+#  Avi Kivity <avi@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+
+MSR_IA32_VMX_BASIC = 0x480
+MSR_IA32_VMX_PINBASED_CTLS = 0x481
+MSR_IA32_VMX_PROCBASED_CTLS = 0x482
+MSR_IA32_VMX_EXIT_CTLS = 0x483
+MSR_IA32_VMX_ENTRY_CTLS = 0x484
+MSR_IA32_VMX_MISC_CTLS = 0x485
+MSR_IA32_VMX_PROCBASED_CTLS2 = 0x48B
+MSR_IA32_VMX_EPT_VPID_CAP = 0x48C
+MSR_IA32_VMX_TRUE_PINBASED_CTLS = 0x48D
+MSR_IA32_VMX_TRUE_PROCBASED_CTLS = 0x48E
+MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F
+MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490
+
+class msr(object):
+    def __init__(self):
+        try:
+            self.f = file('/dev/cpu/0/msr')
+        except:
+            self.f = file('/dev/msr0')
+    def read(self, index, default = None):
+        import struct
+        self.f.seek(index)
+        try:
+            return struct.unpack('Q', self.f.read(8))[0]
+        except:
+            return default
+
+class Control(object):
+    def __init__(self, name, bits, cap_msr, true_cap_msr = None):
+        self.name = name
+        self.bits = bits
+        self.cap_msr = cap_msr
+        self.true_cap_msr = true_cap_msr
+    def read2(self, nr):
+        m = msr()
+        val = m.read(nr, 0)
+        return (val & 0xffffffff, val >> 32)
+    def show(self):
+        print self.name
+        mbz, mb1 = self.read2(self.cap_msr)
+        tmbz, tmb1 = 0, 0
+        if self.true_cap_msr:
+            tmbz, tmb1 = self.read2(self.true_cap_msr)
+        for bit in sorted(self.bits.keys()):
+            zero = not (mbz & (1 << bit))
+            one = mb1 & (1 << bit)
+            true_zero = not (tmbz & (1 << bit))
+            true_one = tmb1 & (1 << bit)
+            s= '?'
+            if (self.true_cap_msr and true_zero and true_one
+                and one and not zero):
+                s = 'default'
+            elif zero and not one:
+                s = 'no'
+            elif one and not zero:
+                s = 'forced'
+            elif one and zero:
+                s = 'yes'
+            print '  %-40s %s' % (self.bits[bit], s)
+
+class Misc(object):
+    def __init__(self, name, bits, msr):
+        self.name = name
+        self.bits = bits
+        self.msr = msr
+    def show(self):
+        print self.name
+        value = msr().read(self.msr, 0)
+        def first_bit(key):
+            if type(key) is tuple:
+                return key[0]
+            else:
+                return key
+        for bits in sorted(self.bits.keys(), key = first_bit):
+            if type(bits) is tuple:
+                lo, hi = bits
+                fmt = int
+            else:
+                lo = hi = bits
+                def fmt(x):
+                    return { True: 'yes', False: 'no' }[x]
+            v = (value >> lo) & ((1 << (hi - lo + 1)) - 1)
+            print '  %-40s %s' % (self.bits[bits], fmt(v))
+
+controls = [
+    Control(
+        name = 'pin-based controls',
+        bits = {
+            0: 'External interrupt exiting',
+            3: 'NMI exiting',
+            5: 'Virtual NMIs',
+            6: 'Activate VMX-preemption timer',
+            },
+        cap_msr = MSR_IA32_VMX_PINBASED_CTLS,
+        true_cap_msr = MSR_IA32_VMX_TRUE_PINBASED_CTLS,
+        ),
+
+    Control(
+        name = 'primary processor-based controls',
+        bits = {
+            2: 'Interrupt window exiting',
+            3: 'Use TSC offsetting',
+            7: 'HLT exiting',
+            9: 'INVLPG exiting',
+            10: 'MWAIT exiting',
+            11: 'RDPMC exiting',
+            12: 'RDTSC exiting',
+            15: 'CR3-load exiting',
+            16: 'CR3-store exiting',
+            19: 'CR8-load exiting',
+            20: 'CR8-store exiting',
+            21: 'Use TPR shadow',
+            22: 'NMI-window exiting',
+            23: 'MOV-DR exiting',
+            24: 'Unconditional I/O exiting',
+            25: 'Use I/O bitmaps',
+            27: 'Monitor trap flag',
+            28: 'Use MSR bitmaps',
+            29: 'MONITOR exiting',
+            30: 'PAUSE exiting',
+            31: 'Activate secondary control',
+            },
+        cap_msr = MSR_IA32_VMX_PROCBASED_CTLS,
+        true_cap_msr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
+        ),
+
+    Control(
+        name = 'secondary processor-based controls',
+        bits = {
+            0: 'Virtualize APIC accesses',
+            1: 'Enable EPT',
+            2: 'Descriptor-table exiting',
+            4: 'Virtualize x2APIC mode',
+            5: 'Enable VPID',
+            6: 'WBINVD exiting',
+            7: 'Unrestricted guest',
+            10: 'PAUSE-loop exiting',
+            },
+        cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2,
+        ),
+
+    Control(
+        name = 'VM-Exit controls',
+        bits = {
+            2: 'Save debug controls',
+            9: 'Host address-space size',
+            12: 'Load IA32_PERF_GLOBAL_CTRL',
+            15: 'Acknowledge interrupt on exit',
+            18: 'Save IA32_PAT',
+            19: 'Load IA32_PAT',
+            20: 'Save IA32_EFER',
+            21: 'Load IA32_EFER',
+            22: 'Save VMX-preemption timer value',
+            },
+        cap_msr = MSR_IA32_VMX_EXIT_CTLS,
+        true_cap_msr = MSR_IA32_VMX_TRUE_EXIT_CTLS,
+        ),
+
+    Control(
+        name = 'VM-Entry controls',
+        bits = {
+            2: 'Load debug controls',
+            9: 'IA-64 mode guest',
+            10: 'Entry to SMM',
+            11: 'Deactivate dual-monitor treatment',
+            13: 'Load IA32_PERF_GLOBAL_CTRL',
+            14: 'Load IA32_PAT',
+            15: 'Load IA32_EFER',
+            },
+        cap_msr = MSR_IA32_VMX_ENTRY_CTLS,
+        true_cap_msr = MSR_IA32_VMX_TRUE_ENTRY_CTLS,
+        ),
+
+    Misc(
+        name = 'Miscellaneous data',
+        bits = {
+            (0,4): 'VMX-preemption timer scale (log2)',
+            5: 'Store EFER.LMA into IA-32e mode guest control',
+            6: 'HLT activity state',
+            7: 'Shutdown activity state',
+            8: 'Wait-for-SIPI activity state',
+            (16,24): 'Number of CR3-target values',
+            (25,27): 'MSR-load/store count recommenation',
+            (32,62): 'MSEG revision identifier',
+            },
+        msr = MSR_IA32_VMX_MISC_CTLS,
+        ),
+
+    Misc(
+        name = 'VPID and EPT capabilities',
+        bits = {
+            0: 'Execute-only EPT translations',
+            6: 'Page-walk length 4',
+            8: 'Paging-structure memory type UC',
+            14: 'Paging-structure memory type WB',
+            16: '2MB EPT pages',
+            17: '1GB EPT pages',
+            20: 'INVEPT supported',
+            25: 'Single-context INVEPT',
+            26: 'All-context INVEPT',
+            32: 'INVVPID supported',
+            40: 'Individual-address INVVPID',
+            41: 'Single-context INVVPID',
+            42: 'All-context INVVPID',
+            43: 'Single-context-retaining-globals INVVPID',
+            },
+        msr = MSR_IA32_VMX_EPT_VPID_CAP,
+        ),
+    ]
+
+for c in controls:
+    c.show()
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index a973f2e20c..a08ce9d873 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -300,6 +300,10 @@
 
 #define MSR_IA32_PERF_STATUS            0x198
 
+#define MSR_IA32_MISC_ENABLE		0x1a0
+/* Indicates good rep/movs microcode on some processors: */
+#define MSR_IA32_MISC_ENABLE_DEFAULT    1
+
 #define MSR_MTRRphysBase(reg)		(0x200 + 2 * (reg))
 #define MSR_MTRRphysMask(reg)		(0x200 + 2 * (reg) + 1)
 
@@ -691,6 +695,7 @@ typedef struct CPUX86State {
     uint64_t tsc_deadline;
 
     uint64_t mcg_status;
+    uint64_t msr_ia32_misc_enable;
 
     /* exception/interrupt handling */
     int error_code;
@@ -949,7 +954,7 @@ uint64_t cpu_get_tsc(CPUX86State *env);
 #define cpu_list_id x86_cpu_list
 #define cpudef_setup	x86_cpudef_setup
 
-#define CPU_SAVE_VERSION 13
+#define CPU_SAVE_VERSION 12
 
 /* MMU modes definitions */
 #define MMU_MODE0_SUFFIX _kernel
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 5df40d4661..6c6a1675df 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -98,6 +98,7 @@ void cpu_reset(CPUX86State *env)
     env->mxcsr = 0x1f80;
 
     env->pat = 0x0007040600070406ULL;
+    env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT;
 
     memset(env->dr, 0, sizeof(env->dr));
     env->dr[6] = DR6_FIXED_1;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 90a6ffba02..ddd115c53c 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -61,6 +61,7 @@ static bool has_msr_star;
 static bool has_msr_hsave_pa;
 static bool has_msr_tsc_deadline;
 static bool has_msr_async_pf_en;
+static bool has_msr_misc_enable;
 static int lm_capable_kernel;
 
 static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
@@ -573,6 +574,10 @@ static int kvm_get_supported_msrs(KVMState *s)
                     has_msr_tsc_deadline = true;
                     continue;
                 }
+                if (kvm_msr_list->indices[i] == MSR_IA32_MISC_ENABLE) {
+                    has_msr_misc_enable = true;
+                    continue;
+                }
             }
         }
 
@@ -889,6 +894,10 @@ static int kvm_put_msrs(CPUState *env, int level)
     if (has_msr_tsc_deadline) {
         kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSCDEADLINE, env->tsc_deadline);
     }
+    if (has_msr_misc_enable) {
+        kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE,
+                          env->msr_ia32_misc_enable);
+    }
 #ifdef TARGET_X86_64
     if (lm_capable_kernel) {
         kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
@@ -1138,6 +1147,9 @@ static int kvm_get_msrs(CPUState *env)
     if (has_msr_tsc_deadline) {
         msrs[n++].index = MSR_IA32_TSCDEADLINE;
     }
+    if (has_msr_misc_enable) {
+        msrs[n++].index = MSR_IA32_MISC_ENABLE;
+    }
 
     if (!env->tsc_valid) {
         msrs[n++].index = MSR_IA32_TSC;
@@ -1224,6 +1236,9 @@ static int kvm_get_msrs(CPUState *env)
         case MSR_MCG_CTL:
             env->mcg_ctl = msrs[i].data;
             break;
+        case MSR_IA32_MISC_ENABLE:
+            env->msr_ia32_misc_enable = msrs[i].data;
+            break;
         default:
             if (msrs[i].index >= MSR_MC0_CTL &&
                 msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 25fa97de4a..d6e98ff37b 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -310,6 +310,42 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
     }
 };
 
+static bool tscdeadline_needed(void *opaque)
+{
+    CPUState *env = opaque;
+
+    return env->tsc_deadline != 0;
+}
+
+static const VMStateDescription vmstate_msr_tscdeadline = {
+    .name = "cpu/msr_tscdeadline",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT64(tsc_deadline, CPUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static bool misc_enable_needed(void *opaque)
+{
+    CPUState *env = opaque;
+
+    return env->msr_ia32_misc_enable != MSR_IA32_MISC_ENABLE_DEFAULT;
+}
+
+static const VMStateDescription vmstate_msr_ia32_misc_enable = {
+    .name = "cpu/msr_ia32_misc_enable",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT64(msr_ia32_misc_enable, CPUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_cpu = {
     .name = "cpu",
     .version_id = CPU_SAVE_VERSION,
@@ -410,7 +446,6 @@ static const VMStateDescription vmstate_cpu = {
         VMSTATE_UINT64_V(xcr0, CPUState, 12),
         VMSTATE_UINT64_V(xstate_bv, CPUState, 12),
         VMSTATE_YMMH_REGS_VARS(ymmh_regs, CPUState, CPU_NB_REGS, 12),
-        VMSTATE_UINT64_V(tsc_deadline, CPUState, 13),
         VMSTATE_END_OF_LIST()
         /* The above list is not sorted /wrt version numbers, watch out! */
     },
@@ -421,6 +456,12 @@ static const VMStateDescription vmstate_cpu = {
         } , {
             .vmsd = &vmstate_fpop_ip_dp,
             .needed = fpop_ip_dp_needed,
+        }, {
+            .vmsd = &vmstate_msr_tscdeadline,
+            .needed = tscdeadline_needed,
+        }, {
+            .vmsd = &vmstate_msr_ia32_misc_enable,
+            .needed = misc_enable_needed,
         } , {
             /* empty */
         }
diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index 3bb5a919ce..c89e4a49db 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -3280,6 +3280,9 @@ void helper_wrmsr(void)
     case MSR_TSC_AUX:
         env->tsc_aux = val;
         break;
+    case MSR_IA32_MISC_ENABLE:
+        env->msr_ia32_misc_enable = val;
+        break;
     default:
         if ((uint32_t)ECX >= MSR_MC0_CTL
             && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
@@ -3413,6 +3416,9 @@ void helper_rdmsr(void)
     case MSR_MCG_STATUS:
         val = env->mcg_status;
         break;
+    case MSR_IA32_MISC_ENABLE:
+        val = env->msr_ia32_misc_enable;
+        break;
     default:
         if ((uint32_t)ECX >= MSR_MC0_CTL
             && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 25b4f1a1dc..38a707466c 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -3,16 +3,17 @@
 
 #include "config.h"
 #include "qemu-common.h"
+#include "bswap.h"
 
 #if !defined(TARGET_SPARC64)
 #define TARGET_LONG_BITS 32
-#define TARGET_FPREGS 32
+#define TARGET_DPREGS 16
 #define TARGET_PAGE_BITS 12 /* 4k */
 #define TARGET_PHYS_ADDR_SPACE_BITS 36
 #define TARGET_VIRT_ADDR_SPACE_BITS 32
 #else
 #define TARGET_LONG_BITS 64
-#define TARGET_FPREGS 64
+#define TARGET_DPREGS 32
 #define TARGET_PAGE_BITS 13 /* 8k */
 #define TARGET_PHYS_ADDR_SPACE_BITS 41
 # ifdef TARGET_ABI32
@@ -395,7 +396,7 @@ typedef struct CPUSPARCState {
 
     uint32_t psr;      /* processor state register */
     target_ulong fsr;      /* FPU state register */
-    float32 fpr[TARGET_FPREGS];  /* floating point registers */
+    CPU_DoubleU fpr[TARGET_DPREGS];  /* floating point registers */
     uint32_t cwp;      /* index of current register window (extracted
                           from PSR) */
 #if !defined(TARGET_SPARC64) || defined(TARGET_ABI32)
@@ -463,7 +464,6 @@ typedef struct CPUSPARCState {
     uint64_t prom_addr;
 #endif
     /* temporary float registers */
-    float64 dt0, dt1;
     float128 qt0, qt1;
     float_status fp_status;
 #if defined(TARGET_SPARC64)
diff --git a/target-sparc/cpu_init.c b/target-sparc/cpu_init.c
index 6954800af0..c7269b54a8 100644
--- a/target-sparc/cpu_init.c
+++ b/target-sparc/cpu_init.c
@@ -813,11 +813,11 @@ void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
         }
     }
     cpu_fprintf(f, "\nFloating Point Registers:\n");
-    for (i = 0; i < TARGET_FPREGS; i++) {
+    for (i = 0; i < TARGET_DPREGS; i++) {
         if ((i & 3) == 0) {
-            cpu_fprintf(f, "%%f%02d:", i);
+            cpu_fprintf(f, "%%f%02d:", i * 2);
         }
-        cpu_fprintf(f, " %016f", *(float *)&env->fpr[i]);
+        cpu_fprintf(f, " %016" PRIx64, env->fpr[i].ll);
         if ((i & 3) == 3) {
             cpu_fprintf(f, "\n");
         }
diff --git a/target-sparc/fop_helper.c b/target-sparc/fop_helper.c
index 23502f3020..c7a2512117 100644
--- a/target-sparc/fop_helper.c
+++ b/target-sparc/fop_helper.c
@@ -20,26 +20,74 @@
 #include "cpu.h"
 #include "helper.h"
 
-#define DT0 (env->dt0)
-#define DT1 (env->dt1)
 #define QT0 (env->qt0)
 #define QT1 (env->qt1)
 
+static void check_ieee_exceptions(CPUState *env)
+{
+    target_ulong status;
+
+    status = get_float_exception_flags(&env->fp_status);
+    if (status) {
+        /* Copy IEEE 754 flags into FSR */
+        if (status & float_flag_invalid) {
+            env->fsr |= FSR_NVC;
+        }
+        if (status & float_flag_overflow) {
+            env->fsr |= FSR_OFC;
+        }
+        if (status & float_flag_underflow) {
+            env->fsr |= FSR_UFC;
+        }
+        if (status & float_flag_divbyzero) {
+            env->fsr |= FSR_DZC;
+        }
+        if (status & float_flag_inexact) {
+            env->fsr |= FSR_NXC;
+        }
+
+        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
+            /* Unmasked exception, generate a trap */
+            env->fsr |= FSR_FTT_IEEE_EXCP;
+            helper_raise_exception(env, TT_FP_EXCP);
+        } else {
+            /* Accumulate exceptions */
+            env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
+        }
+    }
+}
+
+static inline void clear_float_exceptions(CPUState *env)
+{
+    set_float_exception_flags(0, &env->fp_status);
+}
+
 #define F_HELPER(name, p) void helper_f##name##p(CPUState *env)
 
 #define F_BINOP(name)                                           \
-    float32 helper_f ## name ## s (CPUState * env, float32 src1,\
+    float32 helper_f ## name ## s (CPUState *env, float32 src1, \
                                    float32 src2)                \
     {                                                           \
-        return float32_ ## name (src1, src2, &env->fp_status);  \
+        float32 ret;                                            \
+        clear_float_exceptions(env);                            \
+        ret = float32_ ## name (src1, src2, &env->fp_status);   \
+        check_ieee_exceptions(env);                             \
+        return ret;                                             \
     }                                                           \
-    F_HELPER(name, d)                                           \
+    float64 helper_f ## name ## d (CPUState * env, float64 src1,\
+                                   float64 src2)                \
     {                                                           \
-        DT0 = float64_ ## name (DT0, DT1, &env->fp_status);     \
+        float64 ret;                                            \
+        clear_float_exceptions(env);                            \
+        ret = float64_ ## name (src1, src2, &env->fp_status);   \
+        check_ieee_exceptions(env);                             \
+        return ret;                                             \
     }                                                           \
     F_HELPER(name, q)                                           \
     {                                                           \
+        clear_float_exceptions(env);                            \
         QT0 = float128_ ## name (QT0, QT1, &env->fp_status);    \
+        check_ieee_exceptions(env);                             \
     }
 
 F_BINOP(add);
@@ -48,18 +96,24 @@ F_BINOP(mul);
 F_BINOP(div);
 #undef F_BINOP
 
-void helper_fsmuld(CPUState *env, float32 src1, float32 src2)
+float64 helper_fsmuld(CPUState *env, float32 src1, float32 src2)
 {
-    DT0 = float64_mul(float32_to_float64(src1, &env->fp_status),
+    float64 ret;
+    clear_float_exceptions(env);
+    ret = float64_mul(float32_to_float64(src1, &env->fp_status),
                       float32_to_float64(src2, &env->fp_status),
                       &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fdmulq(CPUState *env)
+void helper_fdmulq(CPUState *env, float64 src1, float64 src2)
 {
-    QT0 = float128_mul(float64_to_float128(DT0, &env->fp_status),
-                       float64_to_float128(DT1, &env->fp_status),
+    clear_float_exceptions(env);
+    QT0 = float128_mul(float64_to_float128(src1, &env->fp_status),
+                       float64_to_float128(src2, &env->fp_status),
                        &env->fp_status);
+    check_ieee_exceptions(env);
 }
 
 float32 helper_fnegs(float32 src)
@@ -68,9 +122,9 @@ float32 helper_fnegs(float32 src)
 }
 
 #ifdef TARGET_SPARC64
-F_HELPER(neg, d)
+float64 helper_fnegd(float64 src)
 {
-    DT0 = float64_chs(DT1);
+    return float64_chs(src);
 }
 
 F_HELPER(neg, q)
@@ -82,98 +136,158 @@ F_HELPER(neg, q)
 /* Integer to float conversion.  */
 float32 helper_fitos(CPUState *env, int32_t src)
 {
-    return int32_to_float32(src, &env->fp_status);
+    /* Inexact error possible converting int to float.  */
+    float32 ret;
+    clear_float_exceptions(env);
+    ret = int32_to_float32(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fitod(CPUState *env, int32_t src)
+float64 helper_fitod(CPUState *env, int32_t src)
 {
-    DT0 = int32_to_float64(src, &env->fp_status);
+    /* No possible exceptions converting int to double.  */
+    return int32_to_float64(src, &env->fp_status);
 }
 
 void helper_fitoq(CPUState *env, int32_t src)
 {
+    /* No possible exceptions converting int to long double.  */
     QT0 = int32_to_float128(src, &env->fp_status);
 }
 
 #ifdef TARGET_SPARC64
-float32 helper_fxtos(CPUState *env)
+float32 helper_fxtos(CPUState *env, int64_t src)
 {
-    return int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
+    float32 ret;
+    clear_float_exceptions(env);
+    ret = int64_to_float32(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-F_HELPER(xto, d)
+float64 helper_fxtod(CPUState *env, int64_t src)
 {
-    DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
+    float64 ret;
+    clear_float_exceptions(env);
+    ret = int64_to_float64(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-F_HELPER(xto, q)
+void helper_fxtoq(CPUState *env, int64_t src)
 {
-    QT0 = int64_to_float128(*((int64_t *)&DT1), &env->fp_status);
+    /* No possible exceptions converting long long to long double.  */
+    QT0 = int64_to_float128(src, &env->fp_status);
 }
 #endif
 #undef F_HELPER
 
 /* floating point conversion */
-float32 helper_fdtos(CPUState *env)
+float32 helper_fdtos(CPUState *env, float64 src)
 {
-    return float64_to_float32(DT1, &env->fp_status);
+    float32 ret;
+    clear_float_exceptions(env);
+    ret = float64_to_float32(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fstod(CPUState *env, float32 src)
+float64 helper_fstod(CPUState *env, float32 src)
 {
-    DT0 = float32_to_float64(src, &env->fp_status);
+    float64 ret;
+    clear_float_exceptions(env);
+    ret = float32_to_float64(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
 float32 helper_fqtos(CPUState *env)
 {
-    return float128_to_float32(QT1, &env->fp_status);
+    float32 ret;
+    clear_float_exceptions(env);
+    ret = float128_to_float32(QT1, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
 void helper_fstoq(CPUState *env, float32 src)
 {
+    clear_float_exceptions(env);
     QT0 = float32_to_float128(src, &env->fp_status);
+    check_ieee_exceptions(env);
 }
 
-void helper_fqtod(CPUState *env)
+float64 helper_fqtod(CPUState *env)
 {
-    DT0 = float128_to_float64(QT1, &env->fp_status);
+    float64 ret;
+    clear_float_exceptions(env);
+    ret = float128_to_float64(QT1, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fdtoq(CPUState *env)
+void helper_fdtoq(CPUState *env, float64 src)
 {
-    QT0 = float64_to_float128(DT1, &env->fp_status);
+    clear_float_exceptions(env);
+    QT0 = float64_to_float128(src, &env->fp_status);
+    check_ieee_exceptions(env);
 }
 
 /* Float to integer conversion.  */
 int32_t helper_fstoi(CPUState *env, float32 src)
 {
-    return float32_to_int32_round_to_zero(src, &env->fp_status);
+    int32_t ret;
+    clear_float_exceptions(env);
+    ret = float32_to_int32_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-int32_t helper_fdtoi(CPUState *env)
+int32_t helper_fdtoi(CPUState *env, float64 src)
 {
-    return float64_to_int32_round_to_zero(DT1, &env->fp_status);
+    int32_t ret;
+    clear_float_exceptions(env);
+    ret = float64_to_int32_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
 int32_t helper_fqtoi(CPUState *env)
 {
-    return float128_to_int32_round_to_zero(QT1, &env->fp_status);
+    int32_t ret;
+    clear_float_exceptions(env);
+    ret = float128_to_int32_round_to_zero(QT1, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
 #ifdef TARGET_SPARC64
-void helper_fstox(CPUState *env, float32 src)
+int64_t helper_fstox(CPUState *env, float32 src)
 {
-    *((int64_t *)&DT0) = float32_to_int64_round_to_zero(src, &env->fp_status);
+    int64_t ret;
+    clear_float_exceptions(env);
+    ret = float32_to_int64_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fdtox(CPUState *env)
+int64_t helper_fdtox(CPUState *env, float64 src)
 {
-    *((int64_t *)&DT0) = float64_to_int64_round_to_zero(DT1, &env->fp_status);
+    int64_t ret;
+    clear_float_exceptions(env);
+    ret = float64_to_int64_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fqtox(CPUState *env)
+int64_t helper_fqtox(CPUState *env)
 {
-    *((int64_t *)&DT0) = float128_to_int64_round_to_zero(QT1, &env->fp_status);
+    int64_t ret;
+    clear_float_exceptions(env);
+    ret = float128_to_int64_round_to_zero(QT1, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 #endif
 
@@ -183,9 +297,9 @@ float32 helper_fabss(float32 src)
 }
 
 #ifdef TARGET_SPARC64
-void helper_fabsd(CPUState *env)
+float64 helper_fabsd(float64 src)
 {
-    DT0 = float64_abs(DT1);
+    return float64_abs(src);
 }
 
 void helper_fabsq(CPUState *env)
@@ -196,17 +310,27 @@ void helper_fabsq(CPUState *env)
 
 float32 helper_fsqrts(CPUState *env, float32 src)
 {
-    return float32_sqrt(src, &env->fp_status);
+    float32 ret;
+    clear_float_exceptions(env);
+    ret = float32_sqrt(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
-void helper_fsqrtd(CPUState *env)
+float64 helper_fsqrtd(CPUState *env, float64 src)
 {
-    DT0 = float64_sqrt(DT1, &env->fp_status);
+    float64 ret;
+    clear_float_exceptions(env);
+    ret = float64_sqrt(src, &env->fp_status);
+    check_ieee_exceptions(env);
+    return ret;
 }
 
 void helper_fsqrtq(CPUState *env)
 {
+    clear_float_exceptions(env);
     QT0 = float128_sqrt(QT1, &env->fp_status);
+    check_ieee_exceptions(env);
 }
 
 #define GEN_FCMP(name, size, reg1, reg2, FS, E)                         \
@@ -245,8 +369,8 @@ void helper_fsqrtq(CPUState *env)
             break;                                                      \
         }                                                               \
     }
-#define GEN_FCMPS(name, size, FS, E)                                    \
-    void glue(helper_, name)(CPUState *env, float32 src1, float32 src2) \
+#define GEN_FCMP_T(name, size, FS, E)                                   \
+    void glue(helper_, name)(CPUState *env, size src1, size src2)       \
     {                                                                   \
         env->fsr &= FSR_FTT_NMASK;                                      \
         if (E && (glue(size, _is_any_nan)(src1) ||                      \
@@ -282,80 +406,42 @@ void helper_fsqrtq(CPUState *env)
         }                                                               \
     }
 
-GEN_FCMPS(fcmps, float32, 0, 0);
-GEN_FCMP(fcmpd, float64, DT0, DT1, 0, 0);
+GEN_FCMP_T(fcmps, float32, 0, 0);
+GEN_FCMP_T(fcmpd, float64, 0, 0);
 
-GEN_FCMPS(fcmpes, float32, 0, 1);
-GEN_FCMP(fcmped, float64, DT0, DT1, 0, 1);
+GEN_FCMP_T(fcmpes, float32, 0, 1);
+GEN_FCMP_T(fcmped, float64, 0, 1);
 
 GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
 GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
 
 #ifdef TARGET_SPARC64
-GEN_FCMPS(fcmps_fcc1, float32, 22, 0);
-GEN_FCMP(fcmpd_fcc1, float64, DT0, DT1, 22, 0);
+GEN_FCMP_T(fcmps_fcc1, float32, 22, 0);
+GEN_FCMP_T(fcmpd_fcc1, float64, 22, 0);
 GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
 
-GEN_FCMPS(fcmps_fcc2, float32, 24, 0);
-GEN_FCMP(fcmpd_fcc2, float64, DT0, DT1, 24, 0);
+GEN_FCMP_T(fcmps_fcc2, float32, 24, 0);
+GEN_FCMP_T(fcmpd_fcc2, float64, 24, 0);
 GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
 
-GEN_FCMPS(fcmps_fcc3, float32, 26, 0);
-GEN_FCMP(fcmpd_fcc3, float64, DT0, DT1, 26, 0);
+GEN_FCMP_T(fcmps_fcc3, float32, 26, 0);
+GEN_FCMP_T(fcmpd_fcc3, float64, 26, 0);
 GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
 
-GEN_FCMPS(fcmpes_fcc1, float32, 22, 1);
-GEN_FCMP(fcmped_fcc1, float64, DT0, DT1, 22, 1);
+GEN_FCMP_T(fcmpes_fcc1, float32, 22, 1);
+GEN_FCMP_T(fcmped_fcc1, float64, 22, 1);
 GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
 
-GEN_FCMPS(fcmpes_fcc2, float32, 24, 1);
-GEN_FCMP(fcmped_fcc2, float64, DT0, DT1, 24, 1);
+GEN_FCMP_T(fcmpes_fcc2, float32, 24, 1);
+GEN_FCMP_T(fcmped_fcc2, float64, 24, 1);
 GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
 
-GEN_FCMPS(fcmpes_fcc3, float32, 26, 1);
-GEN_FCMP(fcmped_fcc3, float64, DT0, DT1, 26, 1);
+GEN_FCMP_T(fcmpes_fcc3, float32, 26, 1);
+GEN_FCMP_T(fcmped_fcc3, float64, 26, 1);
 GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
 #endif
-#undef GEN_FCMPS
-
-void helper_check_ieee_exceptions(CPUState *env)
-{
-    target_ulong status;
-
-    status = get_float_exception_flags(&env->fp_status);
-    if (status) {
-        /* Copy IEEE 754 flags into FSR */
-        if (status & float_flag_invalid) {
-            env->fsr |= FSR_NVC;
-        }
-        if (status & float_flag_overflow) {
-            env->fsr |= FSR_OFC;
-        }
-        if (status & float_flag_underflow) {
-            env->fsr |= FSR_UFC;
-        }
-        if (status & float_flag_divbyzero) {
-            env->fsr |= FSR_DZC;
-        }
-        if (status & float_flag_inexact) {
-            env->fsr |= FSR_NXC;
-        }
-
-        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
-            /* Unmasked exception, generate a trap */
-            env->fsr |= FSR_FTT_IEEE_EXCP;
-            helper_raise_exception(env, TT_FP_EXCP);
-        } else {
-            /* Accumulate exceptions */
-            env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
-        }
-    }
-}
-
-void helper_clear_float_exceptions(CPUState *env)
-{
-    set_float_exception_flags(0, &env->fp_status);
-}
+#undef GEN_FCMP_T
+#undef GEN_FCMP
 
 static inline void set_fsr(CPUState *env)
 {
diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index 615ddefa92..faaf8dc7ad 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -16,8 +16,7 @@ DEF_HELPER_1(rdccr, tl, env)
 DEF_HELPER_2(wrccr, void, env, tl)
 DEF_HELPER_1(rdcwp, tl, env)
 DEF_HELPER_2(wrcwp, void, env, tl)
-DEF_HELPER_3(array8, tl, env, tl, tl)
-DEF_HELPER_3(alignaddr, tl, env, tl, tl)
+DEF_HELPER_FLAGS_2(array8, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl, tl)
 DEF_HELPER_1(popc, tl, tl)
 DEF_HELPER_3(ldda_asi, void, tl, int, int)
 DEF_HELPER_4(ldf_asi, void, tl, int, int, int)
@@ -39,8 +38,6 @@ DEF_HELPER_3(udiv, tl, env, tl, tl)
 DEF_HELPER_3(udiv_cc, tl, env, tl, tl)
 DEF_HELPER_3(sdiv, tl, env, tl, tl)
 DEF_HELPER_3(sdiv_cc, tl, env, tl, tl)
-DEF_HELPER_2(stdf, void, tl, int)
-DEF_HELPER_2(lddf, void, tl, int)
 DEF_HELPER_2(ldqf, void, tl, int)
 DEF_HELPER_2(stqf, void, tl, int)
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
@@ -48,33 +45,31 @@ DEF_HELPER_4(ld_asi, i64, tl, int, int, int)
 DEF_HELPER_4(st_asi, void, tl, i64, int, int)
 #endif
 DEF_HELPER_2(ldfsr, void, env, i32)
-DEF_HELPER_1(check_ieee_exceptions, void, env)
-DEF_HELPER_1(clear_float_exceptions, void, env)
-DEF_HELPER_1(fabss, f32, f32)
+DEF_HELPER_FLAGS_1(fabss, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
 DEF_HELPER_2(fsqrts, f32, env, f32)
-DEF_HELPER_1(fsqrtd, void, env)
+DEF_HELPER_2(fsqrtd, f64, env, f64)
 DEF_HELPER_3(fcmps, void, env, f32, f32)
-DEF_HELPER_1(fcmpd, void, env)
+DEF_HELPER_3(fcmpd, void, env, f64, f64)
 DEF_HELPER_3(fcmpes, void, env, f32, f32)
-DEF_HELPER_1(fcmped, void, env)
+DEF_HELPER_3(fcmped, void, env, f64, f64)
 DEF_HELPER_1(fsqrtq, void, env)
 DEF_HELPER_1(fcmpq, void, env)
 DEF_HELPER_1(fcmpeq, void, env)
 #ifdef TARGET_SPARC64
 DEF_HELPER_2(ldxfsr, void, env, i64)
-DEF_HELPER_1(fabsd, void, env)
+DEF_HELPER_FLAGS_1(fabsd, TCG_CALL_CONST | TCG_CALL_PURE, f64, f64)
 DEF_HELPER_3(fcmps_fcc1, void, env, f32, f32)
 DEF_HELPER_3(fcmps_fcc2, void, env, f32, f32)
 DEF_HELPER_3(fcmps_fcc3, void, env, f32, f32)
-DEF_HELPER_1(fcmpd_fcc1, void, env)
-DEF_HELPER_1(fcmpd_fcc2, void, env)
-DEF_HELPER_1(fcmpd_fcc3, void, env)
+DEF_HELPER_3(fcmpd_fcc1, void, env, f64, f64)
+DEF_HELPER_3(fcmpd_fcc2, void, env, f64, f64)
+DEF_HELPER_3(fcmpd_fcc3, void, env, f64, f64)
 DEF_HELPER_3(fcmpes_fcc1, void, env, f32, f32)
 DEF_HELPER_3(fcmpes_fcc2, void, env, f32, f32)
 DEF_HELPER_3(fcmpes_fcc3, void, env, f32, f32)
-DEF_HELPER_1(fcmped_fcc1, void, env)
-DEF_HELPER_1(fcmped_fcc2, void, env)
-DEF_HELPER_1(fcmped_fcc3, void, env)
+DEF_HELPER_3(fcmped_fcc1, void, env, f64, f64)
+DEF_HELPER_3(fcmped_fcc2, void, env, f64, f64)
+DEF_HELPER_3(fcmped_fcc3, void, env, f64, f64)
 DEF_HELPER_1(fabsq, void, env)
 DEF_HELPER_1(fcmpq_fcc1, void, env)
 DEF_HELPER_1(fcmpq_fcc2, void, env)
@@ -86,77 +81,88 @@ DEF_HELPER_1(fcmpeq_fcc3, void, env)
 DEF_HELPER_2(raise_exception, void, env, int)
 DEF_HELPER_0(shutdown, void)
 #define F_HELPER_0_1(name) DEF_HELPER_1(f ## name, void, env)
-#define F_HELPER_DQ_0_1(name)                   \
-    F_HELPER_0_1(name ## d);                    \
-    F_HELPER_0_1(name ## q)
 
-F_HELPER_DQ_0_1(add);
-F_HELPER_DQ_0_1(sub);
-F_HELPER_DQ_0_1(mul);
-F_HELPER_DQ_0_1(div);
+DEF_HELPER_3(faddd, f64, env, f64, f64)
+DEF_HELPER_3(fsubd, f64, env, f64, f64)
+DEF_HELPER_3(fmuld, f64, env, f64, f64)
+DEF_HELPER_3(fdivd, f64, env, f64, f64)
+F_HELPER_0_1(addq)
+F_HELPER_0_1(subq)
+F_HELPER_0_1(mulq)
+F_HELPER_0_1(divq)
 
 DEF_HELPER_3(fadds, f32, env, f32, f32)
 DEF_HELPER_3(fsubs, f32, env, f32, f32)
 DEF_HELPER_3(fmuls, f32, env, f32, f32)
 DEF_HELPER_3(fdivs, f32, env, f32, f32)
 
-DEF_HELPER_3(fsmuld, void, env, f32, f32)
-F_HELPER_0_1(dmulq);
+DEF_HELPER_3(fsmuld, f64, env, f32, f32)
+DEF_HELPER_3(fdmulq, void, env, f64, f64);
 
-DEF_HELPER_1(fnegs, f32, f32)
-DEF_HELPER_2(fitod, void, env, s32)
+DEF_HELPER_FLAGS_1(fnegs, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_2(fitod, f64, env, s32)
 DEF_HELPER_2(fitoq, void, env, s32)
 
 DEF_HELPER_2(fitos, f32, env, s32)
 
 #ifdef TARGET_SPARC64
-DEF_HELPER_1(fnegd, void, env)
+DEF_HELPER_FLAGS_1(fnegd, TCG_CALL_CONST | TCG_CALL_PURE, f64, f64)
 DEF_HELPER_1(fnegq, void, env)
-DEF_HELPER_1(fxtos, i32, env)
-F_HELPER_DQ_0_1(xto);
+DEF_HELPER_2(fxtos, f32, env, s64)
+DEF_HELPER_2(fxtod, f64, env, s64)
+DEF_HELPER_2(fxtoq, void, env, s64)
 #endif
-DEF_HELPER_1(fdtos, f32, env)
-DEF_HELPER_2(fstod, void, env, f32)
+DEF_HELPER_2(fdtos, f32, env, f64)
+DEF_HELPER_2(fstod, f64, env, f32)
 DEF_HELPER_1(fqtos, f32, env)
 DEF_HELPER_2(fstoq, void, env, f32)
-F_HELPER_0_1(qtod);
-F_HELPER_0_1(dtoq);
+DEF_HELPER_1(fqtod, f64, env)
+DEF_HELPER_2(fdtoq, void, env, f64)
 DEF_HELPER_2(fstoi, s32, env, f32)
-DEF_HELPER_1(fdtoi, s32, env)
+DEF_HELPER_2(fdtoi, s32, env, f64)
 DEF_HELPER_1(fqtoi, s32, env)
 #ifdef TARGET_SPARC64
-DEF_HELPER_2(fstox, void, env, i32)
-F_HELPER_0_1(dtox);
-F_HELPER_0_1(qtox);
-F_HELPER_0_1(aligndata);
+DEF_HELPER_2(fstox, s64, env, f32)
+DEF_HELPER_2(fdtox, s64, env, f64)
+DEF_HELPER_1(fqtox, s64, env)
 
-F_HELPER_0_1(pmerge);
-F_HELPER_0_1(mul8x16);
-F_HELPER_0_1(mul8x16al);
-F_HELPER_0_1(mul8x16au);
-F_HELPER_0_1(mul8sux16);
-F_HELPER_0_1(mul8ulx16);
-F_HELPER_0_1(muld8sux16);
-F_HELPER_0_1(muld8ulx16);
-F_HELPER_0_1(expand);
-#define VIS_HELPER(name)                                 \
-    F_HELPER_0_1(name##16);                              \
-    DEF_HELPER_3(f ## name ## 16s, i32, env, i32, i32)   \
-    F_HELPER_0_1(name##32);                              \
-    DEF_HELPER_3(f ## name ## 32s, i32, env, i32, i32)
+DEF_HELPER_FLAGS_2(fpmerge, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8x16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8x16al, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8x16au, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8sux16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8ulx16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmuld8sux16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmuld8ulx16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fexpand, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_3(pdist, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fpack16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64, i64)
+DEF_HELPER_FLAGS_3(fpack32, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fpackfix, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64, i64)
+DEF_HELPER_FLAGS_3(bshuffle, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
+#define VIS_HELPER(name)                                                 \
+    DEF_HELPER_FLAGS_2(f ## name ## 16, TCG_CALL_CONST | TCG_CALL_PURE,  \
+                       i64, i64, i64)                                    \
+    DEF_HELPER_FLAGS_2(f ## name ## 16s, TCG_CALL_CONST | TCG_CALL_PURE, \
+                       i32, i32, i32)                                    \
+    DEF_HELPER_FLAGS_2(f ## name ## 32, TCG_CALL_CONST | TCG_CALL_PURE,  \
+                       i64, i64, i64)                                    \
+    DEF_HELPER_FLAGS_2(f ## name ## 32s, TCG_CALL_CONST | TCG_CALL_PURE, \
+                       i32, i32, i32)
 
 VIS_HELPER(padd);
 VIS_HELPER(psub);
-#define VIS_CMPHELPER(name)                              \
-    DEF_HELPER_1(f##name##16, i64, env);                 \
-    DEF_HELPER_1(f##name##32, i64, env)
+#define VIS_CMPHELPER(name)                                              \
+    DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_CONST | TCG_CALL_PURE,      \
+                       i64, i64, i64)                                    \
+    DEF_HELPER_FLAGS_2(f##name##32, TCG_CALL_CONST | TCG_CALL_PURE,      \
+                       i64, i64, i64)
 VIS_CMPHELPER(cmpgt);
 VIS_CMPHELPER(cmpeq);
 VIS_CMPHELPER(cmple);
 VIS_CMPHELPER(cmpne);
 #endif
 #undef F_HELPER_0_1
-#undef F_HELPER_DQ_0_1
 #undef VIS_HELPER
 #undef VIS_CMPHELPER
 DEF_HELPER_1(compute_psr, void, env);
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index 1fb3996fdb..b59707ecd2 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -66,8 +66,6 @@
 #endif
 #endif
 
-#define DT0 (env->dt0)
-#define DT1 (env->dt1)
 #define QT0 (env->qt0)
 #define QT1 (env->qt1)
 
@@ -2047,7 +2045,7 @@ void helper_ldda_asi(target_ulong addr, int asi, int rd)
 void helper_ldf_asi(target_ulong addr, int asi, int size, int rd)
 {
     unsigned int i;
-    CPU_DoubleU u;
+    target_ulong val;
 
     helper_check_align(addr, 3);
     addr = asi_address_mask(env, asi, addr);
@@ -2062,13 +2060,11 @@ void helper_ldf_asi(target_ulong addr, int asi, int size, int rd)
             return;
         }
         helper_check_align(addr, 0x3f);
-        for (i = 0; i < 16; i++) {
-            *(uint32_t *)&env->fpr[rd++] = helper_ld_asi(addr, asi & 0x8f, 4,
-                                                         0);
-            addr += 4;
+        for (i = 0; i < 8; i++, rd += 2, addr += 8) {
+            env->fpr[rd/2].ll = helper_ld_asi(addr, asi & 0x8f, 8, 0);
         }
-
         return;
+
     case 0x16: /* UA2007 Block load primary, user privilege */
     case 0x17: /* UA2007 Block load secondary, user privilege */
     case 0x1e: /* UA2007 Block load primary LE, user privilege */
@@ -2082,13 +2078,11 @@ void helper_ldf_asi(target_ulong addr, int asi, int size, int rd)
             return;
         }
         helper_check_align(addr, 0x3f);
-        for (i = 0; i < 16; i++) {
-            *(uint32_t *)&env->fpr[rd++] = helper_ld_asi(addr, asi & 0x19, 4,
-                                                         0);
-            addr += 4;
+        for (i = 0; i < 8; i++, rd += 2, addr += 4) {
+            env->fpr[rd/2].ll = helper_ld_asi(addr, asi & 0x19, 8, 0);
         }
-
         return;
+
     default:
         break;
     }
@@ -2096,20 +2090,19 @@ void helper_ldf_asi(target_ulong addr, int asi, int size, int rd)
     switch (size) {
     default:
     case 4:
-        *((uint32_t *)&env->fpr[rd]) = helper_ld_asi(addr, asi, size, 0);
+        val = helper_ld_asi(addr, asi, size, 0);
+        if (rd & 1) {
+            env->fpr[rd/2].l.lower = val;
+        } else {
+            env->fpr[rd/2].l.upper = val;
+        }
         break;
     case 8:
-        u.ll = helper_ld_asi(addr, asi, size, 0);
-        *((uint32_t *)&env->fpr[rd++]) = u.l.upper;
-        *((uint32_t *)&env->fpr[rd++]) = u.l.lower;
+        env->fpr[rd/2].ll = helper_ld_asi(addr, asi, size, 0);
         break;
     case 16:
-        u.ll = helper_ld_asi(addr, asi, 8, 0);
-        *((uint32_t *)&env->fpr[rd++]) = u.l.upper;
-        *((uint32_t *)&env->fpr[rd++]) = u.l.lower;
-        u.ll = helper_ld_asi(addr + 8, asi, 8, 0);
-        *((uint32_t *)&env->fpr[rd++]) = u.l.upper;
-        *((uint32_t *)&env->fpr[rd++]) = u.l.lower;
+        env->fpr[rd/2].ll = helper_ld_asi(addr, asi, 8, 0);
+        env->fpr[rd/2 + 1].ll = helper_ld_asi(addr + 8, asi, 8, 0);
         break;
     }
 }
@@ -2117,8 +2110,7 @@ void helper_ldf_asi(target_ulong addr, int asi, int size, int rd)
 void helper_stf_asi(target_ulong addr, int asi, int size, int rd)
 {
     unsigned int i;
-    target_ulong val = 0;
-    CPU_DoubleU u;
+    target_ulong val;
 
     helper_check_align(addr, 3);
     addr = asi_address_mask(env, asi, addr);
@@ -2135,10 +2127,8 @@ void helper_stf_asi(target_ulong addr, int asi, int size, int rd)
             return;
         }
         helper_check_align(addr, 0x3f);
-        for (i = 0; i < 16; i++) {
-            val = *(uint32_t *)&env->fpr[rd++];
-            helper_st_asi(addr, val, asi & 0x8f, 4);
-            addr += 4;
+        for (i = 0; i < 8; i++, rd += 2, addr += 8) {
+            helper_st_asi(addr, env->fpr[rd/2].ll, asi & 0x8f, 8);
         }
 
         return;
@@ -2155,10 +2145,8 @@ void helper_stf_asi(target_ulong addr, int asi, int size, int rd)
             return;
         }
         helper_check_align(addr, 0x3f);
-        for (i = 0; i < 16; i++) {
-            val = *(uint32_t *)&env->fpr[rd++];
-            helper_st_asi(addr, val, asi & 0x19, 4);
-            addr += 4;
+        for (i = 0; i < 8; i++, rd += 2, addr += 8) {
+            helper_st_asi(addr, env->fpr[rd/2].ll, asi & 0x19, 8);
         }
 
         return;
@@ -2169,20 +2157,19 @@ void helper_stf_asi(target_ulong addr, int asi, int size, int rd)
     switch (size) {
     default:
     case 4:
-        helper_st_asi(addr, *(uint32_t *)&env->fpr[rd], asi, size);
+        if (rd & 1) {
+            val = env->fpr[rd/2].l.lower;
+        } else {
+            val = env->fpr[rd/2].l.upper;
+        }
+        helper_st_asi(addr, val, asi, size);
         break;
     case 8:
-        u.l.upper = *(uint32_t *)&env->fpr[rd++];
-        u.l.lower = *(uint32_t *)&env->fpr[rd++];
-        helper_st_asi(addr, u.ll, asi, size);
+        helper_st_asi(addr, env->fpr[rd/2].ll, asi, size);
         break;
     case 16:
-        u.l.upper = *(uint32_t *)&env->fpr[rd++];
-        u.l.lower = *(uint32_t *)&env->fpr[rd++];
-        helper_st_asi(addr, u.ll, asi, 8);
-        u.l.upper = *(uint32_t *)&env->fpr[rd++];
-        u.l.lower = *(uint32_t *)&env->fpr[rd++];
-        helper_st_asi(addr + 8, u.ll, asi, 8);
+        helper_st_asi(addr, env->fpr[rd/2].ll, asi, 8);
+        helper_st_asi(addr + 8, env->fpr[rd/2 + 1].ll, asi, 8);
         break;
     }
 }
@@ -2214,56 +2201,6 @@ target_ulong helper_casx_asi(target_ulong addr, target_ulong val1,
 }
 #endif /* TARGET_SPARC64 */
 
-void helper_stdf(target_ulong addr, int mem_idx)
-{
-    helper_check_align(addr, 7);
-#if !defined(CONFIG_USER_ONLY)
-    switch (mem_idx) {
-    case MMU_USER_IDX:
-        stfq_user(addr, DT0);
-        break;
-    case MMU_KERNEL_IDX:
-        stfq_kernel(addr, DT0);
-        break;
-#ifdef TARGET_SPARC64
-    case MMU_HYPV_IDX:
-        stfq_hypv(addr, DT0);
-        break;
-#endif
-    default:
-        DPRINTF_MMU("helper_stdf: need to check MMU idx %d\n", mem_idx);
-        break;
-    }
-#else
-    stfq_raw(address_mask(env, addr), DT0);
-#endif
-}
-
-void helper_lddf(target_ulong addr, int mem_idx)
-{
-    helper_check_align(addr, 7);
-#if !defined(CONFIG_USER_ONLY)
-    switch (mem_idx) {
-    case MMU_USER_IDX:
-        DT0 = ldfq_user(addr);
-        break;
-    case MMU_KERNEL_IDX:
-        DT0 = ldfq_kernel(addr);
-        break;
-#ifdef TARGET_SPARC64
-    case MMU_HYPV_IDX:
-        DT0 = ldfq_hypv(addr);
-        break;
-#endif
-    default:
-        DPRINTF_MMU("helper_lddf: need to check MMU idx %d\n", mem_idx);
-        break;
-    }
-#else
-    DT0 = ldfq_raw(address_mask(env, addr));
-#endif
-}
-
 void helper_ldqf(target_ulong addr, int mem_idx)
 {
     /* XXX add 128 bit load */
diff --git a/target-sparc/machine.c b/target-sparc/machine.c
index 56ae0412cd..235b088a45 100644
--- a/target-sparc/machine.c
+++ b/target-sparc/machine.c
@@ -21,13 +21,9 @@ void cpu_save(QEMUFile *f, void *opaque)
         qemu_put_betls(f, &env->regbase[i]);
 
     /* FPU */
-    for(i = 0; i < TARGET_FPREGS; i++) {
-        union {
-            float32 f;
-            uint32_t i;
-        } u;
-        u.f = env->fpr[i];
-        qemu_put_be32(f, u.i);
+    for (i = 0; i < TARGET_DPREGS; i++) {
+        qemu_put_be32(f, env->fpr[i].l.upper);
+        qemu_put_be32(f, env->fpr[i].l.lower);
     }
 
     qemu_put_betls(f, &env->pc);
@@ -128,13 +124,9 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
         qemu_get_betls(f, &env->regbase[i]);
 
     /* FPU */
-    for(i = 0; i < TARGET_FPREGS; i++) {
-        union {
-            float32 f;
-            uint32_t i;
-        } u;
-        u.i = qemu_get_be32(f);
-        env->fpr[i] = u.f;
+    for (i = 0; i < TARGET_DPREGS; i++) {
+        env->fpr[i].l.upper = qemu_get_be32(f);
+        env->fpr[i].l.lower = qemu_get_be32(f);
     }
 
     qemu_get_betls(f, &env->pc);
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 383fd9ce26..93185402fd 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -63,7 +63,7 @@ static TCGv cpu_tmp0;
 static TCGv_i32 cpu_tmp32;
 static TCGv_i64 cpu_tmp64;
 /* Floating point registers */
-static TCGv_i32 cpu_fpr[TARGET_FPREGS];
+static TCGv_i64 cpu_fpr[TARGET_DPREGS];
 
 static target_ulong gen_opc_npc[OPC_BUF_SIZE];
 static target_ulong gen_opc_jump_pc[2];
@@ -82,6 +82,8 @@ typedef struct DisasContext {
     uint32_t cc_op;  /* current CC operation */
     struct TranslationBlock *tb;
     sparc_def_t *def;
+    TCGv_i32 t32[3];
+    int n_t32;
 } DisasContext;
 
 // This function uses non-native bit order
@@ -114,67 +116,116 @@ static int sign_extend(int x, int len)
 
 #define IS_IMM (insn & (1<<13))
 
+static inline void gen_update_fprs_dirty(int rd)
+{
+#if defined(TARGET_SPARC64)
+    tcg_gen_ori_i32(cpu_fprs, cpu_fprs, (rd < 32) ? 1 : 2);
+#endif
+}
+
 /* floating point registers moves */
-static void gen_op_load_fpr_DT0(unsigned int src)
+static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src)
+{
+#if TCG_TARGET_REG_BITS == 32
+    if (src & 1) {
+        return TCGV_LOW(cpu_fpr[src / 2]);
+    } else {
+        return TCGV_HIGH(cpu_fpr[src / 2]);
+    }
+#else
+    if (src & 1) {
+        return MAKE_TCGV_I32(GET_TCGV_I64(cpu_fpr[src / 2]));
+    } else {
+        TCGv_i32 ret = tcg_temp_local_new_i32();
+        TCGv_i64 t = tcg_temp_new_i64();
+
+        tcg_gen_shri_i64(t, cpu_fpr[src / 2], 32);
+        tcg_gen_trunc_i64_i32(ret, t);
+        tcg_temp_free_i64(t);
+
+        dc->t32[dc->n_t32++] = ret;
+        assert(dc->n_t32 <= ARRAY_SIZE(dc->t32));
+
+        return ret;
+    }
+#endif
+}
+
+static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v)
+{
+#if TCG_TARGET_REG_BITS == 32
+    if (dst & 1) {
+        tcg_gen_mov_i32(TCGV_LOW(cpu_fpr[dst / 2]), v);
+    } else {
+        tcg_gen_mov_i32(TCGV_HIGH(cpu_fpr[dst / 2]), v);
+    }
+#else
+    TCGv_i64 t = MAKE_TCGV_I64(GET_TCGV_I32(v));
+    tcg_gen_deposit_i64(cpu_fpr[dst / 2], cpu_fpr[dst / 2], t,
+                        (dst & 1 ? 0 : 32), 32);
+#endif
+    gen_update_fprs_dirty(dst);
+}
+
+static TCGv_i32 gen_dest_fpr_F(void)
+{
+    return cpu_tmp32;
+}
+
+static TCGv_i64 gen_load_fpr_D(DisasContext *dc, unsigned int src)
 {
-    tcg_gen_st_i32(cpu_fpr[src], cpu_env, offsetof(CPUSPARCState, dt0) +
-                   offsetof(CPU_DoubleU, l.upper));
-    tcg_gen_st_i32(cpu_fpr[src + 1], cpu_env, offsetof(CPUSPARCState, dt0) +
-                   offsetof(CPU_DoubleU, l.lower));
+    src = DFPREG(src);
+    return cpu_fpr[src / 2];
 }
 
-static void gen_op_load_fpr_DT1(unsigned int src)
+static void gen_store_fpr_D(DisasContext *dc, unsigned int dst, TCGv_i64 v)
 {
-    tcg_gen_st_i32(cpu_fpr[src], cpu_env, offsetof(CPUSPARCState, dt1) +
-                   offsetof(CPU_DoubleU, l.upper));
-    tcg_gen_st_i32(cpu_fpr[src + 1], cpu_env, offsetof(CPUSPARCState, dt1) +
-                   offsetof(CPU_DoubleU, l.lower));
+    dst = DFPREG(dst);
+    tcg_gen_mov_i64(cpu_fpr[dst / 2], v);
+    gen_update_fprs_dirty(dst);
 }
 
-static void gen_op_store_DT0_fpr(unsigned int dst)
+static TCGv_i64 gen_dest_fpr_D(void)
 {
-    tcg_gen_ld_i32(cpu_fpr[dst], cpu_env, offsetof(CPUSPARCState, dt0) +
-                   offsetof(CPU_DoubleU, l.upper));
-    tcg_gen_ld_i32(cpu_fpr[dst + 1], cpu_env, offsetof(CPUSPARCState, dt0) +
-                   offsetof(CPU_DoubleU, l.lower));
+    return cpu_tmp64;
 }
 
 static void gen_op_load_fpr_QT0(unsigned int src)
 {
-    tcg_gen_st_i32(cpu_fpr[src], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.upmost));
-    tcg_gen_st_i32(cpu_fpr[src + 1], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.upper));
-    tcg_gen_st_i32(cpu_fpr[src + 2], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.lower));
-    tcg_gen_st_i32(cpu_fpr[src + 3], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.lowest));
+    tcg_gen_st_i64(cpu_fpr[src / 2], cpu_env, offsetof(CPUSPARCState, qt0) +
+                   offsetof(CPU_QuadU, ll.upper));
+    tcg_gen_st_i64(cpu_fpr[src/2 + 1], cpu_env, offsetof(CPUSPARCState, qt0) +
+                   offsetof(CPU_QuadU, ll.lower));
 }
 
 static void gen_op_load_fpr_QT1(unsigned int src)
 {
-    tcg_gen_st_i32(cpu_fpr[src], cpu_env, offsetof(CPUSPARCState, qt1) +
-                   offsetof(CPU_QuadU, l.upmost));
-    tcg_gen_st_i32(cpu_fpr[src + 1], cpu_env, offsetof(CPUSPARCState, qt1) +
-                   offsetof(CPU_QuadU, l.upper));
-    tcg_gen_st_i32(cpu_fpr[src + 2], cpu_env, offsetof(CPUSPARCState, qt1) +
-                   offsetof(CPU_QuadU, l.lower));
-    tcg_gen_st_i32(cpu_fpr[src + 3], cpu_env, offsetof(CPUSPARCState, qt1) +
-                   offsetof(CPU_QuadU, l.lowest));
+    tcg_gen_st_i64(cpu_fpr[src / 2], cpu_env, offsetof(CPUSPARCState, qt1) +
+                   offsetof(CPU_QuadU, ll.upper));
+    tcg_gen_st_i64(cpu_fpr[src/2 + 1], cpu_env, offsetof(CPUSPARCState, qt1) +
+                   offsetof(CPU_QuadU, ll.lower));
 }
 
 static void gen_op_store_QT0_fpr(unsigned int dst)
 {
-    tcg_gen_ld_i32(cpu_fpr[dst], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.upmost));
-    tcg_gen_ld_i32(cpu_fpr[dst + 1], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.upper));
-    tcg_gen_ld_i32(cpu_fpr[dst + 2], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.lower));
-    tcg_gen_ld_i32(cpu_fpr[dst + 3], cpu_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, l.lowest));
+    tcg_gen_ld_i64(cpu_fpr[dst / 2], cpu_env, offsetof(CPUSPARCState, qt0) +
+                   offsetof(CPU_QuadU, ll.upper));
+    tcg_gen_ld_i64(cpu_fpr[dst/2 + 1], cpu_env, offsetof(CPUSPARCState, qt0) +
+                   offsetof(CPU_QuadU, ll.lower));
 }
 
+#ifdef TARGET_SPARC64
+static void gen_move_Q(unsigned int rd, unsigned int rs)
+{
+    rd = QFPREG(rd);
+    rs = QFPREG(rs);
+
+    tcg_gen_mov_i64(cpu_fpr[rd / 2], cpu_fpr[rs / 2]);
+    tcg_gen_mov_i64(cpu_fpr[rd / 2 + 1], cpu_fpr[rs / 2 + 1]);
+    gen_update_fprs_dirty(rd);
+}
+#endif
+
 /* moves */
 #ifdef CONFIG_USER_ONLY
 #define supervisor(dc) 0
@@ -1419,20 +1470,20 @@ static inline void gen_op_fcmps(int fccno, TCGv_i32 r_rs1, TCGv_i32 r_rs2)
     }
 }
 
-static inline void gen_op_fcmpd(int fccno)
+static inline void gen_op_fcmpd(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
 {
     switch (fccno) {
     case 0:
-        gen_helper_fcmpd(cpu_env);
+        gen_helper_fcmpd(cpu_env, r_rs1, r_rs2);
         break;
     case 1:
-        gen_helper_fcmpd_fcc1(cpu_env);
+        gen_helper_fcmpd_fcc1(cpu_env, r_rs1, r_rs2);
         break;
     case 2:
-        gen_helper_fcmpd_fcc2(cpu_env);
+        gen_helper_fcmpd_fcc2(cpu_env, r_rs1, r_rs2);
         break;
     case 3:
-        gen_helper_fcmpd_fcc3(cpu_env);
+        gen_helper_fcmpd_fcc3(cpu_env, r_rs1, r_rs2);
         break;
     }
 }
@@ -1473,20 +1524,20 @@ static inline void gen_op_fcmpes(int fccno, TCGv_i32 r_rs1, TCGv_i32 r_rs2)
     }
 }
 
-static inline void gen_op_fcmped(int fccno)
+static inline void gen_op_fcmped(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
 {
     switch (fccno) {
     case 0:
-        gen_helper_fcmped(cpu_env);
+        gen_helper_fcmped(cpu_env, r_rs1, r_rs2);
         break;
     case 1:
-        gen_helper_fcmped_fcc1(cpu_env);
+        gen_helper_fcmped_fcc1(cpu_env, r_rs1, r_rs2);
         break;
     case 2:
-        gen_helper_fcmped_fcc2(cpu_env);
+        gen_helper_fcmped_fcc2(cpu_env, r_rs1, r_rs2);
         break;
     case 3:
-        gen_helper_fcmped_fcc3(cpu_env);
+        gen_helper_fcmped_fcc3(cpu_env, r_rs1, r_rs2);
         break;
     }
 }
@@ -1516,9 +1567,9 @@ static inline void gen_op_fcmps(int fccno, TCGv r_rs1, TCGv r_rs2)
     gen_helper_fcmps(cpu_env, r_rs1, r_rs2);
 }
 
-static inline void gen_op_fcmpd(int fccno)
+static inline void gen_op_fcmpd(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
 {
-    gen_helper_fcmpd(cpu_env);
+    gen_helper_fcmpd(cpu_env, r_rs1, r_rs2);
 }
 
 static inline void gen_op_fcmpq(int fccno)
@@ -1531,9 +1582,9 @@ static inline void gen_op_fcmpes(int fccno, TCGv r_rs1, TCGv r_rs2)
     gen_helper_fcmpes(cpu_env, r_rs1, r_rs2);
 }
 
-static inline void gen_op_fcmped(int fccno)
+static inline void gen_op_fcmped(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
 {
-    gen_helper_fcmped(cpu_env);
+    gen_helper_fcmped(cpu_env, r_rs1, r_rs2);
 }
 
 static inline void gen_op_fcmpeq(int fccno)
@@ -1570,21 +1621,313 @@ static int gen_trap_ifnofpu(DisasContext *dc, TCGv r_cond)
     return 0;
 }
 
-static inline void gen_update_fprs_dirty(int rd)
+static inline void gen_op_clear_ieee_excp_and_FTT(void)
 {
-#if defined(TARGET_SPARC64)
-    tcg_gen_ori_i32(cpu_fprs, cpu_fprs, (rd < 32) ? 1 : 2);
+    tcg_gen_andi_tl(cpu_fsr, cpu_fsr, FSR_FTT_CEXC_NMASK);
+}
+
+static inline void gen_fop_FF(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i32, TCGv_ptr, TCGv_i32))
+{
+    TCGv_i32 dst, src;
+
+    src = gen_load_fpr_F(dc, rs);
+    dst = gen_dest_fpr_F();
+
+    gen(dst, cpu_env, src);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+static inline void gen_ne_fop_FF(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 dst, src;
+
+    src = gen_load_fpr_F(dc, rs);
+    dst = gen_dest_fpr_F();
+
+    gen(dst, src);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+static inline void gen_fop_FFF(DisasContext *dc, int rd, int rs1, int rs2,
+                        void (*gen)(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 dst, src1, src2;
+
+    src1 = gen_load_fpr_F(dc, rs1);
+    src2 = gen_load_fpr_F(dc, rs2);
+    dst = gen_dest_fpr_F();
+
+    gen(dst, cpu_env, src1, src2);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+#ifdef TARGET_SPARC64
+static inline void gen_ne_fop_FFF(DisasContext *dc, int rd, int rs1, int rs2,
+                                  void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 dst, src1, src2;
+
+    src1 = gen_load_fpr_F(dc, rs1);
+    src2 = gen_load_fpr_F(dc, rs2);
+    dst = gen_dest_fpr_F();
+
+    gen(dst, src1, src2);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
 #endif
+
+static inline void gen_fop_DD(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i64, TCGv_ptr, TCGv_i64))
+{
+    TCGv_i64 dst, src;
+
+    src = gen_load_fpr_D(dc, rs);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env, src);
+
+    gen_store_fpr_D(dc, rd, dst);
 }
 
-static inline void gen_op_clear_ieee_excp_and_FTT(void)
+#ifdef TARGET_SPARC64
+static inline void gen_ne_fop_DD(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_i64, TCGv_i64))
 {
-    tcg_gen_andi_tl(cpu_fsr, cpu_fsr, FSR_FTT_CEXC_NMASK);
+    TCGv_i64 dst, src;
+
+    src = gen_load_fpr_D(dc, rs);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, src);
+
+    gen_store_fpr_D(dc, rd, dst);
 }
+#endif
 
-static inline void gen_clear_float_exceptions(void)
+static inline void gen_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
+                        void (*gen)(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64))
 {
-    gen_helper_clear_float_exceptions(cpu_env);
+    TCGv_i64 dst, src1, src2;
+
+    src1 = gen_load_fpr_D(dc, rs1);
+    src2 = gen_load_fpr_D(dc, rs2);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+#ifdef TARGET_SPARC64
+static inline void gen_ne_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
+                                  void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 dst, src1, src2;
+
+    src1 = gen_load_fpr_D(dc, rs1);
+    src2 = gen_load_fpr_D(dc, rs2);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static inline void gen_gsr_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
+                           void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 dst, src1, src2;
+
+    src1 = gen_load_fpr_D(dc, rs1);
+    src2 = gen_load_fpr_D(dc, rs2);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_gsr, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static inline void gen_ne_fop_DDDD(DisasContext *dc, int rd, int rs1, int rs2,
+                           void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 dst, src0, src1, src2;
+
+    src1 = gen_load_fpr_D(dc, rs1);
+    src2 = gen_load_fpr_D(dc, rs2);
+    src0 = gen_load_fpr_D(dc, rd);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, src0, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+#endif
+
+static inline void gen_fop_QQ(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_ptr))
+{
+    gen_op_load_fpr_QT1(QFPREG(rs));
+
+    gen(cpu_env);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
+}
+
+#ifdef TARGET_SPARC64
+static inline void gen_ne_fop_QQ(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_ptr))
+{
+    gen_op_load_fpr_QT1(QFPREG(rs));
+
+    gen(cpu_env);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
+}
+#endif
+
+static inline void gen_fop_QQQ(DisasContext *dc, int rd, int rs1, int rs2,
+                               void (*gen)(TCGv_ptr))
+{
+    gen_op_load_fpr_QT0(QFPREG(rs1));
+    gen_op_load_fpr_QT1(QFPREG(rs2));
+
+    gen(cpu_env);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
+}
+
+static inline void gen_fop_DFF(DisasContext *dc, int rd, int rs1, int rs2,
+                        void (*gen)(TCGv_i64, TCGv_ptr, TCGv_i32, TCGv_i32))
+{
+    TCGv_i64 dst;
+    TCGv_i32 src1, src2;
+
+    src1 = gen_load_fpr_F(dc, rs1);
+    src2 = gen_load_fpr_F(dc, rs2);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static inline void gen_fop_QDD(DisasContext *dc, int rd, int rs1, int rs2,
+                               void (*gen)(TCGv_ptr, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 src1, src2;
+
+    src1 = gen_load_fpr_D(dc, rs1);
+    src2 = gen_load_fpr_D(dc, rs2);
+
+    gen(cpu_env, src1, src2);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
+}
+
+#ifdef TARGET_SPARC64
+static inline void gen_fop_DF(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i64, TCGv_ptr, TCGv_i32))
+{
+    TCGv_i64 dst;
+    TCGv_i32 src;
+
+    src = gen_load_fpr_F(dc, rs);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env, src);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+#endif
+
+static inline void gen_ne_fop_DF(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_i64, TCGv_ptr, TCGv_i32))
+{
+    TCGv_i64 dst;
+    TCGv_i32 src;
+
+    src = gen_load_fpr_F(dc, rs);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env, src);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static inline void gen_fop_FD(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i32, TCGv_ptr, TCGv_i64))
+{
+    TCGv_i32 dst;
+    TCGv_i64 src;
+
+    src = gen_load_fpr_D(dc, rs);
+    dst = gen_dest_fpr_F();
+
+    gen(dst, cpu_env, src);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+static inline void gen_fop_FQ(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i32, TCGv_ptr))
+{
+    TCGv_i32 dst;
+
+    gen_op_load_fpr_QT1(QFPREG(rs));
+    dst = gen_dest_fpr_F();
+
+    gen(dst, cpu_env);
+
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+static inline void gen_fop_DQ(DisasContext *dc, int rd, int rs,
+                              void (*gen)(TCGv_i64, TCGv_ptr))
+{
+    TCGv_i64 dst;
+
+    gen_op_load_fpr_QT1(QFPREG(rs));
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_env);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static inline void gen_ne_fop_QF(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_ptr, TCGv_i32))
+{
+    TCGv_i32 src;
+
+    src = gen_load_fpr_F(dc, rs);
+
+    gen(cpu_env, src);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
+}
+
+static inline void gen_ne_fop_QD(DisasContext *dc, int rd, int rs,
+                                 void (*gen)(TCGv_ptr, TCGv_i64))
+{
+    TCGv_i64 src;
+
+    src = gen_load_fpr_D(dc, rs);
+
+    gen(cpu_env, src);
+
+    gen_op_store_QT0_fpr(QFPREG(rd));
+    gen_update_fprs_dirty(QFPREG(rd));
 }
 
 /* asi moves */
@@ -1878,6 +2221,148 @@ static inline void gen_load_trap_state_at_tl(TCGv_ptr r_tsptr, TCGv_ptr cpu_env)
 
     tcg_temp_free_i32(r_tl);
 }
+
+static void gen_edge(DisasContext *dc, TCGv dst, TCGv s1, TCGv s2,
+                     int width, bool cc, bool left)
+{
+    TCGv lo1, lo2, t1, t2;
+    uint64_t amask, tabl, tabr;
+    int shift, imask, omask;
+
+    if (cc) {
+        tcg_gen_mov_tl(cpu_cc_src, s1);
+        tcg_gen_mov_tl(cpu_cc_src2, s2);
+        tcg_gen_sub_tl(cpu_cc_dst, s1, s2);
+        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUB);
+        dc->cc_op = CC_OP_SUB;
+    }
+
+    /* Theory of operation: there are two tables, left and right (not to
+       be confused with the left and right versions of the opcode).  These
+       are indexed by the low 3 bits of the inputs.  To make things "easy",
+       these tables are loaded into two constants, TABL and TABR below.
+       The operation index = (input & imask) << shift calculates the index
+       into the constant, while val = (table >> index) & omask calculates
+       the value we're looking for.  */
+    switch (width) {
+    case 8:
+        imask = 0x7;
+        shift = 3;
+        omask = 0xff;
+        if (left) {
+            tabl = 0x80c0e0f0f8fcfeffULL;
+            tabr = 0xff7f3f1f0f070301ULL;
+        } else {
+            tabl = 0x0103070f1f3f7fffULL;
+            tabr = 0xfffefcf8f0e0c080ULL;
+        }
+        break;
+    case 16:
+        imask = 0x6;
+        shift = 1;
+        omask = 0xf;
+        if (left) {
+            tabl = 0x8cef;
+            tabr = 0xf731;
+        } else {
+            tabl = 0x137f;
+            tabr = 0xfec8;
+        }
+        break;
+    case 32:
+        imask = 0x4;
+        shift = 0;
+        omask = 0x3;
+        if (left) {
+            tabl = (2 << 2) | 3;
+            tabr = (3 << 2) | 1;
+        } else {
+            tabl = (1 << 2) | 3;
+            tabr = (3 << 2) | 2;
+        }
+        break;
+    default:
+        abort();
+    }
+
+    lo1 = tcg_temp_new();
+    lo2 = tcg_temp_new();
+    tcg_gen_andi_tl(lo1, s1, imask);
+    tcg_gen_andi_tl(lo2, s2, imask);
+    tcg_gen_shli_tl(lo1, lo1, shift);
+    tcg_gen_shli_tl(lo2, lo2, shift);
+
+    t1 = tcg_const_tl(tabl);
+    t2 = tcg_const_tl(tabr);
+    tcg_gen_shr_tl(lo1, t1, lo1);
+    tcg_gen_shr_tl(lo2, t2, lo2);
+    tcg_gen_andi_tl(dst, lo1, omask);
+    tcg_gen_andi_tl(lo2, lo2, omask);
+
+    amask = -8;
+    if (AM_CHECK(dc)) {
+        amask &= 0xffffffffULL;
+    }
+    tcg_gen_andi_tl(s1, s1, amask);
+    tcg_gen_andi_tl(s2, s2, amask);
+
+    /* We want to compute
+        dst = (s1 == s2 ? lo1 : lo1 & lo2).
+       We've already done dst = lo1, so this reduces to
+        dst &= (s1 == s2 ? -1 : lo2)
+       Which we perform by
+        lo2 |= -(s1 == s2)
+        dst &= lo2
+    */
+    tcg_gen_setcond_tl(TCG_COND_EQ, t1, s1, s2);
+    tcg_gen_neg_tl(t1, t1);
+    tcg_gen_or_tl(lo2, lo2, t1);
+    tcg_gen_and_tl(dst, dst, lo2);
+
+    tcg_temp_free(lo1);
+    tcg_temp_free(lo2);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+static void gen_alignaddr(TCGv dst, TCGv s1, TCGv s2, bool left)
+{
+    TCGv tmp = tcg_temp_new();
+
+    tcg_gen_add_tl(tmp, s1, s2);
+    tcg_gen_andi_tl(dst, tmp, -8);
+    if (left) {
+        tcg_gen_neg_tl(tmp, tmp);
+    }
+    tcg_gen_deposit_tl(cpu_gsr, cpu_gsr, tmp, 0, 3);
+
+    tcg_temp_free(tmp);
+}
+
+static void gen_faligndata(TCGv dst, TCGv gsr, TCGv s1, TCGv s2)
+{
+    TCGv t1, t2, shift;
+
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    shift = tcg_temp_new();
+
+    tcg_gen_andi_tl(shift, gsr, 7);
+    tcg_gen_shli_tl(shift, shift, 3);
+    tcg_gen_shl_tl(t1, s1, shift);
+
+    /* A shift of 64 does not produce 0 in TCG.  Divide this into a
+       shift of (up to 63) followed by a constant shift of 1.  */
+    tcg_gen_xori_tl(shift, shift, 63);
+    tcg_gen_shr_tl(t2, s2, shift);
+    tcg_gen_shri_tl(t2, t2, 1);
+
+    tcg_gen_or_tl(dst, t1, t2);
+
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(shift);
+}
 #endif
 
 #define CHECK_IU_FEATURE(dc, FEATURE)                      \
@@ -1892,6 +2377,8 @@ static void disas_sparc_insn(DisasContext * dc)
 {
     unsigned int insn, opc, rs1, rs2, rd;
     TCGv cpu_src1, cpu_src2, cpu_tmp1, cpu_tmp2;
+    TCGv_i32 cpu_src1_32, cpu_src2_32, cpu_dst_32;
+    TCGv_i64 cpu_src1_64, cpu_src2_64, cpu_dst_64;
     target_long simm;
 
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
@@ -2369,350 +2856,162 @@ static void disas_sparc_insn(DisasContext * dc)
                 save_state(dc, cpu_cond);
                 switch (xop) {
                 case 0x1: /* fmovs */
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rs2);
+                    gen_store_fpr_F(dc, rd, cpu_src1_32);
                     break;
                 case 0x5: /* fnegs */
-                    gen_helper_fnegs(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FF(dc, rd, rs2, gen_helper_fnegs);
                     break;
                 case 0x9: /* fabss */
-                    gen_helper_fabss(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FF(dc, rd, rs2, gen_helper_fabss);
                     break;
                 case 0x29: /* fsqrts */
                     CHECK_FPU_FEATURE(dc, FSQRT);
-                    gen_clear_float_exceptions();
-                    gen_helper_fsqrts(cpu_tmp32, cpu_env, cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FF(dc, rd, rs2, gen_helper_fsqrts);
                     break;
                 case 0x2a: /* fsqrtd */
                     CHECK_FPU_FEATURE(dc, FSQRT);
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fsqrtd(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DD(dc, rd, rs2, gen_helper_fsqrtd);
                     break;
                 case 0x2b: /* fsqrtq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fsqrtq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QQ(dc, rd, rs2, gen_helper_fsqrtq);
                     break;
                 case 0x41: /* fadds */
-                    gen_clear_float_exceptions();
-                    gen_helper_fadds(cpu_tmp32, cpu_env, cpu_fpr[rs1],
-                                     cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FFF(dc, rd, rs1, rs2, gen_helper_fadds);
                     break;
                 case 0x42: /* faddd */
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_faddd(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DDD(dc, rd, rs1, rs2, gen_helper_faddd);
                     break;
                 case 0x43: /* faddq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT0(QFPREG(rs1));
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_faddq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QQQ(dc, rd, rs1, rs2, gen_helper_faddq);
                     break;
                 case 0x45: /* fsubs */
-                    gen_clear_float_exceptions();
-                    gen_helper_fsubs(cpu_tmp32, cpu_env, cpu_fpr[rs1],
-                                     cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FFF(dc, rd, rs1, rs2, gen_helper_fsubs);
                     break;
                 case 0x46: /* fsubd */
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fsubd(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DDD(dc, rd, rs1, rs2, gen_helper_fsubd);
                     break;
                 case 0x47: /* fsubq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT0(QFPREG(rs1));
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fsubq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QQQ(dc, rd, rs1, rs2, gen_helper_fsubq);
                     break;
                 case 0x49: /* fmuls */
                     CHECK_FPU_FEATURE(dc, FMUL);
-                    gen_clear_float_exceptions();
-                    gen_helper_fmuls(cpu_tmp32, cpu_env, cpu_fpr[rs1],
-                                     cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FFF(dc, rd, rs1, rs2, gen_helper_fmuls);
                     break;
                 case 0x4a: /* fmuld */
                     CHECK_FPU_FEATURE(dc, FMUL);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fmuld(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmuld);
                     break;
                 case 0x4b: /* fmulq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
                     CHECK_FPU_FEATURE(dc, FMUL);
-                    gen_op_load_fpr_QT0(QFPREG(rs1));
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fmulq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QQQ(dc, rd, rs1, rs2, gen_helper_fmulq);
                     break;
                 case 0x4d: /* fdivs */
-                    gen_clear_float_exceptions();
-                    gen_helper_fdivs(cpu_tmp32, cpu_env, cpu_fpr[rs1],
-                                     cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FFF(dc, rd, rs1, rs2, gen_helper_fdivs);
                     break;
                 case 0x4e: /* fdivd */
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdivd(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DDD(dc, rd, rs1, rs2, gen_helper_fdivd);
                     break;
                 case 0x4f: /* fdivq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT0(QFPREG(rs1));
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdivq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QQQ(dc, rd, rs1, rs2, gen_helper_fdivq);
                     break;
                 case 0x69: /* fsmuld */
                     CHECK_FPU_FEATURE(dc, FSMULD);
-                    gen_clear_float_exceptions();
-                    gen_helper_fsmuld(cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DFF(dc, rd, rs1, rs2, gen_helper_fsmuld);
                     break;
                 case 0x6e: /* fdmulq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdmulq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_fop_QDD(dc, rd, rs1, rs2, gen_helper_fdmulq);
                     break;
                 case 0xc4: /* fitos */
-                    gen_clear_float_exceptions();
-                    gen_helper_fitos(cpu_tmp32, cpu_env, cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FF(dc, rd, rs2, gen_helper_fitos);
                     break;
                 case 0xc6: /* fdtos */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdtos(cpu_tmp32, cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FD(dc, rd, rs2, gen_helper_fdtos);
                     break;
                 case 0xc7: /* fqtos */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fqtos(cpu_tmp32, cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FQ(dc, rd, rs2, gen_helper_fqtos);
                     break;
                 case 0xc8: /* fitod */
-                    gen_helper_fitod(cpu_env, cpu_fpr[rs2]);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DF(dc, rd, rs2, gen_helper_fitod);
                     break;
                 case 0xc9: /* fstod */
-                    gen_helper_fstod(cpu_env, cpu_fpr[rs2]);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DF(dc, rd, rs2, gen_helper_fstod);
                     break;
                 case 0xcb: /* fqtod */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fqtod(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DQ(dc, rd, rs2, gen_helper_fqtod);
                     break;
                 case 0xcc: /* fitoq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_helper_fitoq(cpu_env, cpu_fpr[rs2]);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QF(dc, rd, rs2, gen_helper_fitoq);
                     break;
                 case 0xcd: /* fstoq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_helper_fstoq(cpu_env, cpu_fpr[rs2]);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QF(dc, rd, rs2, gen_helper_fstoq);
                     break;
                 case 0xce: /* fdtoq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fdtoq(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QD(dc, rd, rs2, gen_helper_fdtoq);
                     break;
                 case 0xd1: /* fstoi */
-                    gen_clear_float_exceptions();
-                    gen_helper_fstoi(cpu_tmp32, cpu_env, cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FF(dc, rd, rs2, gen_helper_fstoi);
                     break;
                 case 0xd2: /* fdtoi */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdtoi(cpu_tmp32, cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FD(dc, rd, rs2, gen_helper_fdtoi);
                     break;
                 case 0xd3: /* fqtoi */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fqtoi(cpu_tmp32, cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FQ(dc, rd, rs2, gen_helper_fqtoi);
                     break;
 #ifdef TARGET_SPARC64
                 case 0x2: /* V9 fmovd */
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
+                    gen_store_fpr_D(dc, rd, cpu_src1_64);
                     break;
                 case 0x3: /* V9 fmovq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd)], cpu_fpr[QFPREG(rs2)]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 1],
-                                    cpu_fpr[QFPREG(rs2) + 1]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 2],
-                                    cpu_fpr[QFPREG(rs2) + 2]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 3],
-                                    cpu_fpr[QFPREG(rs2) + 3]);
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_move_Q(rd, rs2);
                     break;
                 case 0x6: /* V9 fnegd */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fnegd(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DD(dc, rd, rs2, gen_helper_fnegd);
                     break;
                 case 0x7: /* V9 fnegq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_helper_fnegq(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QQ(dc, rd, rs2, gen_helper_fnegq);
                     break;
                 case 0xa: /* V9 fabsd */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fabsd(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DD(dc, rd, rs2, gen_helper_fabsd);
                     break;
                 case 0xb: /* V9 fabsq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_helper_fabsq(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QQ(dc, rd, rs2, gen_helper_fabsq);
                     break;
                 case 0x81: /* V9 fstox */
-                    gen_clear_float_exceptions();
-                    gen_helper_fstox(cpu_env, cpu_fpr[rs2]);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DF(dc, rd, rs2, gen_helper_fstox);
                     break;
                 case 0x82: /* V9 fdtox */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fdtox(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DD(dc, rd, rs2, gen_helper_fdtox);
                     break;
                 case 0x83: /* V9 fqtox */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_QT1(QFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fqtox(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DQ(dc, rd, rs2, gen_helper_fqtox);
                     break;
                 case 0x84: /* V9 fxtos */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fxtos(cpu_tmp32, cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_tmp32);
-                    gen_update_fprs_dirty(rd);
+                    gen_fop_FD(dc, rd, rs2, gen_helper_fxtos);
                     break;
                 case 0x88: /* V9 fxtod */
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fxtod(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_fop_DD(dc, rd, rs2, gen_helper_fxtod);
                     break;
                 case 0x8c: /* V9 fxtoq */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_clear_float_exceptions();
-                    gen_helper_fxtoq(cpu_env);
-                    gen_helper_check_ieee_exceptions(cpu_env);
-                    gen_op_store_QT0_fpr(QFPREG(rd));
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_ne_fop_QD(dc, rd, rs2, gen_helper_fxtoq);
                     break;
 #endif
                 default:
@@ -2738,8 +3037,8 @@ static void disas_sparc_insn(DisasContext * dc)
                     cpu_src1 = get_src1(insn, cpu_src1);
                     tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
                                        0, l1);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rs2);
+                    gen_store_fpr_F(dc, rd, cpu_src1_32);
                     gen_set_label(l1);
                     break;
                 } else if ((xop & 0x11f) == 0x006) { // V9 fmovdr
@@ -2750,9 +3049,8 @@ static void disas_sparc_insn(DisasContext * dc)
                     cpu_src1 = get_src1(insn, cpu_src1);
                     tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
                                        0, l1);
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd) + 1], cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
+                    gen_store_fpr_D(dc, rd, cpu_src1_64);
                     gen_set_label(l1);
                     break;
                 } else if ((xop & 0x11f) == 0x007) { // V9 fmovqr
@@ -2764,11 +3062,7 @@ static void disas_sparc_insn(DisasContext * dc)
                     cpu_src1 = get_src1(insn, cpu_src1);
                     tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
                                        0, l1);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd)], cpu_fpr[QFPREG(rs2)]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 1], cpu_fpr[QFPREG(rs2) + 1]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 2], cpu_fpr[QFPREG(rs2) + 2]);
-                    tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 3], cpu_fpr[QFPREG(rs2) + 3]);
-                    gen_update_fprs_dirty(QFPREG(rd));
+                    gen_move_Q(rd, rs2);
                     gen_set_label(l1);
                     break;
                 }
@@ -2786,8 +3080,8 @@ static void disas_sparc_insn(DisasContext * dc)
                         gen_fcond(r_cond, fcc, cond);                   \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs2]);     \
-                        gen_update_fprs_dirty(rd);                      \
+                        cpu_src1_32 = gen_load_fpr_F(dc, rs2);          \
+                        gen_store_fpr_F(dc, rd, cpu_src1_32);           \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
                     }
@@ -2802,11 +3096,8 @@ static void disas_sparc_insn(DisasContext * dc)
                         gen_fcond(r_cond, fcc, cond);                   \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[DFPREG(rd)],            \
-                                        cpu_fpr[DFPREG(rs2)]);          \
-                        tcg_gen_mov_i32(cpu_fpr[DFPREG(rd) + 1],        \
-                                        cpu_fpr[DFPREG(rs2) + 1]);      \
-                        gen_update_fprs_dirty(DFPREG(rd));              \
+                        cpu_src1_64 = gen_load_fpr_D(dc, rs2);          \
+                        gen_store_fpr_D(dc, rd, cpu_src1_64);           \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
                     }
@@ -2821,15 +3112,7 @@ static void disas_sparc_insn(DisasContext * dc)
                         gen_fcond(r_cond, fcc, cond);                   \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd)],            \
-                                        cpu_fpr[QFPREG(rs2)]);          \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 1],        \
-                                        cpu_fpr[QFPREG(rs2) + 1]);      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 2],        \
-                                        cpu_fpr[QFPREG(rs2) + 2]);      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 3],        \
-                                        cpu_fpr[QFPREG(rs2) + 3]);      \
-                        gen_update_fprs_dirty(QFPREG(rd));              \
+                        gen_move_Q(rd, rs2);                            \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
                     }
@@ -2887,8 +3170,8 @@ static void disas_sparc_insn(DisasContext * dc)
                         gen_cond(r_cond, icc, cond, dc);                \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs2]);     \
-                        gen_update_fprs_dirty(rd);                      \
+                        cpu_src1_32 = gen_load_fpr_F(dc, rs2);          \
+                        gen_store_fpr_F(dc, rd, cpu_src1_32);           \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
                     }
@@ -2903,10 +3186,8 @@ static void disas_sparc_insn(DisasContext * dc)
                         gen_cond(r_cond, icc, cond, dc);                \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[DFPREG(rd)],            \
-                                        cpu_fpr[DFPREG(rs2)]);          \
-                        tcg_gen_mov_i32(cpu_fpr[DFPREG(rd) + 1],        \
-                                        cpu_fpr[DFPREG(rs2) + 1]);      \
+                        cpu_src1_64 = gen_load_fpr_D(dc, rs2);          \
+                        gen_store_fpr_D(dc, rd, cpu_src1_64);           \
                         gen_update_fprs_dirty(DFPREG(rd));              \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
@@ -2922,15 +3203,7 @@ static void disas_sparc_insn(DisasContext * dc)
                         gen_cond(r_cond, icc, cond, dc);                \
                         tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
                                            0, l1);                      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd)],            \
-                                        cpu_fpr[QFPREG(rs2)]);          \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 1],        \
-                                        cpu_fpr[QFPREG(rs2) + 1]);      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 2],        \
-                                        cpu_fpr[QFPREG(rs2) + 2]);      \
-                        tcg_gen_mov_i32(cpu_fpr[QFPREG(rd) + 3],        \
-                                        cpu_fpr[QFPREG(rs2) + 3]);      \
-                        gen_update_fprs_dirty(QFPREG(rd));              \
+                        gen_move_Q(rd, rs2);                            \
                         gen_set_label(l1);                              \
                         tcg_temp_free(r_cond);                          \
                     }
@@ -2960,12 +3233,14 @@ static void disas_sparc_insn(DisasContext * dc)
 #undef FMOVQCC
 #endif
                     case 0x51: /* fcmps, V9 %fcc */
-                        gen_op_fcmps(rd & 3, cpu_fpr[rs1], cpu_fpr[rs2]);
+                        cpu_src1_32 = gen_load_fpr_F(dc, rs1);
+                        cpu_src2_32 = gen_load_fpr_F(dc, rs2);
+                        gen_op_fcmps(rd & 3, cpu_src1_32, cpu_src2_32);
                         break;
                     case 0x52: /* fcmpd, V9 %fcc */
-                        gen_op_load_fpr_DT0(DFPREG(rs1));
-                        gen_op_load_fpr_DT1(DFPREG(rs2));
-                        gen_op_fcmpd(rd & 3);
+                        cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                        cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                        gen_op_fcmpd(rd & 3, cpu_src1_64, cpu_src2_64);
                         break;
                     case 0x53: /* fcmpq, V9 %fcc */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
@@ -2974,12 +3249,14 @@ static void disas_sparc_insn(DisasContext * dc)
                         gen_op_fcmpq(rd & 3);
                         break;
                     case 0x55: /* fcmpes, V9 %fcc */
-                        gen_op_fcmpes(rd & 3, cpu_fpr[rs1], cpu_fpr[rs2]);
+                        cpu_src1_32 = gen_load_fpr_F(dc, rs1);
+                        cpu_src2_32 = gen_load_fpr_F(dc, rs2);
+                        gen_op_fcmpes(rd & 3, cpu_src1_32, cpu_src2_32);
                         break;
                     case 0x56: /* fcmped, V9 %fcc */
-                        gen_op_load_fpr_DT0(DFPREG(rs1));
-                        gen_op_load_fpr_DT1(DFPREG(rs2));
-                        gen_op_fcmped(rd & 3);
+                        cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                        cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                        gen_op_fcmped(rd & 3, cpu_src1_64, cpu_src2_64);
                         break;
                     case 0x57: /* fcmpeq, V9 %fcc */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
@@ -3819,31 +4096,101 @@ static void disas_sparc_insn(DisasContext * dc)
 
                 switch (opf) {
                 case 0x000: /* VIS I edge8cc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 1, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x001: /* VIS II edge8n */
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 0, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x002: /* VIS I edge8lcc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 1, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x003: /* VIS II edge8ln */
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 0, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x004: /* VIS I edge16cc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 1, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x005: /* VIS II edge16n */
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 0, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x006: /* VIS I edge16lcc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 1, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x007: /* VIS II edge16ln */
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 0, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x008: /* VIS I edge32cc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 1, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x009: /* VIS II edge32n */
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 0, 0);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x00a: /* VIS I edge32lcc */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 1, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x00b: /* VIS II edge32ln */
-                    // XXX
-                    goto illegal_insn;
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_movl_reg_TN(rs1, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 0, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x010: /* VIS I array8 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1 = get_src1(insn, cpu_src1);
                     gen_movl_reg_TN(rs2, cpu_src2);
-                    gen_helper_array8(cpu_dst, cpu_env, cpu_src1, cpu_src2);
+                    gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x012: /* VIS I array16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1 = get_src1(insn, cpu_src1);
                     gen_movl_reg_TN(rs2, cpu_src2);
-                    gen_helper_array8(cpu_dst, cpu_env, cpu_src1, cpu_src2);
+                    gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
                     tcg_gen_shli_i64(cpu_dst, cpu_dst, 1);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
@@ -3851,7 +4198,7 @@ static void disas_sparc_insn(DisasContext * dc)
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1 = get_src1(insn, cpu_src1);
                     gen_movl_reg_TN(rs2, cpu_src2);
-                    gen_helper_array8(cpu_dst, cpu_env, cpu_src1, cpu_src2);
+                    gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
                     tcg_gen_shli_i64(cpu_dst, cpu_dst, 2);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
@@ -3859,424 +4206,317 @@ static void disas_sparc_insn(DisasContext * dc)
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1 = get_src1(insn, cpu_src1);
                     gen_movl_reg_TN(rs2, cpu_src2);
-                    gen_helper_alignaddr(cpu_dst, cpu_env, cpu_src1, cpu_src2);
+                    gen_alignaddr(cpu_dst, cpu_src1, cpu_src2, 0);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
-                case 0x019: /* VIS II bmask */
                 case 0x01a: /* VIS I alignaddrl */
-                    // XXX
-                    goto illegal_insn;
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    cpu_src1 = get_src1(insn, cpu_src1);
+                    gen_movl_reg_TN(rs2, cpu_src2);
+                    gen_alignaddr(cpu_dst, cpu_src1, cpu_src2, 1);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
+                case 0x019: /* VIS II bmask */
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    cpu_src1 = get_src1(insn, cpu_src1);
+                    cpu_src2 = get_src1(insn, cpu_src2);
+                    tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
+                    tcg_gen_deposit_tl(cpu_gsr, cpu_gsr, cpu_dst, 32, 32);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x020: /* VIS I fcmple16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmple16(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmple16(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x022: /* VIS I fcmpne16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpne16(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpne16(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x024: /* VIS I fcmple32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmple32(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmple32(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x026: /* VIS I fcmpne32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpne32(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpne32(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x028: /* VIS I fcmpgt16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpgt16(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpgt16(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x02a: /* VIS I fcmpeq16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpeq16(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpeq16(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x02c: /* VIS I fcmpgt32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpgt32(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpgt32(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x02e: /* VIS I fcmpeq32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fcmpeq32(cpu_dst, cpu_env);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    cpu_src2_64 = gen_load_fpr_D(dc, rs2);
+                    gen_helper_fcmpeq32(cpu_dst, cpu_src1_64, cpu_src2_64);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x031: /* VIS I fmul8x16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmul8x16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmul8x16);
                     break;
                 case 0x033: /* VIS I fmul8x16au */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmul8x16au(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmul8x16au);
                     break;
                 case 0x035: /* VIS I fmul8x16al */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmul8x16al(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmul8x16al);
                     break;
                 case 0x036: /* VIS I fmul8sux16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmul8sux16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmul8sux16);
                     break;
                 case 0x037: /* VIS I fmul8ulx16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmul8ulx16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmul8ulx16);
                     break;
                 case 0x038: /* VIS I fmuld8sux16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmuld8sux16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmuld8sux16);
                     break;
                 case 0x039: /* VIS I fmuld8ulx16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fmuld8ulx16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmuld8ulx16);
                     break;
                 case 0x03a: /* VIS I fpack32 */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_gsr_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpack32);
+                    break;
                 case 0x03b: /* VIS I fpack16 */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
+                    cpu_dst_32 = gen_dest_fpr_F();
+                    gen_helper_fpack16(cpu_dst_32, cpu_gsr, cpu_src1_64);
+                    gen_store_fpr_F(dc, rd, cpu_dst_32);
+                    break;
                 case 0x03d: /* VIS I fpackfix */
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
+                    cpu_dst_32 = gen_dest_fpr_F();
+                    gen_helper_fpackfix(cpu_dst_32, cpu_gsr, cpu_src1_64);
+                    gen_store_fpr_F(dc, rd, cpu_dst_32);
+                    break;
                 case 0x03e: /* VIS I pdist */
-                    // XXX
-                    goto illegal_insn;
+                    CHECK_FPU_FEATURE(dc, VIS1);
+                    gen_ne_fop_DDDD(dc, rd, rs1, rs2, gen_helper_pdist);
+                    break;
                 case 0x048: /* VIS I faligndata */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_faligndata(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_gsr_fop_DDD(dc, rd, rs1, rs2, gen_faligndata);
                     break;
                 case 0x04b: /* VIS I fpmerge */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fpmerge(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpmerge);
                     break;
                 case 0x04c: /* VIS II bshuffle */
-                    // XXX
-                    goto illegal_insn;
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_gsr_fop_DDD(dc, rd, rs1, rs2, gen_helper_bshuffle);
+                    break;
                 case 0x04d: /* VIS I fexpand */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fexpand(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fexpand);
                     break;
                 case 0x050: /* VIS I fpadd16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fpadd16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpadd16);
                     break;
                 case 0x051: /* VIS I fpadd16s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_helper_fpadd16s(cpu_fpr[rd], cpu_env,
-                                        cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, gen_helper_fpadd16s);
                     break;
                 case 0x052: /* VIS I fpadd32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fpadd32(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpadd32);
                     break;
                 case 0x053: /* VIS I fpadd32s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_helper_fpadd32s(cpu_fpr[rd], cpu_env,
-                                        cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_add_i32);
                     break;
                 case 0x054: /* VIS I fpsub16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fpsub16(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpsub16);
                     break;
                 case 0x055: /* VIS I fpsub16s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_helper_fpsub16s(cpu_fpr[rd], cpu_env,
-                                        cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, gen_helper_fpsub16s);
                     break;
                 case 0x056: /* VIS I fpsub32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs1));
-                    gen_op_load_fpr_DT1(DFPREG(rs2));
-                    gen_helper_fpsub32(cpu_env);
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpsub32);
                     break;
                 case 0x057: /* VIS I fpsub32s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_helper_fpsub32s(cpu_fpr[rd], cpu_env,
-                                        cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_sub_i32);
                     break;
                 case 0x060: /* VIS I fzero */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_movi_i32(cpu_fpr[DFPREG(rd)], 0);
-                    tcg_gen_movi_i32(cpu_fpr[DFPREG(rd) + 1], 0);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_dst_64 = gen_dest_fpr_D();
+                    tcg_gen_movi_i64(cpu_dst_64, 0);
+                    gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 case 0x061: /* VIS I fzeros */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_movi_i32(cpu_fpr[rd], 0);
-                    gen_update_fprs_dirty(rd);
+                    cpu_dst_32 = gen_dest_fpr_F();
+                    tcg_gen_movi_i32(cpu_dst_32, 0);
+                    gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x062: /* VIS I fnor */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_nor_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                    cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_nor_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_nor_i64);
                     break;
                 case 0x063: /* VIS I fnors */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_nor_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_nor_i32);
                     break;
                 case 0x064: /* VIS I fandnot2 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_andc_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                     cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_andc_i32(cpu_fpr[DFPREG(rd) + 1],
-                                     cpu_fpr[DFPREG(rs1) + 1],
-                                     cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_andc_i64);
                     break;
                 case 0x065: /* VIS I fandnot2s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_andc_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_andc_i32);
                     break;
                 case 0x066: /* VIS I fnot2 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_not_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_not_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DD(dc, rd, rs2, tcg_gen_not_i64);
                     break;
                 case 0x067: /* VIS I fnot2s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_not_i32(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FF(dc, rd, rs2, tcg_gen_not_i32);
                     break;
                 case 0x068: /* VIS I fandnot1 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_andc_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs2)],
-                                     cpu_fpr[DFPREG(rs1)]);
-                    tcg_gen_andc_i32(cpu_fpr[DFPREG(rd) + 1],
-                                     cpu_fpr[DFPREG(rs2) + 1],
-                                     cpu_fpr[DFPREG(rs1) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs2, rs1, tcg_gen_andc_i64);
                     break;
                 case 0x069: /* VIS I fandnot1s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_andc_i32(cpu_fpr[rd], cpu_fpr[rs2], cpu_fpr[rs1]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs2, rs1, tcg_gen_andc_i32);
                     break;
                 case 0x06a: /* VIS I fnot1 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_not_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)]);
-                    tcg_gen_not_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DD(dc, rd, rs1, tcg_gen_not_i64);
                     break;
                 case 0x06b: /* VIS I fnot1s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_not_i32(cpu_fpr[rd], cpu_fpr[rs1]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FF(dc, rd, rs1, tcg_gen_not_i32);
                     break;
                 case 0x06c: /* VIS I fxor */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_xor_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                    cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_xor_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_xor_i64);
                     break;
                 case 0x06d: /* VIS I fxors */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_xor_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_xor_i32);
                     break;
                 case 0x06e: /* VIS I fnand */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_nand_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                     cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_nand_i32(cpu_fpr[DFPREG(rd) + 1],
-                                     cpu_fpr[DFPREG(rs1) + 1],
-                                     cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_nand_i64);
                     break;
                 case 0x06f: /* VIS I fnands */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_nand_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_nand_i32);
                     break;
                 case 0x070: /* VIS I fand */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_and_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                    cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_and_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_and_i64);
                     break;
                 case 0x071: /* VIS I fands */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_and_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_and_i32);
                     break;
                 case 0x072: /* VIS I fxnor */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_xori_i32(cpu_tmp32, cpu_fpr[DFPREG(rs2)], -1);
-                    tcg_gen_xor_i32(cpu_fpr[DFPREG(rd)], cpu_tmp32,
-                                    cpu_fpr[DFPREG(rs1)]);
-                    tcg_gen_xori_i32(cpu_tmp32, cpu_fpr[DFPREG(rs2) + 1], -1);
-                    tcg_gen_xor_i32(cpu_fpr[DFPREG(rd) + 1], cpu_tmp32,
-                                    cpu_fpr[DFPREG(rs1) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_eqv_i64);
                     break;
                 case 0x073: /* VIS I fxnors */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_xori_i32(cpu_tmp32, cpu_fpr[rs2], -1);
-                    tcg_gen_xor_i32(cpu_fpr[rd], cpu_tmp32, cpu_fpr[rs1]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_eqv_i32);
                     break;
                 case 0x074: /* VIS I fsrc1 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)]);
-                    tcg_gen_mov_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs1);
+                    gen_store_fpr_D(dc, rd, cpu_src1_64);
                     break;
                 case 0x075: /* VIS I fsrc1s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs1]);
-                    gen_update_fprs_dirty(rd);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rs1);
+                    gen_store_fpr_F(dc, rd, cpu_src1_32);
                     break;
                 case 0x076: /* VIS I fornot2 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_orc_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                    cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_orc_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_orc_i64);
                     break;
                 case 0x077: /* VIS I fornot2s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_orc_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_orc_i32);
                     break;
                 case 0x078: /* VIS I fsrc2 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_op_load_fpr_DT0(DFPREG(rs2));
-                    gen_op_store_DT0_fpr(DFPREG(rd));
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
+                    gen_store_fpr_D(dc, rd, cpu_src1_64);
                     break;
                 case 0x079: /* VIS I fsrc2s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_mov_i32(cpu_fpr[rd], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rs2);
+                    gen_store_fpr_F(dc, rd, cpu_src1_32);
                     break;
                 case 0x07a: /* VIS I fornot1 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_orc_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs2)],
-                                    cpu_fpr[DFPREG(rs1)]);
-                    tcg_gen_orc_i32(cpu_fpr[DFPREG(rd) + 1],
-                                    cpu_fpr[DFPREG(rs2) + 1],
-                                    cpu_fpr[DFPREG(rs1) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs2, rs1, tcg_gen_orc_i64);
                     break;
                 case 0x07b: /* VIS I fornot1s */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_orc_i32(cpu_fpr[rd], cpu_fpr[rs2], cpu_fpr[rs1]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs2, rs1, tcg_gen_orc_i32);
                     break;
                 case 0x07c: /* VIS I for */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_or_i32(cpu_fpr[DFPREG(rd)], cpu_fpr[DFPREG(rs1)],
-                                   cpu_fpr[DFPREG(rs2)]);
-                    tcg_gen_or_i32(cpu_fpr[DFPREG(rd) + 1],
-                                   cpu_fpr[DFPREG(rs1) + 1],
-                                   cpu_fpr[DFPREG(rs2) + 1]);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    gen_ne_fop_DDD(dc, rd, rs1, rs2, tcg_gen_or_i64);
                     break;
                 case 0x07d: /* VIS I fors */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_or_i32(cpu_fpr[rd], cpu_fpr[rs1], cpu_fpr[rs2]);
-                    gen_update_fprs_dirty(rd);
+                    gen_ne_fop_FFF(dc, rd, rs1, rs2, tcg_gen_or_i32);
                     break;
                 case 0x07e: /* VIS I fone */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_movi_i32(cpu_fpr[DFPREG(rd)], -1);
-                    tcg_gen_movi_i32(cpu_fpr[DFPREG(rd) + 1], -1);
-                    gen_update_fprs_dirty(DFPREG(rd));
+                    cpu_dst_64 = gen_dest_fpr_D();
+                    tcg_gen_movi_i64(cpu_dst_64, -1);
+                    gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 case 0x07f: /* VIS I fones */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    tcg_gen_movi_i32(cpu_fpr[rd], -1);
-                    gen_update_fprs_dirty(rd);
+                    cpu_dst_32 = gen_dest_fpr_F();
+                    tcg_gen_movi_i32(cpu_dst_32, -1);
+                    gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x080: /* VIS I shutdown */
                 case 0x081: /* VIS II siam */
@@ -4659,8 +4899,9 @@ static void disas_sparc_insn(DisasContext * dc)
                 case 0x20:      /* ldf, load fpreg */
                     gen_address_mask(dc, cpu_addr);
                     tcg_gen_qemu_ld32u(cpu_tmp0, cpu_addr, dc->mem_idx);
-                    tcg_gen_trunc_tl_i32(cpu_fpr[rd], cpu_tmp0);
-                    gen_update_fprs_dirty(rd);
+                    cpu_dst_32 = gen_dest_fpr_F();
+                    tcg_gen_trunc_tl_i32(cpu_dst_32, cpu_tmp0);
+                    gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x21:      /* ldfsr, V9 ldxfsr */
 #ifdef TARGET_SPARC64
@@ -4694,16 +4935,10 @@ static void disas_sparc_insn(DisasContext * dc)
                     }
                     break;
                 case 0x23:      /* lddf, load double fpreg */
-                    {
-                        TCGv_i32 r_const;
-
-                        r_const = tcg_const_i32(dc->mem_idx);
-                        gen_address_mask(dc, cpu_addr);
-                        gen_helper_lddf(cpu_addr, r_const);
-                        tcg_temp_free_i32(r_const);
-                        gen_op_store_DT0_fpr(DFPREG(rd));
-                        gen_update_fprs_dirty(DFPREG(rd));
-                    }
+                    gen_address_mask(dc, cpu_addr);
+                    cpu_dst_64 = gen_dest_fpr_D();
+                    tcg_gen_qemu_ld64(cpu_dst_64, cpu_addr, dc->mem_idx);
+                    gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 default:
                     goto illegal_insn;
@@ -4810,7 +5045,8 @@ static void disas_sparc_insn(DisasContext * dc)
                 switch (xop) {
                 case 0x24: /* stf, store fpreg */
                     gen_address_mask(dc, cpu_addr);
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_fpr[rd]);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rd);
+                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_src1_32);
                     tcg_gen_qemu_st32(cpu_tmp0, cpu_addr, dc->mem_idx);
                     break;
                 case 0x25: /* stfsr, V9 stxfsr */
@@ -4853,15 +5089,9 @@ static void disas_sparc_insn(DisasContext * dc)
 #endif
 #endif
                 case 0x27: /* stdf, store double fpreg */
-                    {
-                        TCGv_i32 r_const;
-
-                        gen_op_load_fpr_DT0(DFPREG(rd));
-                        r_const = tcg_const_i32(dc->mem_idx);
-                        gen_address_mask(dc, cpu_addr);
-                        gen_helper_stdf(cpu_addr, r_const);
-                        tcg_temp_free_i32(r_const);
-                    }
+                    gen_address_mask(dc, cpu_addr);
+                    cpu_src1_64 = gen_load_fpr_D(dc, rd);
+                    tcg_gen_qemu_st64(cpu_src1_64, cpu_addr, dc->mem_idx);
                     break;
                 default:
                     goto illegal_insn;
@@ -4996,6 +5226,13 @@ static void disas_sparc_insn(DisasContext * dc)
  egress:
     tcg_temp_free(cpu_tmp1);
     tcg_temp_free(cpu_tmp2);
+    if (dc->n_t32 != 0) {
+        int i;
+        for (i = dc->n_t32 - 1; i >= 0; --i) {
+            tcg_temp_free_i32(dc->t32[i]);
+        }
+        dc->n_t32 = 0;
+    }
 }
 
 static inline void gen_intermediate_code_internal(TranslationBlock * tb,
@@ -5095,6 +5332,7 @@ static inline void gen_intermediate_code_internal(TranslationBlock * tb,
     tcg_temp_free_i64(cpu_tmp64);
     tcg_temp_free_i32(cpu_tmp32);
     tcg_temp_free(cpu_tmp0);
+
     if (tb->cflags & CF_LAST_IO)
         gen_io_end();
     if (!dc->is_br) {
@@ -5159,15 +5397,11 @@ void gen_intermediate_code_init(CPUSPARCState *env)
         "g6",
         "g7",
     };
-    static const char * const fregnames[64] = {
-        "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
-        "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
-        "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
-        "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
-        "f32", "f33", "f34", "f35", "f36", "f37", "f38", "f39",
-        "f40", "f41", "f42", "f43", "f44", "f45", "f46", "f47",
-        "f48", "f49", "f50", "f51", "f52", "f53", "f54", "f55",
-        "f56", "f57", "f58", "f59", "f60", "f61", "f62", "f63",
+    static const char * const fregnames[32] = {
+        "f0", "f2", "f4", "f6", "f8", "f10", "f12", "f14",
+        "f16", "f18", "f20", "f22", "f24", "f26", "f28", "f30",
+        "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46",
+        "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62",
     };
 
     /* init various static tables */
@@ -5237,14 +5471,16 @@ void gen_intermediate_code_init(CPUSPARCState *env)
         cpu_tbr = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, tbr),
                                      "tbr");
 #endif
-        for (i = 1; i < 8; i++)
+        for (i = 1; i < 8; i++) {
             cpu_gregs[i] = tcg_global_mem_new(TCG_AREG0,
                                               offsetof(CPUState, gregs[i]),
                                               gregnames[i]);
-        for (i = 0; i < TARGET_FPREGS; i++)
-            cpu_fpr[i] = tcg_global_mem_new_i32(TCG_AREG0,
+        }
+        for (i = 0; i < TARGET_DPREGS; i++) {
+            cpu_fpr[i] = tcg_global_mem_new_i64(TCG_AREG0,
                                                 offsetof(CPUState, fpr[i]),
                                                 fregnames[i]);
+        }
 
         /* register helpers */
 
diff --git a/target-sparc/vis_helper.c b/target-sparc/vis_helper.c
index a22c10bb43..a992c293af 100644
--- a/target-sparc/vis_helper.c
+++ b/target-sparc/vis_helper.c
@@ -20,11 +20,6 @@
 #include "cpu.h"
 #include "helper.h"
 
-#define DT0 (env->dt0)
-#define DT1 (env->dt1)
-#define QT0 (env->qt0)
-#define QT1 (env->qt1)
-
 /* This function uses non-native bit order */
 #define GET_FIELD(X, FROM, TO)                                  \
     ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
@@ -33,8 +28,7 @@
 #define GET_FIELD_SP(X, FROM, TO)               \
     GET_FIELD(X, 63 - (TO), 63 - (FROM))
 
-target_ulong helper_array8(CPUState *env, target_ulong pixel_addr,
-                           target_ulong cubesize)
+target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
 {
     return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
         (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
@@ -47,29 +41,6 @@ target_ulong helper_array8(CPUState *env, target_ulong pixel_addr,
         GET_FIELD_SP(pixel_addr, 11, 12);
 }
 
-target_ulong helper_alignaddr(CPUState *env, target_ulong addr,
-                              target_ulong offset)
-{
-    uint64_t tmp;
-
-    tmp = addr + offset;
-    env->gsr &= ~7ULL;
-    env->gsr |= tmp & 7ULL;
-    return tmp & ~7ULL;
-}
-
-void helper_faligndata(CPUState *env)
-{
-    uint64_t tmp;
-
-    tmp = (*((uint64_t *)&DT0)) << ((env->gsr & 7) * 8);
-    /* on many architectures a shift of 64 does nothing */
-    if ((env->gsr & 7) != 0) {
-        tmp |= (*((uint64_t *)&DT1)) >> (64 - (env->gsr & 7) * 8);
-    }
-    *((uint64_t *)&DT0) = tmp;
-}
-
 #ifdef HOST_WORDS_BIGENDIAN
 #define VIS_B64(n) b[7 - (n)]
 #define VIS_W64(n) w[3 - (n)]
@@ -102,12 +73,12 @@ typedef union {
     float32 f;
 } VIS32;
 
-void helper_fpmerge(CPUState *env)
+uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
     /* Reverse calculation order to handle overlap */
     d.VIS_B64(7) = s.VIS_B64(3);
@@ -119,16 +90,16 @@ void helper_fpmerge(CPUState *env)
     d.VIS_B64(1) = s.VIS_B64(0);
     /* d.VIS_B64(0) = d.VIS_B64(0); */
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmul8x16(CPUState *env)
+uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                 \
     tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
@@ -143,16 +114,16 @@ void helper_fmul8x16(CPUState *env)
     PMUL(3);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmul8x16al(CPUState *env)
+uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                 \
     tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
@@ -167,16 +138,16 @@ void helper_fmul8x16al(CPUState *env)
     PMUL(3);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmul8x16au(CPUState *env)
+uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                 \
     tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
@@ -191,16 +162,16 @@ void helper_fmul8x16au(CPUState *env)
     PMUL(3);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmul8sux16(CPUState *env)
+uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                         \
     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
@@ -215,16 +186,16 @@ void helper_fmul8sux16(CPUState *env)
     PMUL(3);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmul8ulx16(CPUState *env)
+uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                         \
     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
@@ -239,16 +210,16 @@ void helper_fmul8ulx16(CPUState *env)
     PMUL(3);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmuld8sux16(CPUState *env)
+uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                         \
     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
@@ -262,16 +233,16 @@ void helper_fmuld8sux16(CPUState *env)
     PMUL(0);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fmuld8ulx16(CPUState *env)
+uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
 {
     VIS64 s, d;
     uint32_t tmp;
 
-    s.d = DT0;
-    d.d = DT1;
+    s.ll = src1;
+    d.ll = src2;
 
 #define PMUL(r)                                                         \
     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
@@ -285,42 +256,41 @@ void helper_fmuld8ulx16(CPUState *env)
     PMUL(0);
 #undef PMUL
 
-    DT0 = d.d;
+    return d.ll;
 }
 
-void helper_fexpand(CPUState *env)
+uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
 {
     VIS32 s;
     VIS64 d;
 
-    s.l = (uint32_t)(*(uint64_t *)&DT0 & 0xffffffff);
-    d.d = DT1;
+    s.l = (uint32_t)src1;
+    d.ll = src2;
     d.VIS_W64(0) = s.VIS_B32(0) << 4;
     d.VIS_W64(1) = s.VIS_B32(1) << 4;
     d.VIS_W64(2) = s.VIS_B32(2) << 4;
     d.VIS_W64(3) = s.VIS_B32(3) << 4;
 
-    DT0 = d.d;
+    return d.ll;
 }
 
 #define VIS_HELPER(name, F)                             \
-    void name##16(CPUState *env)                        \
+    uint64_t name##16(uint64_t src1, uint64_t src2)     \
     {                                                   \
         VIS64 s, d;                                     \
                                                         \
-        s.d = DT0;                                      \
-        d.d = DT1;                                      \
+        s.ll = src1;                                    \
+        d.ll = src2;                                    \
                                                         \
         d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
         d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
         d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
         d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
                                                         \
-        DT0 = d.d;                                      \
+        return d.ll;                                    \
     }                                                   \
                                                         \
-    uint32_t name##16s(CPUState *env, uint32_t src1,    \
-                       uint32_t src2)                   \
+    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
     {                                                   \
         VIS32 s, d;                                     \
                                                         \
@@ -333,21 +303,20 @@ void helper_fexpand(CPUState *env)
         return d.l;                                     \
     }                                                   \
                                                         \
-    void name##32(CPUState *env)                        \
+    uint64_t name##32(uint64_t src1, uint64_t src2)     \
     {                                                   \
         VIS64 s, d;                                     \
                                                         \
-        s.d = DT0;                                      \
-        d.d = DT1;                                      \
+        s.ll = src1;                                    \
+        d.ll = src2;                                    \
                                                         \
         d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
         d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
                                                         \
-        DT0 = d.d;                                      \
+        return d.ll;                                    \
     }                                                   \
                                                         \
-    uint32_t name##32s(CPUState *env, uint32_t src1,    \
-                       uint32_t src2)                   \
+    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
     {                                                   \
         VIS32 s, d;                                     \
                                                         \
@@ -365,12 +334,12 @@ VIS_HELPER(helper_fpadd, FADD)
 VIS_HELPER(helper_fpsub, FSUB)
 
 #define VIS_CMPHELPER(name, F)                                    \
-    uint64_t name##16(CPUState *env)                              \
+    uint64_t name##16(uint64_t src1, uint64_t src2)               \
     {                                                             \
         VIS64 s, d;                                               \
                                                                   \
-        s.d = DT0;                                                \
-        d.d = DT1;                                                \
+        s.ll = src1;                                              \
+        d.ll = src2;                                              \
                                                                   \
         d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
         d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
@@ -381,12 +350,12 @@ VIS_HELPER(helper_fpsub, FSUB)
         return d.ll;                                              \
     }                                                             \
                                                                   \
-    uint64_t name##32(CPUState *env)                              \
+    uint64_t name##32(uint64_t src1, uint64_t src2)               \
     {                                                             \
         VIS64 s, d;                                               \
                                                                   \
-        s.d = DT0;                                                \
-        d.d = DT1;                                                \
+        s.ll = src1;                                              \
+        d.ll = src2;                                              \
                                                                   \
         d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
         d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
@@ -404,3 +373,117 @@ VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
 VIS_CMPHELPER(helper_fcmple, FCMPLE)
 VIS_CMPHELPER(helper_fcmpne, FCMPNE)
+
+uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
+{
+    int i;
+    for (i = 0; i < 8; i++) {
+        int s1, s2;
+
+        s1 = (src1 >> (56 - (i * 8))) & 0xff;
+        s2 = (src2 >> (56 - (i * 8))) & 0xff;
+
+        /* Absolute value of difference. */
+        s1 -= s2;
+        if (s1 < 0) {
+            s1 = -s1;
+        }
+
+        sum += s1;
+    }
+
+    return sum;
+}
+
+uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
+{
+    int scale = (gsr >> 3) & 0xf;
+    uint32_t ret = 0;
+    int byte;
+
+    for (byte = 0; byte < 4; byte++) {
+        uint32_t val;
+        int16_t src = rs2 >> (byte * 16);
+        int32_t scaled = src << scale;
+        int32_t from_fixed = scaled >> 7;
+
+        val = (from_fixed < 0 ?  0 :
+               from_fixed > 255 ?  255 : from_fixed);
+
+        ret |= val << (8 * byte);
+    }
+
+    return ret;
+}
+
+uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
+{
+    int scale = (gsr >> 3) & 0x1f;
+    uint64_t ret = 0;
+    int word;
+
+    ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
+    for (word = 0; word < 2; word++) {
+        uint64_t val;
+        int32_t src = rs2 >> (word * 32);
+        int64_t scaled = (int64_t)src << scale;
+        int64_t from_fixed = scaled >> 23;
+
+        val = (from_fixed < 0 ? 0 :
+               (from_fixed > 255) ? 255 : from_fixed);
+
+        ret |= val << (32 * word);
+    }
+
+    return ret;
+}
+
+uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
+{
+    int scale = (gsr >> 3) & 0x1f;
+    uint32_t ret = 0;
+    int word;
+
+    for (word = 0; word < 2; word++) {
+        uint32_t val;
+        int32_t src = rs2 >> (word * 32);
+        int64_t scaled = src << scale;
+        int64_t from_fixed = scaled >> 16;
+
+        val = (from_fixed < -32768 ? -32768 :
+               from_fixed > 32767 ?  32767 : from_fixed);
+
+        ret |= (val & 0xffff) << (word * 16);
+    }
+
+    return ret;
+}
+
+uint64 helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
+{
+    union {
+        uint64_t ll[2];
+        uint8_t b[16];
+    } s;
+    VIS64 r;
+    uint32_t i, mask, host;
+
+    /* Set up S such that we can index across all of the bytes.  */
+#ifdef HOST_WORDS_BIGENDIAN
+    s.ll[0] = src1;
+    s.ll[1] = src2;
+    host = 0;
+#else
+    s.ll[1] = src1;
+    s.ll[0] = src2;
+    host = 15;
+#endif
+    mask = gsr >> 32;
+
+    for (i = 0; i < 8; ++i) {
+        unsigned e = (mask >> (28 - i*4)) & 0xf;
+        r.VIS_B64(i) = s.b[e ^ host];
+    }
+
+    return r.ll;
+}
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index fea5983669..24ec7fc128 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2045,38 +2045,75 @@ static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
 				       TCGv_i32 arg2, unsigned int ofs,
 				       unsigned int len)
 {
+    uint32_t mask;
+    TCGv_i32 t1;
+
+    if (ofs == 0 && len == 32) {
+        tcg_gen_mov_i32(ret, arg2);
+        return;
+    }
     if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
-    } else {
-        uint32_t mask = (1u << len) - 1;
-        TCGv_i32 t1 = tcg_temp_new_i32 ();
+        return;
+    }
+
+    mask = (1u << len) - 1;
+    t1 = tcg_temp_new_i32();
 
+    if (ofs + len < 32) {
         tcg_gen_andi_i32(t1, arg2, mask);
         tcg_gen_shli_i32(t1, t1, ofs);
-        tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
-        tcg_gen_or_i32(ret, ret, t1);
-
-        tcg_temp_free_i32(t1);
+    } else {
+        tcg_gen_shli_i32(t1, arg2, ofs);
     }
+    tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
+    tcg_gen_or_i32(ret, ret, t1);
+
+    tcg_temp_free_i32(t1);
 }
 
 static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
 				       TCGv_i64 arg2, unsigned int ofs,
 				       unsigned int len)
 {
+    uint64_t mask;
+    TCGv_i64 t1;
+
+    if (ofs == 0 && len == 64) {
+        tcg_gen_mov_i64(ret, arg2);
+        return;
+    }
     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
-    } else {
-        uint64_t mask = (1ull << len) - 1;
-        TCGv_i64 t1 = tcg_temp_new_i64 ();
+        return;
+    }
 
+#if TCG_TARGET_REG_BITS == 32
+    if (ofs >= 32) {
+        tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
+                            TCGV_LOW(arg2), ofs - 32, len);
+        return;
+    }
+    if (ofs + len <= 32) {
+        tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
+                            TCGV_LOW(arg2), ofs, len);
+        return;
+    }
+#endif
+
+    mask = (1ull << len) - 1;
+    t1 = tcg_temp_new_i64();
+
+    if (ofs + len < 64) {
         tcg_gen_andi_i64(t1, arg2, mask);
         tcg_gen_shli_i64(t1, t1, ofs);
-        tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
-        tcg_gen_or_i64(ret, ret, t1);
-
-        tcg_temp_free_i64(t1);
+    } else {
+        tcg_gen_shli_i64(t1, arg2, ofs);
     }
+    tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
+    tcg_gen_or_i64(ret, ret, t1);
+
+    tcg_temp_free_i64(t1);
 }
 
 /***************************************/
diff --git a/trace-events b/trace-events
index 820b1d6ee6..7d05c82279 100644
--- a/trace-events
+++ b/trace-events
@@ -555,7 +555,7 @@ open_eth_desc_read(uint32_t addr, uint32_t v) "DESC[%04x] -> %08x"
 open_eth_desc_write(uint32_t addr, uint32_t v) "DESC[%04x] <- %08x"
 
 # hw/9pfs/virtio-9p.c
-complete_pdu(uint16_t tag, uint8_t id, int err) "tag %d id %d err %d"
+v9fs_rerror(uint16_t tag, uint8_t id, int err) "tag %d id %d err %d"
 v9fs_version(uint16_t tag, uint8_t id, int32_t msize, char* version) "tag %d id %d msize %d version %s"
 v9fs_version_return(uint16_t tag, uint8_t id, int32_t msize, char* version) "tag %d id %d msize %d version %s"
 v9fs_attach(uint16_t tag, uint8_t id, int32_t fid, int32_t afid, char* uname, char* aname) "tag %u id %u fid %d afid %d uname %s aname %s"
diff --git a/ui/qemu-spice.h b/ui/qemu-spice.h
index f34be69f52..c35b29c1f6 100644
--- a/ui/qemu-spice.h
+++ b/ui/qemu-spice.h
@@ -25,6 +25,7 @@
 #include "qemu-option.h"
 #include "qemu-config.h"
 #include "qemu-char.h"
+#include "monitor.h"
 
 extern int using_spice;
 
@@ -37,7 +38,8 @@ int qemu_spice_set_passwd(const char *passwd,
                           bool fail_if_connected, bool disconnect_if_connected);
 int qemu_spice_set_pw_expire(time_t expires);
 int qemu_spice_migrate_info(const char *hostname, int port, int tls_port,
-                            const char *subject);
+                            const char *subject,
+                            MonitorCompletion cb, void *opaque);
 
 void do_info_spice_print(Monitor *mon, const QObject *data);
 void do_info_spice(Monitor *mon, QObject **ret_data);
@@ -45,6 +47,7 @@ void do_info_spice(Monitor *mon, QObject **ret_data);
 int qemu_chr_open_spice(QemuOpts *opts, CharDriverState **_chr);
 
 #else  /* CONFIG_SPICE */
+#include "monitor.h"
 
 #define using_spice 0
 static inline int qemu_spice_set_passwd(const char *passwd,
@@ -57,8 +60,13 @@ static inline int qemu_spice_set_pw_expire(time_t expires)
 {
     return -1;
 }
-static inline int qemu_spice_migrate_info(const char *h, int p, int t, const char *s)
-{ return -1; }
+static inline int qemu_spice_migrate_info(const char *h, int p, int t,
+                                          const char *s,
+                                          MonitorCompletion cb, void *opaque)
+{
+    cb(opaque, NULL);
+    return -1;
+}
 
 #endif /* CONFIG_SPICE */
 
diff --git a/ui/spice-core.c b/ui/spice-core.c
index f556849ef9..5639c6f531 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -19,10 +19,10 @@
 #include <spice-experimental.h>
 
 #include <netdb.h>
-#include <pthread.h>
 
 #include "qemu-common.h"
 #include "qemu-spice.h"
+#include "qemu-thread.h"
 #include "qemu-timer.h"
 #include "qemu-queue.h"
 #include "qemu-x509.h"
@@ -46,7 +46,7 @@ static char *auth_passwd;
 static time_t auth_expires = TIME_MAX;
 int using_spice = 0;
 
-static pthread_t me;
+static QemuThread me;
 
 struct SpiceTimer {
     QEMUTimer *timer;
@@ -134,7 +134,7 @@ static SpiceWatch *watch_add(int fd, int event_mask, SpiceWatchFunc func, void *
 
 static void watch_remove(SpiceWatch *watch)
 {
-    watch_update_mask(watch, 0);
+    qemu_set_fd_handler(watch->fd, NULL, NULL, NULL);
     QTAILQ_REMOVE(&watches, watch, next);
     g_free(watch);
 }
@@ -214,7 +214,7 @@ static void channel_event(int event, SpiceChannelEventInfo *info)
      * thread and grab the iothread lock if so before calling qemu
      * functions.
      */
-    bool need_lock = !pthread_equal(me, pthread_self());
+    bool need_lock = !qemu_thread_is_self(&me);
     if (need_lock) {
         qemu_mutex_lock_iothread();
     }
@@ -273,6 +273,38 @@ static SpiceCoreInterface core_interface = {
 #endif
 };
 
+#ifdef SPICE_INTERFACE_MIGRATION
+typedef struct SpiceMigration {
+    SpiceMigrateInstance sin;
+    struct {
+        MonitorCompletion *cb;
+        void *opaque;
+    } connect_complete;
+} SpiceMigration;
+
+static void migrate_connect_complete_cb(SpiceMigrateInstance *sin);
+
+static const SpiceMigrateInterface migrate_interface = {
+    .base.type = SPICE_INTERFACE_MIGRATION,
+    .base.description = "migration",
+    .base.major_version = SPICE_INTERFACE_MIGRATION_MAJOR,
+    .base.minor_version = SPICE_INTERFACE_MIGRATION_MINOR,
+    .migrate_connect_complete = migrate_connect_complete_cb,
+    .migrate_end_complete = NULL,
+};
+
+static SpiceMigration spice_migrate;
+
+static void migrate_connect_complete_cb(SpiceMigrateInstance *sin)
+{
+    SpiceMigration *sm = container_of(sin, SpiceMigration, sin);
+    if (sm->connect_complete.cb) {
+        sm->connect_complete.cb(sm->connect_complete.opaque, NULL);
+    }
+    sm->connect_complete.cb = NULL;
+}
+#endif
+
 /* config string parsing */
 
 static int name2enum(const char *string, const char *table[], int entries)
@@ -382,7 +414,7 @@ SpiceInfo *qmp_query_spice(Error **errp)
 
     info = g_malloc0(sizeof(*info));
 
-    if (!spice_server) {
+    if (!spice_server || !opts) {
         info->enabled = false;
         return info;
     }
@@ -426,18 +458,39 @@ static void migration_state_notifier(Notifier *notifier, void *data)
 {
     MigrationState *s = data;
 
-    if (migration_has_finished(s)) {
+    if (migration_is_active(s)) {
+#ifdef SPICE_INTERFACE_MIGRATION
+        spice_server_migrate_start(spice_server);
+#endif
+    } else if (migration_has_finished(s)) {
 #if SPICE_SERVER_VERSION >= 0x000701 /* 0.7.1 */
+#ifndef SPICE_INTERFACE_MIGRATION
         spice_server_migrate_switch(spice_server);
+#else
+        spice_server_migrate_end(spice_server, true);
+    } else if (migration_has_failed(s)) {
+        spice_server_migrate_end(spice_server, false);
+#endif
 #endif
     }
 }
 
 int qemu_spice_migrate_info(const char *hostname, int port, int tls_port,
-                            const char *subject)
+                            const char *subject,
+                            MonitorCompletion *cb, void *opaque)
 {
-    return spice_server_migrate_info(spice_server, hostname,
-                                     port, tls_port, subject);
+    int ret;
+#ifdef SPICE_INTERFACE_MIGRATION
+    spice_migrate.connect_complete.cb = cb;
+    spice_migrate.connect_complete.opaque = opaque;
+    ret = spice_server_migrate_connect(spice_server, hostname,
+                                       port, tls_port, subject);
+#else
+    ret = spice_server_migrate_info(spice_server, hostname,
+                                    port, tls_port, subject);
+    cb(opaque, NULL);
+#endif
+    return ret;
 }
 
 static int add_channel(const char *name, const char *value, void *opaque)
@@ -480,7 +533,7 @@ void qemu_spice_init(void)
     spice_image_compression_t compression;
     spice_wan_compression_t wan_compr;
 
-    me = pthread_self();
+    qemu_thread_get_self(&me);
 
    if (!opts) {
         return;
@@ -627,6 +680,11 @@ void qemu_spice_init(void)
 
     migration_state.notify = migration_state_notifier;
     add_migration_state_change_notifier(&migration_state);
+#ifdef SPICE_INTERFACE_MIGRATION
+    spice_migrate.sin.base.sif = &migrate_interface.base;
+    spice_migrate.connect_complete.cb = NULL;
+    qemu_spice_add_interface(&spice_migrate.sin.base);
+#endif
 
     qemu_spice_input_init();
     qemu_spice_audio_init();