summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--bsd-user/main.c2
-rw-r--r--bsd-user/qemu.h1
-rw-r--r--cputlb.c33
-rw-r--r--docs/multiseat.txt78
-rw-r--r--docs/virtio-balloon-stats.txt5
-rw-r--r--hw/9pfs/virtio-9p-device.c2
-rw-r--r--hw/arm/nseries.c316
-rw-r--r--hw/block/virtio-blk.c12
-rw-r--r--hw/char/serial-pci.c7
-rw-r--r--hw/core/qdev-properties-system.c16
-rw-r--r--hw/core/qdev-properties.c18
-rw-r--r--hw/display/omap_lcd_template.h10
-rw-r--r--hw/display/sm501_template.h6
-rw-r--r--hw/display/vga_template.h4
-rw-r--r--hw/i386/kvm/clock.c52
-rw-r--r--hw/i386/smbios.c110
-rw-r--r--hw/mips/mips_fulong2e.c28
-rw-r--r--hw/mips/mips_malta.c176
-rw-r--r--hw/pci-host/q35.c4
-rw-r--r--hw/pci/pcie_host.c7
-rw-r--r--hw/scsi/vhost-scsi.c4
-rw-r--r--hw/scsi/virtio-scsi.c28
-rw-r--r--hw/virtio/virtio-balloon.c7
-rw-r--r--include/exec/cpu-all.h119
-rw-r--r--include/exec/cpu_ldst.h400
-rw-r--r--include/exec/cpu_ldst_template.h (renamed from include/exec/softmmu_header.h)32
-rw-r--r--include/exec/exec-all.h23
-rw-r--r--include/exec/softmmu_exec.h216
-rw-r--r--include/hw/i386/pc.h15
-rw-r--r--include/hw/i386/smbios.h10
-rw-r--r--include/hw/pci/pcie_host.h1
-rw-r--r--include/hw/qdev-core.h8
-rw-r--r--include/hw/qdev-properties.h1
-rw-r--r--include/qom/cpu.h15
-rw-r--r--include/ui/input.h1
-rw-r--r--kvm-all.c4
-rw-r--r--linux-user/main.c2
-rw-r--r--linux-user/qemu.h1
-rw-r--r--monitor.c1
-rw-r--r--qemu-options.hx4
-rwxr-xr-xscripts/kvm/kvm_stat60
-rw-r--r--softmmu_template.h (renamed from include/exec/softmmu_template.h)32
-rw-r--r--target-alpha/cpu-qom.h2
-rw-r--r--target-alpha/cpu.c1
-rw-r--r--target-alpha/cpu.h1
-rw-r--r--target-alpha/mem_helper.c27
-rw-r--r--target-alpha/translate.c1
-rw-r--r--target-arm/arm_ldst.h48
-rw-r--r--target-arm/cpu.h22
-rw-r--r--target-arm/helper.c4
-rw-r--r--target-arm/op_helper.c17
-rw-r--r--target-arm/translate-a64.c1
-rw-r--r--target-arm/translate.c1
-rw-r--r--target-cris/helper.c1
-rw-r--r--target-cris/op_helper.c17
-rw-r--r--target-cris/translate.c1
-rw-r--r--target-i386/cpu-qom.h1
-rw-r--r--target-i386/cpu.c8
-rw-r--r--target-i386/cpu.h46
-rw-r--r--target-i386/fpu_helper.c5
-rw-r--r--target-i386/gdbstub.c4
-rw-r--r--target-i386/helper.c396
-rw-r--r--target-i386/kvm.c30
-rw-r--r--target-i386/machine.c8
-rw-r--r--target-i386/mem_helper.c23
-rw-r--r--target-i386/misc_helper.c47
-rw-r--r--target-i386/seg_helper.c112
-rw-r--r--target-i386/smm_helper.c26
-rw-r--r--target-i386/svm_helper.c7
-rw-r--r--target-i386/translate.c1
-rw-r--r--target-lm32/op_helper.c12
-rw-r--r--target-lm32/translate.c1
-rw-r--r--target-m68k/op_helper.c17
-rw-r--r--target-m68k/translate.c1
-rw-r--r--target-microblaze/op_helper.c12
-rw-r--r--target-microblaze/translate.c1
-rw-r--r--target-mips/cpu-qom.h2
-rw-r--r--target-mips/cpu.c1
-rw-r--r--target-mips/cpu.h1
-rw-r--r--target-mips/op_helper.c32
-rw-r--r--target-mips/translate.c1
-rw-r--r--target-moxie/helper.c16
-rw-r--r--target-moxie/translate.c1
-rw-r--r--target-openrisc/mmu_helper.c15
-rw-r--r--target-openrisc/translate.c1
-rw-r--r--target-ppc/excp_helper.c1
-rw-r--r--target-ppc/mem_helper.c5
-rw-r--r--target-ppc/mmu_helper.c17
-rw-r--r--target-ppc/translate.c1
-rw-r--r--target-s390x/fpu_helper.c5
-rw-r--r--target-s390x/helper.c1
-rw-r--r--target-s390x/mem_helper.c16
-rw-r--r--target-s390x/misc_helper.c2
-rw-r--r--target-s390x/translate.c1
-rw-r--r--target-sh4/op_helper.c16
-rw-r--r--target-sh4/translate.c1
-rw-r--r--target-sparc/cpu-qom.h3
-rw-r--r--target-sparc/cpu.c1
-rw-r--r--target-sparc/cpu.h2
-rw-r--r--target-sparc/ldst_helper.c32
-rw-r--r--target-sparc/translate.c1
-rw-r--r--target-unicore32/op_helper.c17
-rw-r--r--target-unicore32/translate.c1
-rw-r--r--target-xtensa/cpu-qom.h2
-rw-r--r--target-xtensa/cpu.c1
-rw-r--r--target-xtensa/cpu.h1
-rw-r--r--target-xtensa/op_helper.c28
-rw-r--r--target-xtensa/translate.c1
-rw-r--r--tcg/aarch64/tcg-target.h2
-rw-r--r--tcg/arm/tcg-target.h2
-rw-r--r--tcg/i386/tcg-target.c3
-rw-r--r--tcg/i386/tcg-target.h2
-rw-r--r--tcg/ia64/tcg-target.h2
-rw-r--r--tcg/mips/tcg-target.h2
-rw-r--r--tcg/optimize.c5
-rw-r--r--tcg/ppc/tcg-target.h2
-rw-r--r--tcg/ppc64/tcg-target.h2
-rw-r--r--tcg/s390/tcg-target.h2
-rw-r--r--tcg/sparc/tcg-target.h2
-rw-r--r--tcg/tcg-op.h2
-rw-r--r--tcg/tcg-opc.h114
-rw-r--r--tcg/tcg.c123
-rw-r--r--tcg/tcg.h13
-rw-r--r--tcg/tci/tcg-target.c76
-rw-r--r--tcg/tci/tcg-target.h2
-rw-r--r--tci.c323
-rw-r--r--tests/Makefile4
-rw-r--r--tests/bios-tables-test.c (renamed from tests/acpi-test.c)126
-rw-r--r--tests/test-qdev-global-props.c4
-rw-r--r--ui/curses.c10
-rw-r--r--ui/input-legacy.c45
-rw-r--r--ui/input.c108
-rw-r--r--ui/vnc.c2
-rw-r--r--user-exec.c1
-rw-r--r--vl.c2
135 files changed, 1999 insertions, 1981 deletions
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 4ba61da896..0e8c26c137 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -1004,7 +1004,7 @@ int main(int argc, char **argv)
 
 #if defined(TARGET_I386)
     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
-    env->hflags |= HF_PE_MASK;
+    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
         env->cr[4] |= CR4_OSFXSR_MASK;
         env->hflags |= HF_OSFXSR_MASK;
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index ddc74ed0d7..9d90668ddd 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -5,6 +5,7 @@
 #include <string.h>
 
 #include "cpu.h"
+#include "exec/cpu_ldst.h"
 
 #undef DEBUG_REMAP
 #ifdef DEBUG_REMAP
diff --git a/cputlb.c b/cputlb.c
index 7bd3573025..afd3705ff3 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -22,11 +22,13 @@
 #include "exec/exec-all.h"
 #include "exec/memory.h"
 #include "exec/address-spaces.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/cputlb.h"
 
 #include "exec/memory-internal.h"
 #include "exec/ram_addr.h"
+#include "tcg/tcg.h"
 
 //#define DEBUG_TLB
 //#define DEBUG_TLB_CHECK
@@ -330,21 +332,36 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
     return qemu_ram_addr_from_host_nofail(p);
 }
 
+#define MMUSUFFIX _mmu
+
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef MMUSUFFIX
+
 #define MMUSUFFIX _cmmu
-#undef GETPC
-#define GETPC() ((uintptr_t)0)
+#undef GETPC_ADJ
+#define GETPC_ADJ 0
+#undef GETRA
+#define GETRA() ((uintptr_t)0)
 #define SOFTMMU_CODE_ACCESS
 
 #define SHIFT 0
-#include "exec/softmmu_template.h"
+#include "softmmu_template.h"
 
 #define SHIFT 1
-#include "exec/softmmu_template.h"
+#include "softmmu_template.h"
 
 #define SHIFT 2
-#include "exec/softmmu_template.h"
+#include "softmmu_template.h"
 
 #define SHIFT 3
-#include "exec/softmmu_template.h"
-
-#undef env
+#include "softmmu_template.h"
diff --git a/docs/multiseat.txt b/docs/multiseat.txt
index a6c71dd74f..67151e0849 100644
--- a/docs/multiseat.txt
+++ b/docs/multiseat.txt
@@ -6,16 +6,20 @@ host side
 ---------
 
 First you must compile qemu with a user interface supporting
-multihead/multiseat and input event routing.  Right now this list is
-pretty short: sdl2.
+multihead/multiseat and input event routing.  Right now this
+list includes sdl2 and gtk (both 2+3):
 
   ./configure --enable-sdl --with-sdlabi=2.0
 
+or
+
+  ./configure --enable-gtk
+
 
 Next put together the qemu command line:
 
 qemu	-enable-kvm -usb $memory $disk $whatever \
-	-display sdl \
+	-display [ sdl | gtk ] \
 	-vga std \
 	-device usb-tablet
 
@@ -37,6 +41,20 @@ The "display=video2" sets up the input routing.  Any input coming from
 the window which belongs to the video.2 display adapter will be routed
 to these input devices.
 
+The sdl2 ui will start up with two windows, one for each display
+device.  The gtk ui will start with a single window and each display
+in a separate tab.  You can either simply switch tabs to switch heads,
+or use the "View / Detach tab" menu item to move one of the displays
+to its own window so you can see both display devices side-by-side.
+
+Note on spice: Spice handles multihead just fine.  But it can't do
+multiseat.  For tablet events the event source is sent to the spice
+agent.  But qemu can't figure it, so it can't do input routing.
+Fixing this needs a new or extended input interface between
+libspice-server and qemu.  For keyboard events it is even worse:  The
+event source isn't included in the spice protocol, so the wire
+protocol must be extended to support this.
+
 
 guest side
 ----------
@@ -46,29 +64,37 @@ You need a pretty recent linux guest.  systemd with loginctl.  kernel
 fully updated for the new kernel though, i.e. the live iso doesn't cut
 it.
 
-Now we'll have to configure the guest.  Boot and login.  By default
-all devices belong to seat0.  You can use "loginctl seat-status seat0"
-to list them all (and to get the sysfs paths for cut+paste).  Now
-we'll go assign all pci devices connected the pci bridge in slot 12 to
-a new head:
-
-loginctl attach seat-qemu \
-	 /sys/devices/pci0000:00/0000:00:12.0/0000:01:02.0/drm/card1
-loginctl attach seat-qemu \
-	 /sys/devices/pci0000:00/0000:00:12.0/0000:01:02.0/graphics/fb1
-loginctl attach seat-qemu \
-	 /sys/devices/pci0000:00/0000:00:12.0/0000:01:0f.0/usb2
-
-Use "loginctl seat-status seat-qemu" to check the result.  It isn't
-needed to assign the usb devices to the head individually, assigning a
-usb (root) hub will automatically assign all usb devices connected to
-it too.
-
-BTW: loginctl writes udev rules to /etc/udev/rules.d to make these
-device assignments permanent, so you need to do this only once.
-
-Now simply restart gdm (rebooting will do too), and a login screen
-should show up on the second head.
+Now we'll have to configure the guest.  Boot and login.  "lspci -vt"
+should list the pci bridge with the display adapter and usb controller:
+
+    [root@fedora ~]# lspci -vt
+    -[0000:00]-+-00.0  Intel Corporation 440FX - 82441FX PMC [Natoma]
+               [ ... ]
+               \-12.0-[01]--+-02.0  Device 1234:1111
+                            \-0f.0  NEC Corporation USB 3.0 Host Controller
+
+Good.  Now lets tell the system that the pci bridge and all devices
+below it belong to a separate seat by dropping a file into
+/etc/udev/rules.d:
+
+    [root@fedora ~]# cat /etc/udev/rules.d/70-qemu-autoseat.rules
+    SUBSYSTEMS=="pci", DEVPATH=="*/0000:00:12.0", TAG+="seat", ENV{ID_AUTOSEAT}="1"
+
+Reboot.  System should come up with two seats.  With loginctl you can
+check the configuration:
+
+    [root@fedora ~]# loginctl list-seats
+    SEAT
+    seat0
+    seat-pci-pci-0000_00_12_0
+
+    2 seats listed.
+
+You can use "loginctl seat-status seat-pci-pci-0000_00_12_0" to list
+the devices attached to the seat.
+
+Background info is here:
+  http://www.freedesktop.org/wiki/Software/systemd/multiseat/
 
 Enjoy!
 
diff --git a/docs/virtio-balloon-stats.txt b/docs/virtio-balloon-stats.txt
index f74612f468..edff5f22a8 100644
--- a/docs/virtio-balloon-stats.txt
+++ b/docs/virtio-balloon-stats.txt
@@ -35,7 +35,8 @@ which will return a dictionary containing:
 
   o A key named last-update, which contains the last stats update
     timestamp in seconds. Since this timestamp is generated by the host,
-    a buggy guest can't influence its value
+    a buggy guest can't influence its value. The value is 0 if the guest
+    has not updated the stats (yet).
 
 It's also important to note the following:
 
@@ -49,7 +50,7 @@ It's also important to note the following:
 
  - Polling can be enabled even if the guest doesn't have stats support
    or the balloon driver wasn't loaded in the guest. If this is the case
-   and stats are queried, an error will be returned
+   and stats are queried, last-update will be 0.
 
  - The polling timer is only re-armed when the guest responds to the
    statistics request. This means that if a (buggy) guest doesn't ever
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 15a4983eee..653762af1a 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -34,7 +34,7 @@ static void virtio_9p_get_config(VirtIODevice *vdev, uint8_t *config)
 
     len = strlen(s->tag);
     cfg = g_malloc0(sizeof(struct virtio_9p_config) + len);
-    stw_raw(&cfg->tag_len, len);
+    stw_p(&cfg->tag_len, len);
     /* We don't copy the terminating null to config space */
     memcpy(cfg->tag, s->tag, len);
     memcpy(config, cfg, s->config_size);
diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c
index 82772c657e..4f092d6446 100644
--- a/hw/arm/nseries.c
+++ b/hw/arm/nseries.c
@@ -239,8 +239,9 @@ static void n800_key_event(void *opaque, int keycode)
     int code = s->keymap[keycode & 0x7f];
 
     if (code == -1) {
-        if ((keycode & 0x7f) == RETU_KEYCODE)
+        if ((keycode & 0x7f) == RETU_KEYCODE) {
             retu_key_event(s->retu, !(keycode & 0x80));
+        }
         return;
     }
 
@@ -280,11 +281,14 @@ static void n800_tsc_kbd_setup(struct n800_s *s)
     s->ts.opaque = s->ts.chip->opaque;
     s->ts.txrx = tsc210x_txrx;
 
-    for (i = 0; i < 0x80; i ++)
+    for (i = 0; i < 0x80; i++) {
         s->keymap[i] = -1;
-    for (i = 0; i < 0x10; i ++)
-        if (n800_keys[i] >= 0)
+    }
+    for (i = 0; i < 0x10; i++) {
+        if (n800_keys[i] >= 0) {
             s->keymap[n800_keys[i]] = i;
+        }
+    }
 
     qemu_add_kbd_event_handler(n800_key_event, s);
 
@@ -308,8 +312,9 @@ static void n810_key_event(void *opaque, int keycode)
     int code = s->keymap[keycode & 0x7f];
 
     if (code == -1) {
-        if ((keycode & 0x7f) == RETU_KEYCODE)
+        if ((keycode & 0x7f) == RETU_KEYCODE) {
             retu_key_event(s->retu, !(keycode & 0x80));
+        }
         return;
     }
 
@@ -388,11 +393,14 @@ static void n810_kbd_setup(struct n800_s *s)
     qemu_irq kbd_irq = qdev_get_gpio_in(s->mpu->gpio, N810_KEYBOARD_GPIO);
     int i;
 
-    for (i = 0; i < 0x80; i ++)
+    for (i = 0; i < 0x80; i++) {
         s->keymap[i] = -1;
-    for (i = 0; i < 0x80; i ++)
-        if (n810_keys[i] > 0)
+    }
+    for (i = 0; i < 0x80; i++) {
+        if (n810_keys[i] > 0) {
             s->keymap[n810_keys[i]] = i;
+        }
+    }
 
     qemu_add_kbd_event_handler(n810_key_event, s);
 
@@ -449,17 +457,20 @@ static uint32_t mipid_txrx(void *opaque, uint32_t cmd, int len)
     struct mipid_s *s = (struct mipid_s *) opaque;
     uint8_t ret;
 
-    if (len > 9)
+    if (len > 9) {
         hw_error("%s: FIXME: bad SPI word width %i\n", __FUNCTION__, len);
+    }
 
-    if (s->p >= ARRAY_SIZE(s->resp))
+    if (s->p >= ARRAY_SIZE(s->resp)) {
         ret = 0;
-    else
-        ret = s->resp[s->p ++];
-    if (s->pm --> 0)
+    } else {
+        ret = s->resp[s->p++];
+    }
+    if (s->pm-- > 0) {
         s->param[s->pm] = cmd;
-    else
+    } else {
         s->cmd = cmd;
+    }
 
     switch (s->cmd) {
     case 0x00:	/* NOP */
@@ -560,15 +571,17 @@ static uint32_t mipid_txrx(void *opaque, uint32_t cmd, int len)
         goto bad_cmd;
 
     case 0x25:	/* WRCNTR */
-        if (s->pm < 0)
+        if (s->pm < 0) {
             s->pm = 1;
+        }
         goto bad_cmd;
 
     case 0x26:	/* GAMSET */
-        if (!s->pm)
+        if (!s->pm) {
             s->gamma = ffs(s->param[0] & 0xf) - 1;
-        else if (s->pm < 0)
+        } else if (s->pm < 0) {
             s->pm = 1;
+        }
         break;
 
     case 0x28:	/* DISPOFF */
@@ -591,10 +604,11 @@ static uint32_t mipid_txrx(void *opaque, uint32_t cmd, int len)
         s->te = 0;
         break;
     case 0x35:	/* TEON */
-        if (!s->pm)
+        if (!s->pm) {
             s->te = 1;
-        else if (s->pm < 0)
+        } else if (s->pm < 0) {
             s->pm = 1;
+        }
         break;
 
     case 0x36:	/* MADCTR */
@@ -613,8 +627,9 @@ static uint32_t mipid_txrx(void *opaque, uint32_t cmd, int len)
 
     case 0xb0:	/* CLKINT / DISCTL */
     case 0xb1:	/* CLKEXT */
-        if (s->pm < 0)
+        if (s->pm < 0) {
             s->pm = 2;
+        }
         break;
 
     case 0xb4:	/* FRMSEL */
@@ -635,8 +650,9 @@ static uint32_t mipid_txrx(void *opaque, uint32_t cmd, int len)
         break;
 
     case 0xc2:	/* IFMOD */
-        if (s->pm < 0)
+        if (s->pm < 0) {
             s->pm = 2;
+        }
         break;
 
     case 0xc6:	/* PWRCTL */
@@ -834,118 +850,119 @@ static void n800_setup_nolo_tags(void *sram_base)
 
     strcpy((void *) (p + 8), "F5");
 
-    stl_raw(p + 10, 0x04f70000);
+    stl_p(p + 10, 0x04f70000);
     strcpy((void *) (p + 9), "RX-34");
 
     /* RAM size in MB? */
-    stl_raw(p + 12, 0x80);
+    stl_p(p + 12, 0x80);
 
     /* Pointer to the list of tags */
-    stl_raw(p + 13, OMAP2_SRAM_BASE + 0x9000);
+    stl_p(p + 13, OMAP2_SRAM_BASE + 0x9000);
 
     /* The NOLO tags start here */
     p = sram_base + 0x9000;
 #define ADD_TAG(tag, len)				\
-    stw_raw((uint16_t *) p + 0, tag);			\
-    stw_raw((uint16_t *) p + 1, len); p ++;		\
-    stl_raw(p ++, OMAP2_SRAM_BASE | (((void *) v - sram_base) & 0xffff));
+    stw_p((uint16_t *) p + 0, tag);			\
+    stw_p((uint16_t *) p + 1, len); p++;		\
+    stl_p(p++, OMAP2_SRAM_BASE | (((void *) v - sram_base) & 0xffff));
 
     /* OMAP STI console? Pin out settings? */
     ADD_TAG(0x6e01, 414);
-    for (i = 0; i < ARRAY_SIZE(n800_pinout); i ++)
-        stl_raw(v ++, n800_pinout[i]);
+    for (i = 0; i < ARRAY_SIZE(n800_pinout); i++) {
+        stl_p(v++, n800_pinout[i]);
+    }
 
     /* Kernel memsize? */
     ADD_TAG(0x6e05, 1);
-    stl_raw(v ++, 2);
+    stl_p(v++, 2);
 
     /* NOLO serial console */
     ADD_TAG(0x6e02, 4);
-    stl_raw(v ++, XLDR_LL_UART);	/* UART number (1 - 3) */
+    stl_p(v++, XLDR_LL_UART);		/* UART number (1 - 3) */
 
 #if 0
     /* CBUS settings (Retu/AVilma) */
     ADD_TAG(0x6e03, 6);
-    stw_raw((uint16_t *) v + 0, 65);	/* CBUS GPIO0 */
-    stw_raw((uint16_t *) v + 1, 66);	/* CBUS GPIO1 */
-    stw_raw((uint16_t *) v + 2, 64);	/* CBUS GPIO2 */
+    stw_p((uint16_t *) v + 0, 65);	/* CBUS GPIO0 */
+    stw_p((uint16_t *) v + 1, 66);	/* CBUS GPIO1 */
+    stw_p((uint16_t *) v + 2, 64);	/* CBUS GPIO2 */
     v += 2;
 #endif
 
     /* Nokia ASIC BB5 (Retu/Tahvo) */
     ADD_TAG(0x6e0a, 4);
-    stw_raw((uint16_t *) v + 0, 111);	/* "Retu" interrupt GPIO */
-    stw_raw((uint16_t *) v + 1, 108);	/* "Tahvo" interrupt GPIO */
-    v ++;
+    stw_p((uint16_t *) v + 0, 111);	/* "Retu" interrupt GPIO */
+    stw_p((uint16_t *) v + 1, 108);	/* "Tahvo" interrupt GPIO */
+    v++;
 
     /* LCD console? */
     ADD_TAG(0x6e04, 4);
-    stw_raw((uint16_t *) v + 0, 30);	/* ??? */
-    stw_raw((uint16_t *) v + 1, 24);	/* ??? */
-    v ++;
+    stw_p((uint16_t *) v + 0, 30);	/* ??? */
+    stw_p((uint16_t *) v + 1, 24);	/* ??? */
+    v++;
 
 #if 0
     /* LCD settings */
     ADD_TAG(0x6e06, 2);
-    stw_raw((uint16_t *) (v ++), 15);	/* ??? */
+    stw_p((uint16_t *) (v++), 15);	/* ??? */
 #endif
 
     /* I^2C (Menelaus) */
     ADD_TAG(0x6e07, 4);
-    stl_raw(v ++, 0x00720000);		/* ??? */
+    stl_p(v++, 0x00720000);		/* ??? */
 
     /* Unknown */
     ADD_TAG(0x6e0b, 6);
-    stw_raw((uint16_t *) v + 0, 94);	/* ??? */
-    stw_raw((uint16_t *) v + 1, 23);	/* ??? */
-    stw_raw((uint16_t *) v + 2, 0);	/* ??? */
+    stw_p((uint16_t *) v + 0, 94);	/* ??? */
+    stw_p((uint16_t *) v + 1, 23);	/* ??? */
+    stw_p((uint16_t *) v + 2, 0);	/* ??? */
     v += 2;
 
     /* OMAP gpio switch info */
     ADD_TAG(0x6e0c, 80);
     strcpy((void *) v, "bat_cover");	v += 3;
-    stw_raw((uint16_t *) v + 0, 110);	/* GPIO num ??? */
-    stw_raw((uint16_t *) v + 1, 1);	/* GPIO num ??? */
+    stw_p((uint16_t *) v + 0, 110);	/* GPIO num ??? */
+    stw_p((uint16_t *) v + 1, 1);	/* GPIO num ??? */
     v += 2;
     strcpy((void *) v, "cam_act");	v += 3;
-    stw_raw((uint16_t *) v + 0, 95);	/* GPIO num ??? */
-    stw_raw((uint16_t *) v + 1, 32);	/* GPIO num ??? */
+    stw_p((uint16_t *) v + 0, 95);	/* GPIO num ??? */
+    stw_p((uint16_t *) v + 1, 32);	/* GPIO num ??? */
     v += 2;
     strcpy((void *) v, "cam_turn");	v += 3;
-    stw_raw((uint16_t *) v + 0, 12);	/* GPIO num ??? */
-    stw_raw((uint16_t *) v + 1, 33);	/* GPIO num ??? */
+    stw_p((uint16_t *) v + 0, 12);	/* GPIO num ??? */
+    stw_p((uint16_t *) v + 1, 33);	/* GPIO num ??? */
     v += 2;
     strcpy((void *) v, "headphone");	v += 3;
-    stw_raw((uint16_t *) v + 0, 107);	/* GPIO num ??? */
-    stw_raw((uint16_t *) v + 1, 17);	/* GPIO num ??? */
+    stw_p((uint16_t *) v + 0, 107);	/* GPIO num ??? */
+    stw_p((uint16_t *) v + 1, 17);	/* GPIO num ??? */
     v += 2;
 
     /* Bluetooth */
     ADD_TAG(0x6e0e, 12);
-    stl_raw(v ++, 0x5c623d01);		/* ??? */
-    stl_raw(v ++, 0x00000201);		/* ??? */
-    stl_raw(v ++, 0x00000000);		/* ??? */
+    stl_p(v++, 0x5c623d01);		/* ??? */
+    stl_p(v++, 0x00000201);		/* ??? */
+    stl_p(v++, 0x00000000);		/* ??? */
 
     /* CX3110x WLAN settings */
     ADD_TAG(0x6e0f, 8);
-    stl_raw(v ++, 0x00610025);		/* ??? */
-    stl_raw(v ++, 0xffff0057);		/* ??? */
+    stl_p(v++, 0x00610025);		/* ??? */
+    stl_p(v++, 0xffff0057);		/* ??? */
 
     /* MMC host settings */
     ADD_TAG(0x6e10, 12);
-    stl_raw(v ++, 0xffff000f);		/* ??? */
-    stl_raw(v ++, 0xffffffff);		/* ??? */
-    stl_raw(v ++, 0x00000060);		/* ??? */
+    stl_p(v++, 0xffff000f);		/* ??? */
+    stl_p(v++, 0xffffffff);		/* ??? */
+    stl_p(v++, 0x00000060);		/* ??? */
 
     /* OneNAND chip select */
     ADD_TAG(0x6e11, 10);
-    stl_raw(v ++, 0x00000401);		/* ??? */
-    stl_raw(v ++, 0x0002003a);		/* ??? */
-    stl_raw(v ++, 0x00000002);		/* ??? */
+    stl_p(v++, 0x00000401);		/* ??? */
+    stl_p(v++, 0x0002003a);		/* ??? */
+    stl_p(v++, 0x00000002);		/* ??? */
 
     /* TEA5761 sensor settings */
     ADD_TAG(0x6e12, 2);
-    stl_raw(v ++, 93);			/* GPIO num ??? */
+    stl_p(v++, 93);			/* GPIO num ??? */
 
 #if 0
     /* Unknown tag */
@@ -956,8 +973,8 @@ static void n800_setup_nolo_tags(void *sram_base)
 #endif
 
     /* End of the list */
-    stl_raw(p ++, 0x00000000);
-    stl_raw(p ++, 0x00000000);
+    stl_p(p++, 0x00000000);
+    stl_p(p++, 0x00000000);
 }
 
 /* This task is normally performed by the bootloader.  If we're loading
@@ -1032,8 +1049,9 @@ static void n8x0_boot_init(void *opaque)
     s->mpu->cpu->env.GE = 0x5;
 
     /* If the machine has a slided keyboard, open it */
-    if (s->kbd)
+    if (s->kbd) {
         qemu_irq_raise(qdev_get_gpio_in(s->mpu->gpio, N810_SLIDE_GPIO));
+    }
 }
 
 #define OMAP_TAG_NOKIA_BT	0x4e01
@@ -1119,112 +1137,112 @@ static int n8x0_atag_setup(void *p, int model)
 
     w = p;
 
-    stw_raw(w ++, OMAP_TAG_UART);		/* u16 tag */
-    stw_raw(w ++, 4);				/* u16 len */
-    stw_raw(w ++, (1 << 2) | (1 << 1) | (1 << 0)); /* uint enabled_uarts */
-    w ++;
+    stw_p(w++, OMAP_TAG_UART);			/* u16 tag */
+    stw_p(w++, 4);				/* u16 len */
+    stw_p(w++, (1 << 2) | (1 << 1) | (1 << 0)); /* uint enabled_uarts */
+    w++;
 
 #if 0
-    stw_raw(w ++, OMAP_TAG_SERIAL_CONSOLE);	/* u16 tag */
-    stw_raw(w ++, 4);				/* u16 len */
-    stw_raw(w ++, XLDR_LL_UART + 1);		/* u8 console_uart */
-    stw_raw(w ++, 115200);			/* u32 console_speed */
+    stw_p(w++, OMAP_TAG_SERIAL_CONSOLE);	/* u16 tag */
+    stw_p(w++, 4);				/* u16 len */
+    stw_p(w++, XLDR_LL_UART + 1);		/* u8 console_uart */
+    stw_p(w++, 115200);				/* u32 console_speed */
 #endif
 
-    stw_raw(w ++, OMAP_TAG_LCD);		/* u16 tag */
-    stw_raw(w ++, 36);				/* u16 len */
+    stw_p(w++, OMAP_TAG_LCD);			/* u16 tag */
+    stw_p(w++, 36);				/* u16 len */
     strcpy((void *) w, "QEMU LCD panel");	/* char panel_name[16] */
     w += 8;
     strcpy((void *) w, "blizzard");		/* char ctrl_name[16] */
     w += 8;
-    stw_raw(w ++, N810_BLIZZARD_RESET_GPIO);	/* TODO: n800 s16 nreset_gpio */
-    stw_raw(w ++, 24);				/* u8 data_lines */
+    stw_p(w++, N810_BLIZZARD_RESET_GPIO);	/* TODO: n800 s16 nreset_gpio */
+    stw_p(w++, 24);				/* u8 data_lines */
 
-    stw_raw(w ++, OMAP_TAG_CBUS);		/* u16 tag */
-    stw_raw(w ++, 8);				/* u16 len */
-    stw_raw(w ++, N8X0_CBUS_CLK_GPIO);		/* s16 clk_gpio */
-    stw_raw(w ++, N8X0_CBUS_DAT_GPIO);		/* s16 dat_gpio */
-    stw_raw(w ++, N8X0_CBUS_SEL_GPIO);		/* s16 sel_gpio */
-    w ++;
+    stw_p(w++, OMAP_TAG_CBUS);			/* u16 tag */
+    stw_p(w++, 8);				/* u16 len */
+    stw_p(w++, N8X0_CBUS_CLK_GPIO);		/* s16 clk_gpio */
+    stw_p(w++, N8X0_CBUS_DAT_GPIO);		/* s16 dat_gpio */
+    stw_p(w++, N8X0_CBUS_SEL_GPIO);		/* s16 sel_gpio */
+    w++;
 
-    stw_raw(w ++, OMAP_TAG_EM_ASIC_BB5);	/* u16 tag */
-    stw_raw(w ++, 4);				/* u16 len */
-    stw_raw(w ++, N8X0_RETU_GPIO);		/* s16 retu_irq_gpio */
-    stw_raw(w ++, N8X0_TAHVO_GPIO);		/* s16 tahvo_irq_gpio */
+    stw_p(w++, OMAP_TAG_EM_ASIC_BB5);		/* u16 tag */
+    stw_p(w++, 4);				/* u16 len */
+    stw_p(w++, N8X0_RETU_GPIO);			/* s16 retu_irq_gpio */
+    stw_p(w++, N8X0_TAHVO_GPIO);		/* s16 tahvo_irq_gpio */
 
     gpiosw = (model == 810) ? n810_gpiosw_info : n800_gpiosw_info;
-    for (; gpiosw->name; gpiosw ++) {
-        stw_raw(w ++, OMAP_TAG_GPIO_SWITCH);	/* u16 tag */
-        stw_raw(w ++, 20);			/* u16 len */
+    for (; gpiosw->name; gpiosw++) {
+        stw_p(w++, OMAP_TAG_GPIO_SWITCH);	/* u16 tag */
+        stw_p(w++, 20);				/* u16 len */
         strcpy((void *) w, gpiosw->name);	/* char name[12] */
         w += 6;
-        stw_raw(w ++, gpiosw->line);		/* u16 gpio */
-        stw_raw(w ++, gpiosw->type);
-        stw_raw(w ++, 0);
-        stw_raw(w ++, 0);
+        stw_p(w++, gpiosw->line);		/* u16 gpio */
+        stw_p(w++, gpiosw->type);
+        stw_p(w++, 0);
+        stw_p(w++, 0);
     }
 
-    stw_raw(w ++, OMAP_TAG_NOKIA_BT);		/* u16 tag */
-    stw_raw(w ++, 12);				/* u16 len */
+    stw_p(w++, OMAP_TAG_NOKIA_BT);		/* u16 tag */
+    stw_p(w++, 12);				/* u16 len */
     b = (void *) w;
-    stb_raw(b ++, 0x01);			/* u8 chip_type	(CSR) */
-    stb_raw(b ++, N8X0_BT_WKUP_GPIO);		/* u8 bt_wakeup_gpio */
-    stb_raw(b ++, N8X0_BT_HOST_WKUP_GPIO);	/* u8 host_wakeup_gpio */
-    stb_raw(b ++, N8X0_BT_RESET_GPIO);		/* u8 reset_gpio */
-    stb_raw(b ++, BT_UART + 1);			/* u8 bt_uart */
+    stb_p(b++, 0x01);				/* u8 chip_type	(CSR) */
+    stb_p(b++, N8X0_BT_WKUP_GPIO);		/* u8 bt_wakeup_gpio */
+    stb_p(b++, N8X0_BT_HOST_WKUP_GPIO);		/* u8 host_wakeup_gpio */
+    stb_p(b++, N8X0_BT_RESET_GPIO);		/* u8 reset_gpio */
+    stb_p(b++, BT_UART + 1);			/* u8 bt_uart */
     memcpy(b, &n8x0_bd_addr, 6);		/* u8 bd_addr[6] */
     b += 6;
-    stb_raw(b ++, 0x02);			/* u8 bt_sysclk (38.4) */
+    stb_p(b++, 0x02);				/* u8 bt_sysclk (38.4) */
     w = (void *) b;
 
-    stw_raw(w ++, OMAP_TAG_WLAN_CX3110X);	/* u16 tag */
-    stw_raw(w ++, 8);				/* u16 len */
-    stw_raw(w ++, 0x25);			/* u8 chip_type */
-    stw_raw(w ++, N8X0_WLAN_PWR_GPIO);		/* s16 power_gpio */
-    stw_raw(w ++, N8X0_WLAN_IRQ_GPIO);		/* s16 irq_gpio */
-    stw_raw(w ++, -1);				/* s16 spi_cs_gpio */
+    stw_p(w++, OMAP_TAG_WLAN_CX3110X);		/* u16 tag */
+    stw_p(w++, 8);				/* u16 len */
+    stw_p(w++, 0x25);				/* u8 chip_type */
+    stw_p(w++, N8X0_WLAN_PWR_GPIO);		/* s16 power_gpio */
+    stw_p(w++, N8X0_WLAN_IRQ_GPIO);		/* s16 irq_gpio */
+    stw_p(w++, -1);				/* s16 spi_cs_gpio */
 
-    stw_raw(w ++, OMAP_TAG_MMC);		/* u16 tag */
-    stw_raw(w ++, 16);				/* u16 len */
+    stw_p(w++, OMAP_TAG_MMC);			/* u16 tag */
+    stw_p(w++, 16);				/* u16 len */
     if (model == 810) {
-        stw_raw(w ++, 0x23f);			/* unsigned flags */
-        stw_raw(w ++, -1);			/* s16 power_pin */
-        stw_raw(w ++, -1);			/* s16 switch_pin */
-        stw_raw(w ++, -1);			/* s16 wp_pin */
-        stw_raw(w ++, 0x240);			/* unsigned flags */
-        stw_raw(w ++, 0xc000);			/* s16 power_pin */
-        stw_raw(w ++, 0x0248);			/* s16 switch_pin */
-        stw_raw(w ++, 0xc000);			/* s16 wp_pin */
+        stw_p(w++, 0x23f);			/* unsigned flags */
+        stw_p(w++, -1);				/* s16 power_pin */
+        stw_p(w++, -1);				/* s16 switch_pin */
+        stw_p(w++, -1);				/* s16 wp_pin */
+        stw_p(w++, 0x240);			/* unsigned flags */
+        stw_p(w++, 0xc000);			/* s16 power_pin */
+        stw_p(w++, 0x0248);			/* s16 switch_pin */
+        stw_p(w++, 0xc000);			/* s16 wp_pin */
     } else {
-        stw_raw(w ++, 0xf);			/* unsigned flags */
-        stw_raw(w ++, -1);			/* s16 power_pin */
-        stw_raw(w ++, -1);			/* s16 switch_pin */
-        stw_raw(w ++, -1);			/* s16 wp_pin */
-        stw_raw(w ++, 0);			/* unsigned flags */
-        stw_raw(w ++, 0);			/* s16 power_pin */
-        stw_raw(w ++, 0);			/* s16 switch_pin */
-        stw_raw(w ++, 0);			/* s16 wp_pin */
+        stw_p(w++, 0xf);			/* unsigned flags */
+        stw_p(w++, -1);				/* s16 power_pin */
+        stw_p(w++, -1);				/* s16 switch_pin */
+        stw_p(w++, -1);				/* s16 wp_pin */
+        stw_p(w++, 0);				/* unsigned flags */
+        stw_p(w++, 0);				/* s16 power_pin */
+        stw_p(w++, 0);				/* s16 switch_pin */
+        stw_p(w++, 0);				/* s16 wp_pin */
     }
 
-    stw_raw(w ++, OMAP_TAG_TEA5761);		/* u16 tag */
-    stw_raw(w ++, 4);				/* u16 len */
-    stw_raw(w ++, N8X0_TEA5761_CS_GPIO);	/* u16 enable_gpio */
-    w ++;
+    stw_p(w++, OMAP_TAG_TEA5761);		/* u16 tag */
+    stw_p(w++, 4);				/* u16 len */
+    stw_p(w++, N8X0_TEA5761_CS_GPIO);		/* u16 enable_gpio */
+    w++;
 
     partition = (model == 810) ? n810_part_info : n800_part_info;
-    for (; partition->name; partition ++) {
-        stw_raw(w ++, OMAP_TAG_PARTITION);	/* u16 tag */
-        stw_raw(w ++, 28);			/* u16 len */
+    for (; partition->name; partition++) {
+        stw_p(w++, OMAP_TAG_PARTITION);		/* u16 tag */
+        stw_p(w++, 28);				/* u16 len */
         strcpy((void *) w, partition->name);	/* char name[16] */
         l = (void *) (w + 8);
-        stl_raw(l ++, partition->size);		/* unsigned int size */
-        stl_raw(l ++, partition->offset);	/* unsigned int offset */
-        stl_raw(l ++, partition->mask);		/* unsigned int mask_flags */
+        stl_p(l++, partition->size);		/* unsigned int size */
+        stl_p(l++, partition->offset);		/* unsigned int offset */
+        stl_p(l++, partition->mask);		/* unsigned int mask_flags */
         w = (void *) l;
     }
 
-    stw_raw(w ++, OMAP_TAG_BOOT_REASON);	/* u16 tag */
-    stw_raw(w ++, 12);				/* u16 len */
+    stw_p(w++, OMAP_TAG_BOOT_REASON);		/* u16 tag */
+    stw_p(w++, 12);				/* u16 len */
 #if 0
     strcpy((void *) w, "por");			/* char reason_str[12] */
     strcpy((void *) w, "charger");		/* char reason_str[12] */
@@ -1242,15 +1260,15 @@ static int n8x0_atag_setup(void *p, int model)
     w += 6;
 
     tag = (model == 810) ? "RX-44" : "RX-34";
-    stw_raw(w ++, OMAP_TAG_VERSION_STR);	/* u16 tag */
-    stw_raw(w ++, 24);				/* u16 len */
+    stw_p(w++, OMAP_TAG_VERSION_STR);		/* u16 tag */
+    stw_p(w++, 24);				/* u16 len */
     strcpy((void *) w, "product");		/* char component[12] */
     w += 6;
     strcpy((void *) w, tag);			/* char version[12] */
     w += 6;
 
-    stw_raw(w ++, OMAP_TAG_VERSION_STR);	/* u16 tag */
-    stw_raw(w ++, 24);				/* u16 len */
+    stw_p(w++, OMAP_TAG_VERSION_STR);		/* u16 tag */
+    stw_p(w++, 24);				/* u16 len */
     strcpy((void *) w, "hw-build");		/* char component[12] */
     w += 6;
     strcpy((void *) w, "QEMU ");
@@ -1258,8 +1276,8 @@ static int n8x0_atag_setup(void *p, int model)
     w += 6;
 
     tag = (model == 810) ? "1.1.10-qemu" : "1.1.6-qemu";
-    stw_raw(w ++, OMAP_TAG_VERSION_STR);	/* u16 tag */
-    stw_raw(w ++, 24);				/* u16 len */
+    stw_p(w++, OMAP_TAG_VERSION_STR);		/* u16 tag */
+    stw_p(w++, 24);				/* u16 len */
     strcpy((void *) w, "nolo");			/* char component[12] */
     w += 6;
     strcpy((void *) w, tag);			/* char version[12] */
@@ -1315,9 +1333,9 @@ static void n8x0_init(MachineState *machine,
     n8x0_gpio_setup(s);
     n8x0_nand_setup(s);
     n8x0_i2c_setup(s);
-    if (model == 800)
+    if (model == 800) {
         n800_tsc_kbd_setup(s);
-    else if (model == 810) {
+    } else if (model == 810) {
         n810_tsc_setup(s);
         n810_kbd_setup(s);
     }
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 8a568e5edb..b1fc1de0dc 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -487,12 +487,12 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
 
     bdrv_get_geometry(s->bs, &capacity);
     memset(&blkcfg, 0, sizeof(blkcfg));
-    stq_raw(&blkcfg.capacity, capacity);
-    stl_raw(&blkcfg.seg_max, 128 - 2);
-    stw_raw(&blkcfg.cylinders, s->conf->cyls);
-    stl_raw(&blkcfg.blk_size, blk_size);
-    stw_raw(&blkcfg.min_io_size, s->conf->min_io_size / blk_size);
-    stw_raw(&blkcfg.opt_io_size, s->conf->opt_io_size / blk_size);
+    stq_p(&blkcfg.capacity, capacity);
+    stl_p(&blkcfg.seg_max, 128 - 2);
+    stw_p(&blkcfg.cylinders, s->conf->cyls);
+    stl_p(&blkcfg.blk_size, blk_size);
+    stw_p(&blkcfg.min_io_size, s->conf->min_io_size / blk_size);
+    stw_p(&blkcfg.opt_io_size, s->conf->opt_io_size / blk_size);
     blkcfg.heads = s->conf->heads;
     /*
      * We must ensure that the block device capacity is a multiple of
diff --git a/hw/char/serial-pci.c b/hw/char/serial-pci.c
index 991c99fa6e..acccc9cabd 100644
--- a/hw/char/serial-pci.c
+++ b/hw/char/serial-pci.c
@@ -34,6 +34,7 @@
 typedef struct PCISerialState {
     PCIDevice dev;
     SerialState state;
+    uint8_t prog_if;
 } PCISerialState;
 
 typedef struct PCIMultiSerialState {
@@ -44,6 +45,7 @@ typedef struct PCIMultiSerialState {
     SerialState  state[PCI_SERIAL_MAX_PORTS];
     uint32_t     level[PCI_SERIAL_MAX_PORTS];
     qemu_irq     *irqs;
+    uint8_t      prog_if;
 } PCIMultiSerialState;
 
 static int serial_pci_init(PCIDevice *dev)
@@ -60,6 +62,7 @@ static int serial_pci_init(PCIDevice *dev)
         return -1;
     }
 
+    pci->dev.config[PCI_CLASS_PROG] = pci->prog_if;
     pci->dev.config[PCI_INTERRUPT_PIN] = 0x01;
     s->irq = pci_allocate_irq(&pci->dev);
 
@@ -101,6 +104,7 @@ static int multi_serial_pci_init(PCIDevice *dev)
     assert(pci->ports > 0);
     assert(pci->ports <= PCI_SERIAL_MAX_PORTS);
 
+    pci->dev.config[PCI_CLASS_PROG] = pci->prog_if;
     pci->dev.config[PCI_INTERRUPT_PIN] = 0x01;
     memory_region_init(&pci->iobar, OBJECT(pci), "multiserial", 8 * pci->ports);
     pci_register_bar(&pci->dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &pci->iobar);
@@ -177,12 +181,14 @@ static const VMStateDescription vmstate_pci_multi_serial = {
 
 static Property serial_pci_properties[] = {
     DEFINE_PROP_CHR("chardev",  PCISerialState, state.chr),
+    DEFINE_PROP_UINT8("prog_if",  PCISerialState, prog_if, 0x02),
     DEFINE_PROP_END_OF_LIST(),
 };
 
 static Property multi_2x_serial_pci_properties[] = {
     DEFINE_PROP_CHR("chardev1",  PCIMultiSerialState, state[0].chr),
     DEFINE_PROP_CHR("chardev2",  PCIMultiSerialState, state[1].chr),
+    DEFINE_PROP_UINT8("prog_if",  PCIMultiSerialState, prog_if, 0x02),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -191,6 +197,7 @@ static Property multi_4x_serial_pci_properties[] = {
     DEFINE_PROP_CHR("chardev2",  PCIMultiSerialState, state[1].chr),
     DEFINE_PROP_CHR("chardev3",  PCIMultiSerialState, state[2].chr),
     DEFINE_PROP_CHR("chardev4",  PCIMultiSerialState, state[3].chr),
+    DEFINE_PROP_UINT8("prog_if",  PCIMultiSerialState, prog_if, 0x02),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 404cf1843d..de433b2e38 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -439,11 +439,27 @@ PropertyInfo qdev_prop_iothread = {
 static int qdev_add_one_global(QemuOpts *opts, void *opaque)
 {
     GlobalProperty *g;
+    ObjectClass *oc;
 
     g = g_malloc0(sizeof(*g));
     g->driver   = qemu_opt_get(opts, "driver");
     g->property = qemu_opt_get(opts, "property");
     g->value    = qemu_opt_get(opts, "value");
+    oc = object_class_by_name(g->driver);
+    if (oc) {
+        DeviceClass *dc = DEVICE_CLASS(oc);
+
+        if (dc->hotpluggable) {
+            /* If hotpluggable then skip not_used checking. */
+            g->not_used = false;
+        } else {
+            /* Maybe a typo. */
+            g->not_used = true;
+        }
+    } else {
+        /* Maybe a typo. */
+        g->not_used = true;
+    }
     qdev_prop_register_global(g);
     return 0;
 }
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index d8cb5408c3..3d12560f43 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -955,6 +955,23 @@ void qdev_prop_register_global_list(GlobalProperty *props)
     }
 }
 
+int qdev_prop_check_global(void)
+{
+    GlobalProperty *prop;
+    int ret = 0;
+
+    QTAILQ_FOREACH(prop, &global_props, next) {
+        if (!prop->not_used) {
+            continue;
+        }
+        ret = 1;
+        error_report("Warning: \"-global %s.%s=%s\" not used",
+                     prop->driver, prop->property, prop->value);
+
+    }
+    return ret;
+}
+
 void qdev_prop_set_globals_for_type(DeviceState *dev, const char *typename,
                                     Error **errp)
 {
@@ -966,6 +983,7 @@ void qdev_prop_set_globals_for_type(DeviceState *dev, const char *typename,
         if (strcmp(typename, prop->driver) != 0) {
             continue;
         }
+        prop->not_used = false;
         object_property_parse(OBJECT(dev), prop->value, prop->property, &err);
         if (err != NULL) {
             error_propagate(errp, err);
diff --git a/hw/display/omap_lcd_template.h b/hw/display/omap_lcd_template.h
index 2fb96f83ae..e5dd447167 100644
--- a/hw/display/omap_lcd_template.h
+++ b/hw/display/omap_lcd_template.h
@@ -50,7 +50,7 @@ static void glue(draw_line2_, DEPTH)(void *opaque,
     uint8_t v, r, g, b;
 
     do {
-        v = ldub_raw((void *) s);
+        v = ldub_p((void *) s);
         r = (pal[v & 3] >> 4) & 0xf0;
         g = pal[v & 3] & 0xf0;
         b = (pal[v & 3] << 4) & 0xf0;
@@ -89,7 +89,7 @@ static void glue(draw_line4_, DEPTH)(void *opaque,
     uint8_t v, r, g, b;
 
     do {
-        v = ldub_raw((void *) s);
+        v = ldub_p((void *) s);
         r = (pal[v & 0xf] >> 4) & 0xf0;
         g = pal[v & 0xf] & 0xf0;
         b = (pal[v & 0xf] << 4) & 0xf0;
@@ -116,7 +116,7 @@ static void glue(draw_line8_, DEPTH)(void *opaque,
     uint8_t v, r, g, b;
 
     do {
-        v = ldub_raw((void *) s);
+        v = ldub_p((void *) s);
         r = (pal[v] >> 4) & 0xf0;
         g = pal[v] & 0xf0;
         b = (pal[v] << 4) & 0xf0;
@@ -136,7 +136,7 @@ static void glue(draw_line12_, DEPTH)(void *opaque,
     uint8_t r, g, b;
 
     do {
-        v = lduw_raw((void *) s);
+        v = lduw_p((void *) s);
         r = (v >> 4) & 0xf0;
         g = v & 0xf0;
         b = (v << 4) & 0xf0;
@@ -159,7 +159,7 @@ static void glue(draw_line16_, DEPTH)(void *opaque,
     uint8_t r, g, b;
 
     do {
-        v = lduw_raw((void *) s);
+        v = lduw_p((void *) s);
         r = (v >> 8) & 0xf8;
         g = (v >> 3) & 0xfc;
         b = (v << 3) & 0xf8;
diff --git a/hw/display/sm501_template.h b/hw/display/sm501_template.h
index d4cea9e150..f33e499be4 100644
--- a/hw/display/sm501_template.h
+++ b/hw/display/sm501_template.h
@@ -47,7 +47,7 @@ static void glue(draw_line8_, PIXEL_NAME)(
 {
     uint8_t v, r, g, b;
     do {
-      	v = ldub_raw(s);
+	v = ldub_p(s);
 	r = (pal[v] >> 16) & 0xff;
 	g = (pal[v] >>  8) & 0xff;
 	b = (pal[v] >>  0) & 0xff;
@@ -64,7 +64,7 @@ static void glue(draw_line16_, PIXEL_NAME)(
     uint8_t r, g, b;
 
     do {
-	rgb565 = lduw_raw(s);
+	rgb565 = lduw_p(s);
 	r = ((rgb565 >> 11) & 0x1f) << 3;
 	g = ((rgb565 >>  5) & 0x3f) << 2;
 	b = ((rgb565 >>  0) & 0x1f) << 3;
@@ -80,7 +80,7 @@ static void glue(draw_line32_, PIXEL_NAME)(
     uint8_t r, g, b;
 
     do {
-	ldub_raw(s);
+	ldub_p(s);
 #if defined(TARGET_WORDS_BIGENDIAN)
         r = s[1];
         g = s[2];
diff --git a/hw/display/vga_template.h b/hw/display/vga_template.h
index 6cfae567b4..90ec9c208f 100644
--- a/hw/display/vga_template.h
+++ b/hw/display/vga_template.h
@@ -361,7 +361,7 @@ static void glue(vga_draw_line15_, PIXEL_NAME)(VGACommonState *s1, uint8_t *d,
 
     w = width;
     do {
-        v = lduw_raw((void *)s);
+        v = lduw_p((void *)s);
         r = (v >> 7) & 0xf8;
         g = (v >> 2) & 0xf8;
         b = (v << 3) & 0xf8;
@@ -386,7 +386,7 @@ static void glue(vga_draw_line16_, PIXEL_NAME)(VGACommonState *s1, uint8_t *d,
 
     w = width;
     do {
-        v = lduw_raw((void *)s);
+        v = lduw_p((void *)s);
         r = (v >> 8) & 0xf8;
         g = (v >> 3) & 0xfc;
         b = (v << 3) & 0xf8;
diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 892aa025f4..bef2504389 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -14,8 +14,10 @@
  */
 
 #include "qemu-common.h"
+#include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
+#include "sysemu/cpus.h"
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"
 
@@ -34,6 +36,48 @@ typedef struct KVMClockState {
     bool clock_valid;
 } KVMClockState;
 
+struct pvclock_vcpu_time_info {
+    uint32_t   version;
+    uint32_t   pad0;
+    uint64_t   tsc_timestamp;
+    uint64_t   system_time;
+    uint32_t   tsc_to_system_mul;
+    int8_t     tsc_shift;
+    uint8_t    flags;
+    uint8_t    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+static uint64_t kvmclock_current_nsec(KVMClockState *s)
+{
+    CPUState *cpu = first_cpu;
+    CPUX86State *env = cpu->env_ptr;
+    hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL;
+    uint64_t migration_tsc = env->tsc;
+    struct pvclock_vcpu_time_info time;
+    uint64_t delta;
+    uint64_t nsec_lo;
+    uint64_t nsec_hi;
+    uint64_t nsec;
+
+    if (!(env->system_time_msr & 1ULL)) {
+        /* KVM clock not active */
+        return 0;
+    }
+
+    cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
+
+    assert(time.tsc_timestamp <= migration_tsc);
+    delta = migration_tsc - time.tsc_timestamp;
+    if (time.tsc_shift < 0) {
+        delta >>= -time.tsc_shift;
+    } else {
+        delta <<= time.tsc_shift;
+    }
+
+    mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
+    nsec = (nsec_lo >> 32) | (nsec_hi << 32);
+    return nsec + time.system_time;
+}
 
 static void kvmclock_vm_state_change(void *opaque, int running,
                                      RunState state)
@@ -45,9 +89,15 @@ static void kvmclock_vm_state_change(void *opaque, int running,
 
     if (running) {
         struct kvm_clock_data data;
+        uint64_t time_at_migration = kvmclock_current_nsec(s);
 
         s->clock_valid = false;
 
+	/* We can't rely on the migrated clock value, just discard it */
+	if (time_at_migration) {
+	        s->clock = time_at_migration;
+	}
+
         data.clock = s->clock;
         data.flags = 0;
         ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
@@ -75,6 +125,8 @@ static void kvmclock_vm_state_change(void *opaque, int running,
         if (s->clock_valid) {
             return;
         }
+
+        cpu_synchronize_all_states();
         ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
         if (ret < 0) {
             fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
diff --git a/hw/i386/smbios.c b/hw/i386/smbios.c
index 76607181c3..b3bedde8b9 100644
--- a/hw/i386/smbios.c
+++ b/hw/i386/smbios.c
@@ -67,7 +67,7 @@ static DECLARE_BITMAP(have_fields_bitmap, SMBIOS_MAX_TYPE+1);
 
 static struct {
     const char *vendor, *version, *date;
-    bool have_major_minor;
+    bool have_major_minor, uefi;
     uint8_t major, minor;
 } type0;
 
@@ -134,6 +134,10 @@ static const QemuOptDesc qemu_smbios_type0_opts[] = {
         .name = "release",
         .type = QEMU_OPT_STRING,
         .help = "revision number",
+    },{
+        .name = "uefi",
+        .type = QEMU_OPT_BOOL,
+        .help = "uefi support",
     },
     { /* end of list */ }
 };
@@ -444,7 +448,7 @@ static bool smbios_skip_table(uint8_t type, bool required_table)
                                                                           \
         t->header.type = tbl_type;                                        \
         t->header.length = sizeof(*t);                                    \
-        t->header.handle = tbl_handle;                                    \
+        t->header.handle = cpu_to_le16(tbl_handle);                       \
     } while (0)
 
 #define SMBIOS_TABLE_SET_STR(tbl_type, field, value)                      \
@@ -491,19 +495,18 @@ static void smbios_build_type_0_table(void)
     SMBIOS_TABLE_SET_STR(0, vendor_str, type0.vendor);
     SMBIOS_TABLE_SET_STR(0, bios_version_str, type0.version);
 
-    t->bios_starting_address_segment = 0xE800; /* hardcoded in SeaBIOS */
+    t->bios_starting_address_segment = cpu_to_le16(0xE800); /* from SeaBIOS */
 
     SMBIOS_TABLE_SET_STR(0, bios_release_date_str, type0.date);
 
     t->bios_rom_size = 0; /* hardcoded in SeaBIOS with FIXME comment */
 
-    /* BIOS characteristics not supported */
-    memset(t->bios_characteristics, 0, 8);
-    t->bios_characteristics[0] = 0x08;
-
-    /* Enable targeted content distribution (needed for SVVP, per SeaBIOS) */
+    t->bios_characteristics = cpu_to_le64(0x08); /* Not supported */
     t->bios_characteristics_extension_bytes[0] = 0;
-    t->bios_characteristics_extension_bytes[1] = 4;
+    t->bios_characteristics_extension_bytes[1] = 0x14; /* TCD/SVVP | VM */
+    if (type0.uefi) {
+        t->bios_characteristics_extension_bytes[1] |= 0x08; /* |= UEFI */
+    }
 
     if (type0.have_major_minor) {
         t->system_bios_major_release = type0.major;
@@ -551,7 +554,7 @@ static void smbios_build_type_2_table(void)
     SMBIOS_TABLE_SET_STR(2, asset_tag_number_str, type2.asset);
     t->feature_flags = 0x01; /* Motherboard */
     SMBIOS_TABLE_SET_STR(2, location_str, type2.location);
-    t->chassis_handle = 0x300; /* Type 3 (System enclosure) */
+    t->chassis_handle = cpu_to_le16(0x300); /* Type 3 (System enclosure) */
     t->board_type = 0x0A; /* Motherboard */
     t->contained_element_count = 0;
 
@@ -571,7 +574,7 @@ static void smbios_build_type_3_table(void)
     t->power_supply_state = 0x03; /* Safe */
     t->thermal_state = 0x03; /* Safe */
     t->security_status = 0x02; /* Unknown */
-    t->oem_defined = 0;
+    t->oem_defined = cpu_to_le32(0);
     t->height = 0;
     t->number_of_power_cords = 0;
     t->contained_element_count = 0;
@@ -589,26 +592,27 @@ static void smbios_build_type_4_table(unsigned instance)
     snprintf(sock_str, sizeof(sock_str), "%s%2x", type4.sock_pfx, instance);
     SMBIOS_TABLE_SET_STR(4, socket_designation_str, sock_str);
     t->processor_type = 0x03; /* CPU */
+    t->processor_family = 0x01; /* Other */
     SMBIOS_TABLE_SET_STR(4, processor_manufacturer_str, type4.manufacturer);
-    t->processor_id[0] = smbios_cpuid_version;
-    t->processor_id[1] = smbios_cpuid_features;
+    t->processor_id[0] = cpu_to_le32(smbios_cpuid_version);
+    t->processor_id[1] = cpu_to_le32(smbios_cpuid_features);
     SMBIOS_TABLE_SET_STR(4, processor_version_str, type4.version);
     t->voltage = 0;
-    t->external_clock = 0; /* Unknown */
-    t->max_speed = 0; /* Unknown */
-    t->current_speed = 0; /* Unknown */
+    t->external_clock = cpu_to_le16(0); /* Unknown */
+    t->max_speed = cpu_to_le16(0); /* Unknown */
+    t->current_speed = cpu_to_le16(0); /* Unknown */
     t->status = 0x41; /* Socket populated, CPU enabled */
     t->processor_upgrade = 0x01; /* Other */
-    t->l1_cache_handle = 0xFFFF; /* N/A */
-    t->l2_cache_handle = 0xFFFF; /* N/A */
-    t->l3_cache_handle = 0xFFFF; /* N/A */
+    t->l1_cache_handle = cpu_to_le16(0xFFFF); /* N/A */
+    t->l2_cache_handle = cpu_to_le16(0xFFFF); /* N/A */
+    t->l3_cache_handle = cpu_to_le16(0xFFFF); /* N/A */
     SMBIOS_TABLE_SET_STR(4, serial_number_str, type4.serial);
     SMBIOS_TABLE_SET_STR(4, asset_tag_number_str, type4.asset);
     SMBIOS_TABLE_SET_STR(4, part_number_str, type4.part);
     t->core_count = t->core_enabled = smp_cores;
     t->thread_count = smp_threads;
-    t->processor_characteristics = 0x02; /* Unknown */
-    t->processor_family = t->processor_family2 = 0x01; /* Other */
+    t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */
+    t->processor_family2 = cpu_to_le16(0x01); /* Other */
 
     SMBIOS_BUILD_TABLE_POST;
     smbios_type4_count++;
@@ -631,14 +635,14 @@ static void smbios_build_type_16_table(unsigned dimm_cnt)
     t->error_correction = 0x06; /* Multi-bit ECC (for Microsoft, per SeaBIOS) */
     size_kb = QEMU_ALIGN_UP(ram_size, ONE_KB) / ONE_KB;
     if (size_kb < MAX_T16_STD_SZ) {
-        t->maximum_capacity = size_kb;
-        t->extended_maximum_capacity = 0;
+        t->maximum_capacity = cpu_to_le32(size_kb);
+        t->extended_maximum_capacity = cpu_to_le64(0);
     } else {
-        t->maximum_capacity = MAX_T16_STD_SZ;
-        t->extended_maximum_capacity = ram_size;
+        t->maximum_capacity = cpu_to_le32(MAX_T16_STD_SZ);
+        t->extended_maximum_capacity = cpu_to_le64(ram_size);
     }
-    t->memory_error_information_handle = 0xFFFE; /* Not provided */
-    t->number_of_memory_devices = dimm_cnt;
+    t->memory_error_information_handle = cpu_to_le16(0xFFFE); /* Not provided */
+    t->number_of_memory_devices = cpu_to_le16(dimm_cnt);
 
     SMBIOS_BUILD_TABLE_POST;
 }
@@ -653,18 +657,18 @@ static void smbios_build_type_17_table(unsigned instance, ram_addr_t size)
 
     SMBIOS_BUILD_TABLE_PRE(17, 0x1100 + instance, true); /* required */
 
-    t->physical_memory_array_handle = 0x1000; /* Type 16 (Phys. Mem. Array) */
-    t->memory_error_information_handle = 0xFFFE; /* Not provided */
-    t->total_width = 0xFFFF; /* Unknown */
-    t->data_width = 0xFFFF; /* Unknown */
+    t->physical_memory_array_handle = cpu_to_le16(0x1000); /* Type 16 above */
+    t->memory_error_information_handle = cpu_to_le16(0xFFFE); /* Not provided */
+    t->total_width = cpu_to_le16(0xFFFF); /* Unknown */
+    t->data_width = cpu_to_le16(0xFFFF); /* Unknown */
     size_mb = QEMU_ALIGN_UP(size, ONE_MB) / ONE_MB;
     if (size_mb < MAX_T17_STD_SZ) {
-        t->size = size_mb;
-        t->extended_size = 0;
+        t->size = cpu_to_le16(size_mb);
+        t->extended_size = cpu_to_le32(0);
     } else {
         assert(size_mb < MAX_T17_EXT_SZ);
-        t->size = MAX_T17_STD_SZ;
-        t->extended_size = size_mb;
+        t->size = cpu_to_le16(MAX_T17_STD_SZ);
+        t->extended_size = cpu_to_le32(size_mb);
     }
     t->form_factor = 0x09; /* DIMM */
     t->device_set = 0; /* Not in a set */
@@ -672,17 +676,17 @@ static void smbios_build_type_17_table(unsigned instance, ram_addr_t size)
     SMBIOS_TABLE_SET_STR(17, device_locator_str, loc_str);
     SMBIOS_TABLE_SET_STR(17, bank_locator_str, type17.bank);
     t->memory_type = 0x07; /* RAM */
-    t->type_detail = 0x02; /* Other */
-    t->speed = 0; /* Unknown */
+    t->type_detail = cpu_to_le16(0x02); /* Other */
+    t->speed = cpu_to_le16(0); /* Unknown */
     SMBIOS_TABLE_SET_STR(17, manufacturer_str, type17.manufacturer);
     SMBIOS_TABLE_SET_STR(17, serial_number_str, type17.serial);
     SMBIOS_TABLE_SET_STR(17, asset_tag_number_str, type17.asset);
     SMBIOS_TABLE_SET_STR(17, part_number_str, type17.part);
     t->attributes = 0; /* Unknown */
-    t->configured_clock_speed = 0; /* Unknown */
-    t->minimum_voltage = 0; /* Unknown */
-    t->maximum_voltage = 0; /* Unknown */
-    t->configured_voltage = 0; /* Unknown */
+    t->configured_clock_speed = cpu_to_le16(0); /* Unknown */
+    t->minimum_voltage = cpu_to_le16(0); /* Unknown */
+    t->maximum_voltage = cpu_to_le16(0); /* Unknown */
+    t->configured_voltage = cpu_to_le16(0); /* Unknown */
 
     SMBIOS_BUILD_TABLE_POST;
 }
@@ -699,15 +703,16 @@ static void smbios_build_type_19_table(unsigned instance,
     start_kb = start / ONE_KB;
     end_kb = end / ONE_KB;
     if (start_kb < UINT32_MAX && end_kb < UINT32_MAX) {
-        t->starting_address = start_kb;
-        t->ending_address = end_kb;
-        t->extended_starting_address = t->extended_ending_address = 0;
+        t->starting_address = cpu_to_le32(start_kb);
+        t->ending_address = cpu_to_le32(end_kb);
+        t->extended_starting_address =
+            t->extended_ending_address = cpu_to_le64(0);
     } else {
-        t->starting_address = t->ending_address = UINT32_MAX;
-        t->extended_starting_address = start;
-        t->extended_ending_address = end;
+        t->starting_address = t->ending_address = cpu_to_le32(UINT32_MAX);
+        t->extended_starting_address = cpu_to_le64(start);
+        t->extended_ending_address = cpu_to_le64(end);
     }
-    t->memory_array_handle = 0x1000; /* Type 16 (Phys. Mem. Array) */
+    t->memory_array_handle = cpu_to_le16(0x1000); /* Type 16 above */
     t->partition_width = 1; /* One device per row */
 
     SMBIOS_BUILD_TABLE_POST;
@@ -794,14 +799,14 @@ static void smbios_entry_point_setup(void)
     ep.smbios_bcd_revision = 0x28;
 
     /* set during table construction, but BIOS may override: */
-    ep.structure_table_length = smbios_tables_len;
-    ep.max_structure_size = smbios_table_max;
-    ep.number_of_structures = smbios_table_cnt;
+    ep.structure_table_length = cpu_to_le16(smbios_tables_len);
+    ep.max_structure_size = cpu_to_le16(smbios_table_max);
+    ep.number_of_structures = cpu_to_le16(smbios_table_cnt);
 
     /* BIOS must recalculate: */
     ep.checksum = 0;
     ep.intermediate_checksum = 0;
-    ep.structure_table_address = 0; /* where BIOS has copied smbios_tables */
+    ep.structure_table_address = cpu_to_le32(0);
 }
 
 void smbios_get_tables(uint8_t **tables, size_t *tables_len,
@@ -977,6 +982,7 @@ void smbios_entry_add(QemuOpts *opts)
             save_opt(&type0.vendor, opts, "vendor");
             save_opt(&type0.version, opts, "version");
             save_opt(&type0.date, opts, "date");
+            type0.uefi = qemu_opt_get_bool(opts, "uefi", false);
 
             val = qemu_opt_get(opts, "release");
             if (val) {
diff --git a/hw/mips/mips_fulong2e.c b/hw/mips/mips_fulong2e.c
index 5ad7a410a9..f7533ed200 100644
--- a/hw/mips/mips_fulong2e.c
+++ b/hw/mips/mips_fulong2e.c
@@ -178,24 +178,24 @@ static void write_bootloader (CPUMIPSState *env, uint8_t *base, int64_t kernel_a
     /* Small bootloader */
     p = (uint32_t *) base;
 
-    stl_raw(p++, 0x0bf00010);                                      /* j 0x1fc00040 */
-    stl_raw(p++, 0x00000000);                                      /* nop */
+    stl_p(p++, 0x0bf00010);                                      /* j 0x1fc00040 */
+    stl_p(p++, 0x00000000);                                      /* nop */
 
     /* Second part of the bootloader */
     p = (uint32_t *) (base + 0x040);
 
-    stl_raw(p++, 0x3c040000);                                      /* lui a0, 0 */
-    stl_raw(p++, 0x34840002);                                      /* ori a0, a0, 2 */
-    stl_raw(p++, 0x3c050000 | ((ENVP_ADDR >> 16) & 0xffff));       /* lui a1, high(ENVP_ADDR) */
-    stl_raw(p++, 0x34a50000 | (ENVP_ADDR & 0xffff));               /* ori a1, a0, low(ENVP_ADDR) */
-    stl_raw(p++, 0x3c060000 | (((ENVP_ADDR + 8) >> 16) & 0xffff)); /* lui a2, high(ENVP_ADDR + 8) */
-    stl_raw(p++, 0x34c60000 | ((ENVP_ADDR + 8) & 0xffff));         /* ori a2, a2, low(ENVP_ADDR + 8) */
-    stl_raw(p++, 0x3c070000 | (loaderparams.ram_size >> 16));      /* lui a3, high(env->ram_size) */
-    stl_raw(p++, 0x34e70000 | (loaderparams.ram_size & 0xffff));   /* ori a3, a3, low(env->ram_size) */
-    stl_raw(p++, 0x3c1f0000 | ((kernel_addr >> 16) & 0xffff));     /* lui ra, high(kernel_addr) */;
-    stl_raw(p++, 0x37ff0000 | (kernel_addr & 0xffff));             /* ori ra, ra, low(kernel_addr) */
-    stl_raw(p++, 0x03e00008);                                      /* jr ra */
-    stl_raw(p++, 0x00000000);                                      /* nop */
+    stl_p(p++, 0x3c040000);                                      /* lui a0, 0 */
+    stl_p(p++, 0x34840002);                                      /* ori a0, a0, 2 */
+    stl_p(p++, 0x3c050000 | ((ENVP_ADDR >> 16) & 0xffff));       /* lui a1, high(ENVP_ADDR) */
+    stl_p(p++, 0x34a50000 | (ENVP_ADDR & 0xffff));               /* ori a1, a0, low(ENVP_ADDR) */
+    stl_p(p++, 0x3c060000 | (((ENVP_ADDR + 8) >> 16) & 0xffff)); /* lui a2, high(ENVP_ADDR + 8) */
+    stl_p(p++, 0x34c60000 | ((ENVP_ADDR + 8) & 0xffff));         /* ori a2, a2, low(ENVP_ADDR + 8) */
+    stl_p(p++, 0x3c070000 | (loaderparams.ram_size >> 16));      /* lui a3, high(env->ram_size) */
+    stl_p(p++, 0x34e70000 | (loaderparams.ram_size & 0xffff));   /* ori a3, a3, low(env->ram_size) */
+    stl_p(p++, 0x3c1f0000 | ((kernel_addr >> 16) & 0xffff));     /* lui ra, high(kernel_addr) */;
+    stl_p(p++, 0x37ff0000 | (kernel_addr & 0xffff));             /* ori ra, ra, low(kernel_addr) */
+    stl_p(p++, 0x03e00008);                                      /* jr ra */
+    stl_p(p++, 0x00000000);                                      /* nop */
 }
 
 
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index 9fe775ea88..f4a7d47129 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -609,136 +609,136 @@ static void write_bootloader (CPUMIPSState *env, uint8_t *base,
 
     /* Small bootloader */
     p = (uint32_t *)base;
-    stl_raw(p++, 0x0bf00160);                                      /* j 0x1fc00580 */
-    stl_raw(p++, 0x00000000);                                      /* nop */
+    stl_p(p++, 0x0bf00160);                                      /* j 0x1fc00580 */
+    stl_p(p++, 0x00000000);                                      /* nop */
 
     /* YAMON service vector */
-    stl_raw(base + 0x500, 0xbfc00580);      /* start: */
-    stl_raw(base + 0x504, 0xbfc0083c);      /* print_count: */
-    stl_raw(base + 0x520, 0xbfc00580);      /* start: */
-    stl_raw(base + 0x52c, 0xbfc00800);      /* flush_cache: */
-    stl_raw(base + 0x534, 0xbfc00808);      /* print: */
-    stl_raw(base + 0x538, 0xbfc00800);      /* reg_cpu_isr: */
-    stl_raw(base + 0x53c, 0xbfc00800);      /* unred_cpu_isr: */
-    stl_raw(base + 0x540, 0xbfc00800);      /* reg_ic_isr: */
-    stl_raw(base + 0x544, 0xbfc00800);      /* unred_ic_isr: */
-    stl_raw(base + 0x548, 0xbfc00800);      /* reg_esr: */
-    stl_raw(base + 0x54c, 0xbfc00800);      /* unreg_esr: */
-    stl_raw(base + 0x550, 0xbfc00800);      /* getchar: */
-    stl_raw(base + 0x554, 0xbfc00800);      /* syscon_read: */
+    stl_p(base + 0x500, 0xbfc00580);      /* start: */
+    stl_p(base + 0x504, 0xbfc0083c);      /* print_count: */
+    stl_p(base + 0x520, 0xbfc00580);      /* start: */
+    stl_p(base + 0x52c, 0xbfc00800);      /* flush_cache: */
+    stl_p(base + 0x534, 0xbfc00808);      /* print: */
+    stl_p(base + 0x538, 0xbfc00800);      /* reg_cpu_isr: */
+    stl_p(base + 0x53c, 0xbfc00800);      /* unred_cpu_isr: */
+    stl_p(base + 0x540, 0xbfc00800);      /* reg_ic_isr: */
+    stl_p(base + 0x544, 0xbfc00800);      /* unred_ic_isr: */
+    stl_p(base + 0x548, 0xbfc00800);      /* reg_esr: */
+    stl_p(base + 0x54c, 0xbfc00800);      /* unreg_esr: */
+    stl_p(base + 0x550, 0xbfc00800);      /* getchar: */
+    stl_p(base + 0x554, 0xbfc00800);      /* syscon_read: */
 
 
     /* Second part of the bootloader */
     p = (uint32_t *) (base + 0x580);
-    stl_raw(p++, 0x24040002);                                      /* addiu a0, zero, 2 */
-    stl_raw(p++, 0x3c1d0000 | (((ENVP_ADDR - 64) >> 16) & 0xffff)); /* lui sp, high(ENVP_ADDR) */
-    stl_raw(p++, 0x37bd0000 | ((ENVP_ADDR - 64) & 0xffff));        /* ori sp, sp, low(ENVP_ADDR) */
-    stl_raw(p++, 0x3c050000 | ((ENVP_ADDR >> 16) & 0xffff));       /* lui a1, high(ENVP_ADDR) */
-    stl_raw(p++, 0x34a50000 | (ENVP_ADDR & 0xffff));               /* ori a1, a1, low(ENVP_ADDR) */
-    stl_raw(p++, 0x3c060000 | (((ENVP_ADDR + 8) >> 16) & 0xffff)); /* lui a2, high(ENVP_ADDR + 8) */
-    stl_raw(p++, 0x34c60000 | ((ENVP_ADDR + 8) & 0xffff));         /* ori a2, a2, low(ENVP_ADDR + 8) */
-    stl_raw(p++, 0x3c070000 | (loaderparams.ram_size >> 16));     /* lui a3, high(ram_size) */
-    stl_raw(p++, 0x34e70000 | (loaderparams.ram_size & 0xffff));  /* ori a3, a3, low(ram_size) */
+    stl_p(p++, 0x24040002);                                      /* addiu a0, zero, 2 */
+    stl_p(p++, 0x3c1d0000 | (((ENVP_ADDR - 64) >> 16) & 0xffff)); /* lui sp, high(ENVP_ADDR) */
+    stl_p(p++, 0x37bd0000 | ((ENVP_ADDR - 64) & 0xffff));        /* ori sp, sp, low(ENVP_ADDR) */
+    stl_p(p++, 0x3c050000 | ((ENVP_ADDR >> 16) & 0xffff));       /* lui a1, high(ENVP_ADDR) */
+    stl_p(p++, 0x34a50000 | (ENVP_ADDR & 0xffff));               /* ori a1, a1, low(ENVP_ADDR) */
+    stl_p(p++, 0x3c060000 | (((ENVP_ADDR + 8) >> 16) & 0xffff)); /* lui a2, high(ENVP_ADDR + 8) */
+    stl_p(p++, 0x34c60000 | ((ENVP_ADDR + 8) & 0xffff));         /* ori a2, a2, low(ENVP_ADDR + 8) */
+    stl_p(p++, 0x3c070000 | (loaderparams.ram_size >> 16));     /* lui a3, high(ram_size) */
+    stl_p(p++, 0x34e70000 | (loaderparams.ram_size & 0xffff));  /* ori a3, a3, low(ram_size) */
 
     /* Load BAR registers as done by YAMON */
-    stl_raw(p++, 0x3c09b400);                                      /* lui t1, 0xb400 */
+    stl_p(p++, 0x3c09b400);                                      /* lui t1, 0xb400 */
 
 #ifdef TARGET_WORDS_BIGENDIAN
-    stl_raw(p++, 0x3c08df00);                                      /* lui t0, 0xdf00 */
+    stl_p(p++, 0x3c08df00);                                      /* lui t0, 0xdf00 */
 #else
-    stl_raw(p++, 0x340800df);                                      /* ori t0, r0, 0x00df */
+    stl_p(p++, 0x340800df);                                      /* ori t0, r0, 0x00df */
 #endif
-    stl_raw(p++, 0xad280068);                                      /* sw t0, 0x0068(t1) */
+    stl_p(p++, 0xad280068);                                      /* sw t0, 0x0068(t1) */
 
-    stl_raw(p++, 0x3c09bbe0);                                      /* lui t1, 0xbbe0 */
+    stl_p(p++, 0x3c09bbe0);                                      /* lui t1, 0xbbe0 */
 
 #ifdef TARGET_WORDS_BIGENDIAN
-    stl_raw(p++, 0x3c08c000);                                      /* lui t0, 0xc000 */
+    stl_p(p++, 0x3c08c000);                                      /* lui t0, 0xc000 */
 #else
-    stl_raw(p++, 0x340800c0);                                      /* ori t0, r0, 0x00c0 */
+    stl_p(p++, 0x340800c0);                                      /* ori t0, r0, 0x00c0 */
 #endif
-    stl_raw(p++, 0xad280048);                                      /* sw t0, 0x0048(t1) */
+    stl_p(p++, 0xad280048);                                      /* sw t0, 0x0048(t1) */
 #ifdef TARGET_WORDS_BIGENDIAN
-    stl_raw(p++, 0x3c084000);                                      /* lui t0, 0x4000 */
+    stl_p(p++, 0x3c084000);                                      /* lui t0, 0x4000 */
 #else
-    stl_raw(p++, 0x34080040);                                      /* ori t0, r0, 0x0040 */
+    stl_p(p++, 0x34080040);                                      /* ori t0, r0, 0x0040 */
 #endif
-    stl_raw(p++, 0xad280050);                                      /* sw t0, 0x0050(t1) */
+    stl_p(p++, 0xad280050);                                      /* sw t0, 0x0050(t1) */
 
 #ifdef TARGET_WORDS_BIGENDIAN
-    stl_raw(p++, 0x3c088000);                                      /* lui t0, 0x8000 */
+    stl_p(p++, 0x3c088000);                                      /* lui t0, 0x8000 */
 #else
-    stl_raw(p++, 0x34080080);                                      /* ori t0, r0, 0x0080 */
+    stl_p(p++, 0x34080080);                                      /* ori t0, r0, 0x0080 */
 #endif
-    stl_raw(p++, 0xad280058);                                      /* sw t0, 0x0058(t1) */
+    stl_p(p++, 0xad280058);                                      /* sw t0, 0x0058(t1) */
 #ifdef TARGET_WORDS_BIGENDIAN
-    stl_raw(p++, 0x3c083f00);                                      /* lui t0, 0x3f00 */
+    stl_p(p++, 0x3c083f00);                                      /* lui t0, 0x3f00 */
 #else
-    stl_raw(p++, 0x3408003f);                                      /* ori t0, r0, 0x003f */
+    stl_p(p++, 0x3408003f);                                      /* ori t0, r0, 0x003f */
 #endif
-    stl_raw(p++, 0xad280060);                                      /* sw t0, 0x0060(t1) */
+    stl_p(p++, 0xad280060);                                      /* sw t0, 0x0060(t1) */
 
 #ifdef TARGET_WORDS_BIGENDIAN
-    stl_raw(p++, 0x3c08c100);                                      /* lui t0, 0xc100 */
+    stl_p(p++, 0x3c08c100);                                      /* lui t0, 0xc100 */
 #else
-    stl_raw(p++, 0x340800c1);                                      /* ori t0, r0, 0x00c1 */
+    stl_p(p++, 0x340800c1);                                      /* ori t0, r0, 0x00c1 */
 #endif
-    stl_raw(p++, 0xad280080);                                      /* sw t0, 0x0080(t1) */
+    stl_p(p++, 0xad280080);                                      /* sw t0, 0x0080(t1) */
 #ifdef TARGET_WORDS_BIGENDIAN
-    stl_raw(p++, 0x3c085e00);                                      /* lui t0, 0x5e00 */
+    stl_p(p++, 0x3c085e00);                                      /* lui t0, 0x5e00 */
 #else
-    stl_raw(p++, 0x3408005e);                                      /* ori t0, r0, 0x005e */
+    stl_p(p++, 0x3408005e);                                      /* ori t0, r0, 0x005e */
 #endif
-    stl_raw(p++, 0xad280088);                                      /* sw t0, 0x0088(t1) */
+    stl_p(p++, 0xad280088);                                      /* sw t0, 0x0088(t1) */
 
     /* Jump to kernel code */
-    stl_raw(p++, 0x3c1f0000 | ((kernel_entry >> 16) & 0xffff));    /* lui ra, high(kernel_entry) */
-    stl_raw(p++, 0x37ff0000 | (kernel_entry & 0xffff));            /* ori ra, ra, low(kernel_entry) */
-    stl_raw(p++, 0x03e00008);                                      /* jr ra */
-    stl_raw(p++, 0x00000000);                                      /* nop */
+    stl_p(p++, 0x3c1f0000 | ((kernel_entry >> 16) & 0xffff));    /* lui ra, high(kernel_entry) */
+    stl_p(p++, 0x37ff0000 | (kernel_entry & 0xffff));            /* ori ra, ra, low(kernel_entry) */
+    stl_p(p++, 0x03e00008);                                      /* jr ra */
+    stl_p(p++, 0x00000000);                                      /* nop */
 
     /* YAMON subroutines */
     p = (uint32_t *) (base + 0x800);
-    stl_raw(p++, 0x03e00008);                                     /* jr ra */
-    stl_raw(p++, 0x24020000);                                     /* li v0,0 */
+    stl_p(p++, 0x03e00008);                                     /* jr ra */
+    stl_p(p++, 0x24020000);                                     /* li v0,0 */
    /* 808 YAMON print */
-    stl_raw(p++, 0x03e06821);                                     /* move t5,ra */
-    stl_raw(p++, 0x00805821);                                     /* move t3,a0 */
-    stl_raw(p++, 0x00a05021);                                     /* move t2,a1 */
-    stl_raw(p++, 0x91440000);                                     /* lbu a0,0(t2) */
-    stl_raw(p++, 0x254a0001);                                     /* addiu t2,t2,1 */
-    stl_raw(p++, 0x10800005);                                     /* beqz a0,834 */
-    stl_raw(p++, 0x00000000);                                     /* nop */
-    stl_raw(p++, 0x0ff0021c);                                     /* jal 870 */
-    stl_raw(p++, 0x00000000);                                     /* nop */
-    stl_raw(p++, 0x08000205);                                     /* j 814 */
-    stl_raw(p++, 0x00000000);                                     /* nop */
-    stl_raw(p++, 0x01a00008);                                     /* jr t5 */
-    stl_raw(p++, 0x01602021);                                     /* move a0,t3 */
+    stl_p(p++, 0x03e06821);                                     /* move t5,ra */
+    stl_p(p++, 0x00805821);                                     /* move t3,a0 */
+    stl_p(p++, 0x00a05021);                                     /* move t2,a1 */
+    stl_p(p++, 0x91440000);                                     /* lbu a0,0(t2) */
+    stl_p(p++, 0x254a0001);                                     /* addiu t2,t2,1 */
+    stl_p(p++, 0x10800005);                                     /* beqz a0,834 */
+    stl_p(p++, 0x00000000);                                     /* nop */
+    stl_p(p++, 0x0ff0021c);                                     /* jal 870 */
+    stl_p(p++, 0x00000000);                                     /* nop */
+    stl_p(p++, 0x08000205);                                     /* j 814 */
+    stl_p(p++, 0x00000000);                                     /* nop */
+    stl_p(p++, 0x01a00008);                                     /* jr t5 */
+    stl_p(p++, 0x01602021);                                     /* move a0,t3 */
     /* 0x83c YAMON print_count */
-    stl_raw(p++, 0x03e06821);                                     /* move t5,ra */
-    stl_raw(p++, 0x00805821);                                     /* move t3,a0 */
-    stl_raw(p++, 0x00a05021);                                     /* move t2,a1 */
-    stl_raw(p++, 0x00c06021);                                     /* move t4,a2 */
-    stl_raw(p++, 0x91440000);                                     /* lbu a0,0(t2) */
-    stl_raw(p++, 0x0ff0021c);                                     /* jal 870 */
-    stl_raw(p++, 0x00000000);                                     /* nop */
-    stl_raw(p++, 0x254a0001);                                     /* addiu t2,t2,1 */
-    stl_raw(p++, 0x258cffff);                                     /* addiu t4,t4,-1 */
-    stl_raw(p++, 0x1580fffa);                                     /* bnez t4,84c */
-    stl_raw(p++, 0x00000000);                                     /* nop */
-    stl_raw(p++, 0x01a00008);                                     /* jr t5 */
-    stl_raw(p++, 0x01602021);                                     /* move a0,t3 */
+    stl_p(p++, 0x03e06821);                                     /* move t5,ra */
+    stl_p(p++, 0x00805821);                                     /* move t3,a0 */
+    stl_p(p++, 0x00a05021);                                     /* move t2,a1 */
+    stl_p(p++, 0x00c06021);                                     /* move t4,a2 */
+    stl_p(p++, 0x91440000);                                     /* lbu a0,0(t2) */
+    stl_p(p++, 0x0ff0021c);                                     /* jal 870 */
+    stl_p(p++, 0x00000000);                                     /* nop */
+    stl_p(p++, 0x254a0001);                                     /* addiu t2,t2,1 */
+    stl_p(p++, 0x258cffff);                                     /* addiu t4,t4,-1 */
+    stl_p(p++, 0x1580fffa);                                     /* bnez t4,84c */
+    stl_p(p++, 0x00000000);                                     /* nop */
+    stl_p(p++, 0x01a00008);                                     /* jr t5 */
+    stl_p(p++, 0x01602021);                                     /* move a0,t3 */
     /* 0x870 */
-    stl_raw(p++, 0x3c08b800);                                     /* lui t0,0xb400 */
-    stl_raw(p++, 0x350803f8);                                     /* ori t0,t0,0x3f8 */
-    stl_raw(p++, 0x91090005);                                     /* lbu t1,5(t0) */
-    stl_raw(p++, 0x00000000);                                     /* nop */
-    stl_raw(p++, 0x31290040);                                     /* andi t1,t1,0x40 */
-    stl_raw(p++, 0x1120fffc);                                     /* beqz t1,878 <outch+0x8> */
-    stl_raw(p++, 0x00000000);                                     /* nop */
-    stl_raw(p++, 0x03e00008);                                     /* jr ra */
-    stl_raw(p++, 0xa1040000);                                     /* sb a0,0(t0) */
+    stl_p(p++, 0x3c08b800);                                     /* lui t0,0xb400 */
+    stl_p(p++, 0x350803f8);                                     /* ori t0,t0,0x3f8 */
+    stl_p(p++, 0x91090005);                                     /* lbu t1,5(t0) */
+    stl_p(p++, 0x00000000);                                     /* nop */
+    stl_p(p++, 0x31290040);                                     /* andi t1,t1,0x40 */
+    stl_p(p++, 0x1120fffc);                                     /* beqz t1,878 <outch+0x8> */
+    stl_p(p++, 0x00000000);                                     /* nop */
+    stl_p(p++, 0x03e00008);                                     /* jr ra */
+    stl_p(p++, 0xa1040000);                                     /* sb a0,0(t0) */
 
 }
 
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 8b8cc4e294..aa48b1c82f 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -47,10 +47,6 @@ static void q35_host_realize(DeviceState *dev, Error **errp)
     sysbus_add_io(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, &pci->data_mem);
     sysbus_init_ioports(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, 4);
 
-    if (pcie_host_init(PCIE_HOST_BRIDGE(s)) < 0) {
-        error_setg(errp, "failed to initialize pcie host");
-        return;
-    }
     pci->bus = pci_bus_new(DEVICE(s), "pcie.0",
                            s->mch.pci_address_space, s->mch.address_space_io,
                            0, TYPE_PCIE_BUS);
diff --git a/hw/pci/pcie_host.c b/hw/pci/pcie_host.c
index c6e1b573e1..7c88a1d091 100644
--- a/hw/pci/pcie_host.c
+++ b/hw/pci/pcie_host.c
@@ -83,11 +83,11 @@ static const MemoryRegionOps pcie_mmcfg_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-int pcie_host_init(PCIExpressHost *e)
+static void pcie_host_init(Object *obj)
 {
-    e->base_addr = PCIE_BASE_ADDR_UNMAPPED;
+    PCIExpressHost *e = PCIE_HOST_BRIDGE(obj);
 
-    return 0;
+    e->base_addr = PCIE_BASE_ADDR_UNMAPPED;
 }
 
 void pcie_host_mmcfg_unmap(PCIExpressHost *e)
@@ -128,6 +128,7 @@ static const TypeInfo pcie_host_type_info = {
     .parent = TYPE_PCI_HOST_BRIDGE,
     .abstract = true,
     .instance_size = sizeof(PCIExpressHost),
+    .instance_init = pcie_host_init,
 };
 
 static void pcie_host_register_types(void)
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 3983a5b464..668bafa72a 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -164,8 +164,8 @@ static void vhost_scsi_set_config(VirtIODevice *vdev,
     VirtIOSCSIConfig *scsiconf = (VirtIOSCSIConfig *)config;
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
 
-    if ((uint32_t) ldl_raw(&scsiconf->sense_size) != vs->sense_size ||
-        (uint32_t) ldl_raw(&scsiconf->cdb_size) != vs->cdb_size) {
+    if ((uint32_t) ldl_p(&scsiconf->sense_size) != vs->sense_size ||
+        (uint32_t) ldl_p(&scsiconf->cdb_size) != vs->cdb_size) {
         error_report("vhost-scsi does not support changing the sense data and CDB sizes");
         exit(1);
     }
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 14261fb1a7..b39880a9cd 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -425,16 +425,16 @@ static void virtio_scsi_get_config(VirtIODevice *vdev,
     VirtIOSCSIConfig *scsiconf = (VirtIOSCSIConfig *)config;
     VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(vdev);
 
-    stl_raw(&scsiconf->num_queues, s->conf.num_queues);
-    stl_raw(&scsiconf->seg_max, 128 - 2);
-    stl_raw(&scsiconf->max_sectors, s->conf.max_sectors);
-    stl_raw(&scsiconf->cmd_per_lun, s->conf.cmd_per_lun);
-    stl_raw(&scsiconf->event_info_size, sizeof(VirtIOSCSIEvent));
-    stl_raw(&scsiconf->sense_size, s->sense_size);
-    stl_raw(&scsiconf->cdb_size, s->cdb_size);
-    stw_raw(&scsiconf->max_channel, VIRTIO_SCSI_MAX_CHANNEL);
-    stw_raw(&scsiconf->max_target, VIRTIO_SCSI_MAX_TARGET);
-    stl_raw(&scsiconf->max_lun, VIRTIO_SCSI_MAX_LUN);
+    stl_p(&scsiconf->num_queues, s->conf.num_queues);
+    stl_p(&scsiconf->seg_max, 128 - 2);
+    stl_p(&scsiconf->max_sectors, s->conf.max_sectors);
+    stl_p(&scsiconf->cmd_per_lun, s->conf.cmd_per_lun);
+    stl_p(&scsiconf->event_info_size, sizeof(VirtIOSCSIEvent));
+    stl_p(&scsiconf->sense_size, s->sense_size);
+    stl_p(&scsiconf->cdb_size, s->cdb_size);
+    stw_p(&scsiconf->max_channel, VIRTIO_SCSI_MAX_CHANNEL);
+    stw_p(&scsiconf->max_target, VIRTIO_SCSI_MAX_TARGET);
+    stl_p(&scsiconf->max_lun, VIRTIO_SCSI_MAX_LUN);
 }
 
 static void virtio_scsi_set_config(VirtIODevice *vdev,
@@ -443,14 +443,14 @@ static void virtio_scsi_set_config(VirtIODevice *vdev,
     VirtIOSCSIConfig *scsiconf = (VirtIOSCSIConfig *)config;
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
 
-    if ((uint32_t) ldl_raw(&scsiconf->sense_size) >= 65536 ||
-        (uint32_t) ldl_raw(&scsiconf->cdb_size) >= 256) {
+    if ((uint32_t) ldl_p(&scsiconf->sense_size) >= 65536 ||
+        (uint32_t) ldl_p(&scsiconf->cdb_size) >= 256) {
         error_report("bad data written to virtio-scsi configuration space");
         exit(1);
     }
 
-    vs->sense_size = ldl_raw(&scsiconf->sense_size);
-    vs->cdb_size = ldl_raw(&scsiconf->cdb_size);
+    vs->sense_size = ldl_p(&scsiconf->sense_size);
+    vs->cdb_size = ldl_p(&scsiconf->cdb_size);
 }
 
 static uint32_t virtio_scsi_get_features(VirtIODevice *vdev,
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index bf2b588b24..22cd52edee 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -112,11 +112,6 @@ static void balloon_stats_get_all(Object *obj, struct Visitor *v,
     VirtIOBalloon *s = opaque;
     int i;
 
-    if (!s->stats_last_update) {
-        error_setg(errp, "guest hasn't updated any stats yet");
-        return;
-    }
-
     visit_start_struct(v, NULL, "guest-stats", name, 0, &err);
     if (err) {
         goto out;
@@ -378,6 +373,8 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
     s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
 
+    reset_stats(s);
+
     register_savevm(dev, "virtio-balloon", -1, 1,
                     virtio_balloon_save, virtio_balloon_load, s);
 
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 9cab592dc5..e8363d7248 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -198,127 +198,8 @@ extern unsigned long reserved_va;
 #define RESERVED_VA 0ul
 #endif
 
-/* All direct uses of g2h and h2g need to go away for usermode softmmu.  */
-#define g2h(x) ((void *)((unsigned long)(target_ulong)(x) + GUEST_BASE))
-
-#if HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS
-#define h2g_valid(x) 1
-#else
-#define h2g_valid(x) ({ \
-    unsigned long __guest = (unsigned long)(x) - GUEST_BASE; \
-    (__guest < (1ul << TARGET_VIRT_ADDR_SPACE_BITS)) && \
-    (!RESERVED_VA || (__guest < RESERVED_VA)); \
-})
 #endif
 
-#define h2g_nocheck(x) ({ \
-    unsigned long __ret = (unsigned long)(x) - GUEST_BASE; \
-    (abi_ulong)__ret; \
-})
-
-#define h2g(x) ({ \
-    /* Check if given address fits target address space */ \
-    assert(h2g_valid(x)); \
-    h2g_nocheck(x); \
-})
-
-#define saddr(x) g2h(x)
-#define laddr(x) g2h(x)
-
-#else /* !CONFIG_USER_ONLY */
-/* NOTE: we use double casts if pointers and target_ulong have
-   different sizes */
-#define saddr(x) (uint8_t *)(intptr_t)(x)
-#define laddr(x) (uint8_t *)(intptr_t)(x)
-#endif
-
-#define ldub_raw(p) ldub_p(laddr((p)))
-#define ldsb_raw(p) ldsb_p(laddr((p)))
-#define lduw_raw(p) lduw_p(laddr((p)))
-#define ldsw_raw(p) ldsw_p(laddr((p)))
-#define ldl_raw(p) ldl_p(laddr((p)))
-#define ldq_raw(p) ldq_p(laddr((p)))
-#define ldfl_raw(p) ldfl_p(laddr((p)))
-#define ldfq_raw(p) ldfq_p(laddr((p)))
-#define stb_raw(p, v) stb_p(saddr((p)), v)
-#define stw_raw(p, v) stw_p(saddr((p)), v)
-#define stl_raw(p, v) stl_p(saddr((p)), v)
-#define stq_raw(p, v) stq_p(saddr((p)), v)
-#define stfl_raw(p, v) stfl_p(saddr((p)), v)
-#define stfq_raw(p, v) stfq_p(saddr((p)), v)
-
-
-#if defined(CONFIG_USER_ONLY)
-
-/* if user mode, no other memory access functions */
-#define ldub(p) ldub_raw(p)
-#define ldsb(p) ldsb_raw(p)
-#define lduw(p) lduw_raw(p)
-#define ldsw(p) ldsw_raw(p)
-#define ldl(p) ldl_raw(p)
-#define ldq(p) ldq_raw(p)
-#define ldfl(p) ldfl_raw(p)
-#define ldfq(p) ldfq_raw(p)
-#define stb(p, v) stb_raw(p, v)
-#define stw(p, v) stw_raw(p, v)
-#define stl(p, v) stl_raw(p, v)
-#define stq(p, v) stq_raw(p, v)
-#define stfl(p, v) stfl_raw(p, v)
-#define stfq(p, v) stfq_raw(p, v)
-
-#define cpu_ldub_code(env1, p) ldub_raw(p)
-#define cpu_ldsb_code(env1, p) ldsb_raw(p)
-#define cpu_lduw_code(env1, p) lduw_raw(p)
-#define cpu_ldsw_code(env1, p) ldsw_raw(p)
-#define cpu_ldl_code(env1, p) ldl_raw(p)
-#define cpu_ldq_code(env1, p) ldq_raw(p)
-
-#define cpu_ldub_data(env, addr) ldub_raw(addr)
-#define cpu_lduw_data(env, addr) lduw_raw(addr)
-#define cpu_ldsw_data(env, addr) ldsw_raw(addr)
-#define cpu_ldl_data(env, addr) ldl_raw(addr)
-#define cpu_ldq_data(env, addr) ldq_raw(addr)
-
-#define cpu_stb_data(env, addr, data) stb_raw(addr, data)
-#define cpu_stw_data(env, addr, data) stw_raw(addr, data)
-#define cpu_stl_data(env, addr, data) stl_raw(addr, data)
-#define cpu_stq_data(env, addr, data) stq_raw(addr, data)
-
-#define cpu_ldub_kernel(env, addr) ldub_raw(addr)
-#define cpu_lduw_kernel(env, addr) lduw_raw(addr)
-#define cpu_ldsw_kernel(env, addr) ldsw_raw(addr)
-#define cpu_ldl_kernel(env, addr) ldl_raw(addr)
-#define cpu_ldq_kernel(env, addr) ldq_raw(addr)
-
-#define cpu_stb_kernel(env, addr, data) stb_raw(addr, data)
-#define cpu_stw_kernel(env, addr, data) stw_raw(addr, data)
-#define cpu_stl_kernel(env, addr, data) stl_raw(addr, data)
-#define cpu_stq_kernel(env, addr, data) stq_raw(addr, data)
-
-#define ldub_kernel(p) ldub_raw(p)
-#define ldsb_kernel(p) ldsb_raw(p)
-#define lduw_kernel(p) lduw_raw(p)
-#define ldsw_kernel(p) ldsw_raw(p)
-#define ldl_kernel(p) ldl_raw(p)
-#define ldq_kernel(p) ldq_raw(p)
-#define ldfl_kernel(p) ldfl_raw(p)
-#define ldfq_kernel(p) ldfq_raw(p)
-#define stb_kernel(p, v) stb_raw(p, v)
-#define stw_kernel(p, v) stw_raw(p, v)
-#define stl_kernel(p, v) stl_raw(p, v)
-#define stq_kernel(p, v) stq_raw(p, v)
-#define stfl_kernel(p, v) stfl_raw(p, v)
-#define stfq_kernel(p, vt) stfq_raw(p, v)
-
-#define cpu_ldub_data(env, addr) ldub_raw(addr)
-#define cpu_lduw_data(env, addr) lduw_raw(addr)
-#define cpu_ldl_data(env, addr) ldl_raw(addr)
-
-#define cpu_stb_data(env, addr, data) stb_raw(addr, data)
-#define cpu_stw_data(env, addr, data) stw_raw(addr, data)
-#define cpu_stl_data(env, addr, data) stl_raw(addr, data)
-#endif /* defined(CONFIG_USER_ONLY) */
-
 /* page related stuff */
 
 #define TARGET_PAGE_SIZE (1 << TARGET_PAGE_BITS)
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
new file mode 100644
index 0000000000..e5550e7175
--- /dev/null
+++ b/include/exec/cpu_ldst.h
@@ -0,0 +1,400 @@
+/*
+ *  Software MMU support
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/*
+ * Generate inline load/store functions for all MMU modes (typically
+ * at least _user and _kernel) as well as _data versions, for all data
+ * sizes.
+ *
+ * Used by target op helpers.
+ *
+ * MMU mode suffixes are defined in target cpu.h.
+ */
+#ifndef CPU_LDST_H
+#define CPU_LDST_H
+
+#if defined(CONFIG_USER_ONLY)
+/* All direct uses of g2h and h2g need to go away for usermode softmmu.  */
+#define g2h(x) ((void *)((unsigned long)(target_ulong)(x) + GUEST_BASE))
+
+#if HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS
+#define h2g_valid(x) 1
+#else
+#define h2g_valid(x) ({ \
+    unsigned long __guest = (unsigned long)(x) - GUEST_BASE; \
+    (__guest < (1ul << TARGET_VIRT_ADDR_SPACE_BITS)) && \
+    (!RESERVED_VA || (__guest < RESERVED_VA)); \
+})
+#endif
+
+#define h2g_nocheck(x) ({ \
+    unsigned long __ret = (unsigned long)(x) - GUEST_BASE; \
+    (abi_ulong)__ret; \
+})
+
+#define h2g(x) ({ \
+    /* Check if given address fits target address space */ \
+    assert(h2g_valid(x)); \
+    h2g_nocheck(x); \
+})
+
+#define saddr(x) g2h(x)
+#define laddr(x) g2h(x)
+
+#else /* !CONFIG_USER_ONLY */
+/* NOTE: we use double casts if pointers and target_ulong have
+   different sizes */
+#define saddr(x) (uint8_t *)(intptr_t)(x)
+#define laddr(x) (uint8_t *)(intptr_t)(x)
+#endif
+
+#define ldub_raw(p) ldub_p(laddr((p)))
+#define ldsb_raw(p) ldsb_p(laddr((p)))
+#define lduw_raw(p) lduw_p(laddr((p)))
+#define ldsw_raw(p) ldsw_p(laddr((p)))
+#define ldl_raw(p) ldl_p(laddr((p)))
+#define ldq_raw(p) ldq_p(laddr((p)))
+#define ldfl_raw(p) ldfl_p(laddr((p)))
+#define ldfq_raw(p) ldfq_p(laddr((p)))
+#define stb_raw(p, v) stb_p(saddr((p)), v)
+#define stw_raw(p, v) stw_p(saddr((p)), v)
+#define stl_raw(p, v) stl_p(saddr((p)), v)
+#define stq_raw(p, v) stq_p(saddr((p)), v)
+#define stfl_raw(p, v) stfl_p(saddr((p)), v)
+#define stfq_raw(p, v) stfq_p(saddr((p)), v)
+
+
+#if defined(CONFIG_USER_ONLY)
+
+/* if user mode, no other memory access functions */
+#define ldub(p) ldub_raw(p)
+#define ldsb(p) ldsb_raw(p)
+#define lduw(p) lduw_raw(p)
+#define ldsw(p) ldsw_raw(p)
+#define ldl(p) ldl_raw(p)
+#define ldq(p) ldq_raw(p)
+#define ldfl(p) ldfl_raw(p)
+#define ldfq(p) ldfq_raw(p)
+#define stb(p, v) stb_raw(p, v)
+#define stw(p, v) stw_raw(p, v)
+#define stl(p, v) stl_raw(p, v)
+#define stq(p, v) stq_raw(p, v)
+#define stfl(p, v) stfl_raw(p, v)
+#define stfq(p, v) stfq_raw(p, v)
+
+#define cpu_ldub_code(env1, p) ldub_raw(p)
+#define cpu_ldsb_code(env1, p) ldsb_raw(p)
+#define cpu_lduw_code(env1, p) lduw_raw(p)
+#define cpu_ldsw_code(env1, p) ldsw_raw(p)
+#define cpu_ldl_code(env1, p) ldl_raw(p)
+#define cpu_ldq_code(env1, p) ldq_raw(p)
+
+#define cpu_ldub_data(env, addr) ldub_raw(addr)
+#define cpu_lduw_data(env, addr) lduw_raw(addr)
+#define cpu_ldsw_data(env, addr) ldsw_raw(addr)
+#define cpu_ldl_data(env, addr) ldl_raw(addr)
+#define cpu_ldq_data(env, addr) ldq_raw(addr)
+
+#define cpu_stb_data(env, addr, data) stb_raw(addr, data)
+#define cpu_stw_data(env, addr, data) stw_raw(addr, data)
+#define cpu_stl_data(env, addr, data) stl_raw(addr, data)
+#define cpu_stq_data(env, addr, data) stq_raw(addr, data)
+
+#define cpu_ldub_kernel(env, addr) ldub_raw(addr)
+#define cpu_lduw_kernel(env, addr) lduw_raw(addr)
+#define cpu_ldsw_kernel(env, addr) ldsw_raw(addr)
+#define cpu_ldl_kernel(env, addr) ldl_raw(addr)
+#define cpu_ldq_kernel(env, addr) ldq_raw(addr)
+
+#define cpu_stb_kernel(env, addr, data) stb_raw(addr, data)
+#define cpu_stw_kernel(env, addr, data) stw_raw(addr, data)
+#define cpu_stl_kernel(env, addr, data) stl_raw(addr, data)
+#define cpu_stq_kernel(env, addr, data) stq_raw(addr, data)
+
+#define ldub_kernel(p) ldub_raw(p)
+#define ldsb_kernel(p) ldsb_raw(p)
+#define lduw_kernel(p) lduw_raw(p)
+#define ldsw_kernel(p) ldsw_raw(p)
+#define ldl_kernel(p) ldl_raw(p)
+#define ldq_kernel(p) ldq_raw(p)
+#define ldfl_kernel(p) ldfl_raw(p)
+#define ldfq_kernel(p) ldfq_raw(p)
+#define stb_kernel(p, v) stb_raw(p, v)
+#define stw_kernel(p, v) stw_raw(p, v)
+#define stl_kernel(p, v) stl_raw(p, v)
+#define stq_kernel(p, v) stq_raw(p, v)
+#define stfl_kernel(p, v) stfl_raw(p, v)
+#define stfq_kernel(p, vt) stfq_raw(p, v)
+
+#define cpu_ldub_data(env, addr) ldub_raw(addr)
+#define cpu_lduw_data(env, addr) lduw_raw(addr)
+#define cpu_ldl_data(env, addr) ldl_raw(addr)
+
+#define cpu_stb_data(env, addr, data) stb_raw(addr, data)
+#define cpu_stw_data(env, addr, data) stw_raw(addr, data)
+#define cpu_stl_data(env, addr, data) stl_raw(addr, data)
+
+#else
+
+/* XXX: find something cleaner.
+ * Furthermore, this is false for 64 bits targets
+ */
+#define ldul_user       ldl_user
+#define ldul_kernel     ldl_kernel
+#define ldul_hypv       ldl_hypv
+#define ldul_executive  ldl_executive
+#define ldul_supervisor ldl_supervisor
+
+/* The memory helpers for tcg-generated code need tcg_target_long etc.  */
+#include "tcg.h"
+
+uint8_t helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint16_t helper_ldw_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint32_t helper_ldl_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+
+void helper_stb_mmu(CPUArchState *env, target_ulong addr,
+                    uint8_t val, int mmu_idx);
+void helper_stw_mmu(CPUArchState *env, target_ulong addr,
+                    uint16_t val, int mmu_idx);
+void helper_stl_mmu(CPUArchState *env, target_ulong addr,
+                    uint32_t val, int mmu_idx);
+void helper_stq_mmu(CPUArchState *env, target_ulong addr,
+                    uint64_t val, int mmu_idx);
+
+uint8_t helper_ldb_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint16_t helper_ldw_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint32_t helper_ldl_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
+
+#define CPU_MMU_INDEX 0
+#define MEMSUFFIX MMU_MODE0_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+
+#define CPU_MMU_INDEX 1
+#define MEMSUFFIX MMU_MODE1_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+
+#if (NB_MMU_MODES >= 3)
+
+#define CPU_MMU_INDEX 2
+#define MEMSUFFIX MMU_MODE2_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 3) */
+
+#if (NB_MMU_MODES >= 4)
+
+#define CPU_MMU_INDEX 3
+#define MEMSUFFIX MMU_MODE3_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 4) */
+
+#if (NB_MMU_MODES >= 5)
+
+#define CPU_MMU_INDEX 4
+#define MEMSUFFIX MMU_MODE4_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 5) */
+
+#if (NB_MMU_MODES >= 6)
+
+#define CPU_MMU_INDEX 5
+#define MEMSUFFIX MMU_MODE5_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 6) */
+
+#if (NB_MMU_MODES > 6)
+#error "NB_MMU_MODES > 6 is not supported for now"
+#endif /* (NB_MMU_MODES > 6) */
+
+/* these access are slower, they must be as rare as possible */
+#define CPU_MMU_INDEX (cpu_mmu_index(env))
+#define MEMSUFFIX _data
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+
+#define ldub(p) ldub_data(p)
+#define ldsb(p) ldsb_data(p)
+#define lduw(p) lduw_data(p)
+#define ldsw(p) ldsw_data(p)
+#define ldl(p) ldl_data(p)
+#define ldq(p) ldq_data(p)
+
+#define stb(p, v) stb_data(p, v)
+#define stw(p, v) stw_data(p, v)
+#define stl(p, v) stl_data(p, v)
+#define stq(p, v) stq_data(p, v)
+
+#define CPU_MMU_INDEX (cpu_mmu_index(env))
+#define MEMSUFFIX _code
+#define SOFTMMU_CODE_ACCESS
+
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#undef SOFTMMU_CODE_ACCESS
+
+/**
+ * tlb_vaddr_to_host:
+ * @env: CPUArchState
+ * @addr: guest virtual address to look up
+ * @access_type: 0 for read, 1 for write, 2 for execute
+ * @mmu_idx: MMU index to use for lookup
+ *
+ * Look up the specified guest virtual index in the TCG softmmu TLB.
+ * If the TLB contains a host virtual address suitable for direct RAM
+ * access, then return it. Otherwise (TLB miss, TLB entry is for an
+ * I/O access, etc) return NULL.
+ *
+ * This is the equivalent of the initial fast-path code used by
+ * TCG backends for guest load and store accesses.
+ */
+static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
+                                      int access_type, int mmu_idx)
+{
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index];
+    target_ulong tlb_addr;
+    uintptr_t haddr;
+
+    switch (access_type) {
+    case 0:
+        tlb_addr = tlbentry->addr_read;
+        break;
+    case 1:
+        tlb_addr = tlbentry->addr_write;
+        break;
+    case 2:
+        tlb_addr = tlbentry->addr_code;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        /* TLB entry is for a different page */
+        return NULL;
+    }
+
+    if (tlb_addr & ~TARGET_PAGE_MASK) {
+        /* IO access */
+        return NULL;
+    }
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    return (void *)haddr;
+}
+
+#endif /* defined(CONFIG_USER_ONLY) */
+
+#endif /* CPU_LDST_H */
diff --git a/include/exec/softmmu_header.h b/include/exec/cpu_ldst_template.h
index d8d9c81b05..006093ac49 100644
--- a/include/exec/softmmu_header.h
+++ b/include/exec/cpu_ldst_template.h
@@ -8,7 +8,7 @@
  * 32 and 64 bit cases, also generate floating point functions with
  * the same size.
  *
- * Not used directly but included from softmmu_exec.h and exec-all.h.
+ * Not used directly but included from cpu_ldst.h.
  *
  *  Copyright (c) 2003 Fabrice Bellard
  *
@@ -47,35 +47,18 @@
 #error unsupported data size
 #endif
 
-#if ACCESS_TYPE < (NB_MMU_MODES)
-
-#define CPU_MMU_INDEX ACCESS_TYPE
-#define MMUSUFFIX _mmu
-
-#elif ACCESS_TYPE == (NB_MMU_MODES)
-
-#define CPU_MMU_INDEX (cpu_mmu_index(env))
-#define MMUSUFFIX _mmu
-
-#elif ACCESS_TYPE == (NB_MMU_MODES + 1)
-
-#define CPU_MMU_INDEX (cpu_mmu_index(env))
-#define MMUSUFFIX _cmmu
-
-#else
-#error invalid ACCESS_TYPE
-#endif
-
 #if DATA_SIZE == 8
 #define RES_TYPE uint64_t
 #else
 #define RES_TYPE uint32_t
 #endif
 
-#if ACCESS_TYPE == (NB_MMU_MODES + 1)
+#ifdef SOFTMMU_CODE_ACCESS
 #define ADDR_READ addr_code
+#define MMUSUFFIX _cmmu
 #else
 #define ADDR_READ addr_read
+#define MMUSUFFIX _mmu
 #endif
 
 /* generic load/store macros */
@@ -124,7 +107,7 @@ glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr)
 }
 #endif
 
-#if ACCESS_TYPE != (NB_MMU_MODES + 1)
+#ifndef SOFTMMU_CODE_ACCESS
 
 /* generic store macro */
 
@@ -148,9 +131,7 @@ glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr,
     }
 }
 
-#endif /* ACCESS_TYPE != (NB_MMU_MODES + 1) */
 
-#if ACCESS_TYPE != (NB_MMU_MODES + 1)
 
 #if DATA_SIZE == 8
 static inline float64 glue(cpu_ldfq, MEMSUFFIX)(CPUArchState *env,
@@ -200,7 +181,7 @@ static inline void glue(cpu_stfl, MEMSUFFIX)(CPUArchState *env,
 }
 #endif /* DATA_SIZE == 4 */
 
-#endif /* ACCESS_TYPE != (NB_MMU_MODES + 1) */
+#endif /* !SOFTMMU_CODE_ACCESS */
 
 #undef RES_TYPE
 #undef DATA_TYPE
@@ -208,6 +189,5 @@ static inline void glue(cpu_stfl, MEMSUFFIX)(CPUArchState *env,
 #undef SUFFIX
 #undef USUFFIX
 #undef DATA_SIZE
-#undef CPU_MMU_INDEX
 #undef MMUSUFFIX
 #undef ADDR_READ
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index c964ca4f0b..3d62d9c464 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -344,29 +344,6 @@ bool io_mem_write(struct MemoryRegion *mr, hwaddr addr,
 void tlb_fill(CPUState *cpu, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr);
 
-uint8_t helper_ldb_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-uint16_t helper_ldw_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-uint32_t helper_ldl_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-
-#define ACCESS_TYPE (NB_MMU_MODES + 1)
-#define MEMSUFFIX _code
-
-#define DATA_SIZE 1
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 2
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 4
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 8
-#include "exec/softmmu_header.h"
-
-#undef ACCESS_TYPE
-#undef MEMSUFFIX
-
 #endif
 
 #if defined(CONFIG_USER_ONLY)
diff --git a/include/exec/softmmu_exec.h b/include/exec/softmmu_exec.h
deleted file mode 100644
index 470db20174..0000000000
--- a/include/exec/softmmu_exec.h
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- *  Software MMU support
- *
- * Generate inline load/store functions for all MMU modes (typically
- * at least _user and _kernel) as well as _data versions, for all data
- * sizes.
- *
- * Used by target op helpers.
- *
- * MMU mode suffixes are defined in target cpu.h.
- */
-
-/* XXX: find something cleaner.
- * Furthermore, this is false for 64 bits targets
- */
-#define ldul_user       ldl_user
-#define ldul_kernel     ldl_kernel
-#define ldul_hypv       ldl_hypv
-#define ldul_executive  ldl_executive
-#define ldul_supervisor ldl_supervisor
-
-/* The memory helpers for tcg-generated code need tcg_target_long etc.  */
-#include "tcg.h"
-
-#define ACCESS_TYPE 0
-#define MEMSUFFIX MMU_MODE0_SUFFIX
-#define DATA_SIZE 1
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 2
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 4
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 8
-#include "exec/softmmu_header.h"
-#undef ACCESS_TYPE
-#undef MEMSUFFIX
-
-#define ACCESS_TYPE 1
-#define MEMSUFFIX MMU_MODE1_SUFFIX
-#define DATA_SIZE 1
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 2
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 4
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 8
-#include "exec/softmmu_header.h"
-#undef ACCESS_TYPE
-#undef MEMSUFFIX
-
-#if (NB_MMU_MODES >= 3)
-
-#define ACCESS_TYPE 2
-#define MEMSUFFIX MMU_MODE2_SUFFIX
-#define DATA_SIZE 1
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 2
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 4
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 8
-#include "exec/softmmu_header.h"
-#undef ACCESS_TYPE
-#undef MEMSUFFIX
-#endif /* (NB_MMU_MODES >= 3) */
-
-#if (NB_MMU_MODES >= 4)
-
-#define ACCESS_TYPE 3
-#define MEMSUFFIX MMU_MODE3_SUFFIX
-#define DATA_SIZE 1
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 2
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 4
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 8
-#include "exec/softmmu_header.h"
-#undef ACCESS_TYPE
-#undef MEMSUFFIX
-#endif /* (NB_MMU_MODES >= 4) */
-
-#if (NB_MMU_MODES >= 5)
-
-#define ACCESS_TYPE 4
-#define MEMSUFFIX MMU_MODE4_SUFFIX
-#define DATA_SIZE 1
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 2
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 4
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 8
-#include "exec/softmmu_header.h"
-#undef ACCESS_TYPE
-#undef MEMSUFFIX
-#endif /* (NB_MMU_MODES >= 5) */
-
-#if (NB_MMU_MODES >= 6)
-
-#define ACCESS_TYPE 5
-#define MEMSUFFIX MMU_MODE5_SUFFIX
-#define DATA_SIZE 1
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 2
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 4
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 8
-#include "exec/softmmu_header.h"
-#undef ACCESS_TYPE
-#undef MEMSUFFIX
-#endif /* (NB_MMU_MODES >= 6) */
-
-#if (NB_MMU_MODES > 6)
-#error "NB_MMU_MODES > 6 is not supported for now"
-#endif /* (NB_MMU_MODES > 6) */
-
-/* these access are slower, they must be as rare as possible */
-#define ACCESS_TYPE (NB_MMU_MODES)
-#define MEMSUFFIX _data
-#define DATA_SIZE 1
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 2
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 4
-#include "exec/softmmu_header.h"
-
-#define DATA_SIZE 8
-#include "exec/softmmu_header.h"
-#undef ACCESS_TYPE
-#undef MEMSUFFIX
-
-#define ldub(p) ldub_data(p)
-#define ldsb(p) ldsb_data(p)
-#define lduw(p) lduw_data(p)
-#define ldsw(p) ldsw_data(p)
-#define ldl(p) ldl_data(p)
-#define ldq(p) ldq_data(p)
-
-#define stb(p, v) stb_data(p, v)
-#define stw(p, v) stw_data(p, v)
-#define stl(p, v) stl_data(p, v)
-#define stq(p, v) stq_data(p, v)
-
-/**
- * tlb_vaddr_to_host:
- * @env: CPUArchState
- * @addr: guest virtual address to look up
- * @access_type: 0 for read, 1 for write, 2 for execute
- * @mmu_idx: MMU index to use for lookup
- *
- * Look up the specified guest virtual index in the TCG softmmu TLB.
- * If the TLB contains a host virtual address suitable for direct RAM
- * access, then return it. Otherwise (TLB miss, TLB entry is for an
- * I/O access, etc) return NULL.
- *
- * This is the equivalent of the initial fast-path code used by
- * TCG backends for guest load and store accesses.
- */
-static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
-                                      int access_type, int mmu_idx)
-{
-    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index];
-    target_ulong tlb_addr;
-    uintptr_t haddr;
-
-    switch (access_type) {
-    case 0:
-        tlb_addr = tlbentry->addr_read;
-        break;
-    case 1:
-        tlb_addr = tlbentry->addr_write;
-        break;
-    case 2:
-        tlb_addr = tlbentry->addr_code;
-        break;
-    default:
-        g_assert_not_reached();
-    }
-
-    if ((addr & TARGET_PAGE_MASK)
-        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        /* TLB entry is for a different page */
-        return NULL;
-    }
-
-    if (tlb_addr & ~TARGET_PAGE_MASK) {
-        /* IO access */
-        return NULL;
-    }
-
-    haddr = addr + env->tlb_table[mmu_idx][index].addend;
-    return (void *)haddr;
-}
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 31328a8157..fa9d99792a 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -275,6 +275,21 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
             .driver   = "nec-usb-xhci",\
             .property = "superspeed-ports-first",\
             .value    = "off",\
+        },\
+        {\
+            .driver   = "pci-serial",\
+            .property = "prog_if",\
+            .value    = stringify(0),\
+        },\
+        {\
+            .driver   = "pci-serial-2x",\
+            .property = "prof_if",\
+            .value    = stringify(0),\
+        },\
+        {\
+            .driver   = "pci-serial-4x",\
+            .property = "prog_if",\
+            .value    = stringify(0),\
         }
 
 #define PC_COMPAT_1_7 \
diff --git a/include/hw/i386/smbios.h b/include/hw/i386/smbios.h
index 6d854b7f1e..a3f4d88bf0 100644
--- a/include/hw/i386/smbios.h
+++ b/include/hw/i386/smbios.h
@@ -64,7 +64,7 @@ struct smbios_type_0 {
     uint16_t bios_starting_address_segment;
     uint8_t bios_release_date_str;
     uint8_t bios_rom_size;
-    uint8_t bios_characteristics[8];
+    uint64_t bios_characteristics;
     uint8_t bios_characteristics_extension_bytes[2];
     uint8_t system_bios_major_release;
     uint8_t system_bios_minor_release;
@@ -182,10 +182,10 @@ struct smbios_type_17 {
     uint8_t part_number_str;
     uint8_t attributes;
     uint32_t extended_size;
-    uint32_t configured_clock_speed;
-    uint32_t minimum_voltage;
-    uint32_t maximum_voltage;
-    uint32_t configured_voltage;
+    uint16_t configured_clock_speed;
+    uint16_t minimum_voltage;
+    uint16_t maximum_voltage;
+    uint16_t configured_voltage;
 } QEMU_PACKED;
 
 /* SMBIOS type 19 - Memory Array Mapped Address (v2.7) */
diff --git a/include/hw/pci/pcie_host.h b/include/hw/pci/pcie_host.h
index acca45ed58..ff44ef6fca 100644
--- a/include/hw/pci/pcie_host.h
+++ b/include/hw/pci/pcie_host.h
@@ -49,7 +49,6 @@ struct PCIExpressHost {
     MemoryRegion mmio;
 };
 
-int pcie_host_init(PCIExpressHost *e);
 void pcie_host_mmcfg_unmap(PCIExpressHost *e);
 void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr, uint32_t size);
 void pcie_host_mmcfg_update(PCIExpressHost *e,
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index ae31575577..9221cfc879 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -239,10 +239,18 @@ struct PropertyInfo {
     ObjectPropertyRelease *release;
 };
 
+/**
+ * GlobalProperty:
+ * @not_used: Track use of a global property.  Defaults to false in all C99
+ * struct initializations.
+ *
+ * This prevents reports of .compat_props when they are not used.
+ */
 typedef struct GlobalProperty {
     const char *driver;
     const char *property;
     const char *value;
+    bool not_used;
     QTAILQ_ENTRY(GlobalProperty) next;
 } GlobalProperty;
 
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index c46e908d71..c962b6bbaa 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -180,6 +180,7 @@ void qdev_prop_set_ptr(DeviceState *dev, const char *name, void *value);
 
 void qdev_prop_register_global(GlobalProperty *prop);
 void qdev_prop_register_global_list(GlobalProperty *props);
+int qdev_prop_check_global(void);
 void qdev_prop_set_globals(DeviceState *dev, Error **errp);
 void qdev_prop_set_globals_for_type(DeviceState *dev, const char *typename,
                                     Error **errp);
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index df977c88f0..4b352a28fa 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -80,6 +80,8 @@ struct TranslationBlock;
  * @has_work: Callback for checking if there is work to do.
  * @do_interrupt: Callback for interrupt handling.
  * @do_unassigned_access: Callback for unassigned access handling.
+ * @do_unaligned_access: Callback for unaligned access handling, if
+ * the target defines #ALIGNED_ONLY.
  * @memory_rw_debug: Callback for GDB memory access.
  * @dump_state: Callback for dumping state.
  * @dump_statistics: Callback for dumping statistics.
@@ -112,6 +114,8 @@ typedef struct CPUClass {
     bool (*has_work)(CPUState *cpu);
     void (*do_interrupt)(CPUState *cpu);
     CPUUnassignedAccess do_unassigned_access;
+    void (*do_unaligned_access)(CPUState *cpu, vaddr addr,
+                                int is_write, int is_user, uintptr_t retaddr);
     int (*memory_rw_debug)(CPUState *cpu, vaddr addr,
                            uint8_t *buf, int len, bool is_write);
     void (*dump_state)(CPUState *cpu, FILE *f, fprintf_function cpu_fprintf,
@@ -544,8 +548,7 @@ void cpu_interrupt(CPUState *cpu, int mask);
 
 #endif /* USER_ONLY */
 
-#ifndef CONFIG_USER_ONLY
-
+#ifdef CONFIG_SOFTMMU
 static inline void cpu_unassigned_access(CPUState *cpu, hwaddr addr,
                                          bool is_write, bool is_exec,
                                          int opaque, unsigned size)
@@ -557,6 +560,14 @@ static inline void cpu_unassigned_access(CPUState *cpu, hwaddr addr,
     }
 }
 
+static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
+                                        int is_write, int is_user,
+                                        uintptr_t retaddr)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    return cc->do_unaligned_access(cpu, addr, is_write, is_user, retaddr);
+}
 #endif
 
 /**
diff --git a/include/ui/input.h b/include/ui/input.h
index aa99b0cac7..5d5ac00663 100644
--- a/include/ui/input.h
+++ b/include/ui/input.h
@@ -39,6 +39,7 @@ InputEvent *qemu_input_event_new_key(KeyValue *key, bool down);
 void qemu_input_event_send_key(QemuConsole *src, KeyValue *key, bool down);
 void qemu_input_event_send_key_number(QemuConsole *src, int num, bool down);
 void qemu_input_event_send_key_qcode(QemuConsole *src, QKeyCode q, bool down);
+void qemu_input_event_send_key_delay(uint32_t delay_ms);
 int qemu_input_key_number_to_qcode(uint8_t nr);
 int qemu_input_key_value_to_number(const KeyValue *value);
 int qemu_input_key_value_to_qcode(const KeyValue *value);
diff --git a/kvm-all.c b/kvm-all.c
index 721a3904a9..4e19eff0ef 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1410,7 +1410,7 @@ int kvm_init(MachineClass *mc)
 
     ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
     if (ret < KVM_API_VERSION) {
-        if (ret > 0) {
+        if (ret >= 0) {
             ret = -EINVAL;
         }
         fprintf(stderr, "kvm version too old\n");
@@ -1461,6 +1461,7 @@ int kvm_init(MachineClass *mc)
     if (mc->kvm_type) {
         type = mc->kvm_type(kvm_type);
     } else if (kvm_type) {
+        ret = -EINVAL;
         fprintf(stderr, "Invalid argument kvm-type=%s\n", kvm_type);
         goto err;
     }
@@ -1561,6 +1562,7 @@ int kvm_init(MachineClass *mc)
     return 0;
 
 err:
+    assert(ret < 0);
     if (s->vmfd >= 0) {
         close(s->vmfd);
     }
diff --git a/linux-user/main.c b/linux-user/main.c
index 882186e1a0..3e21024056 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -4052,7 +4052,7 @@ int main(int argc, char **argv, char **envp)
 
 #if defined(TARGET_I386)
     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
-    env->hflags |= HF_PE_MASK;
+    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
         env->cr[4] |= CR4_OSFXSR_MASK;
         env->hflags |= HF_OSFXSR_MASK;
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 36d4a738ea..ba3d8ab378 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -5,6 +5,7 @@
 #include <string.h>
 
 #include "cpu.h"
+#include "exec/cpu_ldst.h"
 
 #undef DEBUG_REMAP
 #ifdef DEBUG_REMAP
diff --git a/monitor.c b/monitor.c
index 593679a17a..0565816910 100644
--- a/monitor.c
+++ b/monitor.c
@@ -66,6 +66,7 @@
 #include "trace/simple.h"
 #endif
 #include "exec/memory.h"
+#include "exec/cpu_ldst.h"
 #include "qmp-commands.h"
 #include "hmp.h"
 #include "qemu/thread.h"
diff --git a/qemu-options.hx b/qemu-options.hx
index 4011d46d62..d0714c43a6 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1341,7 +1341,7 @@ ETEXI
 DEF("smbios", HAS_ARG, QEMU_OPTION_smbios,
     "-smbios file=binary\n"
     "                load SMBIOS entry from binary file\n"
-    "-smbios type=0[,vendor=str][,version=str][,date=str][,release=%d.%d]\n"
+    "-smbios type=0[,vendor=str][,version=str][,date=str][,release=%d.%d][,uefi=on|off]\n"
     "                specify SMBIOS type 0 fields\n"
     "-smbios type=1[,manufacturer=str][,product=str][,version=str][,serial=str]\n"
     "              [,uuid=uuid][,sku=str][,family=str]\n"
@@ -1351,7 +1351,7 @@ STEXI
 @findex -smbios
 Load SMBIOS entry from binary file.
 
-@item -smbios type=0[,vendor=@var{str}][,version=@var{str}][,date=@var{str}][,release=@var{%d.%d}]
+@item -smbios type=0[,vendor=@var{str}][,version=@var{str}][,date=@var{str}][,release=@var{%d.%d}][,uefi=on|off]
 Specify SMBIOS type 0 fields
 
 @item -smbios type=1[,manufacturer=@var{str}][,product=@var{str}] [,version=@var{str}][,serial=@var{str}][,uuid=@var{uuid}][,sku=@var{str}] [,family=@var{str}]
diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat
index 762544b197..d7e97e7488 100755
--- a/scripts/kvm/kvm_stat
+++ b/scripts/kvm/kvm_stat
@@ -352,8 +352,8 @@ class TracepointProvider(object):
         return ret
 
 class Stats:
-    def __init__(self, provider, fields = None):
-        self.provider = provider
+    def __init__(self, providers, fields = None):
+        self.providers = providers
         self.fields_filter = fields
         self._update()
     def _update(self):
@@ -362,22 +362,25 @@ class Stats:
             if not self.fields_filter:
                 return True
             return re.match(self.fields_filter, key) is not None
-        self.values = dict([(key, None)
-                            for key in provider.fields()
-                            if wanted(key)])
-        self.provider.select(self.values.keys())
+        self.values = dict()
+        for d in providers:
+            provider_fields = [key for key in d.fields() if wanted(key)]
+            for key in provider_fields:
+                self.values[key] = None
+            d.select(provider_fields)
     def set_fields_filter(self, fields_filter):
         self.fields_filter = fields_filter
         self._update()
     def get(self):
-        new = self.provider.read()
-        for key in self.provider.fields():
-            oldval = self.values.get(key, (0, 0))
-            newval = new[key]
-            newdelta = None
-            if oldval is not None:
-                newdelta = newval - oldval[0]
-            self.values[key] = (newval, newdelta)
+        for d in providers:
+            new = d.read()
+            for key in d.fields():
+                oldval = self.values.get(key, (0, 0))
+                newval = new[key]
+                newdelta = None
+                if oldval is not None:
+                    newdelta = newval - oldval[0]
+                self.values[key] = (newval, newdelta)
         return self.values
 
 if not os.access('/sys/kernel/debug', os.F_OK):
@@ -487,6 +490,18 @@ options.add_option('-l', '--log',
                    dest = 'log',
                    help = 'run in logging mode (like vmstat)',
                    )
+options.add_option('-t', '--tracepoints',
+                   action = 'store_true',
+                   default = False,
+                   dest = 'tracepoints',
+                   help = 'retrieve statistics from tracepoints',
+                   )
+options.add_option('-d', '--debugfs',
+                   action = 'store_true',
+                   default = False,
+                   dest = 'debugfs',
+                   help = 'retrieve statistics from debugfs',
+                   )
 options.add_option('-f', '--fields',
                    action = 'store',
                    default = None,
@@ -495,12 +510,19 @@ options.add_option('-f', '--fields',
                    )
 (options, args) = options.parse_args(sys.argv)
 
-try:
-    provider = TracepointProvider()
-except:
-    provider = DebugfsProvider()
+providers = []
+if options.tracepoints:
+    providers.append(TracepointProvider())
+if options.debugfs:
+    providers.append(DebugfsProvider())
+
+if len(providers) == 0:
+    try:
+        providers = [TracepointProvider()]
+    except:
+        providers = [DebugfsProvider()]
 
-stats = Stats(provider, fields = options.fields)
+stats = Stats(providers, fields = options.fields)
 
 if options.log:
     log(stats)
diff --git a/include/exec/softmmu_template.h b/softmmu_template.h
index 73ed7cf921..5a07f991a1 100644
--- a/include/exec/softmmu_template.h
+++ b/softmmu_template.h
@@ -116,6 +116,7 @@
 # define helper_te_st_name  helper_le_st_name
 #endif
 
+#ifndef SOFTMMU_CODE_ACCESS
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
                                               hwaddr physaddr,
                                               target_ulong addr,
@@ -135,6 +136,7 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
     io_mem_read(mr, physaddr, &val, 1 << SHIFT);
     return val;
 }
+#endif
 
 #ifdef SOFTMMU_CODE_ACCESS
 static __attribute__((unused))
@@ -155,7 +157,8 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
          != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0) {
-            do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
+                                 mmu_idx, retaddr);
         }
 #endif
         tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
@@ -186,7 +189,8 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
         unsigned shift;
     do_unaligned_access:
 #ifdef ALIGNED_ONLY
-        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
+                             mmu_idx, retaddr);
 #endif
         addr1 = addr & ~(DATA_SIZE - 1);
         addr2 = addr1 + DATA_SIZE;
@@ -204,7 +208,8 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
     /* Handle aligned access or unaligned access in the same page.  */
 #ifdef ALIGNED_ONLY
     if ((addr & (DATA_SIZE - 1)) != 0) {
-        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
+                             mmu_idx, retaddr);
     }
 #endif
 
@@ -237,7 +242,8 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
          != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0) {
-            do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
+                                 mmu_idx, retaddr);
         }
 #endif
         tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
@@ -268,7 +274,8 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
         unsigned shift;
     do_unaligned_access:
 #ifdef ALIGNED_ONLY
-        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
+                             mmu_idx, retaddr);
 #endif
         addr1 = addr & ~(DATA_SIZE - 1);
         addr2 = addr1 + DATA_SIZE;
@@ -286,7 +293,8 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
     /* Handle aligned access or unaligned access in the same page.  */
 #ifdef ALIGNED_ONLY
     if ((addr & (DATA_SIZE - 1)) != 0) {
-        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
+                             mmu_idx, retaddr);
     }
 #endif
 
@@ -357,7 +365,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0) {
-            do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+            cpu_unaligned_access(ENV_GET_CPU(env), addr, 1, mmu_idx, retaddr);
         }
 #endif
         tlb_fill(ENV_GET_CPU(env), addr, 1, mmu_idx, retaddr);
@@ -386,7 +394,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
         int i;
     do_unaligned_access:
 #ifdef ALIGNED_ONLY
-        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, 1, mmu_idx, retaddr);
 #endif
         /* XXX: not efficient, but simple */
         /* Note: relies on the fact that tlb_fill() does not remove the
@@ -405,7 +413,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
     /* Handle aligned access or unaligned access in the same page.  */
 #ifdef ALIGNED_ONLY
     if ((addr & (DATA_SIZE - 1)) != 0) {
-        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, 1, mmu_idx, retaddr);
     }
 #endif
 
@@ -433,7 +441,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0) {
-            do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+            cpu_unaligned_access(ENV_GET_CPU(env), addr, 1, mmu_idx, retaddr);
         }
 #endif
         tlb_fill(ENV_GET_CPU(env), addr, 1, mmu_idx, retaddr);
@@ -462,7 +470,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
         int i;
     do_unaligned_access:
 #ifdef ALIGNED_ONLY
-        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, 1, mmu_idx, retaddr);
 #endif
         /* XXX: not efficient, but simple */
         /* Note: relies on the fact that tlb_fill() does not remove the
@@ -481,7 +489,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
     /* Handle aligned access or unaligned access in the same page.  */
 #ifdef ALIGNED_ONLY
     if ((addr & (DATA_SIZE - 1)) != 0) {
-        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, 1, mmu_idx, retaddr);
     }
 #endif
 
diff --git a/target-alpha/cpu-qom.h b/target-alpha/cpu-qom.h
index 198f1b13a3..0caa362f5b 100644
--- a/target-alpha/cpu-qom.h
+++ b/target-alpha/cpu-qom.h
@@ -84,5 +84,7 @@ void alpha_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
 hwaddr alpha_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int alpha_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int alpha_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+void alpha_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
+                                   int is_write, int is_user, uintptr_t retaddr);
 
 #endif
diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index 7ec46b90fc..2491f0a301 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -292,6 +292,7 @@ static void alpha_cpu_class_init(ObjectClass *oc, void *data)
     cc->handle_mmu_fault = alpha_cpu_handle_mmu_fault;
 #else
     cc->do_unassigned_access = alpha_cpu_unassigned_access;
+    cc->do_unaligned_access = alpha_cpu_do_unaligned_access;
     cc->get_phys_page_debug = alpha_cpu_get_phys_page_debug;
     dc->vmsd = &vmstate_alpha_cpu;
 #endif
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 07d9f63d1f..d9b861f404 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -24,6 +24,7 @@
 #include "qemu-common.h"
 
 #define TARGET_LONG_BITS 64
+#define ALIGNED_ONLY
 
 #define CPUArchState struct CPUAlphaState
 
diff --git a/target-alpha/mem_helper.c b/target-alpha/mem_helper.c
index ef6b7058cb..fc4f57a644 100644
--- a/target-alpha/mem_helper.c
+++ b/target-alpha/mem_helper.c
@@ -19,7 +19,7 @@
 
 #include "cpu.h"
 #include "exec/helper-proto.h"
-
+#include "exec/cpu_ldst.h"
 
 /* Softmmu support */
 #ifndef CONFIG_USER_ONLY
@@ -96,11 +96,11 @@ uint64_t helper_stq_c_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
     return ret;
 }
 
-static void do_unaligned_access(CPUAlphaState *env, target_ulong addr,
-                                int is_write, int is_user, uintptr_t retaddr)
+void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                   int is_write, int is_user, uintptr_t retaddr)
 {
-    AlphaCPU *cpu = alpha_env_get_cpu(env);
-    CPUState *cs = CPU(cpu);
+    AlphaCPU *cpu = ALPHA_CPU(cs);
+    CPUAlphaState *env = &cpu->env;
     uint64_t pc;
     uint32_t insn;
 
@@ -131,23 +131,6 @@ void alpha_cpu_unassigned_access(CPUState *cs, hwaddr addr,
     dynamic_excp(env, 0, EXCP_MCHK, 0);
 }
 
-#include "exec/softmmu_exec.h"
-
-#define MMUSUFFIX _mmu
-#define ALIGNED_ONLY
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index e31d56c629..cc81e774df 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -21,6 +21,7 @@
 #include "disas/disas.h"
 #include "qemu/host-utils.h"
 #include "tcg-op.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/target-arm/arm_ldst.h b/target-arm/arm_ldst.h
new file mode 100644
index 0000000000..b1ece01731
--- /dev/null
+++ b/target-arm/arm_ldst.h
@@ -0,0 +1,48 @@
+/*
+ * ARM load/store instructions for code (armeb-user support)
+ *
+ *  Copyright (c) 2012 CodeSourcery, LLC
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef ARM_LDST_H
+#define ARM_LDST_H
+
+#include "exec/cpu_ldst.h"
+#include "qemu/bswap.h"
+
+/* Load an instruction and return it in the standard little-endian order */
+static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
+                                    bool do_swap)
+{
+    uint32_t insn = cpu_ldl_code(env, addr);
+    if (do_swap) {
+        return bswap32(insn);
+    }
+    return insn;
+}
+
+/* Ditto, for a halfword (Thumb) instruction */
+static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
+                                     bool do_swap)
+{
+    uint16_t insn = cpu_lduw_code(env, addr);
+    if (do_swap) {
+        return bswap16(insn);
+    }
+    return insn;
+}
+
+#endif
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 8d04385261..7d8332e8be 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -1199,26 +1199,4 @@ static inline void cpu_pc_from_tb(CPUARMState *env, TranslationBlock *tb)
     }
 }
 
-/* Load an instruction and return it in the standard little-endian order */
-static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
-                                    bool do_swap)
-{
-    uint32_t insn = cpu_ldl_code(env, addr);
-    if (do_swap) {
-        return bswap32(insn);
-    }
-    return insn;
-}
-
-/* Ditto, for a halfword (Thumb) instruction */
-static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
-                                     bool do_swap)
-{
-    uint16_t insn = cpu_lduw_code(env, addr);
-    if (do_swap) {
-        return bswap16(insn);
-    }
-    return insn;
-}
-
 #endif
diff --git a/target-arm/helper.c b/target-arm/helper.c
index ec031f5947..95af624126 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -7,11 +7,11 @@
 #include "sysemu/sysemu.h"
 #include "qemu/bitops.h"
 #include "qemu/crc32c.h"
+#include "exec/cpu_ldst.h"
+#include "arm_ldst.h"
 #include <zlib.h> /* For crc32 */
 
 #ifndef CONFIG_USER_ONLY
-#include "exec/softmmu_exec.h"
-
 static inline int get_phys_addr(CPUARMState *env, target_ulong address,
                                 int access_type, int is_user,
                                 hwaddr *phys_ptr, int *prot,
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index b28f694d00..9c1ef525a3 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -19,6 +19,7 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "internals.h"
+#include "exec/cpu_ldst.h"
 
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
@@ -56,22 +57,6 @@ uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
 
 #if !defined(CONFIG_USER_ONLY)
 
-#include "exec/softmmu_exec.h"
-
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
 /* try to fill the TLB and return an exception if error. If retaddr is
  * NULL, it means that the function was called in C code (i.e. not
  * from generated code or from helper.c)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 9f964dfd5d..a9c4633517 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -25,6 +25,7 @@
 #include "cpu.h"
 #include "tcg-op.h"
 #include "qemu/log.h"
+#include "arm_ldst.h"
 #include "translate.h"
 #include "internals.h"
 #include "qemu/host-utils.h"
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 7f6fcd699e..d499caa562 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -30,6 +30,7 @@
 #include "tcg-op.h"
 #include "qemu/log.h"
 #include "qemu/bitops.h"
+#include "arm_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/target-cris/helper.c b/target-cris/helper.c
index 4092d279ba..e8b8261fe9 100644
--- a/target-cris/helper.c
+++ b/target-cris/helper.c
@@ -21,6 +21,7 @@
 #include "cpu.h"
 #include "mmu.h"
 #include "qemu/host-utils.h"
+#include "exec/cpu_ldst.h"
 
 
 //#define CRIS_HELPER_DEBUG
diff --git a/target-cris/op_helper.c b/target-cris/op_helper.c
index a9bd742d3b..5c0c14d992 100644
--- a/target-cris/op_helper.c
+++ b/target-cris/op_helper.c
@@ -22,6 +22,7 @@
 #include "mmu.h"
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
+#include "exec/cpu_ldst.h"
 
 //#define CRIS_OP_HELPER_DEBUG
 
@@ -35,22 +36,6 @@
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
diff --git a/target-cris/translate.c b/target-cris/translate.c
index 90fe0a24b5..f26c323686 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -28,6 +28,7 @@
 #include "tcg-op.h"
 #include "exec/helper-proto.h"
 #include "mmu.h"
+#include "exec/cpu_ldst.h"
 #include "crisv32-decode.h"
 
 #include "exec/helper-gen.h"
diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
index e9b3d577b3..0808cfc67d 100644
--- a/target-i386/cpu-qom.h
+++ b/target-i386/cpu-qom.h
@@ -87,6 +87,7 @@ typedef struct X86CPU {
     bool hyperv_time;
     bool check_cpuid;
     bool enforce_cpuid;
+    bool expose_kvm;
 
     /* if true the CPUID code directly forward host cache leaves to the guest */
     bool cache_info_passthrough;
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 042a48d703..dde052cc42 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -552,8 +552,7 @@ struct X86CPUDefinition {
           CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \
           CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS)
           /* partly implemented:
-          CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64)
-          CPUID_PSE36 (needed for Solaris) */
+          CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */
           /* missing:
           CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */
 #define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
@@ -569,9 +568,7 @@ struct X86CPUDefinition {
           CPUID_EXT_RDRAND */
 #define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
           CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
-          CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT)
-          /* missing:
-          CPUID_EXT2_PDPE1GB */
+          CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_PDPE1GB)
 #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
           CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
 #define TCG_SVM_FEATURES 0
@@ -2792,6 +2789,7 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("hv-time", X86CPU, hyperv_time, false),
     DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, false),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
+    DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index e9cbdabc03..b5e1b411fb 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -249,6 +249,7 @@
 #define PG_DIRTY_BIT    6
 #define PG_PSE_BIT      7
 #define PG_GLOBAL_BIT   8
+#define PG_PSE_PAT_BIT  12
 #define PG_NX_BIT       63
 
 #define PG_PRESENT_MASK  (1 << PG_PRESENT_BIT)
@@ -260,6 +261,9 @@
 #define PG_DIRTY_MASK    (1 << PG_DIRTY_BIT)
 #define PG_PSE_MASK      (1 << PG_PSE_BIT)
 #define PG_GLOBAL_MASK   (1 << PG_GLOBAL_BIT)
+#define PG_PSE_PAT_MASK  (1 << PG_PSE_PAT_BIT)
+#define PG_ADDRESS_MASK  0x000ffffffffff000LL
+#define PG_HI_RSVD_MASK  (PG_ADDRESS_MASK & ~PHYS_ADDR_MASK)
 #define PG_HI_USER_MASK  0x7ff0000000000000LL
 #define PG_NX_MASK       (1LL << PG_NX_BIT)
 
@@ -986,7 +990,6 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
     /* update the hidden flags */
     {
         if (seg_reg == R_CS) {
-            int cpl = selector & 3;
 #ifdef TARGET_X86_64
             if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) {
                 /* long mode */
@@ -996,15 +999,14 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
 #endif
             {
                 /* legacy / compatibility case */
-                if (!(env->cr[0] & CR0_PE_MASK))
-                    cpl = 0;
-                else if (env->eflags & VM_MASK)
-                    cpl = 3;
                 new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
                     >> (DESC_B_SHIFT - HF_CS32_SHIFT);
                 env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) |
                     new_hflags;
             }
+        }
+        if (seg_reg == R_SS) {
+            int cpl = (flags >> DESC_DPL_SHIFT) & 3;
 #if HF_CPL_MASK != 3
 #error HF_CPL_MASK is hardcoded
 #endif
@@ -1137,6 +1139,14 @@ uint64_t cpu_get_tsc(CPUX86State *env);
 #define TARGET_VIRT_ADDR_SPACE_BITS 32
 #endif
 
+/* XXX: This value should match the one returned by CPUID
+ * and in exec.c */
+# if defined(TARGET_X86_64)
+# define PHYS_ADDR_MASK 0xffffffffffLL
+# else
+# define PHYS_ADDR_MASK 0xfffffffffLL
+# endif
+
 static inline CPUX86State *cpu_init(const char *cpu_model)
 {
     X86CPU *cpu = cpu_x86_init(cpu_model);
@@ -1153,17 +1163,24 @@ static inline CPUX86State *cpu_init(const char *cpu_model)
 #define cpudef_setup x86_cpudef_setup
 
 /* MMU modes definitions */
-#define MMU_MODE0_SUFFIX _kernel
+#define MMU_MODE0_SUFFIX _ksmap
 #define MMU_MODE1_SUFFIX _user
-#define MMU_MODE2_SUFFIX _ksmap /* Kernel with SMAP override */
-#define MMU_KERNEL_IDX  0
+#define MMU_MODE2_SUFFIX _knosmap /* SMAP disabled or CPL<3 && AC=1 */
+#define MMU_KSMAP_IDX   0
 #define MMU_USER_IDX    1
-#define MMU_KSMAP_IDX   2
-static inline int cpu_mmu_index (CPUX86State *env)
+#define MMU_KNOSMAP_IDX 2
+static inline int cpu_mmu_index(CPUX86State *env)
 {
     return (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER_IDX :
-        ((env->hflags & HF_SMAP_MASK) && (env->eflags & AC_MASK))
-        ? MMU_KSMAP_IDX : MMU_KERNEL_IDX;
+        (!(env->hflags & HF_SMAP_MASK) || (env->eflags & AC_MASK))
+        ? MMU_KNOSMAP_IDX : MMU_KSMAP_IDX;
+}
+
+static inline int cpu_mmu_index_kernel(CPUX86State *env)
+{
+    return !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP_IDX :
+        ((env->hflags & HF_CPL_MASK) < 3 && (env->eflags & AC_MASK))
+        ? MMU_KNOSMAP_IDX : MMU_KSMAP_IDX;
 }
 
 #define CC_DST  (env->cc_dst)
@@ -1234,11 +1251,14 @@ static inline uint32_t cpu_compute_eflags(CPUX86State *env)
     return env->eflags | cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK);
 }
 
-/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */
+/* NOTE: the translator must set DisasContext.cc_op to CC_OP_EFLAGS
+ * after generating a call to a helper that uses this.
+ */
 static inline void cpu_load_eflags(CPUX86State *env, int eflags,
                                    int update_mask)
 {
     CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    CC_OP = CC_OP_EFLAGS;
     env->df = 1 - (2 * ((eflags >> 10) & 1));
     env->eflags = (env->eflags & ~update_mask) |
         (eflags & update_mask) | 0x2;
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index a04e754e61..1b2900d5d2 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -22,10 +22,7 @@
 #include "exec/helper-proto.h"
 #include "qemu/aes.h"
 #include "qemu/host-utils.h"
-
-#if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-#endif /* !defined(CONFIG_USER_ONLY) */
+#include "exec/cpu_ldst.h"
 
 #define FPU_RC_MASK         0xc00
 #define FPU_RC_NEAR         0x000
diff --git a/target-i386/gdbstub.c b/target-i386/gdbstub.c
index d34e5355f7..19fe9adc3f 100644
--- a/target-i386/gdbstub.c
+++ b/target-i386/gdbstub.c
@@ -127,9 +127,11 @@ static int x86_cpu_gdb_load_seg(X86CPU *cpu, int sreg, uint8_t *mem_buf)
         target_ulong base;
 
         if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
+            int dpl = (env->eflags & VM_MASK) ? 3 : 0;
             base = selector << 4;
             limit = 0xffff;
-            flags = 0;
+            flags = DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                    DESC_A_MASK | (dpl << DESC_DPL_SHIFT);
         } else {
             if (!cpu_x86_get_descr_debug(env, selector, &base, &limit,
                                          &flags)) {
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 46d20e4b89..11ca8649b5 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -510,14 +510,6 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
 
 #else
 
-/* XXX: This value should match the one returned by CPUID
- * and in exec.c */
-# if defined(TARGET_X86_64)
-# define PHYS_ADDR_MASK 0xfffffff000LL
-# else
-# define PHYS_ADDR_MASK 0xffffff000LL
-# endif
-
 /* return value:
  * -1 = cannot handle fault
  * 0  = nothing more to do
@@ -530,10 +522,12 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
     CPUX86State *env = &cpu->env;
     uint64_t ptep, pte;
     target_ulong pde_addr, pte_addr;
-    int error_code, is_dirty, prot, page_size, is_write, is_user;
+    int error_code = 0;
+    int is_dirty, prot, page_size, is_write, is_user;
     hwaddr paddr;
+    uint64_t rsvd_mask = PG_HI_RSVD_MASK;
     uint32_t page_offset;
-    target_ulong vaddr, virt_addr;
+    target_ulong vaddr;
 
     is_user = mmu_idx == MMU_USER_IDX;
 #if defined(DEBUG_MMU)
@@ -550,12 +544,15 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
             pte = (uint32_t)pte;
         }
 #endif
-        virt_addr = addr & TARGET_PAGE_MASK;
         prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
         page_size = 4096;
         goto do_mapping;
     }
 
+    if (!(env->efer & MSR_EFER_NXE)) {
+        rsvd_mask |= PG_NX_MASK;
+    }
+
     if (env->cr[4] & CR4_PAE_MASK) {
         uint64_t pde, pdpe;
         target_ulong pdpe_addr;
@@ -577,34 +574,37 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
                 env->a20_mask;
             pml4e = ldq_phys(cs->as, pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK)) {
-                error_code = 0;
                 goto do_fault;
             }
-            if (!(env->efer & MSR_EFER_NXE) && (pml4e & PG_NX_MASK)) {
-                error_code = PG_ERROR_RSVD_MASK;
-                goto do_fault;
+            if (pml4e & (rsvd_mask | PG_PSE_MASK)) {
+                goto do_fault_rsvd;
             }
             if (!(pml4e & PG_ACCESSED_MASK)) {
                 pml4e |= PG_ACCESSED_MASK;
                 stl_phys_notdirty(cs->as, pml4e_addr, pml4e);
             }
             ptep = pml4e ^ PG_NX_MASK;
-            pdpe_addr = ((pml4e & PHYS_ADDR_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
+            pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
                 env->a20_mask;
             pdpe = ldq_phys(cs->as, pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK)) {
-                error_code = 0;
                 goto do_fault;
             }
-            if (!(env->efer & MSR_EFER_NXE) && (pdpe & PG_NX_MASK)) {
-                error_code = PG_ERROR_RSVD_MASK;
-                goto do_fault;
+            if (pdpe & rsvd_mask) {
+                goto do_fault_rsvd;
             }
             ptep &= pdpe ^ PG_NX_MASK;
             if (!(pdpe & PG_ACCESSED_MASK)) {
                 pdpe |= PG_ACCESSED_MASK;
                 stl_phys_notdirty(cs->as, pdpe_addr, pdpe);
             }
+            if (pdpe & PG_PSE_MASK) {
+                /* 1 GB page */
+                page_size = 1024 * 1024 * 1024;
+                pte_addr = pdpe_addr;
+                pte = pdpe;
+                goto do_check_protect;
+            }
         } else
 #endif
         {
@@ -613,134 +613,49 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
                 env->a20_mask;
             pdpe = ldq_phys(cs->as, pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK)) {
-                error_code = 0;
                 goto do_fault;
             }
+            rsvd_mask |= PG_HI_USER_MASK | PG_NX_MASK;
+            if (pdpe & rsvd_mask) {
+                goto do_fault_rsvd;
+            }
             ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
         }
 
-        pde_addr = ((pdpe & PHYS_ADDR_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
+        pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
             env->a20_mask;
         pde = ldq_phys(cs->as, pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
-            error_code = 0;
             goto do_fault;
         }
-        if (!(env->efer & MSR_EFER_NXE) && (pde & PG_NX_MASK)) {
-            error_code = PG_ERROR_RSVD_MASK;
-            goto do_fault;
+        if (pde & rsvd_mask) {
+            goto do_fault_rsvd;
         }
         ptep &= pde ^ PG_NX_MASK;
         if (pde & PG_PSE_MASK) {
             /* 2 MB page */
             page_size = 2048 * 1024;
-            ptep ^= PG_NX_MASK;
-            if ((ptep & PG_NX_MASK) && is_write1 == 2) {
-                goto do_fault_protect;
-            }
-            switch (mmu_idx) {
-            case MMU_USER_IDX:
-                if (!(ptep & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                if (is_write && !(ptep & PG_RW_MASK)) {
-                    goto do_fault_protect;
-                }
-                break;
-
-            case MMU_KERNEL_IDX:
-                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
-                    (ptep & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                /* fall through */
-            case MMU_KSMAP_IDX:
-                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
-                    (ptep & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(ptep & PG_RW_MASK)) {
-                    goto do_fault_protect;
-                }
-                break;
-
-            default: /* cannot happen */
-                break;
-            }
-            is_dirty = is_write && !(pde & PG_DIRTY_MASK);
-            if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
-                pde |= PG_ACCESSED_MASK;
-                if (is_dirty)
-                    pde |= PG_DIRTY_MASK;
-                stl_phys_notdirty(cs->as, pde_addr, pde);
-            }
-            /* align to page_size */
-            pte = pde & ((PHYS_ADDR_MASK & ~(page_size - 1)) | 0xfff);
-            virt_addr = addr & ~(page_size - 1);
-        } else {
-            /* 4 KB page */
-            if (!(pde & PG_ACCESSED_MASK)) {
-                pde |= PG_ACCESSED_MASK;
-                stl_phys_notdirty(cs->as, pde_addr, pde);
-            }
-            pte_addr = ((pde & PHYS_ADDR_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
-                env->a20_mask;
-            pte = ldq_phys(cs->as, pte_addr);
-            if (!(pte & PG_PRESENT_MASK)) {
-                error_code = 0;
-                goto do_fault;
-            }
-            if (!(env->efer & MSR_EFER_NXE) && (pte & PG_NX_MASK)) {
-                error_code = PG_ERROR_RSVD_MASK;
-                goto do_fault;
-            }
-            /* combine pde and pte nx, user and rw protections */
-            ptep &= pte ^ PG_NX_MASK;
-            ptep ^= PG_NX_MASK;
-            if ((ptep & PG_NX_MASK) && is_write1 == 2)
-                goto do_fault_protect;
-            switch (mmu_idx) {
-            case MMU_USER_IDX:
-                if (!(ptep & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                if (is_write && !(ptep & PG_RW_MASK)) {
-                    goto do_fault_protect;
-                }
-                break;
-
-            case MMU_KERNEL_IDX:
-                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
-                    (ptep & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                /* fall through */
-            case MMU_KSMAP_IDX:
-                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
-                    (ptep & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(ptep & PG_RW_MASK)) {
-                    goto do_fault_protect;
-                }
-                break;
-
-            default: /* cannot happen */
-                break;
-            }
-            is_dirty = is_write && !(pte & PG_DIRTY_MASK);
-            if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
-                pte |= PG_ACCESSED_MASK;
-                if (is_dirty)
-                    pte |= PG_DIRTY_MASK;
-                stl_phys_notdirty(cs->as, pte_addr, pte);
-            }
-            page_size = 4096;
-            virt_addr = addr & ~0xfff;
-            pte = pte & (PHYS_ADDR_MASK | 0xfff);
+            pte_addr = pde_addr;
+            pte = pde;
+            goto do_check_protect;
+        }
+        /* 4 KB page */
+        if (!(pde & PG_ACCESSED_MASK)) {
+            pde |= PG_ACCESSED_MASK;
+            stl_phys_notdirty(cs->as, pde_addr, pde);
+        }
+        pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
+            env->a20_mask;
+        pte = ldq_phys(cs->as, pte_addr);
+        if (!(pte & PG_PRESENT_MASK)) {
+            goto do_fault;
+        }
+        if (pte & rsvd_mask) {
+            goto do_fault_rsvd;
         }
+        /* combine pde and pte nx, user and rw protections */
+        ptep &= pte ^ PG_NX_MASK;
+        page_size = 4096;
     } else {
         uint32_t pde;
 
@@ -749,114 +664,95 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
             env->a20_mask;
         pde = ldl_phys(cs->as, pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
-            error_code = 0;
             goto do_fault;
         }
+        ptep = pde | PG_NX_MASK;
+
         /* if PSE bit is set, then we use a 4MB page */
         if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
             page_size = 4096 * 1024;
-            switch (mmu_idx) {
-            case MMU_USER_IDX:
-                if (!(pde & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                if (is_write && !(pde & PG_RW_MASK)) {
-                    goto do_fault_protect;
-                }
-                break;
-
-            case MMU_KERNEL_IDX:
-                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
-                    (pde & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                /* fall through */
-            case MMU_KSMAP_IDX:
-                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
-                    (pde & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(pde & PG_RW_MASK)) {
-                    goto do_fault_protect;
-                }
-                break;
+            pte_addr = pde_addr;
+
+            /* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
+             * Leave bits 20-13 in place for setting accessed/dirty bits below.
+             */
+            pte = pde | ((pde & 0x1fe000) << (32 - 13));
+            rsvd_mask = 0x200000;
+            goto do_check_protect_pse36;
+        }
 
-            default: /* cannot happen */
-                break;
-            }
-            is_dirty = is_write && !(pde & PG_DIRTY_MASK);
-            if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
-                pde |= PG_ACCESSED_MASK;
-                if (is_dirty)
-                    pde |= PG_DIRTY_MASK;
-                stl_phys_notdirty(cs->as, pde_addr, pde);
-            }
+        if (!(pde & PG_ACCESSED_MASK)) {
+            pde |= PG_ACCESSED_MASK;
+            stl_phys_notdirty(cs->as, pde_addr, pde);
+        }
 
-            pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
-            ptep = pte;
-            virt_addr = addr & ~(page_size - 1);
-        } else {
-            if (!(pde & PG_ACCESSED_MASK)) {
-                pde |= PG_ACCESSED_MASK;
-                stl_phys_notdirty(cs->as, pde_addr, pde);
-            }
+        /* page directory entry */
+        pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
+            env->a20_mask;
+        pte = ldl_phys(cs->as, pte_addr);
+        if (!(pte & PG_PRESENT_MASK)) {
+            goto do_fault;
+        }
+        /* combine pde and pte user and rw protections */
+        ptep &= pte | PG_NX_MASK;
+        page_size = 4096;
+        rsvd_mask = 0;
+    }
 
-            /* page directory entry */
-            pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
-                env->a20_mask;
-            pte = ldl_phys(cs->as, pte_addr);
-            if (!(pte & PG_PRESENT_MASK)) {
-                error_code = 0;
-                goto do_fault;
-            }
-            /* combine pde and pte user and rw protections */
-            ptep = pte & pde;
-            switch (mmu_idx) {
-            case MMU_USER_IDX:
-                if (!(ptep & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                if (is_write && !(ptep & PG_RW_MASK)) {
-                    goto do_fault_protect;
-                }
-                break;
+do_check_protect:
+    rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
+do_check_protect_pse36:
+    if (pte & rsvd_mask) {
+        goto do_fault_rsvd;
+    }
+    ptep ^= PG_NX_MASK;
+    if ((ptep & PG_NX_MASK) && is_write1 == 2) {
+        goto do_fault_protect;
+    }
+    switch (mmu_idx) {
+    case MMU_USER_IDX:
+        if (!(ptep & PG_USER_MASK)) {
+            goto do_fault_protect;
+        }
+        if (is_write && !(ptep & PG_RW_MASK)) {
+            goto do_fault_protect;
+        }
+        break;
 
-            case MMU_KERNEL_IDX:
-                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
-                    (ptep & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                /* fall through */
-            case MMU_KSMAP_IDX:
-                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
-                    (ptep & PG_USER_MASK)) {
-                    goto do_fault_protect;
-                }
-                if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(ptep & PG_RW_MASK)) {
-                    goto do_fault_protect;
-                }
-                break;
+    case MMU_KSMAP_IDX:
+        if (is_write1 != 2 && (ptep & PG_USER_MASK)) {
+            goto do_fault_protect;
+        }
+        /* fall through */
+    case MMU_KNOSMAP_IDX:
+        if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
+            (ptep & PG_USER_MASK)) {
+            goto do_fault_protect;
+        }
+        if ((env->cr[0] & CR0_WP_MASK) &&
+            is_write && !(ptep & PG_RW_MASK)) {
+            goto do_fault_protect;
+        }
+        break;
 
-            default: /* cannot happen */
-                break;
-            }
-            is_dirty = is_write && !(pte & PG_DIRTY_MASK);
-            if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
-                pte |= PG_ACCESSED_MASK;
-                if (is_dirty)
-                    pte |= PG_DIRTY_MASK;
-                stl_phys_notdirty(cs->as, pte_addr, pte);
-            }
-            page_size = 4096;
-            virt_addr = addr & ~0xfff;
+    default: /* cannot happen */
+        break;
+    }
+    is_dirty = is_write && !(pte & PG_DIRTY_MASK);
+    if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
+        pte |= PG_ACCESSED_MASK;
+        if (is_dirty) {
+            pte |= PG_DIRTY_MASK;
         }
+        stl_phys_notdirty(cs->as, pte_addr, pte);
     }
+
     /* the page can be put in the TLB */
     prot = PAGE_READ;
-    if (!(ptep & PG_NX_MASK))
+    if (!(ptep & PG_NX_MASK) &&
+        !((env->cr[4] & CR4_SMEP_MASK) && (ptep & PG_USER_MASK))) {
         prot |= PAGE_EXEC;
+    }
     if (pte & PG_DIRTY_MASK) {
         /* only set write access if already dirty... otherwise wait
            for dirty access */
@@ -872,16 +768,21 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
  do_mapping:
     pte = pte & env->a20_mask;
 
+    /* align to page_size */
+    pte &= PG_ADDRESS_MASK & ~(page_size - 1);
+
     /* Even if 4MB pages, we map only one 4KB page in the cache to
        avoid filling it too fast */
-    page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
-    paddr = (pte & TARGET_PAGE_MASK) + page_offset;
-    vaddr = virt_addr + page_offset;
+    vaddr = addr & TARGET_PAGE_MASK;
+    page_offset = vaddr & (page_size - 1);
+    paddr = pte + page_offset;
 
     tlb_set_page(cs, vaddr, paddr, prot, mmu_idx, page_size);
     return 0;
+ do_fault_rsvd:
+    error_code |= PG_ERROR_RSVD_MASK;
  do_fault_protect:
-    error_code = PG_ERROR_P_MASK;
+    error_code |= PG_ERROR_P_MASK;
  do_fault:
     error_code |= (is_write << PG_ERROR_W_BIT);
     if (is_user)
@@ -910,7 +811,6 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     CPUX86State *env = &cpu->env;
     target_ulong pde_addr, pte_addr;
     uint64_t pte;
-    hwaddr paddr;
     uint32_t page_offset;
     int page_size;
 
@@ -928,25 +828,24 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 
             /* test virtual address sign extension */
             sext = (int64_t)addr >> 47;
-            if (sext != 0 && sext != -1)
+            if (sext != 0 && sext != -1) {
                 return -1;
-
+            }
             pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
                 env->a20_mask;
             pml4e = ldq_phys(cs->as, pml4e_addr);
-            if (!(pml4e & PG_PRESENT_MASK))
+            if (!(pml4e & PG_PRESENT_MASK)) {
                 return -1;
-
-            pdpe_addr = ((pml4e & ~0xfff & ~(PG_NX_MASK | PG_HI_USER_MASK)) +
+            }
+            pdpe_addr = ((pml4e & PG_ADDRESS_MASK) +
                          (((addr >> 30) & 0x1ff) << 3)) & env->a20_mask;
             pdpe = ldq_phys(cs->as, pdpe_addr);
-            if (!(pdpe & PG_PRESENT_MASK))
+            if (!(pdpe & PG_PRESENT_MASK)) {
                 return -1;
-
+            }
             if (pdpe & PG_PSE_MASK) {
                 page_size = 1024 * 1024 * 1024;
-                pte = pdpe & ~( (page_size - 1) & ~0xfff);
-                pte &= ~(PG_NX_MASK | PG_HI_USER_MASK);
+                pte = pdpe;
                 goto out;
             }
 
@@ -960,7 +859,7 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
                 return -1;
         }
 
-        pde_addr = ((pdpe & ~0xfff & ~(PG_NX_MASK | PG_HI_USER_MASK)) +
+        pde_addr = ((pdpe & PG_ADDRESS_MASK) +
                     (((addr >> 21) & 0x1ff) << 3)) & env->a20_mask;
         pde = ldq_phys(cs->as, pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
@@ -969,17 +868,17 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
         if (pde & PG_PSE_MASK) {
             /* 2 MB page */
             page_size = 2048 * 1024;
-            pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
+            pte = pde;
         } else {
             /* 4 KB page */
-            pte_addr = ((pde & ~0xfff & ~(PG_NX_MASK | PG_HI_USER_MASK)) +
+            pte_addr = ((pde & PG_ADDRESS_MASK) +
                         (((addr >> 12) & 0x1ff) << 3)) & env->a20_mask;
             page_size = 4096;
             pte = ldq_phys(cs->as, pte_addr);
         }
-        pte &= ~(PG_NX_MASK | PG_HI_USER_MASK);
-        if (!(pte & PG_PRESENT_MASK))
+        if (!(pte & PG_PRESENT_MASK)) {
             return -1;
+        }
     } else {
         uint32_t pde;
 
@@ -989,14 +888,15 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
         if (!(pde & PG_PRESENT_MASK))
             return -1;
         if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
-            pte = pde & ~0x003ff000; /* align to 4MB */
+            pte = pde | ((pde & 0x1fe000) << (32 - 13));
             page_size = 4096 * 1024;
         } else {
             /* page directory entry */
             pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
             pte = ldl_phys(cs->as, pte_addr);
-            if (!(pte & PG_PRESENT_MASK))
+            if (!(pte & PG_PRESENT_MASK)) {
                 return -1;
+            }
             page_size = 4096;
         }
         pte = pte & env->a20_mask;
@@ -1005,9 +905,9 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 #ifdef TARGET_X86_64
 out:
 #endif
+    pte &= PG_ADDRESS_MASK & ~(page_size - 1);
     page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
-    paddr = (pte & TARGET_PAGE_MASK) + page_offset;
-    return paddr;
+    return pte | page_offset;
 }
 
 void hw_breakpoint_insert(CPUX86State *env, int index)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 0d894ef4aa..4bf0ac9e76 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -528,23 +528,25 @@ int kvm_arch_init_vcpu(CPUState *cs)
         has_msr_hv_hypercall = true;
     }
 
-    memcpy(signature, "KVMKVMKVM\0\0\0", 12);
-    c = &cpuid_data.entries[cpuid_i++];
-    c->function = KVM_CPUID_SIGNATURE | kvm_base;
-    c->eax = 0;
-    c->ebx = signature[0];
-    c->ecx = signature[1];
-    c->edx = signature[2];
+    if (cpu->expose_kvm) {
+        memcpy(signature, "KVMKVMKVM\0\0\0", 12);
+        c = &cpuid_data.entries[cpuid_i++];
+        c->function = KVM_CPUID_SIGNATURE | kvm_base;
+        c->eax = KVM_CPUID_FEATURES | kvm_base;
+        c->ebx = signature[0];
+        c->ecx = signature[1];
+        c->edx = signature[2];
 
-    c = &cpuid_data.entries[cpuid_i++];
-    c->function = KVM_CPUID_FEATURES | kvm_base;
-    c->eax = env->features[FEAT_KVM];
+        c = &cpuid_data.entries[cpuid_i++];
+        c->function = KVM_CPUID_FEATURES | kvm_base;
+        c->eax = env->features[FEAT_KVM];
 
-    has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
+        has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
 
-    has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
+        has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
 
-    has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME);
+        has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME);
+    }
 
     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
 
@@ -1430,7 +1432,7 @@ static int kvm_get_sregs(X86CPU *cpu)
        HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
        HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
 
-    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+    hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
     hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
     hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
                 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 168cab681b..bdff447786 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -312,6 +312,14 @@ static int cpu_post_load(void *opaque, int version_id)
         env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
     }
 
+    /* Older versions of QEMU incorrectly used CS.DPL as the CPL when
+     * running under KVM.  This is wrong for conforming code segments.
+     * Luckily, in our implementation the CPL field of hflags is redundant
+     * and we can get the right value from the SS descriptor privilege level.
+     */
+    env->hflags &= ~HF_CPL_MASK;
+    env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+
     /* XXX: restore FPU round state */
     env->fpstt = (env->fpus_vmstate >> 11) & 7;
     env->fpus = env->fpus_vmstate & ~0x3800;
diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
index 83aa1038d7..1aec8a5f19 100644
--- a/target-i386/mem_helper.c
+++ b/target-i386/mem_helper.c
@@ -19,10 +19,7 @@
 
 #include "cpu.h"
 #include "exec/helper-proto.h"
-
-#if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-#endif /* !defined(CONFIG_USER_ONLY) */
+#include "exec/cpu_ldst.h"
 
 /* broken thread support */
 
@@ -110,24 +107,6 @@ void helper_boundl(CPUX86State *env, target_ulong a0, int v)
 }
 
 #if !defined(CONFIG_USER_ONLY)
-
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
-#endif
-
-#if !defined(CONFIG_USER_ONLY)
 /* try to fill the TLB and return an exception if error. If retaddr is
  * NULL, it means that the function was called in C code (i.e. not
  * from generated code or from helper.c)
diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c
index 9cfa25f9ec..4aaf1e4d95 100644
--- a/target-i386/misc_helper.c
+++ b/target-i386/misc_helper.c
@@ -20,52 +20,7 @@
 #include "cpu.h"
 #include "exec/ioport.h"
 #include "exec/helper-proto.h"
-
-#if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-#endif /* !defined(CONFIG_USER_ONLY) */
-
-/* check if Port I/O is allowed in TSS */
-static inline void check_io(CPUX86State *env, int addr, int size)
-{
-    int io_offset, val, mask;
-
-    /* TSS must be a valid 32 bit one */
-    if (!(env->tr.flags & DESC_P_MASK) ||
-        ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 ||
-        env->tr.limit < 103) {
-        goto fail;
-    }
-    io_offset = cpu_lduw_kernel(env, env->tr.base + 0x66);
-    io_offset += (addr >> 3);
-    /* Note: the check needs two bytes */
-    if ((io_offset + 1) > env->tr.limit) {
-        goto fail;
-    }
-    val = cpu_lduw_kernel(env, env->tr.base + io_offset);
-    val >>= (addr & 7);
-    mask = (1 << size) - 1;
-    /* all bits must be zero to allow the I/O */
-    if ((val & mask) != 0) {
-    fail:
-        raise_exception_err(env, EXCP0D_GPF, 0);
-    }
-}
-
-void helper_check_iob(CPUX86State *env, uint32_t t0)
-{
-    check_io(env, t0, 1);
-}
-
-void helper_check_iow(CPUX86State *env, uint32_t t0)
-{
-    check_io(env, t0, 2);
-}
-
-void helper_check_iol(CPUX86State *env, uint32_t t0)
-{
-    check_io(env, t0, 4);
-}
+#include "exec/cpu_ldst.h"
 
 void helper_outb(uint32_t port, uint32_t data)
 {
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 258aae806a..2d970d0cb9 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -21,13 +21,10 @@
 #include "cpu.h"
 #include "qemu/log.h"
 #include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
 
 //#define DEBUG_PCALL
 
-#if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-#endif /* !defined(CONFIG_USER_ONLY) */
-
 #ifdef DEBUG_PCALL
 # define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__)
 # define LOG_PCALL_STATE(cpu)                                  \
@@ -37,6 +34,24 @@
 # define LOG_PCALL_STATE(cpu) do { } while (0)
 #endif
 
+#ifndef CONFIG_USER_ONLY
+#define CPU_MMU_INDEX (cpu_mmu_index_kernel(env))
+#define MEMSUFFIX _kernel
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif
+
 /* return non zero if error */
 static inline int load_segment(CPUX86State *env, uint32_t *e1_ptr,
                                uint32_t *e2_ptr, int selector)
@@ -88,8 +103,10 @@ static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1,
 static inline void load_seg_vm(CPUX86State *env, int seg, int selector)
 {
     selector &= 0xffff;
-    cpu_x86_load_seg_cache(env, seg, selector,
-                           (selector << 4), 0xffff, 0);
+
+    cpu_x86_load_seg_cache(env, seg, selector, (selector << 4), 0xffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK | (3 << DESC_DPL_SHIFT));
 }
 
 static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr,
@@ -133,11 +150,10 @@ static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr,
     }
 }
 
-/* XXX: merge with load_seg() */
-static void tss_load_seg(CPUX86State *env, int seg_reg, int selector)
+static void tss_load_seg(CPUX86State *env, int seg_reg, int selector, int cpl)
 {
     uint32_t e1, e2;
-    int rpl, dpl, cpl;
+    int rpl, dpl;
 
     if ((selector & 0xfffc) != 0) {
         if (load_segment(env, &e1, &e2, selector) != 0) {
@@ -148,18 +164,13 @@ static void tss_load_seg(CPUX86State *env, int seg_reg, int selector)
         }
         rpl = selector & 3;
         dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-        cpl = env->hflags & HF_CPL_MASK;
         if (seg_reg == R_CS) {
             if (!(e2 & DESC_CS_MASK)) {
                 raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
             }
-            /* XXX: is it correct? */
             if (dpl != rpl) {
                 raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
             }
-            if ((e2 & DESC_C_MASK) && dpl > rpl) {
-                raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
-            }
         } else if (seg_reg == R_SS) {
             /* SS must be writable data */
             if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK)) {
@@ -446,12 +457,13 @@ static void switch_tss(CPUX86State *env, int tss_selector,
 
     /* load the segments */
     if (!(new_eflags & VM_MASK)) {
-        tss_load_seg(env, R_CS, new_segs[R_CS]);
-        tss_load_seg(env, R_SS, new_segs[R_SS]);
-        tss_load_seg(env, R_ES, new_segs[R_ES]);
-        tss_load_seg(env, R_DS, new_segs[R_DS]);
-        tss_load_seg(env, R_FS, new_segs[R_FS]);
-        tss_load_seg(env, R_GS, new_segs[R_GS]);
+        int cpl = new_segs[R_CS] & 3;
+        tss_load_seg(env, R_CS, new_segs[R_CS], cpl);
+        tss_load_seg(env, R_SS, new_segs[R_SS], cpl);
+        tss_load_seg(env, R_ES, new_segs[R_ES], cpl);
+        tss_load_seg(env, R_DS, new_segs[R_DS], cpl);
+        tss_load_seg(env, R_FS, new_segs[R_FS], cpl);
+        tss_load_seg(env, R_GS, new_segs[R_GS], cpl);
     }
 
     /* check that env->eip is in the CS segment limits */
@@ -558,6 +570,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
     int has_error_code, new_stack, shift;
     uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0;
     uint32_t old_eip, sp_mask;
+    int vm86 = env->eflags & VM_MASK;
 
     has_error_code = 0;
     if (!is_int && !is_hw) {
@@ -673,7 +686,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
         ssp = get_seg_base(ss_e1, ss_e2);
     } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
         /* to same privilege */
-        if (env->eflags & VM_MASK) {
+        if (vm86) {
             raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
         }
         new_stack = 0;
@@ -694,14 +707,14 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
 #if 0
     /* XXX: check that enough room is available */
     push_size = 6 + (new_stack << 2) + (has_error_code << 1);
-    if (env->eflags & VM_MASK) {
+    if (vm86) {
         push_size += 8;
     }
     push_size <<= shift;
 #endif
     if (shift == 1) {
         if (new_stack) {
-            if (env->eflags & VM_MASK) {
+            if (vm86) {
                 PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector);
                 PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector);
                 PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector);
@@ -718,7 +731,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
         }
     } else {
         if (new_stack) {
-            if (env->eflags & VM_MASK) {
+            if (vm86) {
                 PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector);
                 PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector);
                 PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector);
@@ -742,7 +755,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
     env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
 
     if (new_stack) {
-        if (env->eflags & VM_MASK) {
+        if (vm86) {
             cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
             cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0);
             cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0);
@@ -1600,7 +1613,6 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
             }
             next_eip = env->eip + next_eip_addend;
             switch_tss(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip);
-            CC_OP = CC_OP_EFLAGS;
             break;
         case 4: /* 286 call gate */
         case 12: /* 386 call gate */
@@ -1769,7 +1781,6 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
                 raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
             }
             switch_tss(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip);
-            CC_OP = CC_OP_EFLAGS;
             return;
         case 4: /* 286 call gate */
         case 12: /* 386 call gate */
@@ -2464,11 +2475,56 @@ void helper_verw(CPUX86State *env, target_ulong selector1)
 void cpu_x86_load_seg(CPUX86State *env, int seg_reg, int selector)
 {
     if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
+        int dpl = (env->eflags & VM_MASK) ? 3 : 0;
         selector &= 0xffff;
         cpu_x86_load_seg_cache(env, seg_reg, selector,
-                               (selector << 4), 0xffff, 0);
+                               (selector << 4), 0xffff,
+                               DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                               DESC_A_MASK | (dpl << DESC_DPL_SHIFT));
     } else {
         helper_load_seg(env, seg_reg, selector);
     }
 }
 #endif
+
+/* check if Port I/O is allowed in TSS */
+static inline void check_io(CPUX86State *env, int addr, int size)
+{
+    int io_offset, val, mask;
+
+    /* TSS must be a valid 32 bit one */
+    if (!(env->tr.flags & DESC_P_MASK) ||
+        ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 ||
+        env->tr.limit < 103) {
+        goto fail;
+    }
+    io_offset = cpu_lduw_kernel(env, env->tr.base + 0x66);
+    io_offset += (addr >> 3);
+    /* Note: the check needs two bytes */
+    if ((io_offset + 1) > env->tr.limit) {
+        goto fail;
+    }
+    val = cpu_lduw_kernel(env, env->tr.base + io_offset);
+    val >>= (addr & 7);
+    mask = (1 << size) - 1;
+    /* all bits must be zero to allow the I/O */
+    if ((val & mask) != 0) {
+    fail:
+        raise_exception_err(env, EXCP0D_GPF, 0);
+    }
+}
+
+void helper_check_iob(CPUX86State *env, uint32_t t0)
+{
+    check_io(env, t0, 1);
+}
+
+void helper_check_iow(CPUX86State *env, uint32_t t0)
+{
+    check_io(env, t0, 2);
+}
+
+void helper_check_iol(CPUX86State *env, uint32_t t0)
+{
+    check_io(env, t0, 4);
+}
diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
index 5d7697c1a7..58051d3bcc 100644
--- a/target-i386/smm_helper.c
+++ b/target-i386/smm_helper.c
@@ -168,15 +168,26 @@ void do_smm_enter(X86CPU *cpu)
                                       CR0_PG_MASK));
     cpu_x86_update_cr4(env, 0);
     env->dr[7] = 0x00000400;
-    CC_OP = CC_OP_EFLAGS;
 
     cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
-                           0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
+                           0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
 }
 
 void helper_rsm(CPUX86State *env)
@@ -296,7 +307,6 @@ void helper_rsm(CPUX86State *env)
         env->smbase = ldl_phys(cs->as, sm_state + 0x7ef8) & ~0x7fff;
     }
 #endif
-    CC_OP = CC_OP_EFLAGS;
     env->hflags &= ~HF_SMM_MASK;
     cpu_smm_update(env);
 
diff --git a/target-i386/svm_helper.c b/target-i386/svm_helper.c
index 852e2baf5d..429d029a3d 100644
--- a/target-i386/svm_helper.c
+++ b/target-i386/svm_helper.c
@@ -20,10 +20,7 @@
 #include "cpu.h"
 #include "exec/cpu-all.h"
 #include "exec/helper-proto.h"
-
-#if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-#endif /* !defined(CONFIG_USER_ONLY) */
+#include "exec/cpu_ldst.h"
 
 /* Secure Virtual Machine helpers */
 
@@ -260,7 +257,6 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
                                   env->vm_vmcb + offsetof(struct vmcb,
                                                           save.rflags)),
                     ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    CC_OP = CC_OP_EFLAGS;
 
     svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.es),
                        R_ES);
@@ -702,7 +698,6 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1)
                                                            save.rflags)),
                     ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK |
                       VM_MASK));
-    CC_OP = CC_OP_EFLAGS;
 
     svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.es),
                        R_ES);
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 3aa52eb795..2359787b42 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -27,6 +27,7 @@
 #include "cpu.h"
 #include "disas/disas.h"
 #include "tcg-op.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/target-lm32/op_helper.c b/target-lm32/op_helper.c
index 40fbed64c3..308742a74e 100644
--- a/target-lm32/op_helper.c
+++ b/target-lm32/op_helper.c
@@ -6,23 +6,13 @@
 #include "hw/lm32/lm32_pic.h"
 #include "hw/char/lm32_juart.h"
 
-#include "exec/softmmu_exec.h"
+#include "exec/cpu_ldst.h"
 
 #ifndef CONFIG_USER_ONLY
 #include "sysemu/sysemu.h"
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
-#define MMUSUFFIX _mmu
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
 void raise_exception(CPULM32State *env, int index)
 {
     CPUState *cs = CPU(lm32_env_get_cpu(env));
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index 51eca06591..a51ade9a15 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -22,6 +22,7 @@
 #include "exec/helper-proto.h"
 #include "tcg-op.h"
 
+#include "exec/cpu_ldst.h"
 #include "hw/lm32/lm32_pic.h"
 
 #include "exec/helper-gen.h"
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index f1ac139c51..9dd3e74ab8 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -18,6 +18,7 @@
  */
 #include "cpu.h"
 #include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
 
 #if defined(CONFIG_USER_ONLY)
 
@@ -34,22 +35,6 @@ void do_interrupt_m68k_hardirq(CPUM68KState *env)
 
 extern int semihosting_enabled;
 
-#include "exec/softmmu_exec.h"
-
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index fa248d96b5..50df4d3844 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -22,6 +22,7 @@
 #include "disas/disas.h"
 #include "tcg-op.h"
 #include "qemu/log.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/target-microblaze/op_helper.c b/target-microblaze/op_helper.c
index b24b878919..a4c8f04705 100644
--- a/target-microblaze/op_helper.c
+++ b/target-microblaze/op_helper.c
@@ -22,21 +22,11 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
+#include "exec/cpu_ldst.h"
 
 #define D(x)
 
 #if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-
-#define MMUSUFFIX _mmu
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-#define SHIFT 3
-#include "exec/softmmu_template.h"
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
  * NULL, it means that the function was called in C code (i.e. not
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 488df2d60d..c422bdc718 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -23,6 +23,7 @@
 #include "tcg-op.h"
 #include "exec/helper-proto.h"
 #include "microblaze-decode.h"
+#include "exec/cpu_ldst.h"
 #include "exec/helper-gen.h"
 
 #define SIM_COMPAT 0
diff --git a/target-mips/cpu-qom.h b/target-mips/cpu-qom.h
index 8877f813f7..2cff15a273 100644
--- a/target-mips/cpu-qom.h
+++ b/target-mips/cpu-qom.h
@@ -80,5 +80,7 @@ void mips_cpu_dump_state(CPUState *cpu, FILE *f, fprintf_function cpu_fprintf,
 hwaddr mips_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int mips_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int mips_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
+                                  int is_write, int is_user, uintptr_t retaddr);
 
 #endif
diff --git a/target-mips/cpu.c b/target-mips/cpu.c
index ae37ae26c0..dd954fc55a 100644
--- a/target-mips/cpu.c
+++ b/target-mips/cpu.c
@@ -137,6 +137,7 @@ static void mips_cpu_class_init(ObjectClass *c, void *data)
     cc->handle_mmu_fault = mips_cpu_handle_mmu_fault;
 #else
     cc->do_unassigned_access = mips_cpu_unassigned_access;
+    cc->do_unaligned_access = mips_cpu_do_unaligned_access;
     cc->get_phys_page_debug = mips_cpu_get_phys_page_debug;
 #endif
 
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 6c2014eddd..a9b2c7ae38 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -3,6 +3,7 @@
 
 //#define DEBUG_OP
 
+#define ALIGNED_ONLY
 #define TARGET_HAS_ICE 1
 
 #define ELF_MACHINE	EM_MIPS
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 8af931abd9..4704216834 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -19,12 +19,8 @@
 #include <stdlib.h>
 #include "cpu.h"
 #include "qemu/host-utils.h"
-
 #include "exec/helper-proto.h"
-
-#if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-#endif /* !defined(CONFIG_USER_ONLY) */
+#include "exec/cpu_ldst.h"
 
 #ifndef CONFIG_USER_ONLY
 static inline void cpu_mips_tlb_flush (CPUMIPSState *env, int flush_global);
@@ -2128,28 +2124,12 @@ void helper_wait(CPUMIPSState *env)
 
 #if !defined(CONFIG_USER_ONLY)
 
-static void QEMU_NORETURN do_unaligned_access(CPUMIPSState *env,
-                                              target_ulong addr, int is_write,
-                                              int is_user, uintptr_t retaddr);
-
-#define MMUSUFFIX _mmu
-#define ALIGNED_ONLY
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
-static void do_unaligned_access(CPUMIPSState *env, target_ulong addr,
-                                int is_write, int is_user, uintptr_t retaddr)
+void mips_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                  int is_write, int is_user, uintptr_t retaddr)
 {
+    MIPSCPU *cpu = MIPS_CPU(cs);
+    CPUMIPSState *env = &cpu->env;
+
     env->CP0_BadVAddr = addr;
     do_raise_exception(env, (is_write == 1) ? EXCP_AdES : EXCP_AdEL, retaddr);
 }
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 13cf29b9d9..76deb7b138 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -24,6 +24,7 @@
 #include "cpu.h"
 #include "disas/disas.h"
 #include "tcg-op.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/target-moxie/helper.c b/target-moxie/helper.c
index d1efdedf9d..6c98965b93 100644
--- a/target-moxie/helper.c
+++ b/target-moxie/helper.c
@@ -25,24 +25,10 @@
 #include "cpu.h"
 #include "mmu.h"
 #include "exec/exec-all.h"
-#include "exec/softmmu_exec.h"
+#include "exec/cpu_ldst.h"
 #include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
 
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
diff --git a/target-moxie/translate.c b/target-moxie/translate.c
index 7f0dfb66f2..4541b9bff4 100644
--- a/target-moxie/translate.c
+++ b/target-moxie/translate.c
@@ -32,6 +32,7 @@
 #include "exec/exec-all.h"
 #include "disas/disas.h"
 #include "tcg-op.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/target-openrisc/mmu_helper.c b/target-openrisc/mmu_helper.c
index fb457c76af..ee1c6f6118 100644
--- a/target-openrisc/mmu_helper.c
+++ b/target-openrisc/mmu_helper.c
@@ -19,22 +19,9 @@
  */
 
 #include "cpu.h"
+#include "exec/cpu_ldst.h"
 
 #ifndef CONFIG_USER_ONLY
-#include "exec/softmmu_exec.h"
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
 
 void tlb_fill(CPUState *cs, target_ulong addr, int is_write,
               int mmu_idx, uintptr_t retaddr)
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 40084f9a52..b728718b64 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -26,6 +26,7 @@
 #include "qemu/log.h"
 #include "config.h"
 #include "qemu/bitops.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index a0c9fdc84b..7dfc52d159 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -18,6 +18,7 @@
  */
 #include "cpu.h"
 #include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
 
 #include "helper_regs.h"
 
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index d9c8c36712..02b627e47b 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -21,10 +21,7 @@
 #include "exec/helper-proto.h"
 
 #include "helper_regs.h"
-
-#if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-#endif /* !defined(CONFIG_USER_ONLY) */
+#include "exec/cpu_ldst.h"
 
 //#define DEBUG_OP
 
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index a238bb2731..f029f41965 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -22,6 +22,7 @@
 #include "kvm_ppc.h"
 #include "mmu-hash64.h"
 #include "mmu-hash32.h"
+#include "exec/cpu_ldst.h"
 
 //#define DEBUG_MMU
 //#define DEBUG_BATS
@@ -2903,22 +2904,6 @@ void helper_booke206_tlbflush(CPUPPCState *env, uint32_t type)
 
 /*****************************************************************************/
 
-#include "exec/softmmu_exec.h"
-
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 6283b2c36c..f08901470b 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -22,6 +22,7 @@
 #include "disas/disas.h"
 #include "tcg-op.h"
 #include "qemu/host-utils.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/target-s390x/fpu_helper.c b/target-s390x/fpu_helper.c
index d879ad63a1..b946ec1d51 100644
--- a/target-s390x/fpu_helper.c
+++ b/target-s390x/fpu_helper.c
@@ -19,12 +19,9 @@
  */
 
 #include "cpu.h"
+#include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 
-#if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-#endif
-
 /* #define DEBUG_HELPER */
 #ifdef DEBUG_HELPER
 #define HELPER_LOG(x...) qemu_log(x)
diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index 3d756cae6c..67ab1065aa 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -21,6 +21,7 @@
 #include "cpu.h"
 #include "exec/gdbstub.h"
 #include "qemu/timer.h"
+#include "exec/cpu_ldst.h"
 #ifndef CONFIG_USER_ONLY
 #include "sysemu/sysemu.h"
 #endif
diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c
index 5a29841d71..5a55de86a1 100644
--- a/target-s390x/mem_helper.c
+++ b/target-s390x/mem_helper.c
@@ -20,25 +20,11 @@
 
 #include "cpu.h"
 #include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
 
 /*****************************************************************************/
 /* Softmmu support */
 #if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
-
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index 44c08f370d..9dae0256fa 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -28,9 +28,9 @@
 #ifdef CONFIG_KVM
 #include <linux/kvm.h>
 #endif
+#include "exec/cpu_ldst.h"
 
 #if !defined(CONFIG_USER_ONLY)
-#include "exec/softmmu_exec.h"
 #include "sysemu/cpus.h"
 #include "sysemu/sysemu.h"
 #include "hw/s390x/ebcdic.h"
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index cf65f01f60..8ca4824d60 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -33,6 +33,7 @@
 #include "tcg-op.h"
 #include "qemu/log.h"
 #include "qemu/host-utils.h"
+#include "exec/cpu_ldst.h"
 
 /* global register indexes */
 static TCGv_ptr cpu_env;
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 39e1e7cbef..74a5c4ea77 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -20,23 +20,9 @@
 #include <stdlib.h>
 #include "cpu.h"
 #include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
 
 #ifndef CONFIG_USER_ONLY
-#include "exec/softmmu_exec.h"
-
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
 
 void tlb_fill(CPUState *cs, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 169c87fc1b..8126818142 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -23,6 +23,7 @@
 #include "cpu.h"
 #include "disas/disas.h"
 #include "tcg-op.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/target-sparc/cpu-qom.h b/target-sparc/cpu-qom.h
index 8e3e0de277..477c4d5136 100644
--- a/target-sparc/cpu-qom.h
+++ b/target-sparc/cpu-qom.h
@@ -81,5 +81,8 @@ void sparc_cpu_dump_state(CPUState *cpu, FILE *f,
 hwaddr sparc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int sparc_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int sparc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cpu,
+                                                 vaddr addr, int is_write,
+                                                 int is_user, uintptr_t retaddr);
 
 #endif
diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c
index d9f37e9b6a..3a0ee504e5 100644
--- a/target-sparc/cpu.c
+++ b/target-sparc/cpu.c
@@ -825,6 +825,7 @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data)
     cc->handle_mmu_fault = sparc_cpu_handle_mmu_fault;
 #else
     cc->do_unassigned_access = sparc_cpu_unassigned_access;
+    cc->do_unaligned_access = sparc_cpu_do_unaligned_access;
     cc->get_phys_page_debug = sparc_cpu_get_phys_page_debug;
 #endif
 
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index f72451d53e..836f87f42f 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -5,6 +5,8 @@
 #include "qemu-common.h"
 #include "qemu/bswap.h"
 
+#define ALIGNED_ONLY
+
 #if !defined(TARGET_SPARC64)
 #define TARGET_LONG_BITS 32
 #define TARGET_DPREGS 16
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index b6b9866b93..03bd9f9706 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -19,6 +19,7 @@
 
 #include "cpu.h"
 #include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
 
 //#define DEBUG_MMU
 //#define DEBUG_MXCC
@@ -64,27 +65,6 @@
 #define QT0 (env->qt0)
 #define QT1 (env->qt1)
 
-#if !defined(CONFIG_USER_ONLY)
-static void QEMU_NORETURN do_unaligned_access(CPUSPARCState *env,
-                                              target_ulong addr, int is_write,
-                                              int is_user, uintptr_t retaddr);
-#include "exec/softmmu_exec.h"
-#define MMUSUFFIX _mmu
-#define ALIGNED_ONLY
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-#endif
-
 #if defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY)
 /* Calculates TSB pointer value for fault page size 8k or 64k */
 static uint64_t ultrasparc_tsb_pointer(uint64_t tsb_register,
@@ -2425,11 +2405,13 @@ void sparc_cpu_unassigned_access(CPUState *cs, hwaddr addr,
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
-static void QEMU_NORETURN do_unaligned_access(CPUSPARCState *env,
-                                              target_ulong addr, int is_write,
-                                              int is_user, uintptr_t retaddr)
+void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cs,
+                                                 vaddr addr, int is_write,
+                                                 int is_user, uintptr_t retaddr)
 {
-    SPARCCPU *cpu = sparc_env_get_cpu(env);
+    SPARCCPU *cpu = SPARC_CPU(cs);
+    CPUSPARCState *env = &cpu->env;
+
 #ifdef DEBUG_UNALIGNED
     printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
            "\n", addr, env->pc);
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 652a181763..1ab07a18a2 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -28,6 +28,7 @@
 #include "disas/disas.h"
 #include "exec/helper-proto.h"
 #include "tcg-op.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/helper-gen.h"
 
diff --git a/target-unicore32/op_helper.c b/target-unicore32/op_helper.c
index 4f96ed350b..0266dbdf7b 100644
--- a/target-unicore32/op_helper.c
+++ b/target-unicore32/op_helper.c
@@ -10,6 +10,7 @@
  */
 #include "cpu.h"
 #include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
 
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
@@ -241,22 +242,6 @@ uint32_t HELPER(ror_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
 }
 
 #ifndef CONFIG_USER_ONLY
-#include "exec/softmmu_exec.h"
-
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
 void tlb_fill(CPUState *cs, target_ulong addr, int is_write,
               int mmu_idx, uintptr_t retaddr)
 {
diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index 3cccafe5ad..5a8c7c89ee 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -18,6 +18,7 @@
 #include "disas/disas.h"
 #include "tcg-op.h"
 #include "qemu/log.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/target-xtensa/cpu-qom.h b/target-xtensa/cpu-qom.h
index c6cc2d91f4..f320486a68 100644
--- a/target-xtensa/cpu-qom.h
+++ b/target-xtensa/cpu-qom.h
@@ -89,5 +89,7 @@ void xtensa_cpu_dump_state(CPUState *cpu, FILE *f,
 hwaddr xtensa_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int xtensa_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int xtensa_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+void xtensa_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
+                                    int is_write, int is_user, uintptr_t retaddr);
 
 #endif
diff --git a/target-xtensa/cpu.c b/target-xtensa/cpu.c
index 6251f1c47e..9d8801b70e 100644
--- a/target-xtensa/cpu.c
+++ b/target-xtensa/cpu.c
@@ -148,6 +148,7 @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_read_register = xtensa_cpu_gdb_read_register;
     cc->gdb_write_register = xtensa_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
+    cc->do_unaligned_access = xtensa_cpu_do_unaligned_access;
     cc->get_phys_page_debug = xtensa_cpu_get_phys_page_debug;
 #endif
     dc->vmsd = &vmstate_xtensa_cpu;
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index e210bacdff..d797d2649a 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -28,6 +28,7 @@
 #ifndef CPU_XTENSA_H
 #define CPU_XTENSA_H
 
+#define ALIGNED_ONLY
 #define TARGET_LONG_BITS 32
 #define ELF_MACHINE EM_XTENSA
 
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index 01edab4082..dae13866ef 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -28,31 +28,15 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
-#include "exec/softmmu_exec.h"
+#include "exec/cpu_ldst.h"
 #include "exec/address-spaces.h"
+#include "qemu/timer.h"
 
-static void do_unaligned_access(CPUXtensaState *env,
-        target_ulong addr, int is_write, int is_user, uintptr_t retaddr);
-
-#define ALIGNED_ONLY
-#define MMUSUFFIX _mmu
-
-#define SHIFT 0
-#include "exec/softmmu_template.h"
-
-#define SHIFT 1
-#include "exec/softmmu_template.h"
-
-#define SHIFT 2
-#include "exec/softmmu_template.h"
-
-#define SHIFT 3
-#include "exec/softmmu_template.h"
-
-static void do_unaligned_access(CPUXtensaState *env,
-        target_ulong addr, int is_write, int is_user, uintptr_t retaddr)
+void xtensa_cpu_do_unaligned_access(CPUState *cs,
+        vaddr addr, int is_write, int is_user, uintptr_t retaddr)
 {
-    XtensaCPU *cpu = xtensa_env_get_cpu(env);
+    XtensaCPU *cpu = XTENSA_CPU(cs);
+    CPUXtensaState *env = &cpu->env;
 
     if (xtensa_option_enabled(env->config, XTENSA_OPTION_UNALIGNED_EXCEPTION) &&
             !xtensa_option_enabled(env->config, XTENSA_OPTION_HW_ALIGNMENT)) {
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 57e56bd34d..2f22cce845 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -36,6 +36,7 @@
 #include "tcg-op.h"
 #include "qemu/log.h"
 #include "sysemu/sysemu.h"
+#include "exec/cpu_ldst.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index a32aea66fa..60c7493ac1 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -99,8 +99,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i64        1
 #define TCG_TARGET_HAS_mulsh_i64        1
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
     __builtin___clear_cache((char *)start, (char *)stop);
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 73f10c4424..1c719e2862 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -86,8 +86,6 @@ extern bool use_idiv_instructions;
 #define TCG_TARGET_HAS_div_i32          use_idiv_instructions
 #define TCG_TARGET_HAS_rem_i32          0
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 extern bool tcg_target_deposit_valid(int ofs, int len);
 #define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid
 
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index d9102335f9..4133dcf1a9 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1407,7 +1407,8 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
         } else {
             retaddr = TCG_REG_RAX;
             tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
-            tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
+            tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
+                       TCG_TARGET_CALL_STACK_OFFSET);
         }
     }
 
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 6c94e5cde0..7a9980e70e 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -130,8 +130,6 @@ extern bool have_bmi1;
 #define TCG_TARGET_HAS_mulsh_i64        0
 #endif
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
     (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
      ((ofs) == 0 && (len) == 16))
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 3a59b50349..d67558988a 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -160,8 +160,6 @@ typedef enum {
 #define TCG_TARGET_HAS_mulsh_i64        0
 #define TCG_TARGET_HAS_trunc_shr_i32    0
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
 #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
 
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index b5face8b4d..c88a1c9272 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -118,8 +118,6 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
 #define TCG_TARGET_HAS_rot_i32          use_mips32r2_instructions
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub  rd, zero, rt   */
 #define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 77da2f942a..16cebbe16d 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -889,17 +889,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
 
         CASE_OP_32_64(ld8u):
-        case INDEX_op_qemu_ld8u:
             mask = 0xff;
             break;
         CASE_OP_32_64(ld16u):
-        case INDEX_op_qemu_ld16u:
             mask = 0xffff;
             break;
         case INDEX_op_ld32u_i64:
-#if TCG_TARGET_REG_BITS == 64
-        case INDEX_op_qemu_ld32u:
-#endif
             mask = 0xffffffffu;
             break;
 
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index dd7e557948..05069ae2d8 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -100,8 +100,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 #define TCG_AREG0 TCG_REG_R27
 
 #define tcg_qemu_tb_exec(env, tb_ptr) \
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 29f479a3cf..f2360c869a 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -124,8 +124,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i64        1
 #define TCG_TARGET_HAS_mulsh_i64        1
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 #define TCG_AREG0 TCG_REG_R27
 
 #define TCG_TARGET_EXTEND_ARGS 1
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index ad2c6ddaf4..5acc28ca6b 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -100,8 +100,6 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_muluh_i64        0
 #define TCG_TARGET_HAS_mulsh_i64        0
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 extern bool tcg_target_deposit_valid(int ofs, int len);
 #define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid
 #define TCG_TARGET_deposit_i64_valid  tcg_target_deposit_valid
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 473bfc71fd..089f9761ca 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -140,8 +140,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i64        0
 #define TCG_TARGET_HAS_mulsh_i64        0
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 #define TCG_AREG0 TCG_REG_I0
 
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 719533ac39..019dd9b6fb 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2718,7 +2718,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
-#define tcg_gen_extr_tl_i64 tcg_gen_extr_i32_i64
+#define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32
 #define tcg_gen_andc_tl tcg_gen_andc_i32
 #define tcg_gen_eqv_tl tcg_gen_eqv_i32
 #define tcg_gen_nand_tl tcg_gen_nand_i32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 71ba64ac16..042d442c7e 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -185,106 +185,20 @@ DEF(debug_insn_start, 0, 0, 1, TCG_OPF_NOT_PRESENT)
 DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
 DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
 
-#define IMPL_NEW_LDST \
-    (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS \
-     | IMPL(TCG_TARGET_HAS_new_ldst))
-
-#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
-DEF(qemu_ld_i32, 1, 1, 2, IMPL_NEW_LDST)
-DEF(qemu_st_i32, 0, 2, 2, IMPL_NEW_LDST)
-# if TCG_TARGET_REG_BITS == 64
-DEF(qemu_ld_i64, 1, 1, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-DEF(qemu_st_i64, 0, 2, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-# else
-DEF(qemu_ld_i64, 2, 1, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-DEF(qemu_st_i64, 0, 3, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-# endif
-#else
-DEF(qemu_ld_i32, 1, 2, 2, IMPL_NEW_LDST)
-DEF(qemu_st_i32, 0, 3, 2, IMPL_NEW_LDST)
-DEF(qemu_ld_i64, 2, 2, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-DEF(qemu_st_i64, 0, 4, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-#endif
-
-#undef IMPL_NEW_LDST
-
-#define IMPL_OLD_LDST \
-    (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS \
-     | IMPL(!TCG_TARGET_HAS_new_ldst))
-
-#if TCG_TARGET_REG_BITS == 32
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld8u, 1, 1, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_ld8u, 1, 2, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld8s, 1, 1, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_ld8s, 1, 2, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld16u, 1, 1, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_ld16u, 1, 2, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld16s, 1, 1, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_ld16s, 1, 2, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld32, 1, 1, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_ld32, 1, 2, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld64, 2, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-#else
-DEF(qemu_ld64, 2, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-#endif
-
-#if TARGET_LONG_BITS == 32
-DEF(qemu_st8, 0, 2, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_st8, 0, 3, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_st16, 0, 2, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_st16, 0, 3, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_st32, 0, 2, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_st32, 0, 3, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_st64, 0, 3, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-#else
-DEF(qemu_st64, 0, 4, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-#endif
-
-#else /* TCG_TARGET_REG_BITS == 32 */
-
-DEF(qemu_ld8u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld8s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld16u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld16s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld32, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld32u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld32s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld64, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-
-DEF(qemu_st8, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_st16, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_st32, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_st64, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-
-#endif /* TCG_TARGET_REG_BITS != 32 */
-
-#undef IMPL_OLD_LDST
-
+#define TLADDR_ARGS    (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
+#define DATA64_ARGS  (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
+
+DEF(qemu_ld_i32, 1, TLADDR_ARGS, 2,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 2,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 2,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 2,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+
+#undef TLADDR_ARGS
+#undef DATA64_ARGS
 #undef IMPL
 #undef IMPL64
 #undef DEF
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 2c5732da17..dae4b7cb00 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -941,97 +941,26 @@ static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
     return op;
 }
 
-static const TCGOpcode old_ld_opc[8] = {
-    [MO_UB] = INDEX_op_qemu_ld8u,
-    [MO_SB] = INDEX_op_qemu_ld8s,
-    [MO_UW] = INDEX_op_qemu_ld16u,
-    [MO_SW] = INDEX_op_qemu_ld16s,
-#if TCG_TARGET_REG_BITS == 32
-    [MO_UL] = INDEX_op_qemu_ld32,
-    [MO_SL] = INDEX_op_qemu_ld32,
-#else
-    [MO_UL] = INDEX_op_qemu_ld32u,
-    [MO_SL] = INDEX_op_qemu_ld32s,
-#endif
-    [MO_Q]  = INDEX_op_qemu_ld64,
-};
-
-static const TCGOpcode old_st_opc[4] = {
-    [MO_UB] = INDEX_op_qemu_st8,
-    [MO_UW] = INDEX_op_qemu_st16,
-    [MO_UL] = INDEX_op_qemu_st32,
-    [MO_Q]  = INDEX_op_qemu_st64,
-};
-
 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
     memop = tcg_canonicalize_memop(memop, 0, 0);
 
-    if (TCG_TARGET_HAS_new_ldst) {
-        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
-        tcg_add_param_i32(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = memop;
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-        return;
-    }
-
-    /* The old opcodes only support target-endian memory operations.  */
-    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
-    assert(old_ld_opc[memop & MO_SSIZE] != 0);
-
-    if (TCG_TARGET_REG_BITS == 32) {
-        *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
-        tcg_add_param_i32(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-    } else {
-        TCGv_i64 val64 = tcg_temp_new_i64();
-
-        *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
-        tcg_add_param_i64(val64);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-
-        tcg_gen_trunc_i64_i32(val, val64);
-        tcg_temp_free_i64(val64);
-    }
+    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
+    tcg_add_param_i32(val);
+    tcg_add_param_tl(addr);
+    *tcg_ctx.gen_opparam_ptr++ = memop;
+    *tcg_ctx.gen_opparam_ptr++ = idx;
 }
 
 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
     memop = tcg_canonicalize_memop(memop, 0, 1);
 
-    if (TCG_TARGET_HAS_new_ldst) {
-        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
-        tcg_add_param_i32(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = memop;
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-        return;
-    }
-
-    /* The old opcodes only support target-endian memory operations.  */
-    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
-    assert(old_st_opc[memop & MO_SIZE] != 0);
-
-    if (TCG_TARGET_REG_BITS == 32) {
-        *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
-        tcg_add_param_i32(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-    } else {
-        TCGv_i64 val64 = tcg_temp_new_i64();
-
-        tcg_gen_extu_i32_i64(val64, val);
-
-        *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
-        tcg_add_param_i64(val64);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-
-        tcg_temp_free_i64(val64);
-    }
+    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
+    tcg_add_param_i32(val);
+    tcg_add_param_tl(addr);
+    *tcg_ctx.gen_opparam_ptr++ = memop;
+    *tcg_ctx.gen_opparam_ptr++ = idx;
 }
 
 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
@@ -1050,22 +979,10 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
     }
 #endif
 
-    if (TCG_TARGET_HAS_new_ldst) {
-        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
-        tcg_add_param_i64(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = memop;
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-        return;
-    }
-
-    /* The old opcodes only support target-endian memory operations.  */
-    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
-    assert(old_ld_opc[memop & MO_SSIZE] != 0);
-
-    *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
+    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
     tcg_add_param_i64(val);
     tcg_add_param_tl(addr);
+    *tcg_ctx.gen_opparam_ptr++ = memop;
     *tcg_ctx.gen_opparam_ptr++ = idx;
 }
 
@@ -1080,22 +997,10 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
     }
 #endif
 
-    if (TCG_TARGET_HAS_new_ldst) {
-        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
-        tcg_add_param_i64(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = memop;
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-        return;
-    }
-
-    /* The old opcodes only support target-endian memory operations.  */
-    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
-    assert(old_st_opc[memop & MO_SIZE] != 0);
-
-    *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
+    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
     tcg_add_param_i64(val);
     tcg_add_param_tl(addr);
+    *tcg_ctx.gen_opparam_ptr++ = memop;
     *tcg_ctx.gen_opparam_ptr++ = idx;
 }
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 2efa333166..997a70433b 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -909,19 +909,6 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
 # define helper_ret_stq_mmu   helper_le_stq_mmu
 #endif
 
-uint8_t helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-uint16_t helper_ldw_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-uint32_t helper_ldl_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-
-void helper_stb_mmu(CPUArchState *env, target_ulong addr,
-                    uint8_t val, int mmu_idx);
-void helper_stw_mmu(CPUArchState *env, target_ulong addr,
-                    uint16_t val, int mmu_idx);
-void helper_stl_mmu(CPUArchState *env, target_ulong addr,
-                    uint32_t val, int mmu_idx);
-void helper_stq_mmu(CPUArchState *env, target_ulong addr,
-                    uint64_t val, int mmu_idx);
 #endif /* CONFIG_SOFTMMU */
 
 #endif /* TCG_H */
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index 375e590d2b..03a7b46958 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -227,21 +227,11 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
 #endif
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
-    { INDEX_op_qemu_ld8u, { R, L } },
-    { INDEX_op_qemu_ld8s, { R, L } },
-    { INDEX_op_qemu_ld16u, { R, L } },
-    { INDEX_op_qemu_ld16s, { R, L } },
-    { INDEX_op_qemu_ld32, { R, L } },
-#if TCG_TARGET_REG_BITS == 64
-    { INDEX_op_qemu_ld32u, { R, L } },
-    { INDEX_op_qemu_ld32s, { R, L } },
-#endif
-    { INDEX_op_qemu_ld64, { R64, L } },
+    { INDEX_op_qemu_ld_i32, { R, L } },
+    { INDEX_op_qemu_ld_i64, { R64, L } },
 
-    { INDEX_op_qemu_st8, { R, S } },
-    { INDEX_op_qemu_st16, { R, S } },
-    { INDEX_op_qemu_st32, { R, S } },
-    { INDEX_op_qemu_st64, { R64, S } },
+    { INDEX_op_qemu_st_i32, { R, S } },
+    { INDEX_op_qemu_st_i64, { R64, S } },
 
 #if TCG_TARGET_HAS_ext8s_i32
     { INDEX_op_ext8s_i32, { R, R } },
@@ -767,58 +757,52 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out8(s, args[2]);           /* condition */
         tci_out_label(s, args[3]);
         break;
-    case INDEX_op_qemu_ld8u:
-    case INDEX_op_qemu_ld8s:
-    case INDEX_op_qemu_ld16u:
-    case INDEX_op_qemu_ld16s:
-    case INDEX_op_qemu_ld32:
-#if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_qemu_ld32s:
-    case INDEX_op_qemu_ld32u:
-#endif
+    case INDEX_op_qemu_ld_i32:
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        tcg_out_r(s, *args++);
-#endif
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
 #ifdef CONFIG_SOFTMMU
         tcg_out_i(s, *args);
 #endif
         break;
-    case INDEX_op_qemu_ld64:
-        tcg_out_r(s, *args++);
-#if TCG_TARGET_REG_BITS == 32
+    case INDEX_op_qemu_ld_i64:
         tcg_out_r(s, *args++);
-#endif
-        tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+        if (TCG_TARGET_REG_BITS == 32) {
+            tcg_out_r(s, *args++);
+        }
         tcg_out_r(s, *args++);
-#endif
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
 #ifdef CONFIG_SOFTMMU
         tcg_out_i(s, *args);
 #endif
         break;
-    case INDEX_op_qemu_st8:
-    case INDEX_op_qemu_st16:
-    case INDEX_op_qemu_st32:
+    case INDEX_op_qemu_st_i32:
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        tcg_out_r(s, *args++);
-#endif
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
 #ifdef CONFIG_SOFTMMU
         tcg_out_i(s, *args);
 #endif
         break;
-    case INDEX_op_qemu_st64:
-        tcg_out_r(s, *args++);
-#if TCG_TARGET_REG_BITS == 32
-        tcg_out_r(s, *args++);
-#endif
+    case INDEX_op_qemu_st_i64:
         tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+        if (TCG_TARGET_REG_BITS == 32) {
+            tcg_out_r(s, *args++);
+        }
         tcg_out_r(s, *args++);
-#endif
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
 #ifdef CONFIG_SOFTMMU
         tcg_out_i(s, *args);
 #endif
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 0be5acdb81..bd1e97468c 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -118,8 +118,6 @@
 #define TCG_TARGET_HAS_mulu2_i32        1
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
-#define TCG_TARGET_HAS_new_ldst         0
-
 /* Number of registers available.
    For 32 bit hosts, we need more than 8 registers (call arguments). */
 /* #define TCG_TARGET_NB_REGS 8 */
diff --git a/tci.c b/tci.c
index 6523ab82f4..4711ee4817 100644
--- a/tci.c
+++ b/tci.c
@@ -26,6 +26,7 @@
 
 #include "qemu-common.h"
 #include "exec/exec-all.h"           /* MAX_OPC_PARAM_IARGS */
+#include "exec/cpu_ldst.h"
 #include "tcg-op.h"
 
 /* Marker for missing code. */
@@ -116,16 +117,6 @@ static void tci_write_reg(TCGReg index, tcg_target_ulong value)
     tci_reg[index] = value;
 }
 
-static void tci_write_reg8s(TCGReg index, int8_t value)
-{
-    tci_write_reg(index, value);
-}
-
-static void tci_write_reg16s(TCGReg index, int16_t value)
-{
-    tci_write_reg(index, value);
-}
-
 #if TCG_TARGET_REG_BITS == 64
 static void tci_write_reg32s(TCGReg index, int32_t value)
 {
@@ -138,11 +129,6 @@ static void tci_write_reg8(TCGReg index, uint8_t value)
     tci_write_reg(index, value);
 }
 
-static void tci_write_reg16(TCGReg index, uint16_t value)
-{
-    tci_write_reg(index, value);
-}
-
 static void tci_write_reg32(TCGReg index, uint32_t value)
 {
     tci_write_reg(index, value);
@@ -433,6 +419,53 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
     return result;
 }
 
+#ifdef CONFIG_SOFTMMU
+# define mmuidx          tci_read_i(&tb_ptr)
+# define qemu_ld_ub \
+    helper_ret_ldub_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_leuw \
+    helper_le_lduw_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_leul \
+    helper_le_ldul_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_leq \
+    helper_le_ldq_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_beuw \
+    helper_be_lduw_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_beul \
+    helper_be_ldul_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_beq \
+    helper_be_ldq_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_b(X) \
+    helper_ret_stb_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_lew(X) \
+    helper_le_stw_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_lel(X) \
+    helper_le_stl_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_leq(X) \
+    helper_le_stq_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_bew(X) \
+    helper_be_stw_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_bel(X) \
+    helper_be_stl_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_beq(X) \
+    helper_be_stq_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+#else
+# define qemu_ld_ub      ldub_p(g2h(taddr))
+# define qemu_ld_leuw    lduw_le_p(g2h(taddr))
+# define qemu_ld_leul    (uint32_t)ldl_le_p(g2h(taddr))
+# define qemu_ld_leq     ldq_le_p(g2h(taddr))
+# define qemu_ld_beuw    lduw_be_p(g2h(taddr))
+# define qemu_ld_beul    (uint32_t)ldl_be_p(g2h(taddr))
+# define qemu_ld_beq     ldq_be_p(g2h(taddr))
+# define qemu_st_b(X)    stb_p(g2h(taddr), X)
+# define qemu_st_lew(X)  stw_le_p(g2h(taddr), X)
+# define qemu_st_lel(X)  stl_le_p(g2h(taddr), X)
+# define qemu_st_leq(X)  stq_le_p(g2h(taddr), X)
+# define qemu_st_bew(X)  stw_be_p(g2h(taddr), X)
+# define qemu_st_bel(X)  stl_be_p(g2h(taddr), X)
+# define qemu_st_beq(X)  stq_be_p(g2h(taddr), X)
+#endif
+
 /* Interpret pseudo code in tb. */
 uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
 {
@@ -456,9 +489,6 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
         tcg_target_ulong label;
         TCGCond condition;
         target_ulong taddr;
-#ifndef CONFIG_SOFTMMU
-        tcg_target_ulong host_addr;
-#endif
         uint8_t tmp8;
         uint16_t tmp16;
         uint32_t tmp32;
@@ -466,6 +496,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
 #if TCG_TARGET_REG_BITS == 32
         uint64_t v64;
 #endif
+        TCGMemOp memop;
 
 #if defined(GETPC)
         tci_tb_ptr = (uintptr_t)tb_ptr;
@@ -1086,145 +1117,145 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
             assert(tb_ptr == old_code_ptr + op_size);
             tb_ptr += (int32_t)t0;
             continue;
-        case INDEX_op_qemu_ld8u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp8 = helper_ldb_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp8 = *(uint8_t *)(host_addr + GUEST_BASE);
-#endif
-            tci_write_reg8(t0, tmp8);
-            break;
-        case INDEX_op_qemu_ld8s:
+        case INDEX_op_qemu_ld_i32:
             t0 = *tb_ptr++;
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp8 = helper_ldb_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp8 = *(uint8_t *)(host_addr + GUEST_BASE);
-#endif
-            tci_write_reg8s(t0, tmp8);
-            break;
-        case INDEX_op_qemu_ld16u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp16 = helper_ldw_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp16 = tswap16(*(uint16_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg16(t0, tmp16);
-            break;
-        case INDEX_op_qemu_ld16s:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp16 = helper_ldw_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp16 = tswap16(*(uint16_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg16s(t0, tmp16);
-            break;
-#if TCG_TARGET_REG_BITS == 64
-        case INDEX_op_qemu_ld32u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32(t0, tmp32);
-            break;
-        case INDEX_op_qemu_ld32s:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32s(t0, tmp32);
-            break;
-#endif /* TCG_TARGET_REG_BITS == 64 */
-        case INDEX_op_qemu_ld32:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32(t0, tmp32);
+            memop = tci_read_i(&tb_ptr);
+            switch (memop) {
+            case MO_UB:
+                tmp32 = qemu_ld_ub;
+                break;
+            case MO_SB:
+                tmp32 = (int8_t)qemu_ld_ub;
+                break;
+            case MO_LEUW:
+                tmp32 = qemu_ld_leuw;
+                break;
+            case MO_LESW:
+                tmp32 = (int16_t)qemu_ld_leuw;
+                break;
+            case MO_LEUL:
+                tmp32 = qemu_ld_leul;
+                break;
+            case MO_BEUW:
+                tmp32 = qemu_ld_beuw;
+                break;
+            case MO_BESW:
+                tmp32 = (int16_t)qemu_ld_beuw;
+                break;
+            case MO_BEUL:
+                tmp32 = qemu_ld_beul;
+                break;
+            default:
+                tcg_abort();
+            }
+            tci_write_reg(t0, tmp32);
             break;
-        case INDEX_op_qemu_ld64:
+        case INDEX_op_qemu_ld_i64:
             t0 = *tb_ptr++;
-#if TCG_TARGET_REG_BITS == 32
-            t1 = *tb_ptr++;
-#endif
+            if (TCG_TARGET_REG_BITS == 32) {
+                t1 = *tb_ptr++;
+            }
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp64 = helper_ldq_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp64 = tswap64(*(uint64_t *)(host_addr + GUEST_BASE));
-#endif
+            memop = tci_read_i(&tb_ptr);
+            switch (memop) {
+            case MO_UB:
+                tmp64 = qemu_ld_ub;
+                break;
+            case MO_SB:
+                tmp64 = (int8_t)qemu_ld_ub;
+                break;
+            case MO_LEUW:
+                tmp64 = qemu_ld_leuw;
+                break;
+            case MO_LESW:
+                tmp64 = (int16_t)qemu_ld_leuw;
+                break;
+            case MO_LEUL:
+                tmp64 = qemu_ld_leul;
+                break;
+            case MO_LESL:
+                tmp64 = (int32_t)qemu_ld_leul;
+                break;
+            case MO_LEQ:
+                tmp64 = qemu_ld_leq;
+                break;
+            case MO_BEUW:
+                tmp64 = qemu_ld_beuw;
+                break;
+            case MO_BESW:
+                tmp64 = (int16_t)qemu_ld_beuw;
+                break;
+            case MO_BEUL:
+                tmp64 = qemu_ld_beul;
+                break;
+            case MO_BESL:
+                tmp64 = (int32_t)qemu_ld_beul;
+                break;
+            case MO_BEQ:
+                tmp64 = qemu_ld_beq;
+                break;
+            default:
+                tcg_abort();
+            }
             tci_write_reg(t0, tmp64);
-#if TCG_TARGET_REG_BITS == 32
-            tci_write_reg(t1, tmp64 >> 32);
-#endif
-            break;
-        case INDEX_op_qemu_st8:
-            t0 = tci_read_r8(&tb_ptr);
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stb_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint8_t *)(host_addr + GUEST_BASE) = t0;
-#endif
-            break;
-        case INDEX_op_qemu_st16:
-            t0 = tci_read_r16(&tb_ptr);
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stw_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint16_t *)(host_addr + GUEST_BASE) = tswap16(t0);
-#endif
+            if (TCG_TARGET_REG_BITS == 32) {
+                tci_write_reg(t1, tmp64 >> 32);
+            }
             break;
-        case INDEX_op_qemu_st32:
-            t0 = tci_read_r32(&tb_ptr);
+        case INDEX_op_qemu_st_i32:
+            t0 = tci_read_r(&tb_ptr);
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stl_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint32_t *)(host_addr + GUEST_BASE) = tswap32(t0);
-#endif
+            memop = tci_read_i(&tb_ptr);
+            switch (memop) {
+            case MO_UB:
+                qemu_st_b(t0);
+                break;
+            case MO_LEUW:
+                qemu_st_lew(t0);
+                break;
+            case MO_LEUL:
+                qemu_st_lel(t0);
+                break;
+            case MO_BEUW:
+                qemu_st_bew(t0);
+                break;
+            case MO_BEUL:
+                qemu_st_bel(t0);
+                break;
+            default:
+                tcg_abort();
+            }
             break;
-        case INDEX_op_qemu_st64:
+        case INDEX_op_qemu_st_i64:
             tmp64 = tci_read_r64(&tb_ptr);
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stq_mmu(env, taddr, tmp64, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint64_t *)(host_addr + GUEST_BASE) = tswap64(tmp64);
-#endif
+            memop = tci_read_i(&tb_ptr);
+            switch (memop) {
+            case MO_UB:
+                qemu_st_b(tmp64);
+                break;
+            case MO_LEUW:
+                qemu_st_lew(tmp64);
+                break;
+            case MO_LEUL:
+                qemu_st_lel(tmp64);
+                break;
+            case MO_LEQ:
+                qemu_st_leq(tmp64);
+                break;
+            case MO_BEUW:
+                qemu_st_bew(tmp64);
+                break;
+            case MO_BEUL:
+                qemu_st_bel(tmp64);
+                break;
+            case MO_BEQ:
+                qemu_st_beq(tmp64);
+                break;
+            default:
+                tcg_abort();
+            }
             break;
         default:
             TODO();
diff --git a/tests/Makefile b/tests/Makefile
index 6b294a7dbc..361bb7b6e3 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -133,7 +133,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF)
 check-qtest-i386-y += tests/hd-geo-test$(EXESUF)
 gcov-files-i386-y += hw/block/hd-geometry.c
 check-qtest-i386-y += tests/boot-order-test$(EXESUF)
-check-qtest-i386-y += tests/acpi-test$(EXESUF)
+check-qtest-i386-y += tests/bios-tables-test$(EXESUF)
 check-qtest-i386-y += tests/rtc-test$(EXESUF)
 check-qtest-i386-y += tests/i440fx-test$(EXESUF)
 check-qtest-i386-y += tests/fw_cfg-test$(EXESUF)
@@ -290,7 +290,7 @@ tests/fdc-test$(EXESUF): tests/fdc-test.o
 tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y)
 tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o
 tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y)
-tests/acpi-test$(EXESUF): tests/acpi-test.o $(libqos-obj-y)
+tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o $(libqos-obj-y)
 tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y)
 tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y)
 tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y)
diff --git a/tests/acpi-test.c b/tests/bios-tables-test.c
index 76fbccfa4b..62771f7608 100644
--- a/tests/acpi-test.c
+++ b/tests/bios-tables-test.c
@@ -18,6 +18,8 @@
 #include "libqtest.h"
 #include "qemu/compiler.h"
 #include "hw/i386/acpi-defs.h"
+#include "hw/i386/smbios.h"
+#include "qemu/bitmap.h"
 
 #define MACHINE_PC "pc"
 #define MACHINE_Q35 "q35"
@@ -46,6 +48,8 @@ typedef struct {
     uint32_t *rsdt_tables_addr;
     int rsdt_tables_nr;
     GArray *tables;
+    uint32_t smbios_ep_addr;
+    struct smbios_entry_point smbios_ep_table;
 } test_data;
 
 #define LOW(x) ((x) & 0xff)
@@ -581,6 +585,124 @@ static void test_acpi_asl(test_data *data)
     free_test_data(&exp_data);
 }
 
+static void test_smbios_ep_address(test_data *data)
+{
+    uint32_t off;
+
+    /* find smbios entry point structure */
+    for (off = 0xf0000; off < 0x100000; off += 0x10) {
+        uint8_t sig[] = "_SM_";
+        int i;
+
+        for (i = 0; i < sizeof sig - 1; ++i) {
+            sig[i] = readb(off + i);
+        }
+
+        if (!memcmp(sig, "_SM_", sizeof sig)) {
+            break;
+        }
+    }
+
+    g_assert_cmphex(off, <, 0x100000);
+    data->smbios_ep_addr = off;
+}
+
+static void test_smbios_ep_table(test_data *data)
+{
+    struct smbios_entry_point *ep_table = &data->smbios_ep_table;
+    uint32_t addr = data->smbios_ep_addr;
+
+    ACPI_READ_ARRAY(ep_table->anchor_string, addr);
+    g_assert(!memcmp(ep_table->anchor_string, "_SM_", 4));
+    ACPI_READ_FIELD(ep_table->checksum, addr);
+    ACPI_READ_FIELD(ep_table->length, addr);
+    ACPI_READ_FIELD(ep_table->smbios_major_version, addr);
+    ACPI_READ_FIELD(ep_table->smbios_minor_version, addr);
+    ACPI_READ_FIELD(ep_table->max_structure_size, addr);
+    ACPI_READ_FIELD(ep_table->entry_point_revision, addr);
+    ACPI_READ_ARRAY(ep_table->formatted_area, addr);
+    ACPI_READ_ARRAY(ep_table->intermediate_anchor_string, addr);
+    g_assert(!memcmp(ep_table->intermediate_anchor_string, "_DMI_", 5));
+    ACPI_READ_FIELD(ep_table->intermediate_checksum, addr);
+    ACPI_READ_FIELD(ep_table->structure_table_length, addr);
+    g_assert_cmpuint(ep_table->structure_table_length, >, 0);
+    ACPI_READ_FIELD(ep_table->structure_table_address, addr);
+    ACPI_READ_FIELD(ep_table->number_of_structures, addr);
+    g_assert_cmpuint(ep_table->number_of_structures, >, 0);
+    ACPI_READ_FIELD(ep_table->smbios_bcd_revision, addr);
+    g_assert(!acpi_checksum((uint8_t *)ep_table, sizeof *ep_table));
+    g_assert(!acpi_checksum((uint8_t *)ep_table + 0x10,
+                            sizeof *ep_table - 0x10));
+}
+
+static inline bool smbios_single_instance(uint8_t type)
+{
+    switch (type) {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+    case 16:
+    case 32:
+    case 127:
+        return true;
+    default:
+        return false;
+    }
+}
+
+static void test_smbios_structs(test_data *data)
+{
+    DECLARE_BITMAP(struct_bitmap, SMBIOS_MAX_TYPE+1) = { 0 };
+    struct smbios_entry_point *ep_table = &data->smbios_ep_table;
+    uint32_t addr = ep_table->structure_table_address;
+    int i, len, max_len = 0;
+    uint8_t type, prv, crt;
+    uint8_t required_struct_types[] = {0, 1, 3, 4, 16, 17, 19, 32, 127};
+
+    /* walk the smbios tables */
+    for (i = 0; i < ep_table->number_of_structures; i++) {
+
+        /* grab type and formatted area length from struct header */
+        type = readb(addr);
+        g_assert_cmpuint(type, <=, SMBIOS_MAX_TYPE);
+        len = readb(addr + 1);
+
+        /* single-instance structs must not have been encountered before */
+        if (smbios_single_instance(type)) {
+            g_assert(!test_bit(type, struct_bitmap));
+        }
+        set_bit(type, struct_bitmap);
+
+        /* seek to end of unformatted string area of this struct ("\0\0") */
+        prv = crt = 1;
+        while (prv || crt) {
+            prv = crt;
+            crt = readb(addr + len);
+            len++;
+        }
+
+        /* keep track of max. struct size */
+        if (max_len < len) {
+            max_len = len;
+            g_assert_cmpuint(max_len, <=, ep_table->max_structure_size);
+        }
+
+        /* start of next structure */
+        addr += len;
+    }
+
+    /* total table length and max struct size must match entry point values */
+    g_assert_cmpuint(ep_table->structure_table_length, ==,
+                     addr - ep_table->structure_table_address);
+    g_assert_cmpuint(ep_table->max_structure_size, ==, max_len);
+
+    /* required struct types must all be present */
+    for (i = 0; i < ARRAY_SIZE(required_struct_types); i++) {
+        g_assert(test_bit(required_struct_types[i], struct_bitmap));
+    }
+}
+
 static void test_acpi_one(const char *params, test_data *data)
 {
     char *args;
@@ -633,6 +755,10 @@ static void test_acpi_one(const char *params, test_data *data)
         }
     }
 
+    test_smbios_ep_address(data);
+    test_smbios_ep_table(data);
+    test_smbios_structs(data);
+
     qtest_quit(global_qtest);
     g_free(args);
 }
diff --git a/tests/test-qdev-global-props.c b/tests/test-qdev-global-props.c
index e4ad173d72..2bef04c76f 100644
--- a/tests/test-qdev-global-props.c
+++ b/tests/test-qdev-global-props.c
@@ -150,8 +150,10 @@ static void test_dynamic_globalprop(void)
     static GlobalProperty props[] = {
         { TYPE_DYNAMIC_PROPS, "prop1", "101" },
         { TYPE_DYNAMIC_PROPS, "prop2", "102" },
+        { TYPE_DYNAMIC_PROPS"-bad", "prop3", "103", true },
         {}
     };
+    int all_used;
 
     qdev_prop_register_global_list(props);
 
@@ -160,6 +162,8 @@ static void test_dynamic_globalprop(void)
 
     g_assert_cmpuint(mt->prop1, ==, 101);
     g_assert_cmpuint(mt->prop2, ==, 102);
+    all_used = qdev_prop_check_global();
+    g_assert_cmpuint(all_used, ==, 1);
 }
 
 int main(int argc, char **argv)
diff --git a/ui/curses.c b/ui/curses.c
index de85f7610f..8edb038bb3 100644
--- a/ui/curses.c
+++ b/ui/curses.c
@@ -277,31 +277,41 @@ static void curses_refresh(DisplayChangeListener *dcl)
              * events, we need to emit both for each key received */
             if (keycode & SHIFT) {
                 qemu_input_event_send_key_number(NULL, SHIFT_CODE, true);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & CNTRL) {
                 qemu_input_event_send_key_number(NULL, CNTRL_CODE, true);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & ALT) {
                 qemu_input_event_send_key_number(NULL, ALT_CODE, true);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & ALTGR) {
                 qemu_input_event_send_key_number(NULL, GREY | ALT_CODE, true);
+                qemu_input_event_send_key_delay(0);
             }
 
             qemu_input_event_send_key_number(NULL, keycode & KEY_MASK, true);
+            qemu_input_event_send_key_delay(0);
             qemu_input_event_send_key_number(NULL, keycode & KEY_MASK, false);
+            qemu_input_event_send_key_delay(0);
 
             if (keycode & ALTGR) {
                 qemu_input_event_send_key_number(NULL, GREY | ALT_CODE, false);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & ALT) {
                 qemu_input_event_send_key_number(NULL, ALT_CODE, false);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & CNTRL) {
                 qemu_input_event_send_key_number(NULL, CNTRL_CODE, false);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & SHIFT) {
                 qemu_input_event_send_key_number(NULL, SHIFT_CODE, false);
+                qemu_input_event_send_key_delay(0);
             }
         } else {
             keysym = curses2qemu[chr];
diff --git a/ui/input-legacy.c b/ui/input-legacy.c
index 2a538607a2..3025f50f49 100644
--- a/ui/input-legacy.c
+++ b/ui/input-legacy.c
@@ -74,27 +74,6 @@ int index_from_key(const char *key)
     return i;
 }
 
-static KeyValue **keyvalues;
-static int keyvalues_size;
-static QEMUTimer *key_timer;
-
-static void free_keyvalues(void)
-{
-    g_free(keyvalues);
-    keyvalues = NULL;
-    keyvalues_size = 0;
-}
-
-static void release_keys(void *opaque)
-{
-    while (keyvalues_size > 0) {
-        qemu_input_event_send_key(NULL, keyvalues[--keyvalues_size],
-                                  false);
-    }
-
-    free_keyvalues();
-}
-
 static KeyValue *copy_key_value(KeyValue *src)
 {
     KeyValue *dst = g_new(KeyValue, 1);
@@ -107,30 +86,18 @@ void qmp_send_key(KeyValueList *keys, bool has_hold_time, int64_t hold_time,
 {
     KeyValueList *p;
 
-    if (!key_timer) {
-        key_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, release_keys, NULL);
-    }
-
-    if (keyvalues != NULL) {
-        timer_del(key_timer);
-        release_keys(NULL);
-    }
-
     if (!has_hold_time) {
-        hold_time = 100;
+        hold_time = 0; /* use default */
     }
 
     for (p = keys; p != NULL; p = p->next) {
         qemu_input_event_send_key(NULL, copy_key_value(p->value), true);
-
-        keyvalues = g_realloc(keyvalues, sizeof(KeyValue *) *
-                              (keyvalues_size + 1));
-        keyvalues[keyvalues_size++] = copy_key_value(p->value);
+        qemu_input_event_send_key_delay(hold_time);
+    }
+    for (p = keys; p != NULL; p = p->next) {
+        qemu_input_event_send_key(NULL, copy_key_value(p->value), false);
+        qemu_input_event_send_key_delay(hold_time);
     }
-
-    /* delayed key up events */
-    timer_mod(key_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-              muldiv64(get_ticks_per_sec(), hold_time, 1000));
 }
 
 static void legacy_kbd_event(DeviceState *dev, QemuConsole *src,
diff --git a/ui/input.c b/ui/input.c
index 14c9434f5e..89d9db78c0 100644
--- a/ui/input.c
+++ b/ui/input.c
@@ -14,11 +14,31 @@ struct QemuInputHandlerState {
     QemuConsole       *con;
     QTAILQ_ENTRY(QemuInputHandlerState) node;
 };
+
+typedef struct QemuInputEventQueue QemuInputEventQueue;
+struct QemuInputEventQueue {
+    enum {
+        QEMU_INPUT_QUEUE_DELAY = 1,
+        QEMU_INPUT_QUEUE_EVENT,
+        QEMU_INPUT_QUEUE_SYNC,
+    } type;
+    QEMUTimer *timer;
+    uint32_t delay_ms;
+    QemuConsole *src;
+    InputEvent *evt;
+    QTAILQ_ENTRY(QemuInputEventQueue) node;
+};
+
 static QTAILQ_HEAD(, QemuInputHandlerState) handlers =
     QTAILQ_HEAD_INITIALIZER(handlers);
 static NotifierList mouse_mode_notifiers =
     NOTIFIER_LIST_INITIALIZER(mouse_mode_notifiers);
 
+static QTAILQ_HEAD(QemuInputEventQueueHead, QemuInputEventQueue) kbd_queue =
+    QTAILQ_HEAD_INITIALIZER(kbd_queue);
+static QEMUTimer *kbd_timer;
+static uint32_t kbd_default_delay_ms = 10;
+
 QemuInputHandlerState *qemu_input_handler_register(DeviceState *dev,
                                                    QemuInputHandler *handler)
 {
@@ -171,6 +191,73 @@ static void qemu_input_event_trace(QemuConsole *src, InputEvent *evt)
     }
 }
 
+static void qemu_input_queue_process(void *opaque)
+{
+    struct QemuInputEventQueueHead *queue = opaque;
+    QemuInputEventQueue *item;
+
+    g_assert(!QTAILQ_EMPTY(queue));
+    item = QTAILQ_FIRST(queue);
+    g_assert(item->type == QEMU_INPUT_QUEUE_DELAY);
+    QTAILQ_REMOVE(queue, item, node);
+    g_free(item);
+
+    while (!QTAILQ_EMPTY(queue)) {
+        item = QTAILQ_FIRST(queue);
+        switch (item->type) {
+        case QEMU_INPUT_QUEUE_DELAY:
+            timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL)
+                      + item->delay_ms);
+            return;
+        case QEMU_INPUT_QUEUE_EVENT:
+            qemu_input_event_send(item->src, item->evt);
+            qapi_free_InputEvent(item->evt);
+            break;
+        case QEMU_INPUT_QUEUE_SYNC:
+            qemu_input_event_sync();
+            break;
+        }
+        QTAILQ_REMOVE(queue, item, node);
+        g_free(item);
+    }
+}
+
+static void qemu_input_queue_delay(struct QemuInputEventQueueHead *queue,
+                                   QEMUTimer *timer, uint32_t delay_ms)
+{
+    QemuInputEventQueue *item = g_new0(QemuInputEventQueue, 1);
+    bool start_timer = QTAILQ_EMPTY(queue);
+
+    item->type = QEMU_INPUT_QUEUE_DELAY;
+    item->delay_ms = delay_ms;
+    item->timer = timer;
+    QTAILQ_INSERT_TAIL(queue, item, node);
+
+    if (start_timer) {
+        timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL)
+                  + item->delay_ms);
+    }
+}
+
+static void qemu_input_queue_event(struct QemuInputEventQueueHead *queue,
+                                   QemuConsole *src, InputEvent *evt)
+{
+    QemuInputEventQueue *item = g_new0(QemuInputEventQueue, 1);
+
+    item->type = QEMU_INPUT_QUEUE_EVENT;
+    item->src = src;
+    item->evt = evt;
+    QTAILQ_INSERT_TAIL(queue, item, node);
+}
+
+static void qemu_input_queue_sync(struct QemuInputEventQueueHead *queue)
+{
+    QemuInputEventQueue *item = g_new0(QemuInputEventQueue, 1);
+
+    item->type = QEMU_INPUT_QUEUE_SYNC;
+    QTAILQ_INSERT_TAIL(queue, item, node);
+}
+
 void qemu_input_event_send(QemuConsole *src, InputEvent *evt)
 {
     QemuInputHandlerState *s;
@@ -230,9 +317,14 @@ void qemu_input_event_send_key(QemuConsole *src, KeyValue *key, bool down)
 {
     InputEvent *evt;
     evt = qemu_input_event_new_key(key, down);
-    qemu_input_event_send(src, evt);
-    qemu_input_event_sync();
-    qapi_free_InputEvent(evt);
+    if (QTAILQ_EMPTY(&kbd_queue)) {
+        qemu_input_event_send(src, evt);
+        qemu_input_event_sync();
+        qapi_free_InputEvent(evt);
+    } else {
+        qemu_input_queue_event(&kbd_queue, src, evt);
+        qemu_input_queue_sync(&kbd_queue);
+    }
 }
 
 void qemu_input_event_send_key_number(QemuConsole *src, int num, bool down)
@@ -251,6 +343,16 @@ void qemu_input_event_send_key_qcode(QemuConsole *src, QKeyCode q, bool down)
     qemu_input_event_send_key(src, key, down);
 }
 
+void qemu_input_event_send_key_delay(uint32_t delay_ms)
+{
+    if (!kbd_timer) {
+        kbd_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, qemu_input_queue_process,
+                                 &kbd_queue);
+    }
+    qemu_input_queue_delay(&kbd_queue, kbd_timer,
+                           delay_ms ? delay_ms : kbd_default_delay_ms);
+}
+
 InputEvent *qemu_input_event_new_btn(InputButton btn, bool down)
 {
     InputEvent *evt = g_new0(InputEvent, 1);
diff --git a/ui/vnc.c b/ui/vnc.c
index 61b1f933bf..1684206184 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -1553,7 +1553,9 @@ static void press_key(VncState *vs, int keysym)
 {
     int keycode = keysym2scancode(vs->vd->kbd_layout, keysym) & SCANCODE_KEYMASK;
     qemu_input_event_send_key_number(vs->vd->dcl.con, keycode, true);
+    qemu_input_event_send_key_delay(0);
     qemu_input_event_send_key_number(vs->vd->dcl.con, keycode, false);
+    qemu_input_event_send_key_delay(0);
 }
 
 static int current_led_state(VncState *vs)
diff --git a/user-exec.c b/user-exec.c
index 8ed6fec814..1ff8673acb 100644
--- a/user-exec.c
+++ b/user-exec.c
@@ -21,6 +21,7 @@
 #include "disas/disas.h"
 #include "tcg.h"
 #include "qemu/bitops.h"
+#include "exec/cpu_ldst.h"
 
 #undef EAX
 #undef ECX
diff --git a/vl.c b/vl.c
index 0c15608bcb..a3def97443 100644
--- a/vl.c
+++ b/vl.c
@@ -4541,6 +4541,8 @@ int main(int argc, char **argv, char **envp)
         }
     }
 
+    qdev_prop_check_global();
+
     if (incoming) {
         Error *local_err = NULL;
         qemu_start_incoming_migration(incoming, &local_err);