summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--bsd-user/main.c2
-rw-r--r--docs/multiseat.txt78
-rw-r--r--hw/i386/kvm/clock.c52
-rw-r--r--include/ui/input.h1
-rw-r--r--kvm-all.c4
-rw-r--r--linux-user/main.c2
-rwxr-xr-xscripts/kvm/kvm_stat60
-rw-r--r--target-i386/cpu-qom.h1
-rw-r--r--target-i386/cpu.c1
-rw-r--r--target-i386/cpu.h13
-rw-r--r--target-i386/gdbstub.c4
-rw-r--r--target-i386/kvm.c30
-rw-r--r--target-i386/machine.c8
-rw-r--r--target-i386/seg_helper.c47
-rw-r--r--target-i386/smm_helper.c26
-rw-r--r--target-i386/svm_helper.c2
-rw-r--r--tcg/aarch64/tcg-target.h2
-rw-r--r--tcg/arm/tcg-target.h2
-rw-r--r--tcg/i386/tcg-target.c3
-rw-r--r--tcg/i386/tcg-target.h2
-rw-r--r--tcg/ia64/tcg-target.h2
-rw-r--r--tcg/mips/tcg-target.h2
-rw-r--r--tcg/optimize.c5
-rw-r--r--tcg/ppc/tcg-target.h2
-rw-r--r--tcg/ppc64/tcg-target.h2
-rw-r--r--tcg/s390/tcg-target.h2
-rw-r--r--tcg/sparc/tcg-target.h2
-rw-r--r--tcg/tcg-op.h2
-rw-r--r--tcg/tcg-opc.h114
-rw-r--r--tcg/tcg.c123
-rw-r--r--tcg/tci/tcg-target.c76
-rw-r--r--tcg/tci/tcg-target.h2
-rw-r--r--tci.c322
-rw-r--r--ui/curses.c10
-rw-r--r--ui/input-legacy.c45
-rw-r--r--ui/input.c108
-rw-r--r--ui/vnc.c2
37 files changed, 588 insertions, 573 deletions
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 4ba61da896..0e8c26c137 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -1004,7 +1004,7 @@ int main(int argc, char **argv)
 
 #if defined(TARGET_I386)
     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
-    env->hflags |= HF_PE_MASK;
+    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
         env->cr[4] |= CR4_OSFXSR_MASK;
         env->hflags |= HF_OSFXSR_MASK;
diff --git a/docs/multiseat.txt b/docs/multiseat.txt
index a6c71dd74f..67151e0849 100644
--- a/docs/multiseat.txt
+++ b/docs/multiseat.txt
@@ -6,16 +6,20 @@ host side
 ---------
 
 First you must compile qemu with a user interface supporting
-multihead/multiseat and input event routing.  Right now this list is
-pretty short: sdl2.
+multihead/multiseat and input event routing.  Right now this
+list includes sdl2 and gtk (both 2+3):
 
   ./configure --enable-sdl --with-sdlabi=2.0
 
+or
+
+  ./configure --enable-gtk
+
 
 Next put together the qemu command line:
 
 qemu	-enable-kvm -usb $memory $disk $whatever \
-	-display sdl \
+	-display [ sdl | gtk ] \
 	-vga std \
 	-device usb-tablet
 
@@ -37,6 +41,20 @@ The "display=video2" sets up the input routing.  Any input coming from
 the window which belongs to the video.2 display adapter will be routed
 to these input devices.
 
+The sdl2 ui will start up with two windows, one for each display
+device.  The gtk ui will start with a single window and each display
+in a separate tab.  You can either simply switch tabs to switch heads,
+or use the "View / Detach tab" menu item to move one of the displays
+to its own window so you can see both display devices side-by-side.
+
+Note on spice: Spice handles multihead just fine.  But it can't do
+multiseat.  For tablet events the event source is sent to the spice
+agent.  But qemu can't figure it, so it can't do input routing.
+Fixing this needs a new or extended input interface between
+libspice-server and qemu.  For keyboard events it is even worse:  The
+event source isn't included in the spice protocol, so the wire
+protocol must be extended to support this.
+
 
 guest side
 ----------
@@ -46,29 +64,37 @@ You need a pretty recent linux guest.  systemd with loginctl.  kernel
 fully updated for the new kernel though, i.e. the live iso doesn't cut
 it.
 
-Now we'll have to configure the guest.  Boot and login.  By default
-all devices belong to seat0.  You can use "loginctl seat-status seat0"
-to list them all (and to get the sysfs paths for cut+paste).  Now
-we'll go assign all pci devices connected the pci bridge in slot 12 to
-a new head:
-
-loginctl attach seat-qemu \
-	 /sys/devices/pci0000:00/0000:00:12.0/0000:01:02.0/drm/card1
-loginctl attach seat-qemu \
-	 /sys/devices/pci0000:00/0000:00:12.0/0000:01:02.0/graphics/fb1
-loginctl attach seat-qemu \
-	 /sys/devices/pci0000:00/0000:00:12.0/0000:01:0f.0/usb2
-
-Use "loginctl seat-status seat-qemu" to check the result.  It isn't
-needed to assign the usb devices to the head individually, assigning a
-usb (root) hub will automatically assign all usb devices connected to
-it too.
-
-BTW: loginctl writes udev rules to /etc/udev/rules.d to make these
-device assignments permanent, so you need to do this only once.
-
-Now simply restart gdm (rebooting will do too), and a login screen
-should show up on the second head.
+Now we'll have to configure the guest.  Boot and login.  "lspci -vt"
+should list the pci bridge with the display adapter and usb controller:
+
+    [root@fedora ~]# lspci -vt
+    -[0000:00]-+-00.0  Intel Corporation 440FX - 82441FX PMC [Natoma]
+               [ ... ]
+               \-12.0-[01]--+-02.0  Device 1234:1111
+                            \-0f.0  NEC Corporation USB 3.0 Host Controller
+
+Good.  Now lets tell the system that the pci bridge and all devices
+below it belong to a separate seat by dropping a file into
+/etc/udev/rules.d:
+
+    [root@fedora ~]# cat /etc/udev/rules.d/70-qemu-autoseat.rules
+    SUBSYSTEMS=="pci", DEVPATH=="*/0000:00:12.0", TAG+="seat", ENV{ID_AUTOSEAT}="1"
+
+Reboot.  System should come up with two seats.  With loginctl you can
+check the configuration:
+
+    [root@fedora ~]# loginctl list-seats
+    SEAT
+    seat0
+    seat-pci-pci-0000_00_12_0
+
+    2 seats listed.
+
+You can use "loginctl seat-status seat-pci-pci-0000_00_12_0" to list
+the devices attached to the seat.
+
+Background info is here:
+  http://www.freedesktop.org/wiki/Software/systemd/multiseat/
 
 Enjoy!
 
diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 892aa025f4..bef2504389 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -14,8 +14,10 @@
  */
 
 #include "qemu-common.h"
+#include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
+#include "sysemu/cpus.h"
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"
 
@@ -34,6 +36,48 @@ typedef struct KVMClockState {
     bool clock_valid;
 } KVMClockState;
 
+struct pvclock_vcpu_time_info {
+    uint32_t   version;
+    uint32_t   pad0;
+    uint64_t   tsc_timestamp;
+    uint64_t   system_time;
+    uint32_t   tsc_to_system_mul;
+    int8_t     tsc_shift;
+    uint8_t    flags;
+    uint8_t    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+static uint64_t kvmclock_current_nsec(KVMClockState *s)
+{
+    CPUState *cpu = first_cpu;
+    CPUX86State *env = cpu->env_ptr;
+    hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL;
+    uint64_t migration_tsc = env->tsc;
+    struct pvclock_vcpu_time_info time;
+    uint64_t delta;
+    uint64_t nsec_lo;
+    uint64_t nsec_hi;
+    uint64_t nsec;
+
+    if (!(env->system_time_msr & 1ULL)) {
+        /* KVM clock not active */
+        return 0;
+    }
+
+    cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
+
+    assert(time.tsc_timestamp <= migration_tsc);
+    delta = migration_tsc - time.tsc_timestamp;
+    if (time.tsc_shift < 0) {
+        delta >>= -time.tsc_shift;
+    } else {
+        delta <<= time.tsc_shift;
+    }
+
+    mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
+    nsec = (nsec_lo >> 32) | (nsec_hi << 32);
+    return nsec + time.system_time;
+}
 
 static void kvmclock_vm_state_change(void *opaque, int running,
                                      RunState state)
@@ -45,9 +89,15 @@ static void kvmclock_vm_state_change(void *opaque, int running,
 
     if (running) {
         struct kvm_clock_data data;
+        uint64_t time_at_migration = kvmclock_current_nsec(s);
 
         s->clock_valid = false;
 
+	/* We can't rely on the migrated clock value, just discard it */
+	if (time_at_migration) {
+	        s->clock = time_at_migration;
+	}
+
         data.clock = s->clock;
         data.flags = 0;
         ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
@@ -75,6 +125,8 @@ static void kvmclock_vm_state_change(void *opaque, int running,
         if (s->clock_valid) {
             return;
         }
+
+        cpu_synchronize_all_states();
         ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
         if (ret < 0) {
             fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
diff --git a/include/ui/input.h b/include/ui/input.h
index aa99b0cac7..5d5ac00663 100644
--- a/include/ui/input.h
+++ b/include/ui/input.h
@@ -39,6 +39,7 @@ InputEvent *qemu_input_event_new_key(KeyValue *key, bool down);
 void qemu_input_event_send_key(QemuConsole *src, KeyValue *key, bool down);
 void qemu_input_event_send_key_number(QemuConsole *src, int num, bool down);
 void qemu_input_event_send_key_qcode(QemuConsole *src, QKeyCode q, bool down);
+void qemu_input_event_send_key_delay(uint32_t delay_ms);
 int qemu_input_key_number_to_qcode(uint8_t nr);
 int qemu_input_key_value_to_number(const KeyValue *value);
 int qemu_input_key_value_to_qcode(const KeyValue *value);
diff --git a/kvm-all.c b/kvm-all.c
index 721a3904a9..4e19eff0ef 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1410,7 +1410,7 @@ int kvm_init(MachineClass *mc)
 
     ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
     if (ret < KVM_API_VERSION) {
-        if (ret > 0) {
+        if (ret >= 0) {
             ret = -EINVAL;
         }
         fprintf(stderr, "kvm version too old\n");
@@ -1461,6 +1461,7 @@ int kvm_init(MachineClass *mc)
     if (mc->kvm_type) {
         type = mc->kvm_type(kvm_type);
     } else if (kvm_type) {
+        ret = -EINVAL;
         fprintf(stderr, "Invalid argument kvm-type=%s\n", kvm_type);
         goto err;
     }
@@ -1561,6 +1562,7 @@ int kvm_init(MachineClass *mc)
     return 0;
 
 err:
+    assert(ret < 0);
     if (s->vmfd >= 0) {
         close(s->vmfd);
     }
diff --git a/linux-user/main.c b/linux-user/main.c
index 882186e1a0..3e21024056 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -4052,7 +4052,7 @@ int main(int argc, char **argv, char **envp)
 
 #if defined(TARGET_I386)
     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
-    env->hflags |= HF_PE_MASK;
+    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
         env->cr[4] |= CR4_OSFXSR_MASK;
         env->hflags |= HF_OSFXSR_MASK;
diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat
index 762544b197..d7e97e7488 100755
--- a/scripts/kvm/kvm_stat
+++ b/scripts/kvm/kvm_stat
@@ -352,8 +352,8 @@ class TracepointProvider(object):
         return ret
 
 class Stats:
-    def __init__(self, provider, fields = None):
-        self.provider = provider
+    def __init__(self, providers, fields = None):
+        self.providers = providers
         self.fields_filter = fields
         self._update()
     def _update(self):
@@ -362,22 +362,25 @@ class Stats:
             if not self.fields_filter:
                 return True
             return re.match(self.fields_filter, key) is not None
-        self.values = dict([(key, None)
-                            for key in provider.fields()
-                            if wanted(key)])
-        self.provider.select(self.values.keys())
+        self.values = dict()
+        for d in providers:
+            provider_fields = [key for key in d.fields() if wanted(key)]
+            for key in provider_fields:
+                self.values[key] = None
+            d.select(provider_fields)
     def set_fields_filter(self, fields_filter):
         self.fields_filter = fields_filter
         self._update()
     def get(self):
-        new = self.provider.read()
-        for key in self.provider.fields():
-            oldval = self.values.get(key, (0, 0))
-            newval = new[key]
-            newdelta = None
-            if oldval is not None:
-                newdelta = newval - oldval[0]
-            self.values[key] = (newval, newdelta)
+        for d in providers:
+            new = d.read()
+            for key in d.fields():
+                oldval = self.values.get(key, (0, 0))
+                newval = new[key]
+                newdelta = None
+                if oldval is not None:
+                    newdelta = newval - oldval[0]
+                self.values[key] = (newval, newdelta)
         return self.values
 
 if not os.access('/sys/kernel/debug', os.F_OK):
@@ -487,6 +490,18 @@ options.add_option('-l', '--log',
                    dest = 'log',
                    help = 'run in logging mode (like vmstat)',
                    )
+options.add_option('-t', '--tracepoints',
+                   action = 'store_true',
+                   default = False,
+                   dest = 'tracepoints',
+                   help = 'retrieve statistics from tracepoints',
+                   )
+options.add_option('-d', '--debugfs',
+                   action = 'store_true',
+                   default = False,
+                   dest = 'debugfs',
+                   help = 'retrieve statistics from debugfs',
+                   )
 options.add_option('-f', '--fields',
                    action = 'store',
                    default = None,
@@ -495,12 +510,19 @@ options.add_option('-f', '--fields',
                    )
 (options, args) = options.parse_args(sys.argv)
 
-try:
-    provider = TracepointProvider()
-except:
-    provider = DebugfsProvider()
+providers = []
+if options.tracepoints:
+    providers.append(TracepointProvider())
+if options.debugfs:
+    providers.append(DebugfsProvider())
+
+if len(providers) == 0:
+    try:
+        providers = [TracepointProvider()]
+    except:
+        providers = [DebugfsProvider()]
 
-stats = Stats(provider, fields = options.fields)
+stats = Stats(providers, fields = options.fields)
 
 if options.log:
     log(stats)
diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
index e9b3d577b3..0808cfc67d 100644
--- a/target-i386/cpu-qom.h
+++ b/target-i386/cpu-qom.h
@@ -87,6 +87,7 @@ typedef struct X86CPU {
     bool hyperv_time;
     bool check_cpuid;
     bool enforce_cpuid;
+    bool expose_kvm;
 
     /* if true the CPUID code directly forward host cache leaves to the guest */
     bool cache_info_passthrough;
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index c8ef936354..dde052cc42 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2789,6 +2789,7 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("hv-time", X86CPU, hyperv_time, false),
     DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, false),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
+    DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 51959be290..b5e1b411fb 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -990,7 +990,6 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
     /* update the hidden flags */
     {
         if (seg_reg == R_CS) {
-            int cpl = selector & 3;
 #ifdef TARGET_X86_64
             if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) {
                 /* long mode */
@@ -1000,15 +999,14 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
 #endif
             {
                 /* legacy / compatibility case */
-                if (!(env->cr[0] & CR0_PE_MASK))
-                    cpl = 0;
-                else if (env->eflags & VM_MASK)
-                    cpl = 3;
                 new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
                     >> (DESC_B_SHIFT - HF_CS32_SHIFT);
                 env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) |
                     new_hflags;
             }
+        }
+        if (seg_reg == R_SS) {
+            int cpl = (flags >> DESC_DPL_SHIFT) & 3;
 #if HF_CPL_MASK != 3
 #error HF_CPL_MASK is hardcoded
 #endif
@@ -1253,11 +1251,14 @@ static inline uint32_t cpu_compute_eflags(CPUX86State *env)
     return env->eflags | cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK);
 }
 
-/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */
+/* NOTE: the translator must set DisasContext.cc_op to CC_OP_EFLAGS
+ * after generating a call to a helper that uses this.
+ */
 static inline void cpu_load_eflags(CPUX86State *env, int eflags,
                                    int update_mask)
 {
     CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    CC_OP = CC_OP_EFLAGS;
     env->df = 1 - (2 * ((eflags >> 10) & 1));
     env->eflags = (env->eflags & ~update_mask) |
         (eflags & update_mask) | 0x2;
diff --git a/target-i386/gdbstub.c b/target-i386/gdbstub.c
index d34e5355f7..19fe9adc3f 100644
--- a/target-i386/gdbstub.c
+++ b/target-i386/gdbstub.c
@@ -127,9 +127,11 @@ static int x86_cpu_gdb_load_seg(X86CPU *cpu, int sreg, uint8_t *mem_buf)
         target_ulong base;
 
         if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
+            int dpl = (env->eflags & VM_MASK) ? 3 : 0;
             base = selector << 4;
             limit = 0xffff;
-            flags = 0;
+            flags = DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                    DESC_A_MASK | (dpl << DESC_DPL_SHIFT);
         } else {
             if (!cpu_x86_get_descr_debug(env, selector, &base, &limit,
                                          &flags)) {
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 0d894ef4aa..4bf0ac9e76 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -528,23 +528,25 @@ int kvm_arch_init_vcpu(CPUState *cs)
         has_msr_hv_hypercall = true;
     }
 
-    memcpy(signature, "KVMKVMKVM\0\0\0", 12);
-    c = &cpuid_data.entries[cpuid_i++];
-    c->function = KVM_CPUID_SIGNATURE | kvm_base;
-    c->eax = 0;
-    c->ebx = signature[0];
-    c->ecx = signature[1];
-    c->edx = signature[2];
+    if (cpu->expose_kvm) {
+        memcpy(signature, "KVMKVMKVM\0\0\0", 12);
+        c = &cpuid_data.entries[cpuid_i++];
+        c->function = KVM_CPUID_SIGNATURE | kvm_base;
+        c->eax = KVM_CPUID_FEATURES | kvm_base;
+        c->ebx = signature[0];
+        c->ecx = signature[1];
+        c->edx = signature[2];
 
-    c = &cpuid_data.entries[cpuid_i++];
-    c->function = KVM_CPUID_FEATURES | kvm_base;
-    c->eax = env->features[FEAT_KVM];
+        c = &cpuid_data.entries[cpuid_i++];
+        c->function = KVM_CPUID_FEATURES | kvm_base;
+        c->eax = env->features[FEAT_KVM];
 
-    has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
+        has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
 
-    has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
+        has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
 
-    has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME);
+        has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME);
+    }
 
     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
 
@@ -1430,7 +1432,7 @@ static int kvm_get_sregs(X86CPU *cpu)
        HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
        HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
 
-    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+    hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
     hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
     hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
                 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 168cab681b..bdff447786 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -312,6 +312,14 @@ static int cpu_post_load(void *opaque, int version_id)
         env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
     }
 
+    /* Older versions of QEMU incorrectly used CS.DPL as the CPL when
+     * running under KVM.  This is wrong for conforming code segments.
+     * Luckily, in our implementation the CPL field of hflags is redundant
+     * and we can get the right value from the SS descriptor privilege level.
+     */
+    env->hflags &= ~HF_CPL_MASK;
+    env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+
     /* XXX: restore FPU round state */
     env->fpstt = (env->fpus_vmstate >> 11) & 7;
     env->fpus = env->fpus_vmstate & ~0x3800;
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 934cc2b287..2d970d0cb9 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -103,8 +103,10 @@ static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1,
 static inline void load_seg_vm(CPUX86State *env, int seg, int selector)
 {
     selector &= 0xffff;
-    cpu_x86_load_seg_cache(env, seg, selector,
-                           (selector << 4), 0xffff, 0);
+
+    cpu_x86_load_seg_cache(env, seg, selector, (selector << 4), 0xffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK | (3 << DESC_DPL_SHIFT));
 }
 
 static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr,
@@ -148,11 +150,10 @@ static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr,
     }
 }
 
-/* XXX: merge with load_seg() */
-static void tss_load_seg(CPUX86State *env, int seg_reg, int selector)
+static void tss_load_seg(CPUX86State *env, int seg_reg, int selector, int cpl)
 {
     uint32_t e1, e2;
-    int rpl, dpl, cpl;
+    int rpl, dpl;
 
     if ((selector & 0xfffc) != 0) {
         if (load_segment(env, &e1, &e2, selector) != 0) {
@@ -163,18 +164,13 @@ static void tss_load_seg(CPUX86State *env, int seg_reg, int selector)
         }
         rpl = selector & 3;
         dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-        cpl = env->hflags & HF_CPL_MASK;
         if (seg_reg == R_CS) {
             if (!(e2 & DESC_CS_MASK)) {
                 raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
             }
-            /* XXX: is it correct? */
             if (dpl != rpl) {
                 raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
             }
-            if ((e2 & DESC_C_MASK) && dpl > rpl) {
-                raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
-            }
         } else if (seg_reg == R_SS) {
             /* SS must be writable data */
             if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK)) {
@@ -461,12 +457,13 @@ static void switch_tss(CPUX86State *env, int tss_selector,
 
     /* load the segments */
     if (!(new_eflags & VM_MASK)) {
-        tss_load_seg(env, R_CS, new_segs[R_CS]);
-        tss_load_seg(env, R_SS, new_segs[R_SS]);
-        tss_load_seg(env, R_ES, new_segs[R_ES]);
-        tss_load_seg(env, R_DS, new_segs[R_DS]);
-        tss_load_seg(env, R_FS, new_segs[R_FS]);
-        tss_load_seg(env, R_GS, new_segs[R_GS]);
+        int cpl = new_segs[R_CS] & 3;
+        tss_load_seg(env, R_CS, new_segs[R_CS], cpl);
+        tss_load_seg(env, R_SS, new_segs[R_SS], cpl);
+        tss_load_seg(env, R_ES, new_segs[R_ES], cpl);
+        tss_load_seg(env, R_DS, new_segs[R_DS], cpl);
+        tss_load_seg(env, R_FS, new_segs[R_FS], cpl);
+        tss_load_seg(env, R_GS, new_segs[R_GS], cpl);
     }
 
     /* check that env->eip is in the CS segment limits */
@@ -573,6 +570,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
     int has_error_code, new_stack, shift;
     uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0;
     uint32_t old_eip, sp_mask;
+    int vm86 = env->eflags & VM_MASK;
 
     has_error_code = 0;
     if (!is_int && !is_hw) {
@@ -688,7 +686,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
         ssp = get_seg_base(ss_e1, ss_e2);
     } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
         /* to same privilege */
-        if (env->eflags & VM_MASK) {
+        if (vm86) {
             raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
         }
         new_stack = 0;
@@ -709,14 +707,14 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
 #if 0
     /* XXX: check that enough room is available */
     push_size = 6 + (new_stack << 2) + (has_error_code << 1);
-    if (env->eflags & VM_MASK) {
+    if (vm86) {
         push_size += 8;
     }
     push_size <<= shift;
 #endif
     if (shift == 1) {
         if (new_stack) {
-            if (env->eflags & VM_MASK) {
+            if (vm86) {
                 PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector);
                 PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector);
                 PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector);
@@ -733,7 +731,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
         }
     } else {
         if (new_stack) {
-            if (env->eflags & VM_MASK) {
+            if (vm86) {
                 PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector);
                 PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector);
                 PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector);
@@ -757,7 +755,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
     env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
 
     if (new_stack) {
-        if (env->eflags & VM_MASK) {
+        if (vm86) {
             cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
             cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0);
             cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0);
@@ -1615,7 +1613,6 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
             }
             next_eip = env->eip + next_eip_addend;
             switch_tss(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip);
-            CC_OP = CC_OP_EFLAGS;
             break;
         case 4: /* 286 call gate */
         case 12: /* 386 call gate */
@@ -1784,7 +1781,6 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
                 raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
             }
             switch_tss(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip);
-            CC_OP = CC_OP_EFLAGS;
             return;
         case 4: /* 286 call gate */
         case 12: /* 386 call gate */
@@ -2479,9 +2475,12 @@ void helper_verw(CPUX86State *env, target_ulong selector1)
 void cpu_x86_load_seg(CPUX86State *env, int seg_reg, int selector)
 {
     if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
+        int dpl = (env->eflags & VM_MASK) ? 3 : 0;
         selector &= 0xffff;
         cpu_x86_load_seg_cache(env, seg_reg, selector,
-                               (selector << 4), 0xffff, 0);
+                               (selector << 4), 0xffff,
+                               DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                               DESC_A_MASK | (dpl << DESC_DPL_SHIFT));
     } else {
         helper_load_seg(env, seg_reg, selector);
     }
diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
index 5d7697c1a7..58051d3bcc 100644
--- a/target-i386/smm_helper.c
+++ b/target-i386/smm_helper.c
@@ -168,15 +168,26 @@ void do_smm_enter(X86CPU *cpu)
                                       CR0_PG_MASK));
     cpu_x86_update_cr4(env, 0);
     env->dr[7] = 0x00000400;
-    CC_OP = CC_OP_EFLAGS;
 
     cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
-                           0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
+                           0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
 }
 
 void helper_rsm(CPUX86State *env)
@@ -296,7 +307,6 @@ void helper_rsm(CPUX86State *env)
         env->smbase = ldl_phys(cs->as, sm_state + 0x7ef8) & ~0x7fff;
     }
 #endif
-    CC_OP = CC_OP_EFLAGS;
     env->hflags &= ~HF_SMM_MASK;
     cpu_smm_update(env);
 
diff --git a/target-i386/svm_helper.c b/target-i386/svm_helper.c
index d250d18e27..429d029a3d 100644
--- a/target-i386/svm_helper.c
+++ b/target-i386/svm_helper.c
@@ -257,7 +257,6 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
                                   env->vm_vmcb + offsetof(struct vmcb,
                                                           save.rflags)),
                     ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    CC_OP = CC_OP_EFLAGS;
 
     svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.es),
                        R_ES);
@@ -699,7 +698,6 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1)
                                                            save.rflags)),
                     ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK |
                       VM_MASK));
-    CC_OP = CC_OP_EFLAGS;
 
     svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.es),
                        R_ES);
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index a32aea66fa..60c7493ac1 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -99,8 +99,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i64        1
 #define TCG_TARGET_HAS_mulsh_i64        1
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
     __builtin___clear_cache((char *)start, (char *)stop);
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 73f10c4424..1c719e2862 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -86,8 +86,6 @@ extern bool use_idiv_instructions;
 #define TCG_TARGET_HAS_div_i32          use_idiv_instructions
 #define TCG_TARGET_HAS_rem_i32          0
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 extern bool tcg_target_deposit_valid(int ofs, int len);
 #define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid
 
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index d9102335f9..4133dcf1a9 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1407,7 +1407,8 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
         } else {
             retaddr = TCG_REG_RAX;
             tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
-            tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
+            tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
+                       TCG_TARGET_CALL_STACK_OFFSET);
         }
     }
 
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 6c94e5cde0..7a9980e70e 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -130,8 +130,6 @@ extern bool have_bmi1;
 #define TCG_TARGET_HAS_mulsh_i64        0
 #endif
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
     (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
      ((ofs) == 0 && (len) == 16))
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 3a59b50349..d67558988a 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -160,8 +160,6 @@ typedef enum {
 #define TCG_TARGET_HAS_mulsh_i64        0
 #define TCG_TARGET_HAS_trunc_shr_i32    0
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
 #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
 
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index b5face8b4d..c88a1c9272 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -118,8 +118,6 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
 #define TCG_TARGET_HAS_rot_i32          use_mips32r2_instructions
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub  rd, zero, rt   */
 #define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 77da2f942a..16cebbe16d 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -889,17 +889,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
 
         CASE_OP_32_64(ld8u):
-        case INDEX_op_qemu_ld8u:
             mask = 0xff;
             break;
         CASE_OP_32_64(ld16u):
-        case INDEX_op_qemu_ld16u:
             mask = 0xffff;
             break;
         case INDEX_op_ld32u_i64:
-#if TCG_TARGET_REG_BITS == 64
-        case INDEX_op_qemu_ld32u:
-#endif
             mask = 0xffffffffu;
             break;
 
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index dd7e557948..05069ae2d8 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -100,8 +100,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 #define TCG_AREG0 TCG_REG_R27
 
 #define tcg_qemu_tb_exec(env, tb_ptr) \
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 29f479a3cf..f2360c869a 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -124,8 +124,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i64        1
 #define TCG_TARGET_HAS_mulsh_i64        1
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 #define TCG_AREG0 TCG_REG_R27
 
 #define TCG_TARGET_EXTEND_ARGS 1
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index ad2c6ddaf4..5acc28ca6b 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -100,8 +100,6 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_muluh_i64        0
 #define TCG_TARGET_HAS_mulsh_i64        0
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 extern bool tcg_target_deposit_valid(int ofs, int len);
 #define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid
 #define TCG_TARGET_deposit_i64_valid  tcg_target_deposit_valid
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 473bfc71fd..089f9761ca 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -140,8 +140,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i64        0
 #define TCG_TARGET_HAS_mulsh_i64        0
 
-#define TCG_TARGET_HAS_new_ldst         1
-
 #define TCG_AREG0 TCG_REG_I0
 
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 719533ac39..019dd9b6fb 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2718,7 +2718,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
-#define tcg_gen_extr_tl_i64 tcg_gen_extr_i32_i64
+#define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32
 #define tcg_gen_andc_tl tcg_gen_andc_i32
 #define tcg_gen_eqv_tl tcg_gen_eqv_i32
 #define tcg_gen_nand_tl tcg_gen_nand_i32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 71ba64ac16..042d442c7e 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -185,106 +185,20 @@ DEF(debug_insn_start, 0, 0, 1, TCG_OPF_NOT_PRESENT)
 DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
 DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
 
-#define IMPL_NEW_LDST \
-    (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS \
-     | IMPL(TCG_TARGET_HAS_new_ldst))
-
-#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
-DEF(qemu_ld_i32, 1, 1, 2, IMPL_NEW_LDST)
-DEF(qemu_st_i32, 0, 2, 2, IMPL_NEW_LDST)
-# if TCG_TARGET_REG_BITS == 64
-DEF(qemu_ld_i64, 1, 1, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-DEF(qemu_st_i64, 0, 2, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-# else
-DEF(qemu_ld_i64, 2, 1, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-DEF(qemu_st_i64, 0, 3, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-# endif
-#else
-DEF(qemu_ld_i32, 1, 2, 2, IMPL_NEW_LDST)
-DEF(qemu_st_i32, 0, 3, 2, IMPL_NEW_LDST)
-DEF(qemu_ld_i64, 2, 2, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-DEF(qemu_st_i64, 0, 4, 2, IMPL_NEW_LDST | TCG_OPF_64BIT)
-#endif
-
-#undef IMPL_NEW_LDST
-
-#define IMPL_OLD_LDST \
-    (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS \
-     | IMPL(!TCG_TARGET_HAS_new_ldst))
-
-#if TCG_TARGET_REG_BITS == 32
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld8u, 1, 1, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_ld8u, 1, 2, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld8s, 1, 1, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_ld8s, 1, 2, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld16u, 1, 1, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_ld16u, 1, 2, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld16s, 1, 1, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_ld16s, 1, 2, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld32, 1, 1, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_ld32, 1, 2, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_ld64, 2, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-#else
-DEF(qemu_ld64, 2, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-#endif
-
-#if TARGET_LONG_BITS == 32
-DEF(qemu_st8, 0, 2, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_st8, 0, 3, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_st16, 0, 2, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_st16, 0, 3, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_st32, 0, 2, 1, IMPL_OLD_LDST)
-#else
-DEF(qemu_st32, 0, 3, 1, IMPL_OLD_LDST)
-#endif
-#if TARGET_LONG_BITS == 32
-DEF(qemu_st64, 0, 3, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-#else
-DEF(qemu_st64, 0, 4, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-#endif
-
-#else /* TCG_TARGET_REG_BITS == 32 */
-
-DEF(qemu_ld8u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld8s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld16u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld16s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld32, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld32u, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld32s, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_ld64, 1, 1, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-
-DEF(qemu_st8, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_st16, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_st32, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-DEF(qemu_st64, 0, 2, 1, IMPL_OLD_LDST | TCG_OPF_64BIT)
-
-#endif /* TCG_TARGET_REG_BITS != 32 */
-
-#undef IMPL_OLD_LDST
-
+#define TLADDR_ARGS    (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
+#define DATA64_ARGS  (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
+
+DEF(qemu_ld_i32, 1, TLADDR_ARGS, 2,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 2,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 2,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 2,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+
+#undef TLADDR_ARGS
+#undef DATA64_ARGS
 #undef IMPL
 #undef IMPL64
 #undef DEF
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 2c5732da17..dae4b7cb00 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -941,97 +941,26 @@ static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
     return op;
 }
 
-static const TCGOpcode old_ld_opc[8] = {
-    [MO_UB] = INDEX_op_qemu_ld8u,
-    [MO_SB] = INDEX_op_qemu_ld8s,
-    [MO_UW] = INDEX_op_qemu_ld16u,
-    [MO_SW] = INDEX_op_qemu_ld16s,
-#if TCG_TARGET_REG_BITS == 32
-    [MO_UL] = INDEX_op_qemu_ld32,
-    [MO_SL] = INDEX_op_qemu_ld32,
-#else
-    [MO_UL] = INDEX_op_qemu_ld32u,
-    [MO_SL] = INDEX_op_qemu_ld32s,
-#endif
-    [MO_Q]  = INDEX_op_qemu_ld64,
-};
-
-static const TCGOpcode old_st_opc[4] = {
-    [MO_UB] = INDEX_op_qemu_st8,
-    [MO_UW] = INDEX_op_qemu_st16,
-    [MO_UL] = INDEX_op_qemu_st32,
-    [MO_Q]  = INDEX_op_qemu_st64,
-};
-
 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
     memop = tcg_canonicalize_memop(memop, 0, 0);
 
-    if (TCG_TARGET_HAS_new_ldst) {
-        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
-        tcg_add_param_i32(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = memop;
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-        return;
-    }
-
-    /* The old opcodes only support target-endian memory operations.  */
-    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
-    assert(old_ld_opc[memop & MO_SSIZE] != 0);
-
-    if (TCG_TARGET_REG_BITS == 32) {
-        *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
-        tcg_add_param_i32(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-    } else {
-        TCGv_i64 val64 = tcg_temp_new_i64();
-
-        *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
-        tcg_add_param_i64(val64);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-
-        tcg_gen_trunc_i64_i32(val, val64);
-        tcg_temp_free_i64(val64);
-    }
+    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
+    tcg_add_param_i32(val);
+    tcg_add_param_tl(addr);
+    *tcg_ctx.gen_opparam_ptr++ = memop;
+    *tcg_ctx.gen_opparam_ptr++ = idx;
 }
 
 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
     memop = tcg_canonicalize_memop(memop, 0, 1);
 
-    if (TCG_TARGET_HAS_new_ldst) {
-        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
-        tcg_add_param_i32(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = memop;
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-        return;
-    }
-
-    /* The old opcodes only support target-endian memory operations.  */
-    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
-    assert(old_st_opc[memop & MO_SIZE] != 0);
-
-    if (TCG_TARGET_REG_BITS == 32) {
-        *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
-        tcg_add_param_i32(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-    } else {
-        TCGv_i64 val64 = tcg_temp_new_i64();
-
-        tcg_gen_extu_i32_i64(val64, val);
-
-        *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
-        tcg_add_param_i64(val64);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-
-        tcg_temp_free_i64(val64);
-    }
+    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
+    tcg_add_param_i32(val);
+    tcg_add_param_tl(addr);
+    *tcg_ctx.gen_opparam_ptr++ = memop;
+    *tcg_ctx.gen_opparam_ptr++ = idx;
 }
 
 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
@@ -1050,22 +979,10 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
     }
 #endif
 
-    if (TCG_TARGET_HAS_new_ldst) {
-        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
-        tcg_add_param_i64(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = memop;
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-        return;
-    }
-
-    /* The old opcodes only support target-endian memory operations.  */
-    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
-    assert(old_ld_opc[memop & MO_SSIZE] != 0);
-
-    *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
+    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
     tcg_add_param_i64(val);
     tcg_add_param_tl(addr);
+    *tcg_ctx.gen_opparam_ptr++ = memop;
     *tcg_ctx.gen_opparam_ptr++ = idx;
 }
 
@@ -1080,22 +997,10 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
     }
 #endif
 
-    if (TCG_TARGET_HAS_new_ldst) {
-        *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
-        tcg_add_param_i64(val);
-        tcg_add_param_tl(addr);
-        *tcg_ctx.gen_opparam_ptr++ = memop;
-        *tcg_ctx.gen_opparam_ptr++ = idx;
-        return;
-    }
-
-    /* The old opcodes only support target-endian memory operations.  */
-    assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
-    assert(old_st_opc[memop & MO_SIZE] != 0);
-
-    *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
+    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
     tcg_add_param_i64(val);
     tcg_add_param_tl(addr);
+    *tcg_ctx.gen_opparam_ptr++ = memop;
     *tcg_ctx.gen_opparam_ptr++ = idx;
 }
 
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index 375e590d2b..03a7b46958 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -227,21 +227,11 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
 #endif
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
-    { INDEX_op_qemu_ld8u, { R, L } },
-    { INDEX_op_qemu_ld8s, { R, L } },
-    { INDEX_op_qemu_ld16u, { R, L } },
-    { INDEX_op_qemu_ld16s, { R, L } },
-    { INDEX_op_qemu_ld32, { R, L } },
-#if TCG_TARGET_REG_BITS == 64
-    { INDEX_op_qemu_ld32u, { R, L } },
-    { INDEX_op_qemu_ld32s, { R, L } },
-#endif
-    { INDEX_op_qemu_ld64, { R64, L } },
+    { INDEX_op_qemu_ld_i32, { R, L } },
+    { INDEX_op_qemu_ld_i64, { R64, L } },
 
-    { INDEX_op_qemu_st8, { R, S } },
-    { INDEX_op_qemu_st16, { R, S } },
-    { INDEX_op_qemu_st32, { R, S } },
-    { INDEX_op_qemu_st64, { R64, S } },
+    { INDEX_op_qemu_st_i32, { R, S } },
+    { INDEX_op_qemu_st_i64, { R64, S } },
 
 #if TCG_TARGET_HAS_ext8s_i32
     { INDEX_op_ext8s_i32, { R, R } },
@@ -767,58 +757,52 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out8(s, args[2]);           /* condition */
         tci_out_label(s, args[3]);
         break;
-    case INDEX_op_qemu_ld8u:
-    case INDEX_op_qemu_ld8s:
-    case INDEX_op_qemu_ld16u:
-    case INDEX_op_qemu_ld16s:
-    case INDEX_op_qemu_ld32:
-#if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_qemu_ld32s:
-    case INDEX_op_qemu_ld32u:
-#endif
+    case INDEX_op_qemu_ld_i32:
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        tcg_out_r(s, *args++);
-#endif
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
 #ifdef CONFIG_SOFTMMU
         tcg_out_i(s, *args);
 #endif
         break;
-    case INDEX_op_qemu_ld64:
-        tcg_out_r(s, *args++);
-#if TCG_TARGET_REG_BITS == 32
+    case INDEX_op_qemu_ld_i64:
         tcg_out_r(s, *args++);
-#endif
-        tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+        if (TCG_TARGET_REG_BITS == 32) {
+            tcg_out_r(s, *args++);
+        }
         tcg_out_r(s, *args++);
-#endif
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
 #ifdef CONFIG_SOFTMMU
         tcg_out_i(s, *args);
 #endif
         break;
-    case INDEX_op_qemu_st8:
-    case INDEX_op_qemu_st16:
-    case INDEX_op_qemu_st32:
+    case INDEX_op_qemu_st_i32:
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        tcg_out_r(s, *args++);
-#endif
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
 #ifdef CONFIG_SOFTMMU
         tcg_out_i(s, *args);
 #endif
         break;
-    case INDEX_op_qemu_st64:
-        tcg_out_r(s, *args++);
-#if TCG_TARGET_REG_BITS == 32
-        tcg_out_r(s, *args++);
-#endif
+    case INDEX_op_qemu_st_i64:
         tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+        if (TCG_TARGET_REG_BITS == 32) {
+            tcg_out_r(s, *args++);
+        }
         tcg_out_r(s, *args++);
-#endif
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            tcg_out_r(s, *args++);
+        }
+        tcg_out_i(s, *args++);
 #ifdef CONFIG_SOFTMMU
         tcg_out_i(s, *args);
 #endif
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 0be5acdb81..bd1e97468c 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -118,8 +118,6 @@
 #define TCG_TARGET_HAS_mulu2_i32        1
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
-#define TCG_TARGET_HAS_new_ldst         0
-
 /* Number of registers available.
    For 32 bit hosts, we need more than 8 registers (call arguments). */
 /* #define TCG_TARGET_NB_REGS 8 */
diff --git a/tci.c b/tci.c
index 0acf1a177e..4711ee4817 100644
--- a/tci.c
+++ b/tci.c
@@ -117,16 +117,6 @@ static void tci_write_reg(TCGReg index, tcg_target_ulong value)
     tci_reg[index] = value;
 }
 
-static void tci_write_reg8s(TCGReg index, int8_t value)
-{
-    tci_write_reg(index, value);
-}
-
-static void tci_write_reg16s(TCGReg index, int16_t value)
-{
-    tci_write_reg(index, value);
-}
-
 #if TCG_TARGET_REG_BITS == 64
 static void tci_write_reg32s(TCGReg index, int32_t value)
 {
@@ -139,11 +129,6 @@ static void tci_write_reg8(TCGReg index, uint8_t value)
     tci_write_reg(index, value);
 }
 
-static void tci_write_reg16(TCGReg index, uint16_t value)
-{
-    tci_write_reg(index, value);
-}
-
 static void tci_write_reg32(TCGReg index, uint32_t value)
 {
     tci_write_reg(index, value);
@@ -434,6 +419,53 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
     return result;
 }
 
+#ifdef CONFIG_SOFTMMU
+# define mmuidx          tci_read_i(&tb_ptr)
+# define qemu_ld_ub \
+    helper_ret_ldub_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_leuw \
+    helper_le_lduw_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_leul \
+    helper_le_ldul_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_leq \
+    helper_le_ldq_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_beuw \
+    helper_be_lduw_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_beul \
+    helper_be_ldul_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_ld_beq \
+    helper_be_ldq_mmu(env, taddr, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_b(X) \
+    helper_ret_stb_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_lew(X) \
+    helper_le_stw_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_lel(X) \
+    helper_le_stl_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_leq(X) \
+    helper_le_stq_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_bew(X) \
+    helper_be_stw_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_bel(X) \
+    helper_be_stl_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+# define qemu_st_beq(X) \
+    helper_be_stq_mmu(env, taddr, X, mmuidx, (uintptr_t)tb_ptr)
+#else
+# define qemu_ld_ub      ldub_p(g2h(taddr))
+# define qemu_ld_leuw    lduw_le_p(g2h(taddr))
+# define qemu_ld_leul    (uint32_t)ldl_le_p(g2h(taddr))
+# define qemu_ld_leq     ldq_le_p(g2h(taddr))
+# define qemu_ld_beuw    lduw_be_p(g2h(taddr))
+# define qemu_ld_beul    (uint32_t)ldl_be_p(g2h(taddr))
+# define qemu_ld_beq     ldq_be_p(g2h(taddr))
+# define qemu_st_b(X)    stb_p(g2h(taddr), X)
+# define qemu_st_lew(X)  stw_le_p(g2h(taddr), X)
+# define qemu_st_lel(X)  stl_le_p(g2h(taddr), X)
+# define qemu_st_leq(X)  stq_le_p(g2h(taddr), X)
+# define qemu_st_bew(X)  stw_be_p(g2h(taddr), X)
+# define qemu_st_bel(X)  stl_be_p(g2h(taddr), X)
+# define qemu_st_beq(X)  stq_be_p(g2h(taddr), X)
+#endif
+
 /* Interpret pseudo code in tb. */
 uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
 {
@@ -457,9 +489,6 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
         tcg_target_ulong label;
         TCGCond condition;
         target_ulong taddr;
-#ifndef CONFIG_SOFTMMU
-        tcg_target_ulong host_addr;
-#endif
         uint8_t tmp8;
         uint16_t tmp16;
         uint32_t tmp32;
@@ -467,6 +496,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
 #if TCG_TARGET_REG_BITS == 32
         uint64_t v64;
 #endif
+        TCGMemOp memop;
 
 #if defined(GETPC)
         tci_tb_ptr = (uintptr_t)tb_ptr;
@@ -1087,145 +1117,145 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
             assert(tb_ptr == old_code_ptr + op_size);
             tb_ptr += (int32_t)t0;
             continue;
-        case INDEX_op_qemu_ld8u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp8 = helper_ldb_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp8 = *(uint8_t *)(host_addr + GUEST_BASE);
-#endif
-            tci_write_reg8(t0, tmp8);
-            break;
-        case INDEX_op_qemu_ld8s:
+        case INDEX_op_qemu_ld_i32:
             t0 = *tb_ptr++;
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp8 = helper_ldb_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp8 = *(uint8_t *)(host_addr + GUEST_BASE);
-#endif
-            tci_write_reg8s(t0, tmp8);
-            break;
-        case INDEX_op_qemu_ld16u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp16 = helper_ldw_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp16 = tswap16(*(uint16_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg16(t0, tmp16);
-            break;
-        case INDEX_op_qemu_ld16s:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp16 = helper_ldw_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp16 = tswap16(*(uint16_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg16s(t0, tmp16);
-            break;
-#if TCG_TARGET_REG_BITS == 64
-        case INDEX_op_qemu_ld32u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32(t0, tmp32);
-            break;
-        case INDEX_op_qemu_ld32s:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32s(t0, tmp32);
-            break;
-#endif /* TCG_TARGET_REG_BITS == 64 */
-        case INDEX_op_qemu_ld32:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32(t0, tmp32);
+            memop = tci_read_i(&tb_ptr);
+            switch (memop) {
+            case MO_UB:
+                tmp32 = qemu_ld_ub;
+                break;
+            case MO_SB:
+                tmp32 = (int8_t)qemu_ld_ub;
+                break;
+            case MO_LEUW:
+                tmp32 = qemu_ld_leuw;
+                break;
+            case MO_LESW:
+                tmp32 = (int16_t)qemu_ld_leuw;
+                break;
+            case MO_LEUL:
+                tmp32 = qemu_ld_leul;
+                break;
+            case MO_BEUW:
+                tmp32 = qemu_ld_beuw;
+                break;
+            case MO_BESW:
+                tmp32 = (int16_t)qemu_ld_beuw;
+                break;
+            case MO_BEUL:
+                tmp32 = qemu_ld_beul;
+                break;
+            default:
+                tcg_abort();
+            }
+            tci_write_reg(t0, tmp32);
             break;
-        case INDEX_op_qemu_ld64:
+        case INDEX_op_qemu_ld_i64:
             t0 = *tb_ptr++;
-#if TCG_TARGET_REG_BITS == 32
-            t1 = *tb_ptr++;
-#endif
+            if (TCG_TARGET_REG_BITS == 32) {
+                t1 = *tb_ptr++;
+            }
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp64 = helper_ldq_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp64 = tswap64(*(uint64_t *)(host_addr + GUEST_BASE));
-#endif
+            memop = tci_read_i(&tb_ptr);
+            switch (memop) {
+            case MO_UB:
+                tmp64 = qemu_ld_ub;
+                break;
+            case MO_SB:
+                tmp64 = (int8_t)qemu_ld_ub;
+                break;
+            case MO_LEUW:
+                tmp64 = qemu_ld_leuw;
+                break;
+            case MO_LESW:
+                tmp64 = (int16_t)qemu_ld_leuw;
+                break;
+            case MO_LEUL:
+                tmp64 = qemu_ld_leul;
+                break;
+            case MO_LESL:
+                tmp64 = (int32_t)qemu_ld_leul;
+                break;
+            case MO_LEQ:
+                tmp64 = qemu_ld_leq;
+                break;
+            case MO_BEUW:
+                tmp64 = qemu_ld_beuw;
+                break;
+            case MO_BESW:
+                tmp64 = (int16_t)qemu_ld_beuw;
+                break;
+            case MO_BEUL:
+                tmp64 = qemu_ld_beul;
+                break;
+            case MO_BESL:
+                tmp64 = (int32_t)qemu_ld_beul;
+                break;
+            case MO_BEQ:
+                tmp64 = qemu_ld_beq;
+                break;
+            default:
+                tcg_abort();
+            }
             tci_write_reg(t0, tmp64);
-#if TCG_TARGET_REG_BITS == 32
-            tci_write_reg(t1, tmp64 >> 32);
-#endif
-            break;
-        case INDEX_op_qemu_st8:
-            t0 = tci_read_r8(&tb_ptr);
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stb_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint8_t *)(host_addr + GUEST_BASE) = t0;
-#endif
-            break;
-        case INDEX_op_qemu_st16:
-            t0 = tci_read_r16(&tb_ptr);
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stw_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint16_t *)(host_addr + GUEST_BASE) = tswap16(t0);
-#endif
+            if (TCG_TARGET_REG_BITS == 32) {
+                tci_write_reg(t1, tmp64 >> 32);
+            }
             break;
-        case INDEX_op_qemu_st32:
-            t0 = tci_read_r32(&tb_ptr);
+        case INDEX_op_qemu_st_i32:
+            t0 = tci_read_r(&tb_ptr);
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stl_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint32_t *)(host_addr + GUEST_BASE) = tswap32(t0);
-#endif
+            memop = tci_read_i(&tb_ptr);
+            switch (memop) {
+            case MO_UB:
+                qemu_st_b(t0);
+                break;
+            case MO_LEUW:
+                qemu_st_lew(t0);
+                break;
+            case MO_LEUL:
+                qemu_st_lel(t0);
+                break;
+            case MO_BEUW:
+                qemu_st_bew(t0);
+                break;
+            case MO_BEUL:
+                qemu_st_bel(t0);
+                break;
+            default:
+                tcg_abort();
+            }
             break;
-        case INDEX_op_qemu_st64:
+        case INDEX_op_qemu_st_i64:
             tmp64 = tci_read_r64(&tb_ptr);
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stq_mmu(env, taddr, tmp64, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint64_t *)(host_addr + GUEST_BASE) = tswap64(tmp64);
-#endif
+            memop = tci_read_i(&tb_ptr);
+            switch (memop) {
+            case MO_UB:
+                qemu_st_b(tmp64);
+                break;
+            case MO_LEUW:
+                qemu_st_lew(tmp64);
+                break;
+            case MO_LEUL:
+                qemu_st_lel(tmp64);
+                break;
+            case MO_LEQ:
+                qemu_st_leq(tmp64);
+                break;
+            case MO_BEUW:
+                qemu_st_bew(tmp64);
+                break;
+            case MO_BEUL:
+                qemu_st_bel(tmp64);
+                break;
+            case MO_BEQ:
+                qemu_st_beq(tmp64);
+                break;
+            default:
+                tcg_abort();
+            }
             break;
         default:
             TODO();
diff --git a/ui/curses.c b/ui/curses.c
index de85f7610f..8edb038bb3 100644
--- a/ui/curses.c
+++ b/ui/curses.c
@@ -277,31 +277,41 @@ static void curses_refresh(DisplayChangeListener *dcl)
              * events, we need to emit both for each key received */
             if (keycode & SHIFT) {
                 qemu_input_event_send_key_number(NULL, SHIFT_CODE, true);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & CNTRL) {
                 qemu_input_event_send_key_number(NULL, CNTRL_CODE, true);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & ALT) {
                 qemu_input_event_send_key_number(NULL, ALT_CODE, true);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & ALTGR) {
                 qemu_input_event_send_key_number(NULL, GREY | ALT_CODE, true);
+                qemu_input_event_send_key_delay(0);
             }
 
             qemu_input_event_send_key_number(NULL, keycode & KEY_MASK, true);
+            qemu_input_event_send_key_delay(0);
             qemu_input_event_send_key_number(NULL, keycode & KEY_MASK, false);
+            qemu_input_event_send_key_delay(0);
 
             if (keycode & ALTGR) {
                 qemu_input_event_send_key_number(NULL, GREY | ALT_CODE, false);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & ALT) {
                 qemu_input_event_send_key_number(NULL, ALT_CODE, false);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & CNTRL) {
                 qemu_input_event_send_key_number(NULL, CNTRL_CODE, false);
+                qemu_input_event_send_key_delay(0);
             }
             if (keycode & SHIFT) {
                 qemu_input_event_send_key_number(NULL, SHIFT_CODE, false);
+                qemu_input_event_send_key_delay(0);
             }
         } else {
             keysym = curses2qemu[chr];
diff --git a/ui/input-legacy.c b/ui/input-legacy.c
index 2a538607a2..3025f50f49 100644
--- a/ui/input-legacy.c
+++ b/ui/input-legacy.c
@@ -74,27 +74,6 @@ int index_from_key(const char *key)
     return i;
 }
 
-static KeyValue **keyvalues;
-static int keyvalues_size;
-static QEMUTimer *key_timer;
-
-static void free_keyvalues(void)
-{
-    g_free(keyvalues);
-    keyvalues = NULL;
-    keyvalues_size = 0;
-}
-
-static void release_keys(void *opaque)
-{
-    while (keyvalues_size > 0) {
-        qemu_input_event_send_key(NULL, keyvalues[--keyvalues_size],
-                                  false);
-    }
-
-    free_keyvalues();
-}
-
 static KeyValue *copy_key_value(KeyValue *src)
 {
     KeyValue *dst = g_new(KeyValue, 1);
@@ -107,30 +86,18 @@ void qmp_send_key(KeyValueList *keys, bool has_hold_time, int64_t hold_time,
 {
     KeyValueList *p;
 
-    if (!key_timer) {
-        key_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, release_keys, NULL);
-    }
-
-    if (keyvalues != NULL) {
-        timer_del(key_timer);
-        release_keys(NULL);
-    }
-
     if (!has_hold_time) {
-        hold_time = 100;
+        hold_time = 0; /* use default */
     }
 
     for (p = keys; p != NULL; p = p->next) {
         qemu_input_event_send_key(NULL, copy_key_value(p->value), true);
-
-        keyvalues = g_realloc(keyvalues, sizeof(KeyValue *) *
-                              (keyvalues_size + 1));
-        keyvalues[keyvalues_size++] = copy_key_value(p->value);
+        qemu_input_event_send_key_delay(hold_time);
+    }
+    for (p = keys; p != NULL; p = p->next) {
+        qemu_input_event_send_key(NULL, copy_key_value(p->value), false);
+        qemu_input_event_send_key_delay(hold_time);
     }
-
-    /* delayed key up events */
-    timer_mod(key_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-              muldiv64(get_ticks_per_sec(), hold_time, 1000));
 }
 
 static void legacy_kbd_event(DeviceState *dev, QemuConsole *src,
diff --git a/ui/input.c b/ui/input.c
index 14c9434f5e..89d9db78c0 100644
--- a/ui/input.c
+++ b/ui/input.c
@@ -14,11 +14,31 @@ struct QemuInputHandlerState {
     QemuConsole       *con;
     QTAILQ_ENTRY(QemuInputHandlerState) node;
 };
+
+typedef struct QemuInputEventQueue QemuInputEventQueue;
+struct QemuInputEventQueue {
+    enum {
+        QEMU_INPUT_QUEUE_DELAY = 1,
+        QEMU_INPUT_QUEUE_EVENT,
+        QEMU_INPUT_QUEUE_SYNC,
+    } type;
+    QEMUTimer *timer;
+    uint32_t delay_ms;
+    QemuConsole *src;
+    InputEvent *evt;
+    QTAILQ_ENTRY(QemuInputEventQueue) node;
+};
+
 static QTAILQ_HEAD(, QemuInputHandlerState) handlers =
     QTAILQ_HEAD_INITIALIZER(handlers);
 static NotifierList mouse_mode_notifiers =
     NOTIFIER_LIST_INITIALIZER(mouse_mode_notifiers);
 
+static QTAILQ_HEAD(QemuInputEventQueueHead, QemuInputEventQueue) kbd_queue =
+    QTAILQ_HEAD_INITIALIZER(kbd_queue);
+static QEMUTimer *kbd_timer;
+static uint32_t kbd_default_delay_ms = 10;
+
 QemuInputHandlerState *qemu_input_handler_register(DeviceState *dev,
                                                    QemuInputHandler *handler)
 {
@@ -171,6 +191,73 @@ static void qemu_input_event_trace(QemuConsole *src, InputEvent *evt)
     }
 }
 
+static void qemu_input_queue_process(void *opaque)
+{
+    struct QemuInputEventQueueHead *queue = opaque;
+    QemuInputEventQueue *item;
+
+    g_assert(!QTAILQ_EMPTY(queue));
+    item = QTAILQ_FIRST(queue);
+    g_assert(item->type == QEMU_INPUT_QUEUE_DELAY);
+    QTAILQ_REMOVE(queue, item, node);
+    g_free(item);
+
+    while (!QTAILQ_EMPTY(queue)) {
+        item = QTAILQ_FIRST(queue);
+        switch (item->type) {
+        case QEMU_INPUT_QUEUE_DELAY:
+            timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL)
+                      + item->delay_ms);
+            return;
+        case QEMU_INPUT_QUEUE_EVENT:
+            qemu_input_event_send(item->src, item->evt);
+            qapi_free_InputEvent(item->evt);
+            break;
+        case QEMU_INPUT_QUEUE_SYNC:
+            qemu_input_event_sync();
+            break;
+        }
+        QTAILQ_REMOVE(queue, item, node);
+        g_free(item);
+    }
+}
+
+static void qemu_input_queue_delay(struct QemuInputEventQueueHead *queue,
+                                   QEMUTimer *timer, uint32_t delay_ms)
+{
+    QemuInputEventQueue *item = g_new0(QemuInputEventQueue, 1);
+    bool start_timer = QTAILQ_EMPTY(queue);
+
+    item->type = QEMU_INPUT_QUEUE_DELAY;
+    item->delay_ms = delay_ms;
+    item->timer = timer;
+    QTAILQ_INSERT_TAIL(queue, item, node);
+
+    if (start_timer) {
+        timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL)
+                  + item->delay_ms);
+    }
+}
+
+static void qemu_input_queue_event(struct QemuInputEventQueueHead *queue,
+                                   QemuConsole *src, InputEvent *evt)
+{
+    QemuInputEventQueue *item = g_new0(QemuInputEventQueue, 1);
+
+    item->type = QEMU_INPUT_QUEUE_EVENT;
+    item->src = src;
+    item->evt = evt;
+    QTAILQ_INSERT_TAIL(queue, item, node);
+}
+
+static void qemu_input_queue_sync(struct QemuInputEventQueueHead *queue)
+{
+    QemuInputEventQueue *item = g_new0(QemuInputEventQueue, 1);
+
+    item->type = QEMU_INPUT_QUEUE_SYNC;
+    QTAILQ_INSERT_TAIL(queue, item, node);
+}
+
 void qemu_input_event_send(QemuConsole *src, InputEvent *evt)
 {
     QemuInputHandlerState *s;
@@ -230,9 +317,14 @@ void qemu_input_event_send_key(QemuConsole *src, KeyValue *key, bool down)
 {
     InputEvent *evt;
     evt = qemu_input_event_new_key(key, down);
-    qemu_input_event_send(src, evt);
-    qemu_input_event_sync();
-    qapi_free_InputEvent(evt);
+    if (QTAILQ_EMPTY(&kbd_queue)) {
+        qemu_input_event_send(src, evt);
+        qemu_input_event_sync();
+        qapi_free_InputEvent(evt);
+    } else {
+        qemu_input_queue_event(&kbd_queue, src, evt);
+        qemu_input_queue_sync(&kbd_queue);
+    }
 }
 
 void qemu_input_event_send_key_number(QemuConsole *src, int num, bool down)
@@ -251,6 +343,16 @@ void qemu_input_event_send_key_qcode(QemuConsole *src, QKeyCode q, bool down)
     qemu_input_event_send_key(src, key, down);
 }
 
+void qemu_input_event_send_key_delay(uint32_t delay_ms)
+{
+    if (!kbd_timer) {
+        kbd_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, qemu_input_queue_process,
+                                 &kbd_queue);
+    }
+    qemu_input_queue_delay(&kbd_queue, kbd_timer,
+                           delay_ms ? delay_ms : kbd_default_delay_ms);
+}
+
 InputEvent *qemu_input_event_new_btn(InputButton btn, bool down)
 {
     InputEvent *evt = g_new0(InputEvent, 1);
diff --git a/ui/vnc.c b/ui/vnc.c
index 61b1f933bf..1684206184 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -1553,7 +1553,9 @@ static void press_key(VncState *vs, int keysym)
 {
     int keycode = keysym2scancode(vs->vd->kbd_layout, keysym) & SCANCODE_KEYMASK;
     qemu_input_event_send_key_number(vs->vd->dcl.con, keycode, true);
+    qemu_input_event_send_key_delay(0);
     qemu_input_event_send_key_number(vs->vd->dcl.con, keycode, false);
+    qemu_input_event_send_key_delay(0);
 }
 
 static int current_led_state(VncState *vs)