summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--disas/i386.c8
-rw-r--r--docs/qmp/qmp-events.txt2
-rw-r--r--hw/display/xenfb.c7
-rw-r--r--hw/ide/core.c1
-rw-r--r--hw/net/lan9118.c6
-rw-r--r--hw/virtio/virtio-balloon.c7
-rw-r--r--hw/xen/xen_pt.c8
-rw-r--r--include/exec/ram_addr.h2
-rw-r--r--include/qemu/timer.h6
-rw-r--r--linux-user/s390x/syscall.h2
-rw-r--r--linux-user/syscall.c4
-rw-r--r--net/net.c5
-rw-r--r--net/tap-linux.c14
-rw-r--r--[-rwxr-xr-x]pc-bios/kvmvapic.binbin9216 -> 9216 bytes
-rw-r--r--[-rwxr-xr-x]pc-bios/multiboot.binbin1024 -> 1024 bytes
-rw-r--r--[-rwxr-xr-x]pc-bios/sgabios.binbin4096 -> 4096 bytes
-rw-r--r--tcg/i386/tcg-target.c145
-rw-r--r--tcg/tcg.c2
-rw-r--r--vl.c2
20 files changed, 140 insertions, 83 deletions
diff --git a/Makefile b/Makefile
index bdff4e4684..807054b3a1 100644
--- a/Makefile
+++ b/Makefile
@@ -290,7 +290,7 @@ common  de-ch  es     fo  fr-ca  hu     ja  mk  nl-be      pt  sl     tr \
 bepo    cz
 
 ifdef INSTALL_BLOBS
-BLOBS=bios.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
+BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
 vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin \
 acpi-dsdt.aml q35-acpi-dsdt.aml \
 ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin \
diff --git a/disas/i386.c b/disas/i386.c
index 47f1f2ea61..044e02c032 100644
--- a/disas/i386.c
+++ b/disas/i386.c
@@ -2632,17 +2632,17 @@ static const struct dis386 prefix_user_table[][4] = {
 
   /* PREGRP87 */
   {
+    { "movbe",	{ Gv, Ev } },
     { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
+    { "movbe",	{ Gv, Ev } },
     { "crc32",	{ Gdq, { CRC32_Fixup, b_mode } } },
   },
 
   /* PREGRP88 */
   {
+    { "movbe",	{ Ev, Gv } },
     { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
+    { "movbe",	{ Ev, Gv } },
     { "crc32",	{ Gdq, { CRC32_Fixup, v_mode } } },
   },
 
diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt
index 6b87e9786a..a378c87583 100644
--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
@@ -479,7 +479,7 @@ Data: None.
 
 Example:
 
-{ "event": "WATCHDOG",
+{ "event": "WAKEUP",
      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
 
 WATCHDOG
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index f0333a0cad..cb9d456814 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -495,7 +495,7 @@ static int xenfb_map_fb(struct XenFB *xenfb)
     munmap(map, n_fbdirs * XC_PAGE_SIZE);
 
     xenfb->pixels = xc_map_foreign_pages(xen_xc, xenfb->c.xendev.dom,
-					 PROT_READ | PROT_WRITE, fbmfns, xenfb->fbpages);
+            PROT_READ, fbmfns, xenfb->fbpages);
     if (xenfb->pixels == NULL)
 	goto out;
 
@@ -903,6 +903,11 @@ static void fb_disconnect(struct XenDevice *xendev)
     fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE,
                       PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
                       -1, 0);
+    if (fb->pixels == MAP_FAILED) {
+        xen_be_printf(xendev, 0,
+                "Couldn't replace the framebuffer with anonymous memory errno=%d\n",
+                errno);
+    }
     common_unbind(&fb->c);
     fb->feature_update = 0;
     fb->bug_trigger    = 0;
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 036cd4a6d1..e1dfe54df6 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1321,6 +1321,7 @@ static bool cmd_exec_dev_diagnostic(IDEState *s, uint8_t cmd)
         s->status = 0; /* ATAPI spec (v6) section 9.10 defines packet
                         * devices to return a clear status register
                         * with READY_STAT *not* set. */
+        s->error = 0x01;
     } else {
         s->status = READY_STAT | SEEK_STAT;
         /* The bits of the error register are not as usual for this command!
diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c
index 2315f996d4..e528290b41 100644
--- a/hw/net/lan9118.c
+++ b/hw/net/lan9118.c
@@ -727,14 +727,14 @@ static void tx_fifo_push(lan9118_state *s, uint32_t val)
         s->txp->cmd_a = val & 0x831f37ff;
         s->txp->fifo_used++;
         s->txp->state = TX_B;
+        s->txp->buffer_size = extract32(s->txp->cmd_a, 0, 11);
+        s->txp->offset = extract32(s->txp->cmd_a, 16, 5);
         break;
     case TX_B:
         if (s->txp->cmd_a & 0x2000) {
             /* First segment */
             s->txp->cmd_b = val;
             s->txp->fifo_used++;
-            s->txp->buffer_size = s->txp->cmd_a & 0x7ff;
-            s->txp->offset = (s->txp->cmd_a >> 16) & 0x1f;
             /* End alignment does not include command words.  */
             n = (s->txp->buffer_size + s->txp->offset + 3) >> 2;
             switch ((n >> 24) & 3) {
@@ -763,7 +763,7 @@ static void tx_fifo_push(lan9118_state *s, uint32_t val)
         if (s->txp->buffer_size <= 0 && s->txp->pad != 0) {
             s->txp->pad--;
         } else {
-            n = 4;
+            n = MIN(4, s->txp->buffer_size + s->txp->offset);
             while (s->txp->offset) {
                 val >>= 8;
                 n--;
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d9754dbd33..a470a0b3a6 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -263,7 +263,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
     config.num_pages = cpu_to_le32(dev->num_pages);
     config.actual = cpu_to_le32(dev->actual);
 
-    memcpy(config_data, &config, 8);
+    memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
 }
 
 static void virtio_balloon_set_config(VirtIODevice *vdev,
@@ -272,7 +272,7 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
     struct virtio_balloon_config config;
     uint32_t oldactual = dev->actual;
-    memcpy(&config, config_data, 8);
+    memcpy(&config, config_data, sizeof(struct virtio_balloon_config));
     dev->actual = le32_to_cpu(config.actual);
     if (dev->actual != oldactual) {
         qemu_balloon_changed(ram_size -
@@ -343,7 +343,8 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     VirtIOBalloon *s = VIRTIO_BALLOON(dev);
     int ret;
 
-    virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON, 8);
+    virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON,
+                sizeof(struct virtio_balloon_config));
 
     ret = qemu_add_balloon_handler(virtio_balloon_to_target,
                                    virtio_balloon_stat, s);
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index d58cb616b1..be4220b415 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -420,8 +420,8 @@ static int xen_pt_register_regions(XenPCIPassthroughState *s)
                               "xen-pci-pt-bar", r->size);
         pci_register_bar(&s->dev, i, type, &s->bar[i]);
 
-        XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%lx"PRIx64
-                   " base_addr=0x%lx"PRIx64" type: %#x)\n",
+        XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%08"PRIx64
+                   " base_addr=0x%08"PRIx64" type: %#x)\n",
                    i, r->size, r->base_addr, type);
     }
 
@@ -440,8 +440,8 @@ static int xen_pt_register_regions(XenPCIPassthroughState *s)
 
         s->bases[PCI_ROM_SLOT].access.maddr = d->rom.base_addr;
 
-        memory_region_init_rom_device(&s->rom, OBJECT(s), NULL, NULL,
-                                      "xen-pci-pt-rom", d->rom.size);
+        memory_region_init_io(&s->rom, OBJECT(s), &ops, &s->dev,
+                              "xen-pci-pt-rom", d->rom.size);
         pci_register_bar(&s->dev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_MEM_PREFETCH,
                          &s->rom);
 
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 33c8acc02e..481a447417 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -79,6 +79,7 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
     xen_modified_memory(start, length);
 }
 
+#if !defined(_WIN32)
 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
                                                           ram_addr_t start,
                                                           ram_addr_t pages)
@@ -127,6 +128,7 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
         }
     }
 }
+#endif /* not _WIN32 */
 
 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
                                                          ram_addr_t length,
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 5afcffc3f9..7f9a074c2a 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -405,7 +405,7 @@ int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg);
  * timer_init:
  * @ts: the timer to be initialised
  * @timer_list: the timer list to attach the timer to
- * @scale: the scale value for the tiemr
+ * @scale: the scale value for the timer
  * @cb: the callback to be called when the timer expires
  * @opaque: the opaque pointer to be passed to the callback
  *
@@ -422,7 +422,7 @@ void timer_init(QEMUTimer *ts,
 /**
  * timer_new_tl:
  * @timer_list: the timer list to attach the timer to
- * @scale: the scale value for the tiemr
+ * @scale: the scale value for the timer
  * @cb: the callback to be called when the timer expires
  * @opaque: the opaque pointer to be passed to the callback
  *
@@ -447,7 +447,7 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list,
 /**
  * timer_new:
  * @type: the clock type to use
- * @scale: the scale value for the tiemr
+ * @scale: the scale value for the timer
  * @cb: the callback to be called when the timer expires
  * @opaque: the opaque pointer to be passed to the callback
  *
diff --git a/linux-user/s390x/syscall.h b/linux-user/s390x/syscall.h
index ea8c304840..e5ce30b667 100644
--- a/linux-user/s390x/syscall.h
+++ b/linux-user/s390x/syscall.h
@@ -22,4 +22,4 @@ struct target_pt_regs {
 
 #define UNAME_MACHINE "s390x"
 
-#define TARGET_CLONE_BACKWARDS
+#define TARGET_CLONE_BACKWARDS2
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 0ac05b85f2..bc0ac98d4f 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -2340,7 +2340,7 @@ static abi_long do_socketcall(int num, abi_ulong vptr)
             size_t len;
             abi_ulong flags;
             abi_ulong addr;
-            socklen_t addrlen;
+            abi_ulong addrlen;
 
             if (get_user_ual(sockfd, vptr)
                 || get_user_ual(msg, vptr + n)
@@ -2406,7 +2406,7 @@ static abi_long do_socketcall(int num, abi_ulong vptr)
             abi_ulong level;
             abi_ulong optname;
             abi_ulong optval;
-            socklen_t optlen;
+            abi_ulong optlen;
 
             if (get_user_ual(sockfd, vptr)
                 || get_user_ual(level, vptr + n)
diff --git a/net/net.c b/net/net.c
index f8db85f30b..2c3af202a6 100644
--- a/net/net.c
+++ b/net/net.c
@@ -164,7 +164,6 @@ void qemu_macaddr_default_if_unset(MACAddr *macaddr)
 static char *assign_name(NetClientState *nc1, const char *model)
 {
     NetClientState *nc;
-    char buf[256];
     int id = 0;
 
     QTAILQ_FOREACH(nc, &net_clients, next) {
@@ -176,9 +175,7 @@ static char *assign_name(NetClientState *nc1, const char *model)
         }
     }
 
-    snprintf(buf, sizeof(buf), "%s.%d", model, id);
-
-    return g_strdup(buf);
+    return g_strdup_printf("%s.%d", model, id);
 }
 
 static void qemu_net_client_destructor(NetClientState *nc)
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 36c09e24d8..812bf2dfc6 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -52,14 +52,17 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
     memset(&ifr, 0, sizeof(ifr));
     ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
 
-    if (ioctl(fd, TUNGETFEATURES, &features) == 0 &&
-        features & IFF_ONE_QUEUE) {
+    if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
+        error_report("warning: TUNGETFEATURES failed: %s", strerror(errno));
+        features = 0;
+    }
+
+    if (features & IFF_ONE_QUEUE) {
         ifr.ifr_flags |= IFF_ONE_QUEUE;
     }
 
     if (*vnet_hdr) {
-        if (ioctl(fd, TUNGETFEATURES, &features) == 0 &&
-            features & IFF_VNET_HDR) {
+        if (features & IFF_VNET_HDR) {
             *vnet_hdr = 1;
             ifr.ifr_flags |= IFF_VNET_HDR;
         } else {
@@ -82,8 +85,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
     }
 
     if (mq_required) {
-        if ((ioctl(fd, TUNGETFEATURES, &features) != 0) ||
-            !(features & IFF_MULTI_QUEUE)) {
+        if (!(features & IFF_MULTI_QUEUE)) {
             error_report("multiqueue required, but no kernel "
                          "support for IFF_MULTI_QUEUE available");
             close(fd);
diff --git a/pc-bios/kvmvapic.bin b/pc-bios/kvmvapic.bin
index 045f5c2884..045f5c2884 100755..100644
--- a/pc-bios/kvmvapic.bin
+++ b/pc-bios/kvmvapic.bin
Binary files differdiff --git a/pc-bios/multiboot.bin b/pc-bios/multiboot.bin
index e772713c95..e772713c95 100755..100644
--- a/pc-bios/multiboot.bin
+++ b/pc-bios/multiboot.bin
Binary files differdiff --git a/pc-bios/sgabios.bin b/pc-bios/sgabios.bin
index c3da4c3d0a..c3da4c3d0a 100755..100644
--- a/pc-bios/sgabios.bin
+++ b/pc-bios/sgabios.bin
Binary files differdiff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 495b901080..5d4cf9386e 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -99,18 +99,31 @@ static const int tcg_target_call_oarg_regs[] = {
 # define TCG_REG_L1 TCG_REG_EDX
 #endif
 
+/* The host compiler should supply <cpuid.h> to enable runtime features
+   detection, as we're not going to go so far as our own inline assembly.
+   If not available, default values will be assumed.  */
+#if defined(CONFIG_CPUID_H)
+#include <cpuid.h>
+#endif
+
 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
-   is available.  However, the host compiler must supply <cpuid.h>, as we're
-   not going to go so far as our own inline assembly.  */
+   is available.  */
 #if TCG_TARGET_REG_BITS == 64
 # define have_cmov 1
 #elif defined(CONFIG_CPUID_H)
-#include <cpuid.h>
 static bool have_cmov;
 #else
 # define have_cmov 0
 #endif
 
+/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
+   going to attempt to determine at runtime whether movbe is available.  */
+#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
+static bool have_movbe;
+#else
+# define have_movbe 0
+#endif
+
 static uint8_t *tb_ret_addr;
 
 static void patch_reloc(uint8_t *code_ptr, int type,
@@ -240,13 +253,14 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #endif
 
 #define P_EXT		0x100		/* 0x0f opcode prefix */
-#define P_DATA16	0x200		/* 0x66 opcode prefix */
+#define P_EXT38         0x200           /* 0x0f 0x38 opcode prefix */
+#define P_DATA16        0x400           /* 0x66 opcode prefix */
 #if TCG_TARGET_REG_BITS == 64
-# define P_ADDR32	0x400		/* 0x67 opcode prefix */
-# define P_REXW		0x800		/* Set REX.W = 1 */
-# define P_REXB_R	0x1000		/* REG field as byte register */
-# define P_REXB_RM	0x2000		/* R/M field as byte register */
-# define P_GS           0x4000          /* gs segment override */
+# define P_ADDR32       0x800           /* 0x67 opcode prefix */
+# define P_REXW         0x1000          /* Set REX.W = 1 */
+# define P_REXB_R       0x2000          /* REG field as byte register */
+# define P_REXB_RM      0x4000          /* R/M field as byte register */
+# define P_GS           0x8000          /* gs segment override */
 #else
 # define P_ADDR32	0
 # define P_REXW		0
@@ -279,6 +293,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_MOVB_EvIz   (0xc6)
 #define OPC_MOVL_EvIz	(0xc7)
 #define OPC_MOVL_Iv     (0xb8)
+#define OPC_MOVBE_GyMy  (0xf0 | P_EXT38)
+#define OPC_MOVBE_MyGy  (0xf1 | P_EXT38)
 #define OPC_MOVSBL	(0xbe | P_EXT)
 #define OPC_MOVSWL	(0xbf | P_EXT)
 #define OPC_MOVSLQ	(0x63 | P_REXW)
@@ -381,7 +397,7 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
     }
 
     rex = 0;
-    rex |= (opc & P_REXW) >> 8;		/* REX.W */
+    rex |= (opc & P_REXW) ? 0x8 : 0x0;  /* REX.W */
     rex |= (r & 8) >> 1;		/* REX.R */
     rex |= (x & 8) >> 2;		/* REX.X */
     rex |= (rm & 8) >> 3;		/* REX.B */
@@ -398,9 +414,13 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
         tcg_out8(s, (uint8_t)(rex | 0x40));
     }
 
-    if (opc & P_EXT) {
+    if (opc & (P_EXT | P_EXT38)) {
         tcg_out8(s, 0x0f);
+        if (opc & P_EXT38) {
+            tcg_out8(s, 0x38);
+        }
     }
+
     tcg_out8(s, opc);
 }
 #else
@@ -409,8 +429,11 @@ static void tcg_out_opc(TCGContext *s, int opc)
     if (opc & P_DATA16) {
         tcg_out8(s, 0x66);
     }
-    if (opc & P_EXT) {
+    if (opc & (P_EXT | P_EXT38)) {
         tcg_out8(s, 0x0f);
+        if (opc & P_EXT38) {
+            tcg_out8(s, 0x38);
+        }
     }
     tcg_out8(s, opc);
 }
@@ -1336,7 +1359,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                    TCGReg base, intptr_t ofs, int seg,
                                    TCGMemOp memop)
 {
-    const TCGMemOp bswap = memop & MO_BSWAP;
+    const TCGMemOp real_bswap = memop & MO_BSWAP;
+    TCGMemOp bswap = real_bswap;
+    int movop = OPC_MOVL_GvEv;
+
+    if (have_movbe && real_bswap) {
+        bswap = 0;
+        movop = OPC_MOVBE_GyMy;
+    }
 
     switch (memop & MO_SSIZE) {
     case MO_UB:
@@ -1347,14 +1377,19 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
         break;
     case MO_UW:
         tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
-        if (bswap) {
+        if (real_bswap) {
             tcg_out_rolw_8(s, datalo);
         }
         break;
     case MO_SW:
-        if (bswap) {
-            tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
-            tcg_out_rolw_8(s, datalo);
+        if (real_bswap) {
+            if (have_movbe) {
+                tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+                                     datalo, base, ofs);
+            } else {
+                tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
+                tcg_out_rolw_8(s, datalo);
+            }
             tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
         } else {
             tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
@@ -1362,16 +1397,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
         }
         break;
     case MO_UL:
-        tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
+        tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
         if (bswap) {
             tcg_out_bswap32(s, datalo);
         }
         break;
 #if TCG_TARGET_REG_BITS == 64
     case MO_SL:
-        if (bswap) {
-            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
-            tcg_out_bswap32(s, datalo);
+        if (real_bswap) {
+            tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
+            if (bswap) {
+                tcg_out_bswap32(s, datalo);
+            }
             tcg_out_ext32s(s, datalo, datalo);
         } else {
             tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
@@ -1380,27 +1417,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
 #endif
     case MO_Q:
         if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
-                                 datalo, base, ofs);
+            tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
             if (bswap) {
                 tcg_out_bswap64(s, datalo);
             }
         } else {
-            if (bswap) {
+            if (real_bswap) {
                 int t = datalo;
                 datalo = datahi;
                 datahi = t;
             }
             if (base != datalo) {
-                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
-                                     datalo, base, ofs);
-                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
-                                     datahi, base, ofs + 4);
+                tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
+                tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
             } else {
-                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
-                                     datahi, base, ofs + 4);
-                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
-                                     datalo, base, ofs);
+                tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
+                tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
             }
             if (bswap) {
                 tcg_out_bswap32(s, datalo);
@@ -1476,13 +1508,19 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                    TCGReg base, intptr_t ofs, int seg,
                                    TCGMemOp memop)
 {
-    const TCGMemOp bswap = memop & MO_BSWAP;
-
     /* ??? Ideally we wouldn't need a scratch register.  For user-only,
        we could perform the bswap twice to restore the original value
        instead of moving to the scratch.  But as it is, the L constraint
        means that TCG_REG_L0 is definitely free here.  */
     const TCGReg scratch = TCG_REG_L0;
+    const TCGMemOp real_bswap = memop & MO_BSWAP;
+    TCGMemOp bswap = real_bswap;
+    int movop = OPC_MOVL_EvGv;
+
+    if (have_movbe && real_bswap) {
+        bswap = 0;
+        movop = OPC_MOVBE_MyGy;
+    }
 
     switch (memop & MO_SIZE) {
     case MO_8:
@@ -1501,8 +1539,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
             tcg_out_rolw_8(s, scratch);
             datalo = scratch;
         }
-        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
-                             datalo, base, ofs);
+        tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
         break;
     case MO_32:
         if (bswap) {
@@ -1510,7 +1547,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
             tcg_out_bswap32(s, scratch);
             datalo = scratch;
         }
-        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
+        tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
         break;
     case MO_64:
         if (TCG_TARGET_REG_BITS == 64) {
@@ -1519,8 +1556,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                 tcg_out_bswap64(s, scratch);
                 datalo = scratch;
             }
-            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
-                                 datalo, base, ofs);
+            tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
         } else if (bswap) {
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
             tcg_out_bswap32(s, scratch);
@@ -1529,8 +1565,13 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
             tcg_out_bswap32(s, scratch);
             tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
         } else {
-            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
-            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
+            if (real_bswap) {
+                int t = datalo;
+                datalo = datahi;
+                datahi = t;
+            }
+            tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
+            tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
         }
         break;
     default:
@@ -1985,9 +2026,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_setcond_i32, { "q", "r", "ri" } },
 
     { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
-#if TCG_TARGET_HAS_movcond_i32
     { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
-#endif
 
     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
     { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
@@ -2157,13 +2196,23 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
 static void tcg_target_init(TCGContext *s)
 {
-    /* For 32-bit, 99% certainty that we're running on hardware that supports
-       cmov, but we still need to check.  In case cmov is not available, we'll
-       use a small forward branch.  */
-#ifndef have_cmov
+#if !(defined(have_cmov) && defined(have_movbe))
     {
         unsigned a, b, c, d;
-        have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
+        int ret = __get_cpuid(1, &a, &b, &c, &d);
+
+# ifndef have_cmov
+        /* For 32-bit, 99% certainty that we're running on hardware that
+           supports cmov, but we still need to check.  In case cmov is not
+           available, we'll use a small forward branch.  */
+        have_cmov = ret && (d & bit_CMOV);
+# endif
+
+# ifndef have_movbe
+        /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
+           need to probe for it.  */
+        have_movbe = ret && (c & bit_MOVBE);
+# endif
     }
 #endif
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 712438ced8..acd02b99b6 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -586,7 +586,7 @@ static void tcg_temp_free_internal(int idx)
     assert(ts->temp_allocated != 0);
     ts->temp_allocated = 0;
 
-    k = ts->type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
+    k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
     set_bit(idx, s->free_temps[k].l);
 }
 
diff --git a/vl.c b/vl.c
index 7f4fe0d5df..2b478664a5 100644
--- a/vl.c
+++ b/vl.c
@@ -2925,7 +2925,7 @@ int main(int argc, char **argv, char **envp)
 
     bdrv_init_with_whitelist();
 
-    autostart= 1;
+    autostart = 1;
 
     /* first pass of option parsing */
     optind = 1;