summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--hw/i386/pc.c1
-rw-r--r--target/i386/cpu.c18
-rw-r--r--target/i386/cpu.h6
-rw-r--r--target/i386/helper.h11
-rw-r--r--target/i386/tcg/decode-new.c.inc608
-rw-r--r--target/i386/tcg/decode-new.h23
-rw-r--r--target/i386/tcg/emit.c.inc1595
-rw-r--r--target/i386/tcg/int_helper.c34
-rw-r--r--target/i386/tcg/shift_helper_template.h.inc108
-rw-r--r--target/i386/tcg/translate.c3687
10 files changed, 2942 insertions, 3149 deletions
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 08c7de416f..46235466d7 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -81,6 +81,7 @@
 GlobalProperty pc_compat_9_0[] = {
     { TYPE_X86_CPU, "guest-phys-bits", "0" },
     { "sev-guest", "legacy-vm-type", "true" },
+    { TYPE_X86_CPU, "legacy-multi-node", "on" },
 };
 const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0);
 
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 3ef30a765c..1058b6803f 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -398,12 +398,9 @@ static void encode_topo_cpuid8000001e(X86CPU *cpu, X86CPUTopoInfo *topo_info,
      * 31:11 Reserved.
      * 10:8 NodesPerProcessor: Node per processor. Read-only. Reset: XXXb.
      *      ValidValues:
-     *      Value Description
-     *      000b  1 node per processor.
-     *      001b  2 nodes per processor.
-     *      010b Reserved.
-     *      011b 4 nodes per processor.
-     *      111b-100b Reserved.
+     *      Value   Description
+     *      0h      1 node per processor.
+     *      7h-1h   Reserved.
      *  7:0 NodeId: Node ID. Read-only. Reset: XXh.
      *
      * NOTE: Hardware reserves 3 bits for number of nodes per processor.
@@ -412,8 +409,12 @@ static void encode_topo_cpuid8000001e(X86CPU *cpu, X86CPUTopoInfo *topo_info,
      * NodeId is combination of node and socket_id which is already decoded
      * in apic_id. Just use it by shifting.
      */
-    *ecx = ((topo_info->dies_per_pkg - 1) << 8) |
-           ((cpu->apic_id >> apicid_die_offset(topo_info)) & 0xFF);
+    if (cpu->legacy_multi_node) {
+        *ecx = ((topo_info->dies_per_pkg - 1) << 8) |
+               ((cpu->apic_id >> apicid_die_offset(topo_info)) & 0xFF);
+    } else {
+        *ecx = (cpu->apic_id >> apicid_pkg_offset(topo_info)) & 0xFF;
+    }
 
     *edx = 0;
 }
@@ -8084,6 +8085,7 @@ static Property x86_cpu_properties[] = {
      * own cache information (see x86_cpu_load_def()).
      */
     DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true),
+    DEFINE_PROP_BOOL("legacy-multi-node", X86CPU, legacy_multi_node, false),
     DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false),
 
     /*
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 565c7a98c3..1e0d2c915f 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1994,6 +1994,12 @@ struct ArchCPU {
      */
     bool legacy_cache;
 
+    /* Compatibility bits for old machine types.
+     * If true decode the CPUID Function 0x8000001E_ECX to support multiple
+     * nodes per processor
+     */
+    bool legacy_multi_node;
+
     /* Compatibility bits for old machine types: */
     bool enable_cpuid_0xb;
 
diff --git a/target/i386/helper.h b/target/i386/helper.h
index ac2b04abd6..3c207ac62d 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -207,15 +207,4 @@ DEF_HELPER_1(emms, void, env)
 #define SHIFT 2
 #include "tcg/ops_sse_header.h.inc"
 
-DEF_HELPER_3(rclb, tl, env, tl, tl)
-DEF_HELPER_3(rclw, tl, env, tl, tl)
-DEF_HELPER_3(rcll, tl, env, tl, tl)
-DEF_HELPER_3(rcrb, tl, env, tl, tl)
-DEF_HELPER_3(rcrw, tl, env, tl, tl)
-DEF_HELPER_3(rcrl, tl, env, tl, tl)
-#ifdef TARGET_X86_64
-DEF_HELPER_3(rclq, tl, env, tl, tl)
-DEF_HELPER_3(rcrq, tl, env, tl, tl)
-#endif
-
 DEF_HELPER_1(rdrand, tl, env)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 426c459412..0e1811399f 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -33,6 +33,28 @@
  * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the
  * "v" or "z" sizes.  The decoder simply makes them separate operand sizes.
  *
+ * The manual lists immediate far destinations as Ap (technically an implicit
+ * argument).  The decoder splits them into two immediates, using "Ip" for
+ * the offset part (that comes first in the instruction stream) and "Iw" for
+ * the segment/selector part.  The size of the offset is given by s->dflag
+ * and the instructions are illegal in 64-bit mode, so the choice of "Ip"
+ * is somewhat arbitrary; "Iv" or "Iz" would work just as well.
+ *
+ * Operand types
+ * -------------
+ *
+ * For memory-only operands, if the emitter functions wants to rely on
+ * generic load and writeback, the decoder needs to know the type of the
+ * operand.  Therefore, M is often replaced by the more specific EM and WM
+ * (respectively selecting an ALU operand, like the operand type E, or a
+ * vector operand like the operand type W).
+ *
+ * Immediates are almost always signed or masked away in helpers.  Two
+ * common exceptions are IN/OUT and absolute jumps.  For these, there is
+ * an additional custom operand type "I_unsigned".  Alternatively, the
+ * mask could be applied (and the original sign-extended value would be
+ * optimized away by TCG) in the emitter function.
+ *
  * Vector operands
  * ---------------
  *
@@ -119,8 +141,12 @@
     ## __VA_ARGS__                                                \
 }
 
+#define X86_OP_GROUP1(op, op0, s0, ...)                           \
+    X86_OP_GROUP3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
 #define X86_OP_GROUP2(op, op0, s0, op1, s1, ...)                  \
     X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
+#define X86_OP_GROUPw(op, op0, s0, ...)                           \
+    X86_OP_GROUP3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
 #define X86_OP_GROUP0(op, ...)                                    \
     X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
 
@@ -140,16 +166,30 @@
         .op3 = X86_TYPE_I, .s3 = X86_SIZE_b,                      \
         ## __VA_ARGS__)
 
+/*
+ * Short forms that are mostly useful for ALU opcodes and other
+ * one-byte opcodes.  For vector instructions it is usually
+ * clearer to write all three operands explicitly, because the
+ * corresponding gen_* function will use OP_PTRn rather than s->T0
+ * and s->T1.
+ */
+#define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...)                 \
+    X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__)
+#define X86_OP_ENTRYwr(op, op0, s0, op1, s1, ...)                 \
+    X86_OP_ENTRY3(op, op0, s0, None, None, op1, s1, ## __VA_ARGS__)
 #define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)                  \
     X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
 #define X86_OP_ENTRYw(op, op0, s0, ...)                           \
     X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
 #define X86_OP_ENTRYr(op, op0, s0, ...)                           \
     X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
+#define X86_OP_ENTRY1(op, op0, s0, ...)                           \
+    X86_OP_ENTRY3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
 #define X86_OP_ENTRY0(op, ...)                                    \
     X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
 
 #define cpuid(feat) .cpuid = X86_FEAT_##feat,
+#define noseg .special = X86_SPECIAL_NoSeg,
 #define xchg .special = X86_SPECIAL_Locked,
 #define lock .special = X86_SPECIAL_HasLock,
 #define mmx .special = X86_SPECIAL_MMX,
@@ -196,6 +236,8 @@
 #define p_66_f3_f2    .valid_prefix = P_66 | P_F3 | P_F2,
 #define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
 
+#define UNKNOWN_OPCODE ((X86OpEntry) {})
+
 static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
 {
     if (!s->has_modrm) {
@@ -957,6 +999,15 @@ static const X86OpEntry opcodes_0F[256] = {
     /* Incorrectly listed as Mq,Vq in the manual */
     [0x17] = X86_OP_ENTRY3(VMOVHPx_st,  M,q, None,None, V,dq, vex5 p_00_66),
 
+    [0x40] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x41] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x42] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x43] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x44] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x45] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x46] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x47] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+
     [0x50] = X86_OP_ENTRY3(MOVMSK,     G,y, None,None, U,x, vex7 p_00_66),
     [0x51] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */
     [0x52] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */
@@ -984,6 +1035,27 @@ static const X86OpEntry opcodes_0F[256] = {
     [0x76] = X86_OP_ENTRY3(PCMPEQD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
     [0x77] = X86_OP_GROUP0(0F77),
 
+    [0x80] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x81] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x82] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x83] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x84] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x85] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x86] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x87] = X86_OP_ENTRYr(Jcc, J,z_f64),
+
+    [0x90] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x91] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x92] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x93] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x94] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x95] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x96] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x97] = X86_OP_ENTRYw(SETcc, E,b),
+
+    [0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
+    [0xa1] = X86_OP_ENTRYw(POP, FS, w),
+
     [0x28] = X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x, vex1 p_00_66), /* MOVAPS */
     [0x29] = X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex1 p_00_66), /* MOVAPS */
     [0x2A] = X86_OP_GROUP0(0F2A),
@@ -996,6 +1068,15 @@ static const X86OpEntry opcodes_0F[256] = {
     [0x38] = X86_OP_GROUP0(0F38),
     [0x3a] = X86_OP_GROUP0(0F3A),
 
+    [0x48] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x49] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x4a] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x4b] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x4c] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x4d] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x4e] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+    [0x4f] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
+
     [0x58] = X86_OP_ENTRY3(VADD,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
     [0x59] = X86_OP_ENTRY3(VMUL,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
     [0x5a] = X86_OP_GROUP0(0F5A),
@@ -1021,13 +1102,57 @@ static const X86OpEntry opcodes_0F[256] = {
     [0x7e] = X86_OP_GROUP0(0F7E),
     [0x7f] = X86_OP_GROUP0(0F7F),
 
+    [0x88] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x89] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x8a] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x8b] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x8c] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x8d] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x8e] = X86_OP_ENTRYr(Jcc, J,z_f64),
+    [0x8f] = X86_OP_ENTRYr(Jcc, J,z_f64),
+
+    [0x98] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x99] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x9a] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x9b] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x9c] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x9d] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x9e] = X86_OP_ENTRYw(SETcc, E,b),
+    [0x9f] = X86_OP_ENTRYw(SETcc, E,b),
+
+    [0xa8] = X86_OP_ENTRYr(PUSH,   GS, w),
+    [0xa9] = X86_OP_ENTRYw(POP,    GS, w),
     [0xae] = X86_OP_GROUP0(group15),
+    /*
+     * It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3
+     * to assume sextT0.  Multiplication is commutative anyway.
+     */
+    [0xaf] = X86_OP_ENTRY3(IMUL3,  G,v, E,v, 2op,v, sextT0),
+
+    [0xb2] = X86_OP_ENTRY3(LSS,    G,v, EM,p, None, None),
+    [0xb4] = X86_OP_ENTRY3(LFS,    G,v, EM,p, None, None),
+    [0xb5] = X86_OP_ENTRY3(LGS,    G,v, EM,p, None, None),
+    [0xb6] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, zextT0), /* MOVZX */
+    [0xb7] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, zextT0), /* MOVZX */
+
+    [0xbe] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, sextT0), /* MOVSX */
+    [0xbf] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, sextT0), /* MOVSX */
 
     [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
+    [0xc3] = X86_OP_ENTRY3(MOV,        EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */
     [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
     [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
     [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
 
+    [0xc8] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
+    [0xc9] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
+    [0xca] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
+    [0xcb] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
+    [0xcc] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
+    [0xcd] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
+    [0xce] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
+    [0xcf] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
+
     [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
     [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
     [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
@@ -1095,8 +1220,405 @@ static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint
     do_decode_0F(s, env, entry, b);
 }
 
+static void decode_63(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static const X86OpEntry arpl = X86_OP_ENTRY2(ARPL, E,w, G,w, chk(prot));
+    static const X86OpEntry mov = X86_OP_ENTRY3(MOV, G,v, E,v, None, None);
+    static const X86OpEntry movsxd = X86_OP_ENTRY3(MOV, G,v, E,d, None, None, sextT0);
+    if (!CODE64(s)) {
+        *entry = arpl;
+    } else if (REX_W(s)) {
+        *entry = movsxd;
+    } else {
+        *entry = mov;
+    }
+}
+
+static void decode_group1(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static const X86GenFunc group1_gen[8] = {
+        gen_ADD, gen_OR, gen_ADC, gen_SBB, gen_AND, gen_SUB, gen_XOR, gen_SUB,
+    };
+    int op = (get_modrm(s, env) >> 3) & 7;
+    entry->gen = group1_gen[op];
+
+    if (op == 7) {
+        /* prevent writeback for CMP */
+        entry->op1 = entry->op0;
+        entry->op0 = X86_TYPE_None;
+        entry->s0 = X86_SIZE_None;
+    } else {
+        entry->special = X86_SPECIAL_HasLock;
+    }
+}
+
+static void decode_group1A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    int op = (get_modrm(s, env) >> 3) & 7;
+    if (op != 0) {
+        /* could be XOP prefix too */
+        *entry = UNKNOWN_OPCODE;
+    } else {
+        entry->gen = gen_POP;
+        /* The address must use the value of ESP after the pop.  */
+        s->popl_esp_hack = 1 << mo_pushpop(s, s->dflag);
+    }
+}
+
+static void decode_group2(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static const X86GenFunc group2_gen[8] = {
+        gen_ROL, gen_ROR, gen_RCL, gen_RCR,
+        gen_SHL, gen_SHR, gen_SHL /* SAL, undocumented */, gen_SAR,
+    };
+    int op = (get_modrm(s, env) >> 3) & 7;
+    entry->gen = group2_gen[op];
+    if (op == 7) {
+        entry->special = X86_SPECIAL_SExtT0;
+    } else {
+        entry->special = X86_SPECIAL_ZExtT0;
+    }
+}
+
+static void decode_group3(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static const X86OpEntry opcodes_grp3[16] = {
+        /* 0xf6 */
+        [0x00] = X86_OP_ENTRYrr(AND, E,b, I,b),
+        [0x02] = X86_OP_ENTRY1(NOT,  E,b,      lock),
+        [0x03] = X86_OP_ENTRY1(NEG,  E,b,      lock),
+        [0x04] = X86_OP_ENTRYrr(MUL, E,b, 0,b, zextT0),
+        [0x05] = X86_OP_ENTRYrr(IMUL,E,b, 0,b, sextT0),
+        [0x06] = X86_OP_ENTRYr(DIV,  E,b),
+        [0x07] = X86_OP_ENTRYr(IDIV, E,b),
+
+        /* 0xf7 */
+        [0x08] = X86_OP_ENTRYrr(AND, E,v, I,z),
+        [0x0a] = X86_OP_ENTRY1(NOT,  E,v,      lock),
+        [0x0b] = X86_OP_ENTRY1(NEG,  E,v,      lock),
+        [0x0c] = X86_OP_ENTRYrr(MUL, E,v, 0,v, zextT0),
+        [0x0d] = X86_OP_ENTRYrr(IMUL,E,v, 0,v, sextT0),
+        [0x0e] = X86_OP_ENTRYr(DIV,  E,v),
+        [0x0f] = X86_OP_ENTRYr(IDIV, E,v),
+    };
+
+    int w = (*b & 1);
+    int reg = (get_modrm(s, env) >> 3) & 7;
+
+    *entry = opcodes_grp3[(w << 3) | reg];
+}
+
+static void decode_group4_5(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static const X86OpEntry opcodes_grp4_5[16] = {
+        /* 0xfe */
+        [0x00] = X86_OP_ENTRY1(INC,     E,b,                           lock),
+        [0x01] = X86_OP_ENTRY1(DEC,     E,b,                           lock),
+
+        /* 0xff */
+        [0x08] = X86_OP_ENTRY1(INC,     E,v,                           lock),
+        [0x09] = X86_OP_ENTRY1(DEC,     E,v,                           lock),
+        [0x0a] = X86_OP_ENTRY3(CALL_m,  None, None, E,f64, None, None, zextT0),
+        [0x0b] = X86_OP_ENTRYr(CALLF_m, M,p),
+        [0x0c] = X86_OP_ENTRY3(JMP_m,   None, None, E,f64, None, None, zextT0),
+        [0x0d] = X86_OP_ENTRYr(JMPF_m,  M,p),
+        [0x0e] = X86_OP_ENTRYr(PUSH,    E,f64),
+    };
+
+    int w = (*b & 1);
+    int reg = (get_modrm(s, env) >> 3) & 7;
+
+    *entry = opcodes_grp4_5[(w << 3) | reg];
+}
+
+
+static void decode_group11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    int op = (get_modrm(s, env) >> 3) & 7;
+    if (op != 0) {
+        *entry = UNKNOWN_OPCODE;
+    } else {
+        entry->gen = gen_MOV;
+    }
+}
+
 static const X86OpEntry opcodes_root[256] = {
+    [0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock),
+    [0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock),
+    [0x02] = X86_OP_ENTRY2(ADD, G,b, E,b, lock),
+    [0x03] = X86_OP_ENTRY2(ADD, G,v, E,v, lock),
+    [0x04] = X86_OP_ENTRY2(ADD, 0,b, I,b, lock),   /* AL, Ib */
+    [0x05] = X86_OP_ENTRY2(ADD, 0,v, I,z, lock),   /* rAX, Iz */
+    [0x06] = X86_OP_ENTRYr(PUSH, ES, w, chk(i64)),
+    [0x07] = X86_OP_ENTRYw(POP, ES, w, chk(i64)),
+
+    [0x10] = X86_OP_ENTRY2(ADC, E,b, G,b, lock),
+    [0x11] = X86_OP_ENTRY2(ADC, E,v, G,v, lock),
+    [0x12] = X86_OP_ENTRY2(ADC, G,b, E,b, lock),
+    [0x13] = X86_OP_ENTRY2(ADC, G,v, E,v, lock),
+    [0x14] = X86_OP_ENTRY2(ADC, 0,b, I,b, lock),   /* AL, Ib */
+    [0x15] = X86_OP_ENTRY2(ADC, 0,v, I,z, lock),   /* rAX, Iz */
+    [0x16] = X86_OP_ENTRYr(PUSH, SS, w, chk(i64)),
+    [0x17] = X86_OP_ENTRYw(POP, SS, w, chk(i64)),
+
+    [0x20] = X86_OP_ENTRY2(AND, E,b, G,b, lock),
+    [0x21] = X86_OP_ENTRY2(AND, E,v, G,v, lock),
+    [0x22] = X86_OP_ENTRY2(AND, G,b, E,b, lock),
+    [0x23] = X86_OP_ENTRY2(AND, G,v, E,v, lock),
+    [0x24] = X86_OP_ENTRY2(AND, 0,b, I,b, lock),   /* AL, Ib */
+    [0x25] = X86_OP_ENTRY2(AND, 0,v, I,z, lock),   /* rAX, Iz */
+    [0x26] = {},
+    [0x27] = X86_OP_ENTRY0(DAA, chk(i64)),
+
+    [0x30] = X86_OP_ENTRY2(XOR, E,b, G,b, lock),
+    [0x31] = X86_OP_ENTRY2(XOR, E,v, G,v, lock),
+    [0x32] = X86_OP_ENTRY2(XOR, G,b, E,b, lock),
+    [0x33] = X86_OP_ENTRY2(XOR, G,v, E,v, lock),
+    [0x34] = X86_OP_ENTRY2(XOR, 0,b, I,b, lock),   /* AL, Ib */
+    [0x35] = X86_OP_ENTRY2(XOR, 0,v, I,z, lock),   /* rAX, Iz */
+    [0x36] = {},
+    [0x37] = X86_OP_ENTRY0(AAA, chk(i64)),
+
+    [0x40] = X86_OP_ENTRY1(INC, 0,v, chk(i64)),
+    [0x41] = X86_OP_ENTRY1(INC, 1,v, chk(i64)),
+    [0x42] = X86_OP_ENTRY1(INC, 2,v, chk(i64)),
+    [0x43] = X86_OP_ENTRY1(INC, 3,v, chk(i64)),
+    [0x44] = X86_OP_ENTRY1(INC, 4,v, chk(i64)),
+    [0x45] = X86_OP_ENTRY1(INC, 5,v, chk(i64)),
+    [0x46] = X86_OP_ENTRY1(INC, 6,v, chk(i64)),
+    [0x47] = X86_OP_ENTRY1(INC, 7,v, chk(i64)),
+
+    [0x50] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+    [0x51] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+    [0x52] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+    [0x53] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+    [0x54] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+    [0x55] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+    [0x56] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+    [0x57] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+
+    [0x60] = X86_OP_ENTRY0(PUSHA, chk(i64)),
+    [0x61] = X86_OP_ENTRY0(POPA, chk(i64)),
+    [0x62] = X86_OP_ENTRYrr(BOUND, G,v, M,a, chk(i64)),
+    [0x63] = X86_OP_GROUP0(63),
+    [0x64] = {},
+    [0x65] = {},
+    [0x66] = {},
+    [0x67] = {},
+
+    [0x70] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x71] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x72] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x73] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x74] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x75] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x76] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x77] = X86_OP_ENTRYr(Jcc, J,b),
+
+    [0x80] = X86_OP_GROUP2(group1, E,b, I,b),
+    [0x81] = X86_OP_GROUP2(group1, E,v, I,z),
+    [0x82] = X86_OP_GROUP2(group1, E,b, I,b, chk(i64)),
+    [0x83] = X86_OP_GROUP2(group1, E,v, I,b),
+    [0x84] = X86_OP_ENTRYrr(AND, E,b, G,b),
+    [0x85] = X86_OP_ENTRYrr(AND, E,v, G,v),
+    [0x86] = X86_OP_ENTRY2(XCHG, E,b, G,b, xchg),
+    [0x87] = X86_OP_ENTRY2(XCHG, E,v, G,v, xchg),
+
+    [0x90] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+    [0x91] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+    [0x92] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+    [0x93] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+    [0x94] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+    [0x95] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+    [0x96] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+    [0x97] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+
+    [0xA0] = X86_OP_ENTRY3(MOV, 0,b, O,b, None, None), /* AL, Ob */
+    [0xA1] = X86_OP_ENTRY3(MOV, 0,v, O,v, None, None), /* rAX, Ov */
+    [0xA2] = X86_OP_ENTRY3(MOV, O,b, 0,b, None, None), /* Ob, AL */
+    [0xA3] = X86_OP_ENTRY3(MOV, O,v, 0,v, None, None), /* Ov, rAX */
+    [0xA4] = X86_OP_ENTRYrr(MOVS, Y,b, X,b),
+    [0xA5] = X86_OP_ENTRYrr(MOVS, Y,v, X,v),
+    [0xA6] = X86_OP_ENTRYrr(CMPS, Y,b, X,b),
+    [0xA7] = X86_OP_ENTRYrr(CMPS, Y,v, X,v),
+
+    [0xB0] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+    [0xB1] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+    [0xB2] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+    [0xB3] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+    [0xB4] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+    [0xB5] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+    [0xB6] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+    [0xB7] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+
+    [0xC0] = X86_OP_GROUP2(group2, E,b, I,b),
+    [0xC1] = X86_OP_GROUP2(group2, E,v, I,b),
+    [0xC2] = X86_OP_ENTRYr(RET, I,w),
+    [0xC3] = X86_OP_ENTRY0(RET),
+    [0xC4] = X86_OP_ENTRY3(LES, G,z, EM,p, None, None, chk(i64)),
+    [0xC5] = X86_OP_ENTRY3(LDS, G,z, EM,p, None, None, chk(i64)),
+    [0xC6] = X86_OP_GROUP3(group11, E,b, I,b, None, None), /* reg=000b */
+    [0xC7] = X86_OP_GROUP3(group11, E,v, I,z, None, None), /* reg=000b */
+
+    [0xD0] = X86_OP_GROUP1(group2, E,b),
+    [0xD1] = X86_OP_GROUP1(group2, E,v),
+    [0xD2] = X86_OP_GROUP2(group2, E,b, 1,b), /* CL */
+    [0xD3] = X86_OP_GROUP2(group2, E,v, 1,b), /* CL */
+    [0xD4] = X86_OP_ENTRYr(AAM, I,b),
+    [0xD5] = X86_OP_ENTRYr(AAD, I,b),
+    [0xD6] = X86_OP_ENTRYw(SALC, 0,b),
+    [0xD7] = X86_OP_ENTRY1(XLAT, 0,b, zextT0), /* AL read/written */
+
+    [0xE0] = X86_OP_ENTRYr(LOOPNE, J,b), /* implicit: CX with aflag size */
+    [0xE1] = X86_OP_ENTRYr(LOOPE,  J,b), /* implicit: CX with aflag size */
+    [0xE2] = X86_OP_ENTRYr(LOOP,   J,b), /* implicit: CX with aflag size */
+    [0xE3] = X86_OP_ENTRYr(JCXZ,   J,b), /* implicit: CX with aflag size */
+    [0xE4] = X86_OP_ENTRYwr(IN,    0,b, I_unsigned,b), /* AL */
+    [0xE5] = X86_OP_ENTRYwr(IN,    0,v, I_unsigned,b), /* AX/EAX */
+    [0xE6] = X86_OP_ENTRYrr(OUT,   0,b, I_unsigned,b), /* AL */
+    [0xE7] = X86_OP_ENTRYrr(OUT,   0,v, I_unsigned,b), /* AX/EAX */
+
+    [0xF1] = X86_OP_ENTRY0(INT1,   svm(ICEBP)),
+    [0xF4] = X86_OP_ENTRY0(HLT,    chk(cpl0)),
+    [0xF5] = X86_OP_ENTRY0(CMC),
+    [0xF6] = X86_OP_GROUP1(group3, E,b),
+    [0xF7] = X86_OP_GROUP1(group3, E,v),
+
+    [0x08] = X86_OP_ENTRY2(OR, E,b, G,b, lock),
+    [0x09] = X86_OP_ENTRY2(OR, E,v, G,v, lock),
+    [0x0A] = X86_OP_ENTRY2(OR, G,b, E,b, lock),
+    [0x0B] = X86_OP_ENTRY2(OR, G,v, E,v, lock),
+    [0x0C] = X86_OP_ENTRY2(OR, 0,b, I,b, lock),   /* AL, Ib */
+    [0x0D] = X86_OP_ENTRY2(OR, 0,v, I,z, lock),   /* rAX, Iz */
+    [0x0E] = X86_OP_ENTRYr(PUSH, CS, w, chk(i64)),
     [0x0F] = X86_OP_GROUP0(0F),
+
+    [0x18] = X86_OP_ENTRY2(SBB, E,b, G,b, lock),
+    [0x19] = X86_OP_ENTRY2(SBB, E,v, G,v, lock),
+    [0x1A] = X86_OP_ENTRY2(SBB, G,b, E,b, lock),
+    [0x1B] = X86_OP_ENTRY2(SBB, G,v, E,v, lock),
+    [0x1C] = X86_OP_ENTRY2(SBB, 0,b, I,b, lock),   /* AL, Ib */
+    [0x1D] = X86_OP_ENTRY2(SBB, 0,v, I,z, lock),   /* rAX, Iz */
+    [0x1E] = X86_OP_ENTRYr(PUSH, DS, w, chk(i64)),
+    [0x1F] = X86_OP_ENTRYw(POP, DS, w, chk(i64)),
+
+    [0x28] = X86_OP_ENTRY2(SUB, E,b, G,b, lock),
+    [0x29] = X86_OP_ENTRY2(SUB, E,v, G,v, lock),
+    [0x2A] = X86_OP_ENTRY2(SUB, G,b, E,b, lock),
+    [0x2B] = X86_OP_ENTRY2(SUB, G,v, E,v, lock),
+    [0x2C] = X86_OP_ENTRY2(SUB, 0,b, I,b, lock),   /* AL, Ib */
+    [0x2D] = X86_OP_ENTRY2(SUB, 0,v, I,z, lock),   /* rAX, Iz */
+    [0x2E] = {},
+    [0x2F] = X86_OP_ENTRY0(DAS, chk(i64)),
+
+    [0x38] = X86_OP_ENTRYrr(SUB, E,b, G,b),
+    [0x39] = X86_OP_ENTRYrr(SUB, E,v, G,v),
+    [0x3A] = X86_OP_ENTRYrr(SUB, G,b, E,b),
+    [0x3B] = X86_OP_ENTRYrr(SUB, G,v, E,v),
+    [0x3C] = X86_OP_ENTRYrr(SUB, 0,b, I,b),   /* AL, Ib */
+    [0x3D] = X86_OP_ENTRYrr(SUB, 0,v, I,z),   /* rAX, Iz */
+    [0x3E] = {},
+    [0x3F] = X86_OP_ENTRY0(AAS, chk(i64)),
+
+    [0x48] = X86_OP_ENTRY1(DEC, 0,v, chk(i64)),
+    [0x49] = X86_OP_ENTRY1(DEC, 1,v, chk(i64)),
+    [0x4A] = X86_OP_ENTRY1(DEC, 2,v, chk(i64)),
+    [0x4B] = X86_OP_ENTRY1(DEC, 3,v, chk(i64)),
+    [0x4C] = X86_OP_ENTRY1(DEC, 4,v, chk(i64)),
+    [0x4D] = X86_OP_ENTRY1(DEC, 5,v, chk(i64)),
+    [0x4E] = X86_OP_ENTRY1(DEC, 6,v, chk(i64)),
+    [0x4F] = X86_OP_ENTRY1(DEC, 7,v, chk(i64)),
+
+    [0x58] = X86_OP_ENTRYw(POP, LoBits,d64),
+    [0x59] = X86_OP_ENTRYw(POP, LoBits,d64),
+    [0x5A] = X86_OP_ENTRYw(POP, LoBits,d64),
+    [0x5B] = X86_OP_ENTRYw(POP, LoBits,d64),
+    [0x5C] = X86_OP_ENTRYw(POP, LoBits,d64),
+    [0x5D] = X86_OP_ENTRYw(POP, LoBits,d64),
+    [0x5E] = X86_OP_ENTRYw(POP, LoBits,d64),
+    [0x5F] = X86_OP_ENTRYw(POP, LoBits,d64),
+
+    [0x68] = X86_OP_ENTRYr(PUSH, I,z),
+    [0x69] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,z, sextT0),
+    [0x6A] = X86_OP_ENTRYr(PUSH, I,b),
+    [0x6B] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,b, sextT0),
+    [0x6C] = X86_OP_ENTRYrr(INS, Y,b, 2,w), /* DX */
+    [0x6D] = X86_OP_ENTRYrr(INS, Y,z, 2,w), /* DX */
+    [0x6E] = X86_OP_ENTRYrr(OUTS, X,b, 2,w), /* DX */
+    [0x6F] = X86_OP_ENTRYrr(OUTS, X,z, 2,w), /* DX */
+
+    [0x78] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x79] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x7A] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x7B] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x7C] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x7D] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x7E] = X86_OP_ENTRYr(Jcc, J,b),
+    [0x7F] = X86_OP_ENTRYr(Jcc, J,b),
+
+    [0x88] = X86_OP_ENTRY3(MOV, E,b, G,b, None, None),
+    [0x89] = X86_OP_ENTRY3(MOV, E,v, G,v, None, None),
+    [0x8A] = X86_OP_ENTRY3(MOV, G,b, E,b, None, None),
+    [0x8B] = X86_OP_ENTRY3(MOV, G,v, E,v, None, None),
+    [0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None),
+    [0x8D] = X86_OP_ENTRY3(LEA, G,v, M,v, None, None, noseg),
+    [0x8E] = X86_OP_ENTRY3(MOV, S,w, E,v, None, None),
+    [0x8F] = X86_OP_GROUPw(group1A, E,v),
+
+    [0x98] = X86_OP_ENTRY1(CBW,    0,v), /* rAX */
+    [0x99] = X86_OP_ENTRY3(CWD,    2,v, 0,v, None, None), /* rDX, rAX */
+    [0x9A] = X86_OP_ENTRYrr(CALLF, I_unsigned,p, I_unsigned,w, chk(i64)),
+    [0x9B] = X86_OP_ENTRY0(WAIT),
+    [0x9C] = X86_OP_ENTRY0(PUSHF,  chk(vm86_iopl) svm(PUSHF)),
+    [0x9D] = X86_OP_ENTRY0(POPF,   chk(vm86_iopl) svm(POPF)),
+    [0x9E] = X86_OP_ENTRY0(SAHF),
+    [0x9F] = X86_OP_ENTRY0(LAHF),
+
+    [0xA8] = X86_OP_ENTRYrr(AND, 0,b, I,b),   /* AL, Ib */
+    [0xA9] = X86_OP_ENTRYrr(AND, 0,v, I,z),   /* rAX, Iz */
+    [0xAA] = X86_OP_ENTRY3(STOS, Y,b, 0,b, None, None),
+    [0xAB] = X86_OP_ENTRY3(STOS, Y,v, 0,v, None, None),
+    /* Manual writeback because REP LODS (!) has to write EAX/RAX after every LODS.  */
+    [0xAC] = X86_OP_ENTRYr(LODS, X,b),
+    [0xAD] = X86_OP_ENTRYr(LODS, X,v),
+    [0xAE] = X86_OP_ENTRYrr(SCAS, 0,b, Y,b),
+    [0xAF] = X86_OP_ENTRYrr(SCAS, 0,v, Y,v),
+
+    [0xB8] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+    [0xB9] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+    [0xBA] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+    [0xBB] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+    [0xBC] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+    [0xBD] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+    [0xBE] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+    [0xBF] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+
+    [0xC8] = X86_OP_ENTRYrr(ENTER, I,w, I,b),
+    [0xC9] = X86_OP_ENTRY1(LEAVE, A,d64),
+    [0xCA] = X86_OP_ENTRYr(RETF,  I,w),
+    [0xCB] = X86_OP_ENTRY0(RETF),
+    [0xCC] = X86_OP_ENTRY0(INT3),
+    [0xCD] = X86_OP_ENTRYr(INT, I,b,  chk(vm86_iopl)),
+    [0xCE] = X86_OP_ENTRY0(INTO),
+    [0xCF] = X86_OP_ENTRY0(IRET,      chk(vm86_iopl) svm(IRET)),
+
+    [0xE8] = X86_OP_ENTRYr(CALL,   J,z_f64),
+    [0xE9] = X86_OP_ENTRYr(JMP,    J,z_f64),
+    [0xEA] = X86_OP_ENTRYrr(JMPF,  I_unsigned,p, I_unsigned,w, chk(i64)),
+    [0xEB] = X86_OP_ENTRYr(JMP,    J,b),
+    [0xEC] = X86_OP_ENTRYwr(IN,    0,b, 2,w), /* AL, DX */
+    [0xED] = X86_OP_ENTRYwr(IN,    0,v, 2,w), /* AX/EAX, DX */
+    [0xEE] = X86_OP_ENTRYrr(OUT,   0,b, 2,w), /* DX, AL */
+    [0xEF] = X86_OP_ENTRYrr(OUT,   0,v, 2,w), /* DX, AX/EAX */
+
+    [0xF8] = X86_OP_ENTRY0(CLC),
+    [0xF9] = X86_OP_ENTRY0(STC),
+    [0xFA] = X86_OP_ENTRY0(CLI,    chk(iopl)),
+    [0xFB] = X86_OP_ENTRY0(STI,    chk(iopl)),
+    [0xFC] = X86_OP_ENTRY0(CLD),
+    [0xFD] = X86_OP_ENTRY0(STD),
+    [0xFE] = X86_OP_GROUP1(group4_5, E,b),
+    [0xFF] = X86_OP_GROUP1(group4_5, E,v),
 };
 
 #undef mmx
@@ -1176,6 +1698,10 @@ static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp
         *ot = s->dflag == MO_16 ? MO_16 : MO_32;
         return true;
 
+    case X86_SIZE_z_f64:  /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */
+        *ot = !CODE64(s) && s->dflag == MO_16 ? MO_16 : MO_32;
+        return true;
+
     case X86_SIZE_dq: /* SSE/AVX 128-bit */
         if (e->special == X86_SPECIAL_MMX &&
             !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
@@ -1315,8 +1841,13 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
 
     case X86_TYPE_WM:  /* modrm byte selects an XMM/YMM memory operand */
         op->unit = X86_OP_SSE;
+        goto get_modrm_mem;
+
+    case X86_TYPE_EM:  /* modrm byte selects an ALU memory operand */
+        op->unit = X86_OP_INT;
         /* fall through */
     case X86_TYPE_M:  /* modrm byte selects a memory operand */
+    get_modrm_mem:
         modrm = get_modrm(s, env);
         if ((modrm >> 6) == 3) {
             return false;
@@ -1353,7 +1884,12 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
     case X86_TYPE_I:  /* Immediate */
     case X86_TYPE_J:  /* Relative offset for a jump */
         op->unit = X86_OP_IMM;
-        decode->immediate = insn_get_signed(env, s, op->ot);
+        decode->immediate = op->imm = insn_get_signed(env, s, op->ot);
+        break;
+
+    case X86_TYPE_I_unsigned:  /* Immediate */
+        op->unit = X86_OP_IMM;
+        decode->immediate = op->imm = insn_get(env, s, op->ot);
         break;
 
     case X86_TYPE_L:  /* The upper 4 bits of the immediate select a 128-bit register */
@@ -1476,6 +2012,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
     switch (cpuid) {
     case X86_FEAT_None:
         return true;
+    case X86_FEAT_CMOV:
+        return (s->cpuid_features & CPUID_CMOV);
     case X86_FEAT_F16C:
         return (s->cpuid_ext_features & CPUID_EXT_F16C);
     case X86_FEAT_FMA:
@@ -1681,22 +2219,31 @@ illegal:
  * Convert one instruction. s->base.is_jmp is set if the translation must
  * be stopped.
  */
-static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
+static void disas_insn(DisasContext *s, CPUState *cpu)
 {
     CPUX86State *env = cpu_env(cpu);
-    bool first = true;
     X86DecodedInsn decode;
     X86DecodeFunc decode_func = decode_root;
-    uint8_t cc_live;
+    uint8_t cc_live, b;
 
+    s->pc = s->base.pc_next;
+    s->override = -1;
+    s->popl_esp_hack = 0;
+#ifdef TARGET_X86_64
+    s->rex_r = 0;
+    s->rex_x = 0;
+    s->rex_b = 0;
+#endif
+    s->rip_offset = 0; /* for relative ip address */
+    s->vex_l = 0;
+    s->vex_v = 0;
+    s->vex_w = false;
     s->has_modrm = false;
+    s->prefix = 0;
 
  next_byte:
-    if (first) {
-        first = false;
-    } else {
-        b = x86_ldub_code(env, s);
-    }
+    b = x86_ldub_code(env, s);
+
     /* Collect prefixes.  */
     switch (b) {
     case 0xf3:
@@ -1808,10 +2355,6 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
         }
         break;
     default:
-        if (b >= 0x100) {
-            b -= 0x100;
-            decode_func = do_decode_0F;
-        }
         break;
     }
 
@@ -1840,6 +2383,40 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
         }
     }
 
+    /* Go back to old decoder for unconverted opcodes.  */
+    if (!(s->prefix & PREFIX_VEX)) {
+        if ((b & ~7) == 0xd8) {
+            if (!disas_insn_x87(s, cpu, b)) {
+                goto unknown_op;
+            }
+            return;
+        }
+
+        if (b == 0x0f) {
+            b = x86_ldub_code(env, s);
+            switch (b) {
+            case 0x00 ... 0x03: /* mostly privileged instructions */
+            case 0x05 ... 0x09:
+            case 0x0d:          /* 3DNow! prefetch */
+            case 0x18 ... 0x23: /* prefetch, MPX, mov from/to CR and DR */
+            case 0x30 ... 0x35: /* more privileged instructions */
+            case 0xa2 ... 0xa5: /* CPUID, BT, SHLD */
+            case 0xaa ... 0xae: /* RSM, SHRD, grp15 */
+            case 0xb0 ... 0xb1: /* cmpxchg */
+            case 0xb3:          /* btr */
+            case 0xb8:          /* integer ops */
+            case 0xba ... 0xbd: /* integer ops */
+            case 0xc0 ... 0xc1: /* xadd */
+            case 0xc7:          /* grp9 */
+                disas_insn_old(s, cpu, b + 0x100);
+                return;
+            default:
+                decode_func = do_decode_0F;
+                break;
+            }
+        }
+    }
+
     memset(&decode, 0, sizeof(decode));
     decode.cc_op = -1;
     decode.b = b;
@@ -1914,6 +2491,11 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
         assert(decode.op[1].unit == X86_OP_INT);
         break;
 
+    case X86_SPECIAL_NoSeg:
+        decode.mem.def_seg = -1;
+        s->override = -1;
+        break;
+
     default:
         break;
     }
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 15e6bfef4b..2ea06b4478 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -47,7 +47,9 @@ typedef enum X86OpType {
     X86_TYPE_Y, /* string destination */
 
     /* Custom */
+    X86_TYPE_EM, /* modrm byte selects an ALU memory operand */
     X86_TYPE_WM, /* modrm byte selects an XMM/YMM memory operand */
+    X86_TYPE_I_unsigned, /* Immediate, zero-extended */
     X86_TYPE_2op, /* 2-operand RMW instruction */
     X86_TYPE_LoBits, /* encoded in bits 0-2 of the operand + REX.B */
     X86_TYPE_0, /* Hard-coded GPRs (RAX..RDI) */
@@ -88,6 +90,7 @@ typedef enum X86OpSize {
     X86_SIZE_x,  /* 128/256-bit, based on operand size */
     X86_SIZE_y,  /* 32/64-bit, based on operand size */
     X86_SIZE_z,  /* 16-bit for 16-bit operand size, else 32-bit */
+    X86_SIZE_z_f64,  /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */
 
     /* Custom */
     X86_SIZE_d64,
@@ -104,6 +107,7 @@ typedef enum X86CPUIDFeature {
     X86_FEAT_AVX2,
     X86_FEAT_BMI1,
     X86_FEAT_BMI2,
+    X86_FEAT_CMOV,
     X86_FEAT_CMPCCXADD,
     X86_FEAT_F16C,
     X86_FEAT_FMA,
@@ -165,6 +169,8 @@ typedef enum X86InsnSpecial {
     /* Always locked if it has a memory operand (XCHG) */
     X86_SPECIAL_Locked,
 
+    /* Do not apply segment base to effective address */
+    X86_SPECIAL_NoSeg,
     /*
      * Rd/Mb or Rd/Mw in the manual: register operand 0 is treated as 32 bits
      * (and writeback zero-extends it to 64 bits if applicable).  PREFIX_DATA
@@ -271,16 +277,23 @@ typedef struct X86DecodedOp {
     bool has_ea;
     int offset;   /* For MMX and SSE */
 
-    /*
-     * This field is used internally by macros OP0_PTR/OP1_PTR/OP2_PTR,
-     * do not access directly!
-     */
-    TCGv_ptr v_ptr;
+    union {
+	target_ulong imm;
+        /*
+         * This field is used internally by macros OP0_PTR/OP1_PTR/OP2_PTR,
+         * do not access directly!
+         */
+        TCGv_ptr v_ptr;
+    };
 } X86DecodedOp;
 
 struct X86DecodedInsn {
     X86OpEntry e;
     X86DecodedOp op[3];
+    /*
+     * Rightmost immediate, for convenience since most instructions have
+     * one (and also for 4-operand instructions).
+     */
     target_ulong immediate;
     AddressParts mem;
 
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 6bcf88ecd7..58f255873f 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -19,6 +19,21 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
+/*
+ * Sometimes, knowing what the backend has can produce better code.
+ * The exact opcode to check depends on 32- vs. 64-bit.
+ */
+#ifdef TARGET_X86_64
+#define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i64
+#define TCG_TARGET_deposit_tl_valid     TCG_TARGET_deposit_i64_valid
+#define TCG_TARGET_extract_tl_valid     TCG_TARGET_extract_i64_valid
+#else
+#define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i32
+#define TCG_TARGET_deposit_tl_valid     TCG_TARGET_deposit_i32_valid
+#define TCG_TARGET_extract_tl_valid     TCG_TARGET_extract_i32_valid
+#endif
+
+
 #define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg])
 
 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
@@ -45,6 +60,9 @@ typedef void (*SSEFunc_0_eppppii)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
                                   TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 even,
                                   TCGv_i32 odd);
 
+static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode);
+static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode);
+
 static inline TCGv_i32 tcg_constant8u_i32(uint8_t val)
 {
     return tcg_constant_i32(val);
@@ -259,7 +277,7 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
         }
         break;
     case X86_OP_IMM:
-        tcg_gen_movi_tl(v, decode->immediate);
+        tcg_gen_movi_tl(v, op->imm);
         break;
 
     case X86_OP_MMX:
@@ -283,6 +301,8 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
 static TCGv_ptr op_ptr(X86DecodedInsn *decode, int opn)
 {
     X86DecodedOp *op = &decode->op[opn];
+
+    assert(op->unit == X86_OP_MMX || op->unit == X86_OP_SSE);
     if (op->v_ptr) {
         return op->v_ptr;
     }
@@ -304,8 +324,8 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv
     case X86_OP_SKIP:
         break;
     case X86_OP_SEG:
-        /* Note that gen_movl_seg_T0 takes care of interrupt shadow and TF.  */
-        gen_movl_seg_T0(s, op->n);
+        /* Note that gen_movl_seg takes care of interrupt shadow and TF.  */
+        gen_movl_seg(s, op->n, s->T0);
         break;
     case X86_OP_INT:
         if (op->has_ea) {
@@ -328,6 +348,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv
     default:
         g_assert_not_reached();
     }
+    op->unit = X86_OP_SKIP;
 }
 
 static inline int vector_len(DisasContext *s, X86DecodedInsn *decode)
@@ -352,6 +373,20 @@ static void prepare_update2_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op)
     decode->cc_op = op;
 }
 
+static void prepare_update_cc_incdec(X86DecodedInsn *decode, DisasContext *s, CCOp op)
+{
+    gen_compute_eflags_c(s, s->T1);
+    prepare_update2_cc(decode, s, op);
+}
+
+static void prepare_update3_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op, TCGv reg)
+{
+    decode->cc_src2 = reg;
+    decode->cc_src = s->T1;
+    decode->cc_dst = s->T0;
+    decode->cc_op = op;
+}
+
 static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs)
 {
     MemOp ot = decode->op[0].ot;
@@ -1040,6 +1075,53 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
 VSIB_AVX(VPGATHERD, vpgatherd)
 VSIB_AVX(VPGATHERQ, vpgatherq)
 
+static void gen_AAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_helper_aaa(tcg_env);
+    set_cc_op(s, CC_OP_EFLAGS);
+}
+
+static void gen_AAD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_helper_aad(tcg_env, tcg_constant_i32(decode->immediate));
+    set_cc_op(s, CC_OP_LOGICB);
+}
+
+static void gen_AAM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    if (decode->immediate == 0) {
+        gen_exception(s, EXCP00_DIVZ);
+    } else {
+        gen_helper_aam(tcg_env, tcg_constant_i32(decode->immediate));
+        set_cc_op(s, CC_OP_LOGICB);
+    }
+}
+
+static void gen_AAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_helper_aas(tcg_env);
+    set_cc_op(s, CC_OP_EFLAGS);
+}
+
+static void gen_ADC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+    TCGv c_in = tcg_temp_new();
+
+    gen_compute_eflags_c(s, c_in);
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_add_tl(s->T0, c_in, s->T1);
+        tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0,
+                                    s->mem_index, ot | MO_LE);
+    } else {
+        tcg_gen_add_tl(s->T0, s->T0, s->T1);
+        tcg_gen_add_tl(s->T0, s->T0, c_in);
+    }
+    prepare_update3_cc(decode, s, CC_OP_ADCB + ot, c_in);
+}
+
 /* ADCX/ADOX do not have memory operands and can use set_cc_op.  */
 static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
 {
@@ -1093,11 +1175,37 @@ static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX);
 }
 
+static void gen_ADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T1,
+                                    s->mem_index, ot | MO_LE);
+    } else {
+        tcg_gen_add_tl(s->T0, s->T0, s->T1);
+    }
+    prepare_update2_cc(decode, s, CC_OP_ADDB + ot);
+}
+
 static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX);
 }
 
+static void gen_AND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_atomic_and_fetch_tl(s->T0, s->A0, s->T1,
+                                    s->mem_index, ot | MO_LE);
+    } else {
+        tcg_gen_and_tl(s->T0, s->T0, s->T1);
+    }
+    prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
+}
+
 static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1106,6 +1214,27 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
 }
 
+static void gen_ARPL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    TCGv zf = tcg_temp_new();
+    TCGv flags = tcg_temp_new();
+
+    gen_mov_eflags(s, flags);
+
+    /* Compute adjusted DST in T1, merging in SRC[RPL].  */
+    tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 0, 2);
+
+    /* Z flag set if DST[RPL] < SRC[RPL] */
+    tcg_gen_setcond_tl(TCG_COND_LTU, zf, s->T0, s->T1);
+    tcg_gen_deposit_tl(flags, flags, zf, ctz32(CC_Z), 1);
+
+    /* Place maximum RPL in DST */
+    tcg_gen_umax_tl(s->T0, s->T0, s->T1);
+
+    decode->cc_src = flags;
+    decode->cc_op = CC_OP_EFLAGS;
+}
+
 static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1170,6 +1299,28 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     set_cc_op(s, CC_OP_BMILGB + ot);
 }
 
+static void gen_BOUND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    TCGv_i32 op = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(op, s->T0);
+    if (decode->op[1].ot == MO_16) {
+        gen_helper_boundw(tcg_env, s->A0, op);
+    } else {
+        gen_helper_boundl(tcg_env, s->A0, op);
+    }
+}
+
+static void gen_BSWAP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+#ifdef TARGET_X86_64
+    if (s->dflag == MO_64) {
+        tcg_gen_bswap64_i64(s->T0, s->T0);
+        return;
+    }
+#endif
+    tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_OZ);
+}
+
 static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1190,6 +1341,67 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
 }
 
+static void gen_CALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_push_v(s, eip_next_tl(s));
+    gen_JMP(s, env, decode);
+}
+
+static void gen_CALL_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_push_v(s, eip_next_tl(s));
+    gen_JMP_m(s, env, decode);
+}
+
+static void gen_CALLF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_far_call(s);
+}
+
+static void gen_CALLF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[2].ot;
+
+    gen_op_ld_v(s, ot, s->T0, s->A0);
+    gen_add_A0_im(s, 1 << ot);
+    gen_op_ld_v(s, MO_16, s->T1, s->A0);
+    gen_far_call(s);
+}
+
+static void gen_CBW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp src_ot = decode->op[0].ot - 1;
+
+    tcg_gen_ext_tl(s->T0, s->T0, src_ot | MO_SIGN);
+}
+
+static void gen_CLC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_compute_eflags(s);
+    tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
+}
+
+static void gen_CLD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    tcg_gen_st_i32(tcg_constant_i32(1), tcg_env, offsetof(CPUX86State, df));
+}
+
+static void gen_CLI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_reset_eflags(s, IF_MASK);
+}
+
+static void gen_CMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_compute_eflags(s);
+    tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
+}
+
+static void gen_CMOVcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_cmovcc1(s, decode->b & 0xf, s->T0, s->T1);
+}
+
 static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     TCGLabel *label_top = gen_new_label();
@@ -1209,7 +1421,7 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
         [JCC_Z] = TCG_COND_EQ,
         [JCC_BE] = TCG_COND_LEU,
         [JCC_S] = TCG_COND_LT,  /* test sign bit by comparing against 0 */
-        [JCC_P] = TCG_COND_EQ,  /* even parity - tests low bit of popcount */
+        [JCC_P] = TCG_COND_TSTEQ,  /* even parity - tests low bit of popcount */
         [JCC_L] = TCG_COND_LT,
         [JCC_LE] = TCG_COND_LE,
     };
@@ -1260,8 +1472,7 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
     case JCC_P:
         tcg_gen_ext8u_tl(s->tmp0, s->T0);
         tcg_gen_ctpop_tl(s->tmp0, s->tmp0);
-        tcg_gen_andi_tl(s->tmp0, s->tmp0, 1);
-        cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0);
+        cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(1);
         break;
 
     case JCC_S:
@@ -1294,6 +1505,18 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
     decode->cc_op = CC_OP_SUBB + ot;
 }
 
+static void gen_CMPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[2].ot;
+    if (s->prefix & PREFIX_REPNZ) {
+        gen_repz_cmps(s, ot, 1);
+    } else if (s->prefix & PREFIX_REPZ) {
+        gen_repz_cmps(s, ot, 0);
+    } else {
+        gen_cmps(s, ot);
+    }
+}
+
 static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[2].ot;
@@ -1332,11 +1555,74 @@ static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
     }
 }
 
+static void gen_CWD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    int shift = 8 << decode->op[0].ot;
+
+    tcg_gen_sextract_tl(s->T0, s->T0, shift - 1, 1);
+}
+
+static void gen_DAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_helper_daa(tcg_env);
+    set_cc_op(s, CC_OP_EFLAGS);
+}
+
+static void gen_DAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_helper_das(tcg_env);
+    set_cc_op(s, CC_OP_EFLAGS);
+}
+
+static void gen_DEC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+
+    tcg_gen_movi_tl(s->T1, -1);
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T1,
+                                    s->mem_index, ot | MO_LE);
+    } else {
+        tcg_gen_add_tl(s->T0, s->T0, s->T1);
+    }
+    prepare_update_cc_incdec(decode, s, CC_OP_DECB + ot);
+}
+
+static void gen_DIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[2].ot;
+
+    switch(ot) {
+    case MO_8:
+        gen_helper_divb_AL(tcg_env, s->T1);
+        break;
+    case MO_16:
+        gen_helper_divw_AX(tcg_env, s->T1);
+        break;
+    default:
+    case MO_32:
+        gen_helper_divl_EAX(tcg_env, s->T1);
+        break;
+#ifdef TARGET_X86_64
+    case MO_64:
+        gen_helper_divq_EAX(tcg_env, s->T1);
+        break;
+#endif
+    }
+}
+
 static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     gen_helper_emms(tcg_env);
 }
 
+static void gen_ENTER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+   gen_enter(s, decode->op[1].imm, decode->op[2].imm);
+}
+
 static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     TCGv_i32 length = tcg_constant_i32(decode->immediate & 63);
@@ -1350,6 +1636,210 @@ static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
     gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2);
 }
 
+static void gen_HLT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+#ifdef CONFIG_SYSTEM_ONLY
+    gen_update_cc_op(s);
+    gen_update_eip_cur(s);
+    gen_helper_hlt(tcg_env, cur_insn_len_i32(s));
+    s->base.is_jmp = DISAS_NORETURN;
+#endif
+}
+
+static void gen_IDIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[2].ot;
+
+    switch(ot) {
+    case MO_8:
+        gen_helper_idivb_AL(tcg_env, s->T1);
+        break;
+    case MO_16:
+        gen_helper_idivw_AX(tcg_env, s->T1);
+        break;
+    default:
+    case MO_32:
+        gen_helper_idivl_EAX(tcg_env, s->T1);
+        break;
+#ifdef TARGET_X86_64
+    case MO_64:
+        gen_helper_idivq_EAX(tcg_env, s->T1);
+        break;
+#endif
+    }
+}
+
+static void gen_IMUL3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[0].ot;
+    TCGv cc_src_rhs;
+
+    switch (ot) {
+    case MO_16:
+        /* s->T0 already sign-extended */
+        tcg_gen_ext16s_tl(s->T1, s->T1);
+        tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+        /* Compare the full result to the extension of the truncated result.  */
+        tcg_gen_ext16s_tl(s->T1, s->T0);
+        cc_src_rhs = s->T0;
+        break;
+
+    case MO_32:
+#ifdef TARGET_X86_64
+        if (TCG_TARGET_REG_BITS == 64) {
+            /*
+             * This produces fewer TCG ops, and better code if flags are needed,
+             * but it requires a 64-bit multiply even if they are not.  Use it
+             * only if the target has 64-bits registers.
+             *
+             * s->T0 is already sign-extended.
+             */
+            tcg_gen_ext32s_tl(s->T1, s->T1);
+            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+            /* Compare the full result to the extension of the truncated result.  */
+            tcg_gen_ext32s_tl(s->T1, s->T0);
+            cc_src_rhs = s->T0;
+        } else {
+            /* Variant that only needs a 32-bit widening multiply.  */
+            TCGv_i32 hi = tcg_temp_new_i32();
+            TCGv_i32 lo = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(lo, s->T0);
+            tcg_gen_trunc_tl_i32(hi, s->T1);
+            tcg_gen_muls2_i32(lo, hi, lo, hi);
+            tcg_gen_extu_i32_tl(s->T0, lo);
+
+            cc_src_rhs = tcg_temp_new();
+            tcg_gen_extu_i32_tl(cc_src_rhs, hi);
+            /* Compare the high part to the sign bit of the truncated result */
+            tcg_gen_sari_i32(lo, lo, 31);
+            tcg_gen_extu_i32_tl(s->T1, lo);
+        }
+        break;
+
+    case MO_64:
+#endif
+        cc_src_rhs = tcg_temp_new();
+        tcg_gen_muls2_tl(s->T0, cc_src_rhs, s->T0, s->T1);
+        /* Compare the high part to the sign bit of the truncated result */
+        tcg_gen_sari_tl(s->T1, s->T0, TARGET_LONG_BITS - 1);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    tcg_gen_sub_tl(s->T1, s->T1, cc_src_rhs);
+    prepare_update2_cc(decode, s, CC_OP_MULB + ot);
+}
+
+static void gen_IMUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+    TCGv cc_src_rhs;
+
+    switch (ot) {
+    case MO_8:
+        /* s->T0 already sign-extended */
+        tcg_gen_ext8s_tl(s->T1, s->T1);
+        tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+        gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+        /* Compare the full result to the extension of the truncated result.  */
+        tcg_gen_ext8s_tl(s->T1, s->T0);
+        cc_src_rhs = s->T0;
+        break;
+
+    case MO_16:
+        /* s->T0 already sign-extended */
+        tcg_gen_ext16s_tl(s->T1, s->T1);
+        tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+        gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+        tcg_gen_shri_tl(s->T1, s->T0, 16);
+        gen_op_mov_reg_v(s, MO_16, R_EDX, s->T1);
+        /* Compare the full result to the extension of the truncated result.  */
+        tcg_gen_ext16s_tl(s->T1, s->T0);
+        cc_src_rhs = s->T0;
+        break;
+
+    case MO_32:
+#ifdef TARGET_X86_64
+        /* s->T0 already sign-extended */
+        tcg_gen_ext32s_tl(s->T1, s->T1);
+        tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+        tcg_gen_ext32u_tl(cpu_regs[R_EAX], s->T0);
+        tcg_gen_shri_tl(cpu_regs[R_EDX], s->T0, 32);
+        /* Compare the full result to the extension of the truncated result.  */
+        tcg_gen_ext32s_tl(s->T1, s->T0);
+        cc_src_rhs = s->T0;
+        break;
+
+    case MO_64:
+#endif
+        tcg_gen_muls2_tl(s->T0, cpu_regs[R_EDX], s->T0, s->T1);
+        tcg_gen_mov_tl(cpu_regs[R_EAX], s->T0);
+
+        /* Compare the high part to the sign bit of the truncated result */
+        tcg_gen_negsetcondi_tl(TCG_COND_LT, s->T1, s->T0, 0);
+        cc_src_rhs = cpu_regs[R_EDX];
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    tcg_gen_sub_tl(s->T1, s->T1, cc_src_rhs);
+    prepare_update2_cc(decode, s, CC_OP_MULB + ot);
+}
+
+static void gen_IN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[0].ot;
+    TCGv_i32 port = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(port, s->T1);
+    tcg_gen_ext16u_i32(port, port);
+    if (!gen_check_io(s, ot, port, SVM_IOIO_TYPE_MASK)) {
+        return;
+    }
+    translator_io_start(&s->base);
+    gen_helper_in_func(ot, s->T0, port);
+    gen_writeback(s, decode, 0, s->T0);
+    gen_bpt_io(s, port, ot);
+}
+
+static void gen_INC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+
+    tcg_gen_movi_tl(s->T1, 1);
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T1,
+                                    s->mem_index, ot | MO_LE);
+    } else {
+        tcg_gen_add_tl(s->T0, s->T0, s->T1);
+    }
+    prepare_update_cc_incdec(decode, s, CC_OP_INCB + ot);
+}
+
+static void gen_INS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+    TCGv_i32 port = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(port, s->T1);
+    tcg_gen_ext16u_i32(port, port);
+    if (!gen_check_io(s, ot, port,
+                      SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
+        return;
+    }
+
+    translator_io_start(&s->base);
+    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+        gen_repz_ins(s, ot);
+    } else {
+        gen_ins(s, ot);
+    }
+}
+
 static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     TCGv_i32 length = tcg_constant_i32(decode->immediate & 63);
@@ -1363,12 +1853,197 @@ static void gen_INSERTQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
     gen_helper_insertq_r(tcg_env, OP_PTR0, OP_PTR2);
 }
 
+static void gen_INT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_interrupt(s, decode->immediate);
+}
+
+static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_exception(s, EXCP01_DB);
+}
+
+static void gen_INT3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_interrupt(s, EXCP03_INT3);
+}
+
+static void gen_INTO(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_update_eip_cur(s);
+    gen_helper_into(tcg_env, cur_insn_len_i32(s));
+}
+
+static void gen_IRET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    if (!PE(s) || VM86(s)) {
+        gen_helper_iret_real(tcg_env, tcg_constant_i32(s->dflag - 1));
+    } else {
+        gen_helper_iret_protected(tcg_env, tcg_constant_i32(s->dflag - 1),
+                                  eip_next_i32(s));
+    }
+    set_cc_op(s, CC_OP_EFLAGS);
+    s->base.is_jmp = DISAS_EOB_ONLY;
+}
+
+static void gen_Jcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_bnd_jmp(s);
+    gen_jcc(s, decode->b & 0xf, decode->immediate);
+}
+
+static void gen_JCXZ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    TCGLabel *taken = gen_new_label();
+
+    gen_update_cc_op(s);
+    gen_op_jz_ecx(s, taken);
+    gen_conditional_jump_labels(s, decode->immediate, NULL, taken);
+}
+
+static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_jmp_rel(s, s->dflag, decode->immediate, 0);
+}
+
+static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_op_jmp_v(s, s->T0);
+    gen_bnd_jmp(s);
+    s->base.is_jmp = DISAS_JUMP;
+}
+
+static void gen_JMPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_far_jmp(s);
+}
+
+static void gen_JMPF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[2].ot;
+
+    gen_op_ld_v(s, ot, s->T0, s->A0);
+    gen_add_A0_im(s, 1 << ot);
+    gen_op_ld_v(s, MO_16, s->T1, s->A0);
+    gen_far_jmp(s);
+}
+
+static void gen_LAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) {
+        return gen_illegal_opcode(s);
+    }
+    gen_compute_eflags(s);
+    /* Note: gen_compute_eflags() only gives the condition codes */
+    tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
+    tcg_gen_deposit_tl(cpu_regs[R_EAX], cpu_regs[R_EAX], s->T0, 8, 8);
+}
+
 static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T1);
     gen_helper_ldmxcsr(tcg_env, s->tmp2_i32);
 }
 
+static void gen_lxx_seg(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, int seg)
+{
+    MemOp ot = decode->op[0].ot;
+
+    /* Offset already in s->T0.  */
+    gen_add_A0_im(s, 1 << ot);
+    gen_op_ld_v(s, MO_16, s->T1, s->A0);
+
+    /* load the segment here to handle exceptions properly */
+    gen_movl_seg(s, seg, s->T1);
+}
+
+static void gen_LDS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_lxx_seg(s, env, decode, R_DS);
+}
+
+static void gen_LEA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    tcg_gen_mov_tl(s->T0, s->A0);
+}
+
+static void gen_LEAVE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_leave(s);
+}
+
+static void gen_LES(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_lxx_seg(s, env, decode, R_ES);
+}
+
+static void gen_LFS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_lxx_seg(s, env, decode, R_FS);
+}
+
+static void gen_LGS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_lxx_seg(s, env, decode, R_GS);
+}
+
+static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[2].ot;
+    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+        gen_repz_lods(s, ot);
+    } else {
+        gen_lods(s, ot);
+    }
+}
+
+static void gen_LOOP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    TCGLabel *taken = gen_new_label();
+
+    gen_update_cc_op(s);
+    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+    gen_op_jnz_ecx(s, taken);
+    gen_conditional_jump_labels(s, decode->immediate, NULL, taken);
+}
+
+static void gen_LOOPE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    TCGLabel *taken = gen_new_label();
+    TCGLabel *not_taken = gen_new_label();
+
+    gen_update_cc_op(s);
+    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+    gen_op_jz_ecx(s, not_taken);
+    gen_jcc1(s, (JCC_Z << 1), taken); /* jz taken */
+    gen_conditional_jump_labels(s, decode->immediate, not_taken, taken);
+}
+
+static void gen_LOOPNE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    TCGLabel *taken = gen_new_label();
+    TCGLabel *not_taken = gen_new_label();
+
+    gen_update_cc_op(s);
+    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+    gen_op_jz_ecx(s, not_taken);
+    gen_jcc1(s, (JCC_Z << 1) | 1, taken); /* jnz taken */
+    gen_conditional_jump_labels(s, decode->immediate, not_taken, taken);
+}
+
+static void gen_LSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_lxx_seg(s, env, decode, R_SS);
+}
+
+static void gen_MOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    /* nothing to do! */
+}
+#define gen_NOP gen_MOV
+
 static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_DS, s->override);
@@ -1476,6 +2151,67 @@ static void gen_MOVq_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
     return gen_MOVQ(s, env, decode);
 }
 
+static void gen_MOVS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[2].ot;
+    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+        gen_repz_movs(s, ot);
+    } else {
+        gen_movs(s, ot);
+    }
+}
+
+static void gen_MUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+
+    switch (ot) {
+    case MO_8:
+        /* s->T0 already zero-extended */
+        tcg_gen_ext8u_tl(s->T1, s->T1);
+        tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+        gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+        tcg_gen_andi_tl(s->T1, s->T0, 0xff00);
+        decode->cc_dst = s->T0;
+        decode->cc_src = s->T1;
+        break;
+
+    case MO_16:
+        /* s->T0 already zero-extended */
+        tcg_gen_ext16u_tl(s->T1, s->T1);
+        tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+        gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+        tcg_gen_shri_tl(s->T1, s->T0, 16);
+        gen_op_mov_reg_v(s, MO_16, R_EDX, s->T1);
+        decode->cc_dst = s->T0;
+        decode->cc_src = s->T1;
+        break;
+
+    case MO_32:
+#ifdef TARGET_X86_64
+        /* s->T0 already zero-extended */
+        tcg_gen_ext32u_tl(s->T1, s->T1);
+        tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+        tcg_gen_ext32u_tl(cpu_regs[R_EAX], s->T0);
+        tcg_gen_shri_tl(cpu_regs[R_EDX], s->T0, 32);
+        decode->cc_dst = cpu_regs[R_EAX];
+        decode->cc_src = cpu_regs[R_EDX];
+        break;
+
+    case MO_64:
+#endif
+        tcg_gen_mulu2_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->T0, s->T1);
+        decode->cc_dst = cpu_regs[R_EAX];
+        decode->cc_src = cpu_regs[R_EDX];
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    decode->cc_op = CC_OP_MULB + ot;
+}
+
 static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1502,6 +2238,95 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     }
 }
 
+static void gen_NEG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[0].ot;
+    TCGv oldv = tcg_temp_new();
+
+    if (s->prefix & PREFIX_LOCK) {
+        TCGv newv = tcg_temp_new();
+        TCGv cmpv = tcg_temp_new();
+        TCGLabel *label1 = gen_new_label();
+
+        gen_set_label(label1);
+        gen_op_ld_v(s, ot, oldv, s->A0);
+        tcg_gen_neg_tl(newv, oldv);
+        tcg_gen_atomic_cmpxchg_tl(cmpv, s->A0, oldv, newv,
+                                  s->mem_index, ot | MO_LE);
+        tcg_gen_brcond_tl(TCG_COND_NE, oldv, cmpv, label1);
+    } else {
+        tcg_gen_mov_tl(oldv, s->T0);
+    }
+    tcg_gen_neg_tl(s->T0, oldv);
+
+    decode->cc_dst = s->T0;
+    decode->cc_src = oldv;
+    tcg_gen_movi_tl(s->cc_srcT, 0);
+    decode->cc_op = CC_OP_SUBB + ot;
+}
+
+static void gen_NOT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[0].ot;
+
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_movi_tl(s->T0, ~0);
+        tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
+                                    s->mem_index, ot | MO_LE);
+    } else {
+        tcg_gen_not_tl(s->T0, s->T0);
+    }
+}
+
+static void gen_OR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_atomic_or_fetch_tl(s->T0, s->A0, s->T1,
+                                   s->mem_index, ot | MO_LE);
+    } else {
+        tcg_gen_or_tl(s->T0, s->T0, s->T1);
+    }
+    prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
+}
+
+static void gen_OUT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+    TCGv_i32 port = tcg_temp_new_i32();
+    TCGv_i32 value = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(port, s->T1);
+    tcg_gen_ext16u_i32(port, port);
+    if (!gen_check_io(s, ot, port, 0)) {
+        return;
+    }
+    tcg_gen_trunc_tl_i32(value, s->T0);
+    translator_io_start(&s->base);
+    gen_helper_out_func(ot, port, value);
+    gen_bpt_io(s, port, ot);
+}
+
+static void gen_OUTS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+    TCGv_i32 port = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(port, s->T1);
+    tcg_gen_ext16u_i32(port, port);
+    if (!gen_check_io(s, ot, port, SVM_IOIO_STR_MASK)) {
+        return;
+    }
+
+    translator_io_start(&s->base);
+    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+        gen_repz_outs(s, ot);
+    } else {
+        gen_outs(s, ot);
+    }
+}
+
 static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -1695,12 +2520,6 @@ static void gen_pmovmskb_vec(unsigned vece, TCGv_vec d, TCGv_vec s)
     tcg_gen_or_vec(vece, d, d, t);
 }
 
-#ifdef TARGET_X86_64
-#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64
-#else
-#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32
-#endif
-
 static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
@@ -1745,6 +2564,45 @@ static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
     }
 }
 
+static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = gen_pop_T0(s);
+    if (decode->op[0].has_ea) {
+        /* NOTE: order is important for MMU exceptions */
+        gen_op_st_v(s, ot, s->T0, s->A0);
+        decode->op[0].unit = X86_OP_SKIP;
+    }
+    /* NOTE: writing back registers after update is important for pop %sp */
+    gen_pop_update(s, ot);
+}
+
+static void gen_POPA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_popa(s);
+}
+
+static void gen_POPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot;
+    int mask = TF_MASK | AC_MASK | ID_MASK | NT_MASK;
+
+    if (CPL(s) == 0) {
+        mask |= IF_MASK | IOPL_MASK;
+    } else if (CPL(s) <= IOPL(s)) {
+        mask |= IF_MASK;
+    }
+    if (s->dflag == MO_16) {
+        mask &= 0xffff;
+    }
+
+    ot = gen_pop_T0(s);
+    gen_helper_write_eflags(tcg_env, s->T0, tcg_constant_i32(mask));
+    gen_pop_update(s, ot);
+    set_cc_op(s, CC_OP_EFLAGS);
+    /* abort translation because TF/AC flag may change */
+    s->base.is_jmp = DISAS_EOB_NEXT;
+}
+
 static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -1891,6 +2749,455 @@ static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
     }
 }
 
+static void gen_PUSH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_push_v(s, s->T1);
+}
+
+static void gen_PUSHA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_pusha(s);
+}
+
+static void gen_PUSHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_helper_read_eflags(s->T0, tcg_env);
+    gen_push_v(s, s->T0);
+}
+
+static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
+                             bool *can_be_zero, TCGv *count)
+{
+    MemOp ot = decode->op[0].ot;
+    int mask = (ot <= MO_32 ? 0x1f : 0x3f);
+
+    *can_be_zero = false;
+    switch (decode->op[2].unit) {
+    case X86_OP_INT:
+        *count = tcg_temp_new();
+        tcg_gen_andi_tl(*count, s->T1, mask);
+        *can_be_zero = true;
+        break;
+
+    case X86_OP_IMM:
+        if ((decode->immediate & mask) == 0) {
+            *count = NULL;
+            break;
+        }
+        *count = tcg_temp_new();
+        tcg_gen_movi_tl(*count, decode->immediate & mask);
+        break;
+
+    case X86_OP_SKIP:
+        *count = tcg_temp_new();
+        tcg_gen_movi_tl(*count, 1);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    return ot;
+}
+
+/*
+ * Compute existing flags in decode->cc_src, for gen_* functions that wants
+ * to set the cc_op set to CC_OP_ADCOX.  In particular, this allows rotate
+ * operations to compute the carry in decode->cc_dst and the overflow in
+ * decode->cc_src2.
+ *
+ * If need_flags is true, decode->cc_dst and decode->cc_src2 are preloaded
+ * with the value of CF and OF before the instruction, so that it is possible
+ * to keep the flags unmodified.
+ *
+ * Return true if carry could be made available cheaply as a 1-bit value in
+ * decode->cc_dst (trying a bit harder if want_carry is true).  If false is
+ * returned, decode->cc_dst is uninitialized and the carry is only available
+ * as bit 0 of decode->cc_src.
+ */
+static bool gen_eflags_adcox(DisasContext *s, X86DecodedInsn *decode, bool want_carry, bool need_flags)
+{
+    bool got_cf = false;
+    bool got_of = false;
+
+    decode->cc_dst = tcg_temp_new();
+    decode->cc_src = tcg_temp_new();
+    decode->cc_src2 = tcg_temp_new();
+    decode->cc_op = CC_OP_ADCOX;
+
+    /* A lot more cc_ops could be "optimized" to avoid the extracts at
+     * the end (INC/DEC, BMILG, MUL), but they are all really unlikely
+     * to be followed by rotations within the same basic block.
+     */
+    switch (s->cc_op) {
+    case CC_OP_ADCOX:
+        /* No need to compute the full EFLAGS, CF/OF are already isolated.  */
+        tcg_gen_mov_tl(decode->cc_src, cpu_cc_src);
+        if (need_flags) {
+            tcg_gen_mov_tl(decode->cc_src2, cpu_cc_src2);
+            got_of = true;
+        }
+        if (want_carry || need_flags) {
+            tcg_gen_mov_tl(decode->cc_dst, cpu_cc_dst);
+            got_cf = true;
+        }
+        break;
+
+    case CC_OP_LOGICB ... CC_OP_LOGICQ:
+        /* CF and OF are zero, do it just because it's easy.  */
+        gen_mov_eflags(s, decode->cc_src);
+        if (need_flags) {
+            tcg_gen_movi_tl(decode->cc_src2, 0);
+            got_of = true;
+        }
+        if (want_carry || need_flags) {
+            tcg_gen_movi_tl(decode->cc_dst, 0);
+            got_cf = true;
+        }
+        break;
+
+    case CC_OP_SARB ... CC_OP_SARQ:
+        /*
+         * SHR/RCR/SHR/RCR/... is a relatively common occurrence of RCR.
+         * By computing CF without using eflags, the calls to cc_compute_all
+         * can be eliminated as dead code (except for the last RCR).
+         */
+        if (want_carry || need_flags) {
+            tcg_gen_andi_tl(decode->cc_dst, cpu_cc_src, 1);
+            got_cf = true;
+        }
+        gen_mov_eflags(s, decode->cc_src);
+        break;
+
+    case CC_OP_SHLB ... CC_OP_SHLQ:
+        /*
+         * Likewise for SHL/RCL/SHL/RCL/... but, if CF is not in the sign
+         * bit, we might as well fish CF out of EFLAGS and save a shift.
+         */
+        if (want_carry && (!need_flags || s->cc_op == CC_OP_SHLB + MO_TL)) {
+            tcg_gen_shri_tl(decode->cc_dst, cpu_cc_src, (8 << (s->cc_op - CC_OP_SHLB)) - 1);
+            got_cf = true;
+        }
+        gen_mov_eflags(s, decode->cc_src);
+        break;
+
+    default:
+        gen_mov_eflags(s, decode->cc_src);
+        break;
+    }
+
+    if (need_flags) {
+        /* If the flags could be left unmodified, always load them.  */
+        if (!got_of) {
+            tcg_gen_extract_tl(decode->cc_src2, decode->cc_src, ctz32(CC_O), 1);
+            got_of = true;
+        }
+        if (!got_cf) {
+            tcg_gen_extract_tl(decode->cc_dst, decode->cc_src, ctz32(CC_C), 1);
+            got_cf = true;
+        }
+    }
+    return got_cf;
+}
+
+static void gen_rot_overflow(X86DecodedInsn *decode, TCGv result, TCGv old, TCGv count)
+{
+    MemOp ot = decode->op[0].ot;
+    TCGv temp = count ? tcg_temp_new() : decode->cc_src2;
+
+    tcg_gen_xor_tl(temp, old, result);
+    tcg_gen_extract_tl(temp, temp, (8 << ot) - 1, 1);
+    if (count) {
+        tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_src2, count, tcg_constant_tl(0),
+                           decode->cc_src2, temp);
+    }
+}
+
+/*
+ * RCx operations are invariant modulo 8*operand_size+1.  For 8 and 16-bit operands,
+ * this is less than 0x1f (the mask applied by gen_shift_count) so reduce further.
+ */
+static void gen_rotc_mod(MemOp ot, TCGv count)
+{
+    TCGv temp;
+
+    switch (ot) {
+    case MO_8:
+        temp = tcg_temp_new();
+        tcg_gen_subi_tl(temp, count, 18);
+        tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
+        tcg_gen_subi_tl(temp, count, 9);
+        tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
+        break;
+
+    case MO_16:
+        temp = tcg_temp_new();
+        tcg_gen_subi_tl(temp, count, 17);
+        tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
+        break;
+
+    default:
+        break;
+    }
+}
+
+/*
+ * The idea here is that the bit to the right of the new bit 0 is the
+ * new carry, and the bit to the right of the old bit 0 is the old carry.
+ * Just like a regular rotation, the result of the rotation is composed
+ * from a right shifted part and a left shifted part of s->T0.  The new carry
+ * is extracted from the right-shifted portion, and the old carry is
+ * inserted at the end of the left-shifted portion.
+ *
+ * Because of the separate shifts involving the carry, gen_RCL and gen_RCR
+ * mostly operate on count-1.  This also comes in handy when computing
+ * length - count, because (length-1) - (count-1) can be computed with
+ * a XOR, and that is commutative unlike subtraction.
+ */
+static void gen_RCL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    bool have_1bit_cin, can_be_zero;
+    TCGv count;
+    TCGLabel *zero_label = NULL;
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+    TCGv low, high, low_count;
+
+    if (!count) {
+        return;
+    }
+
+    low = tcg_temp_new();
+    high = tcg_temp_new();
+    low_count = tcg_temp_new();
+
+    gen_rotc_mod(ot, count);
+    have_1bit_cin = gen_eflags_adcox(s, decode, true, can_be_zero);
+    if (can_be_zero) {
+        zero_label = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_EQ, count, 0, zero_label);
+    }
+
+    /* Compute high part, including incoming carry.  */
+    if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) {
+        /* high = (T0 << 1) | cin */
+        TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src;
+        tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1);
+    } else {
+        /* Same as above but without deposit; cin in cc_dst.  */
+        tcg_gen_add_tl(high, s->T0, decode->cc_dst);
+        tcg_gen_add_tl(high, high, s->T0);
+    }
+    tcg_gen_subi_tl(count, count, 1);
+    tcg_gen_shl_tl(high, high, count);
+
+    /* Compute low part and outgoing carry, incoming s->T0 is zero extended */
+    tcg_gen_xori_tl(low_count, count, (8 << ot) - 1); /* LENGTH - 1 - (count - 1) */
+    tcg_gen_shr_tl(low, s->T0, low_count);
+    tcg_gen_andi_tl(decode->cc_dst, low, 1);
+    tcg_gen_shri_tl(low, low, 1);
+
+    /* Compute result and outgoing overflow */
+    tcg_gen_mov_tl(decode->cc_src2, s->T0);
+    tcg_gen_or_tl(s->T0, low, high);
+    gen_rot_overflow(decode, s->T0, decode->cc_src2, NULL);
+
+    if (zero_label) {
+        gen_set_label(zero_label);
+    }
+}
+
+static void gen_RCR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    bool have_1bit_cin, can_be_zero;
+    TCGv count;
+    TCGLabel *zero_label = NULL;
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+    TCGv low, high, high_count;
+
+    if (!count) {
+        return;
+    }
+
+    low = tcg_temp_new();
+    high = tcg_temp_new();
+    high_count = tcg_temp_new();
+
+    gen_rotc_mod(ot, count);
+    have_1bit_cin = gen_eflags_adcox(s, decode, true, can_be_zero);
+    if (can_be_zero) {
+        zero_label = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_EQ, count, 0, zero_label);
+    }
+
+    /* Save incoming carry into high, it will be shifted later.  */
+    if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) {
+        TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src;
+        tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1);
+    } else {
+        /* Same as above but without deposit; cin in cc_dst.  */
+        tcg_gen_add_tl(high, s->T0, decode->cc_dst);
+        tcg_gen_add_tl(high, high, s->T0);
+    }
+
+    /* Compute low part and outgoing carry, incoming s->T0 is zero extended */
+    tcg_gen_subi_tl(count, count, 1);
+    tcg_gen_shr_tl(low, s->T0, count);
+    tcg_gen_andi_tl(decode->cc_dst, low, 1);
+    tcg_gen_shri_tl(low, low, 1);
+
+    /* Move high part to the right position */
+    tcg_gen_xori_tl(high_count, count, (8 << ot) - 1); /* LENGTH - 1 - (count - 1) */
+    tcg_gen_shl_tl(high, high, high_count);
+
+    /* Compute result and outgoing overflow */
+    tcg_gen_mov_tl(decode->cc_src2, s->T0);
+    tcg_gen_or_tl(s->T0, low, high);
+    gen_rot_overflow(decode, s->T0, decode->cc_src2, NULL);
+
+    if (zero_label) {
+        gen_set_label(zero_label);
+    }
+}
+
+static void gen_RET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0;
+
+    MemOp ot = gen_pop_T0(s);
+    gen_stack_update(s, adjust + (1 << ot));
+    gen_op_jmp_v(s, s->T0);
+    gen_bnd_jmp(s);
+    s->base.is_jmp = DISAS_JUMP;
+}
+
+static void gen_RETF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0;
+
+    if (!PE(s) || VM86(s)) {
+        gen_stack_A0(s);
+        /* pop offset */
+        gen_op_ld_v(s, s->dflag, s->T0, s->A0);
+        /* NOTE: keeping EIP updated is not a problem in case of
+           exception */
+        gen_op_jmp_v(s, s->T0);
+        /* pop selector */
+        gen_add_A0_im(s, 1 << s->dflag);
+        gen_op_ld_v(s, s->dflag, s->T0, s->A0);
+        gen_op_movl_seg_real(s, R_CS, s->T0);
+        /* add stack offset */
+        gen_stack_update(s, adjust + (2 << s->dflag));
+    } else {
+        gen_update_cc_op(s);
+        gen_update_eip_cur(s);
+        gen_helper_lret_protected(tcg_env, tcg_constant_i32(s->dflag - 1),
+                                  tcg_constant_i32(adjust));
+    }
+    s->base.is_jmp = DISAS_EOB_ONLY;
+}
+
+/*
+ * Return non-NULL if a 32-bit rotate works, after possibly replicating the input.
+ * The input has already been zero-extended upon operand decode.
+ */
+static TCGv_i32 gen_rot_replicate(MemOp ot, TCGv in)
+{
+    TCGv_i32 temp;
+    switch (ot) {
+    case MO_8:
+        temp = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(temp, in);
+        tcg_gen_muli_i32(temp, temp, 0x01010101);
+        return temp;
+
+    case MO_16:
+        temp = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(temp, in);
+        tcg_gen_deposit_i32(temp, temp, temp, 16, 16);
+        return temp;
+
+#ifdef TARGET_X86_64
+    case MO_32:
+        temp = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(temp, in);
+        return temp;
+#endif
+
+    default:
+        return NULL;
+    }
+}
+
+static void gen_rot_carry(X86DecodedInsn *decode, TCGv result, TCGv count, int bit)
+{
+    if (count == NULL) {
+        tcg_gen_extract_tl(decode->cc_dst, result, bit, 1);
+    } else {
+        TCGv temp = tcg_temp_new();
+        tcg_gen_extract_tl(temp, result, bit, 1);
+        tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_dst, count, tcg_constant_tl(0),
+                           decode->cc_dst, temp);
+    }
+}
+
+static void gen_ROL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    bool can_be_zero;
+    TCGv count;
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+    TCGv_i32 temp32, count32;
+    TCGv old = tcg_temp_new();
+
+    if (!count) {
+        return;
+    }
+
+    gen_eflags_adcox(s, decode, false, can_be_zero);
+    tcg_gen_mov_tl(old, s->T0);
+    temp32 = gen_rot_replicate(ot, s->T0);
+    if (temp32) {
+        count32 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(count32, count);
+        tcg_gen_rotl_i32(temp32, temp32, count32);
+        /* Zero extend to facilitate later optimization.  */
+        tcg_gen_extu_i32_tl(s->T0, temp32);
+    } else {
+        tcg_gen_rotl_tl(s->T0, s->T0, count);
+    }
+    gen_rot_carry(decode, s->T0, count, 0);
+    gen_rot_overflow(decode, s->T0, old, count);
+}
+
+static void gen_ROR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    bool can_be_zero;
+    TCGv count;
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+    TCGv_i32 temp32, count32;
+    TCGv old = tcg_temp_new();
+
+    if (!count) {
+        return;
+    }
+
+    gen_eflags_adcox(s, decode, false, can_be_zero);
+    tcg_gen_mov_tl(old, s->T0);
+    temp32 = gen_rot_replicate(ot, s->T0);
+    if (temp32) {
+        count32 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(count32, count);
+        tcg_gen_rotr_i32(temp32, temp32, count32);
+        /* Zero extend to facilitate later optimization.  */
+        tcg_gen_extu_i32_tl(s->T0, temp32);
+        gen_rot_carry(decode, s->T0, count, 31);
+    } else {
+        tcg_gen_rotr_tl(s->T0, s->T0, count);
+        gen_rot_carry(decode, s->T0, count, TARGET_LONG_BITS - 1);
+    }
+    gen_rot_overflow(decode, s->T0, old, count);
+}
+
 static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1915,6 +3222,76 @@ static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     }
 }
 
+static void gen_SAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) {
+        return gen_illegal_opcode(s);
+    }
+    tcg_gen_shri_tl(s->T0, cpu_regs[R_EAX], 8);
+    gen_compute_eflags(s);
+    tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
+    tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
+    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
+}
+
+static void gen_SALC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_compute_eflags_c(s, s->T0);
+    tcg_gen_neg_tl(s->T0, s->T0);
+}
+
+static void gen_shift_dynamic_flags(DisasContext *s, X86DecodedInsn *decode, TCGv count, CCOp cc_op)
+{
+    TCGv_i32 count32 = tcg_temp_new_i32();
+    TCGv_i32 old_cc_op;
+
+    decode->cc_op = CC_OP_DYNAMIC;
+    decode->cc_op_dynamic = tcg_temp_new_i32();
+
+    assert(decode->cc_dst == s->T0);
+    if (cc_op_live[s->cc_op] & USES_CC_DST) {
+        decode->cc_dst = tcg_temp_new();
+        tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_dst, count, tcg_constant_tl(0),
+                           cpu_cc_dst, s->T0);
+    }
+
+    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
+        tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_src, count, tcg_constant_tl(0),
+                           cpu_cc_src, decode->cc_src);
+    }
+
+    tcg_gen_trunc_tl_i32(count32, count);
+    if (s->cc_op == CC_OP_DYNAMIC) {
+        old_cc_op = cpu_cc_op;
+    } else {
+        old_cc_op = tcg_constant_i32(s->cc_op);
+    }
+    tcg_gen_movcond_i32(TCG_COND_EQ, decode->cc_op_dynamic, count32, tcg_constant_i32(0),
+                        old_cc_op, tcg_constant_i32(cc_op));
+}
+
+static void gen_SAR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    bool can_be_zero;
+    TCGv count;
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+
+    if (!count) {
+        return;
+    }
+
+    decode->cc_dst = s->T0;
+    decode->cc_src = tcg_temp_new();
+    tcg_gen_subi_tl(decode->cc_src, count, 1);
+    tcg_gen_sar_tl(decode->cc_src, s->T0, decode->cc_src);
+    tcg_gen_sar_tl(s->T0, s->T0, count);
+    if (can_be_zero) {
+        gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot);
+    } else {
+        decode->cc_op = CC_OP_SARB + ot;
+    }
+}
+
 static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1925,6 +3302,45 @@ static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
 }
 
+static void gen_SBB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[0].ot;
+    TCGv c_in = tcg_temp_new();
+
+    gen_compute_eflags_c(s, c_in);
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_add_tl(s->T0, s->T1, c_in);
+        tcg_gen_neg_tl(s->T0, s->T0);
+        tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0,
+                                    s->mem_index, ot | MO_LE);
+    } else {
+        /*
+         * TODO: SBB reg, reg could use gen_prepare_eflags_c followed by
+         * negsetcond, and CC_OP_SUBB as the cc_op.
+         */
+        tcg_gen_sub_tl(s->T0, s->T0, s->T1);
+        tcg_gen_sub_tl(s->T0, s->T0, c_in);
+    }
+    prepare_update3_cc(decode, s, CC_OP_SBBB + ot, c_in);
+}
+
+static void gen_SCAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[2].ot;
+    if (s->prefix & PREFIX_REPNZ) {
+        gen_repz_scas(s, ot, 1);
+    } else if (s->prefix & PREFIX_REPZ) {
+        gen_repz_scas(s, ot, 0);
+    } else {
+        gen_scas(s, ot);
+    }
+}
+
+static void gen_SETcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_setcc1(s, decode->b & 0xf, s->T0);
+}
+
 static void gen_SHA1NEXTE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2);
@@ -1979,6 +3395,28 @@ static void gen_SHA256RNDS2(DisasContext *s, CPUX86State *env, X86DecodedInsn *d
     gen_helper_sha256rnds2(OP_PTR0, OP_PTR1, OP_PTR2, wk0, wk1);
 }
 
+static void gen_SHL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    bool can_be_zero;
+    TCGv count;
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+
+    if (!count) {
+        return;
+    }
+
+    decode->cc_dst = s->T0;
+    decode->cc_src = tcg_temp_new();
+    tcg_gen_subi_tl(decode->cc_src, count, 1);
+    tcg_gen_shl_tl(decode->cc_src, s->T0, decode->cc_src);
+    tcg_gen_shl_tl(s->T0, s->T0, count);
+    if (can_be_zero) {
+        gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot);
+    } else {
+        decode->cc_op = CC_OP_SHLB + ot;
+    }
+}
+
 static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1989,6 +3427,28 @@ static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
 }
 
+static void gen_SHR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    bool can_be_zero;
+    TCGv count;
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+
+    if (!count) {
+        return;
+    }
+
+    decode->cc_dst = s->T0;
+    decode->cc_src = tcg_temp_new();
+    tcg_gen_subi_tl(decode->cc_src, count, 1);
+    tcg_gen_shr_tl(decode->cc_src, s->T0, decode->cc_src);
+    tcg_gen_shr_tl(s->T0, s->T0, count);
+    if (can_be_zero) {
+        gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot);
+    } else {
+        decode->cc_op = CC_OP_SARB + ot;
+    }
+}
+
 static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1999,6 +3459,25 @@ static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
 }
 
+static void gen_STC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_compute_eflags(s);
+    tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
+}
+
+static void gen_STD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    tcg_gen_st_i32(tcg_constant_i32(-1), tcg_env, offsetof(CPUX86State, df));
+}
+
+static void gen_STI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_set_eflags(s, IF_MASK);
+    /* interruptions are enabled only the first insn after sti */
+    gen_update_eip_next(s);
+    gen_eob_inhibit_irq(s);
+}
+
 static void gen_VAESKEYGEN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -2012,6 +3491,32 @@ static void gen_STMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
     tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr));
 }
 
+static void gen_STOS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+        gen_repz_stos(s, ot);
+    } else {
+        gen_stos(s, ot);
+    }
+}
+
+static void gen_SUB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_neg_tl(s->T0, s->T1);
+        tcg_gen_atomic_fetch_add_tl(s->cc_srcT, s->A0, s->T0,
+                                    s->mem_index, ot | MO_LE);
+        tcg_gen_sub_tl(s->T0, s->cc_srcT, s->T1);
+    } else {
+        tcg_gen_mov_tl(s->cc_srcT, s->T0);
+        tcg_gen_sub_tl(s->T0, s->T0, s->T1);
+    }
+    prepare_update2_cc(decode, s, CC_OP_SUBB + ot);
+}
+
 static void gen_VAESIMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     assert(!s->vex_l);
@@ -2491,3 +3996,69 @@ static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *de
         tcg_gen_gvec_dup_imm(MO_64, offset, 16, 16, 0);
     }
 }
+
+static void gen_WAIT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == (HF_MP_MASK | HF_TS_MASK)) {
+        gen_NM_exception(s);
+    } else {
+        /* needs to be treated as I/O because of ferr_irq */
+        translator_io_start(&s->base);
+        gen_helper_fwait(tcg_env);
+    }
+}
+
+static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    if (decode->b == 0x90 && !REX_B(s)) {
+        if (s->prefix & PREFIX_REPZ) {
+            gen_update_cc_op(s);
+            gen_update_eip_cur(s);
+            gen_helper_pause(tcg_env, cur_insn_len_i32(s));
+            s->base.is_jmp = DISAS_NORETURN;
+        }
+        /* No writeback.  */
+        decode->op[0].unit = X86_OP_SKIP;
+        return;
+    }
+
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_atomic_xchg_tl(s->T0, s->A0, s->T1,
+                               s->mem_index, decode->op[0].ot | MO_LE);
+        /* now store old value into register operand */
+        gen_op_mov_reg_v(s, decode->op[2].ot, decode->op[2].n, s->T0);
+    } else {
+        /* move destination value into source operand, source preserved in T1 */
+        gen_op_mov_reg_v(s, decode->op[2].ot, decode->op[2].n, s->T0);
+        tcg_gen_mov_tl(s->T0, s->T1);
+    }
+}
+
+static void gen_XLAT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    /* AL is already zero-extended into s->T0.  */
+    tcg_gen_add_tl(s->A0, cpu_regs[R_EBX], s->T0);
+    gen_add_A0_ds_seg(s);
+    gen_op_ld_v(s, MO_8, s->T0, s->A0);
+}
+
+static void gen_XOR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    /* special case XOR reg, reg */
+    if (decode->op[1].unit == X86_OP_INT &&
+        decode->op[2].unit == X86_OP_INT &&
+        decode->op[1].n == decode->op[2].n) {
+        tcg_gen_movi_tl(s->T0, 0);
+        decode->cc_op = CC_OP_CLR;
+    } else {
+        MemOp ot = decode->op[1].ot;
+
+        if (s->prefix & PREFIX_LOCK) {
+            tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T1,
+                                        s->mem_index, ot | MO_LE);
+        } else {
+            tcg_gen_xor_tl(s->T0, s->T0, s->T1);
+        }
+        prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
+    }
+}
diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c
index ab85dc5540..df16130f5d 100644
--- a/target/i386/tcg/int_helper.c
+++ b/target/i386/tcg/int_helper.c
@@ -29,22 +29,6 @@
 
 //#define DEBUG_MULDIV
 
-/* modulo 9 table */
-static const uint8_t rclb_table[32] = {
-    0, 1, 2, 3, 4, 5, 6, 7,
-    8, 0, 1, 2, 3, 4, 5, 6,
-    7, 8, 0, 1, 2, 3, 4, 5,
-    6, 7, 8, 0, 1, 2, 3, 4,
-};
-
-/* modulo 17 table */
-static const uint8_t rclw_table[32] = {
-    0, 1, 2, 3, 4, 5, 6, 7,
-    8, 9, 10, 11, 12, 13, 14, 15,
-    16, 0, 1, 2, 3, 4, 5, 6,
-    7, 8, 9, 10, 11, 12, 13, 14,
-};
-
 /* division, flags are undefined */
 
 void helper_divb_AL(CPUX86State *env, target_ulong t0)
@@ -447,24 +431,6 @@ target_ulong helper_pext(target_ulong src, target_ulong mask)
     return dest;
 }
 
-#define SHIFT 0
-#include "shift_helper_template.h.inc"
-#undef SHIFT
-
-#define SHIFT 1
-#include "shift_helper_template.h.inc"
-#undef SHIFT
-
-#define SHIFT 2
-#include "shift_helper_template.h.inc"
-#undef SHIFT
-
-#ifdef TARGET_X86_64
-#define SHIFT 3
-#include "shift_helper_template.h.inc"
-#undef SHIFT
-#endif
-
 /* Test that BIT is enabled in CR4.  If not, raise an illegal opcode
    exception.  This reduces the requirements for rare CR4 bits being
    mapped into HFLAGS.  */
diff --git a/target/i386/tcg/shift_helper_template.h.inc b/target/i386/tcg/shift_helper_template.h.inc
deleted file mode 100644
index 54f15d6e05..0000000000
--- a/target/i386/tcg/shift_helper_template.h.inc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- *  x86 shift helpers
- *
- *  Copyright (c) 2008 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#define DATA_BITS (1 << (3 + SHIFT))
-#define SHIFT_MASK (DATA_BITS - 1)
-#if DATA_BITS <= 32
-#define SHIFT1_MASK 0x1f
-#else
-#define SHIFT1_MASK 0x3f
-#endif
-
-#if DATA_BITS == 8
-#define SUFFIX b
-#define DATA_MASK 0xff
-#elif DATA_BITS == 16
-#define SUFFIX w
-#define DATA_MASK 0xffff
-#elif DATA_BITS == 32
-#define SUFFIX l
-#define DATA_MASK 0xffffffff
-#elif DATA_BITS == 64
-#define SUFFIX q
-#define DATA_MASK 0xffffffffffffffffULL
-#else
-#error unhandled operand size
-#endif
-
-target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env, target_ulong t0,
-                                      target_ulong t1)
-{
-    int count, eflags;
-    target_ulong src;
-    target_long res;
-
-    count = t1 & SHIFT1_MASK;
-#if DATA_BITS == 16
-    count = rclw_table[count];
-#elif DATA_BITS == 8
-    count = rclb_table[count];
-#endif
-    if (count) {
-        eflags = env->cc_src;
-        t0 &= DATA_MASK;
-        src = t0;
-        res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1));
-        if (count > 1) {
-            res |= t0 >> (DATA_BITS + 1 - count);
-        }
-        t0 = res;
-        env->cc_src = (eflags & ~(CC_C | CC_O)) |
-            (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
-            ((src >> (DATA_BITS - count)) & CC_C);
-    }
-    return t0;
-}
-
-target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env, target_ulong t0,
-                                      target_ulong t1)
-{
-    int count, eflags;
-    target_ulong src;
-    target_long res;
-
-    count = t1 & SHIFT1_MASK;
-#if DATA_BITS == 16
-    count = rclw_table[count];
-#elif DATA_BITS == 8
-    count = rclb_table[count];
-#endif
-    if (count) {
-        eflags = env->cc_src;
-        t0 &= DATA_MASK;
-        src = t0;
-        res = (t0 >> count) |
-            ((target_ulong)(eflags & CC_C) << (DATA_BITS - count));
-        if (count > 1) {
-            res |= t0 << (DATA_BITS + 1 - count);
-        }
-        t0 = res;
-        env->cc_src = (eflags & ~(CC_C | CC_O)) |
-            (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
-            ((src >> (count - 1)) & CC_C);
-    }
-    return t0;
-}
-
-#undef DATA_BITS
-#undef SHIFT_MASK
-#undef SHIFT1_MASK
-#undef DATA_TYPE
-#undef DATA_MASK
-#undef SUFFIX
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 051ffb5e1f..3842b29484 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -38,6 +38,9 @@
 #include "exec/helper-info.c.inc"
 #undef  HELPER_H
 
+/* Fixes for Windows namespace pollution.  */
+#undef IN
+#undef OUT
 
 #define PREFIX_REPZ   0x01
 #define PREFIX_REPNZ  0x02
@@ -212,7 +215,6 @@ typedef struct DisasContext {
 #ifdef CONFIG_USER_ONLY
 STUB_HELPER(clgi, TCGv_env env)
 STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
-STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
 STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
 STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
 STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
@@ -239,21 +241,8 @@ static void gen_eob(DisasContext *s);
 static void gen_jr(DisasContext *s);
 static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num);
 static void gen_jmp_rel_csize(DisasContext *s, int diff, int tb_num);
-static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
 static void gen_exception_gpf(DisasContext *s);
 
-/* i386 arith/logic operations */
-enum {
-    OP_ADDL,
-    OP_ORL,
-    OP_ADCL,
-    OP_SBBL,
-    OP_ANDL,
-    OP_SUBL,
-    OP_XORL,
-    OP_CMPL,
-};
-
 /* i386 shift ops */
 enum {
     OP_ROL,
@@ -439,13 +428,6 @@ static inline MemOp mo_b_d(int b, MemOp ot)
     return b & 1 ? ot : MO_8;
 }
 
-/* Select size 8 if lsb of B is clear, else OT capped at 32.
-   Used for decoding operand size of port opcodes.  */
-static inline MemOp mo_b_d32(int b, MemOp ot)
-{
-    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
-}
-
 /* Compute the result of writing t0 to the OT-sized register REG.
  *
  * If DEST is NULL, store the result into the register and return the
@@ -848,25 +830,6 @@ static void gen_op_update2_cc(DisasContext *s)
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
-static void gen_op_update3_cc(DisasContext *s, TCGv reg)
-{
-    tcg_gen_mov_tl(cpu_cc_src2, reg);
-    tcg_gen_mov_tl(cpu_cc_src, s->T1);
-    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-}
-
-static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
-{
-    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
-}
-
-static void gen_op_update_neg_cc(DisasContext *s)
-{
-    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-    tcg_gen_neg_tl(cpu_cc_src, s->T0);
-    tcg_gen_movi_tl(s->cc_srcT, 0);
-}
-
 /* compute all eflags to reg */
 static void gen_mov_eflags(DisasContext *s, TCGv reg)
 {
@@ -923,94 +886,100 @@ typedef struct CCPrepare {
     TCGv reg;
     TCGv reg2;
     target_ulong imm;
-    target_ulong mask;
     bool use_reg2;
     bool no_setcond;
 } CCPrepare;
 
-/* compute eflags.C to reg */
+static CCPrepare gen_prepare_sign_nz(TCGv src, MemOp size)
+{
+    if (size == MO_TL) {
+        return (CCPrepare) { .cond = TCG_COND_LT, .reg = src };
+    } else {
+        return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = src,
+                             .imm = 1ull << ((8 << size) - 1) };
+    }
+}
+
+/* compute eflags.C, trying to store it in reg if not NULL */
 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 {
-    TCGv t0, t1;
-    int size, shift;
+    MemOp size;
 
     switch (s->cc_op) {
     case CC_OP_SUBB ... CC_OP_SUBQ:
         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
         size = s->cc_op - CC_OP_SUBB;
-        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
-        /* If no temporary was used, be careful not to alias t1 and t0.  */
-        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
-        tcg_gen_mov_tl(t0, s->cc_srcT);
-        gen_extu(size, t0);
-        goto add_sub;
+        gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false);
+        gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = s->cc_srcT,
+                             .reg2 = cpu_cc_src, .use_reg2 = true };
 
     case CC_OP_ADDB ... CC_OP_ADDQ:
         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
         size = s->cc_op - CC_OP_ADDB;
-        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
-        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
-    add_sub:
-        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
-                             .reg2 = t1, .mask = -1, .use_reg2 = true };
+        gen_ext_tl(cpu_cc_dst, cpu_cc_dst, size, false);
+        gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = cpu_cc_dst,
+                             .reg2 = cpu_cc_src, .use_reg2 = true };
 
     case CC_OP_LOGICB ... CC_OP_LOGICQ:
     case CC_OP_CLR:
     case CC_OP_POPCNT:
-        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_NEVER };
 
     case CC_OP_INCB ... CC_OP_INCQ:
     case CC_OP_DECB ... CC_OP_DECQ:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                             .mask = -1, .no_setcond = true };
+                             .no_setcond = true };
 
     case CC_OP_SHLB ... CC_OP_SHLQ:
         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
         size = s->cc_op - CC_OP_SHLB;
-        shift = (8 << size) - 1;
-        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                             .mask = (target_ulong)1 << shift };
+        return gen_prepare_sign_nz(cpu_cc_src, size);
 
     case CC_OP_MULB ... CC_OP_MULQ:
         return (CCPrepare) { .cond = TCG_COND_NE,
-                             .reg = cpu_cc_src, .mask = -1 };
+                             .reg = cpu_cc_src };
 
     case CC_OP_BMILGB ... CC_OP_BMILGQ:
         size = s->cc_op - CC_OP_BMILGB;
-        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
-        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
+        gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src };
 
     case CC_OP_ADCX:
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
-                             .mask = -1, .no_setcond = true };
+                             .no_setcond = true };
 
     case CC_OP_EFLAGS:
     case CC_OP_SARB ... CC_OP_SARQ:
         /* CC_SRC & 1 */
-        return (CCPrepare) { .cond = TCG_COND_NE,
-                             .reg = cpu_cc_src, .mask = CC_C };
+        return (CCPrepare) { .cond = TCG_COND_TSTNE,
+                             .reg = cpu_cc_src, .imm = CC_C };
 
     default:
        /* The need to compute only C from CC_OP_DYNAMIC is important
           in efficiently implementing e.g. INC at the start of a TB.  */
        gen_update_cc_op(s);
+       if (!reg) {
+           reg = tcg_temp_new();
+       }
        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
                                cpu_cc_src2, cpu_cc_op);
        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
-                            .mask = -1, .no_setcond = true };
+                            .no_setcond = true };
     }
 }
 
-/* compute eflags.P to reg */
+/* compute eflags.P, trying to store it in reg if not NULL */
 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 {
     gen_compute_eflags(s);
-    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                         .mask = CC_P };
+    return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+                         .imm = CC_P };
 }
 
-/* compute eflags.S to reg */
+/* compute eflags.S, trying to store it in reg if not NULL */
 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 {
     switch (s->cc_op) {
@@ -1021,42 +990,40 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
     case CC_OP_ADCX:
     case CC_OP_ADOX:
     case CC_OP_ADCOX:
-        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                             .mask = CC_S };
+        return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+                             .imm = CC_S };
     case CC_OP_CLR:
     case CC_OP_POPCNT:
-        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_NEVER };
     default:
         {
             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
-            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
-            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
+            return gen_prepare_sign_nz(cpu_cc_dst, size);
         }
     }
 }
 
-/* compute eflags.O to reg */
+/* compute eflags.O, trying to store it in reg if not NULL */
 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 {
     switch (s->cc_op) {
     case CC_OP_ADOX:
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
-                             .mask = -1, .no_setcond = true };
+                             .no_setcond = true };
     case CC_OP_CLR:
     case CC_OP_POPCNT:
-        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_NEVER };
     case CC_OP_MULB ... CC_OP_MULQ:
-        return (CCPrepare) { .cond = TCG_COND_NE,
-                             .reg = cpu_cc_src, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src };
     default:
         gen_compute_eflags(s);
-        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                             .mask = CC_O };
+        return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+                             .imm = CC_O };
     }
 }
 
-/* compute eflags.Z to reg */
+/* compute eflags.Z, trying to store it in reg if not NULL */
 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 {
     switch (s->cc_op) {
@@ -1067,30 +1034,33 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
     case CC_OP_ADCX:
     case CC_OP_ADOX:
     case CC_OP_ADCOX:
-        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                             .mask = CC_Z };
+        return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+                             .imm = CC_Z };
     case CC_OP_CLR:
-        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_ALWAYS };
     case CC_OP_POPCNT:
-        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
-                             .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src };
     default:
         {
             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
-            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
-            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
+            if (size == MO_TL) {
+                return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_dst };
+            } else {
+                return (CCPrepare) { .cond = TCG_COND_TSTEQ, .reg = cpu_cc_dst,
+                                     .imm = (1ull << (8 << size)) - 1 };
+            }
         }
     }
 }
 
-/* perform a conditional store into register 'reg' according to jump opcode
-   value 'b'. In the fast case, T0 is guaranteed not to be used. */
+/* return how to compute jump opcode 'b'.  'reg' can be clobbered
+ * if needed; it may be used for CCPrepare.reg if that will
+ * provide more freedom in the translation of a subsequent setcond. */
 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 {
     int inv, jcc_op, cond;
     MemOp size;
     CCPrepare cc;
-    TCGv t0;
 
     inv = b & 1;
     jcc_op = (b >> 1) & 7;
@@ -1101,24 +1071,21 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         size = s->cc_op - CC_OP_SUBB;
         switch (jcc_op) {
         case JCC_BE:
-            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
-            gen_extu(size, s->tmp4);
-            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
-            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
-                               .reg2 = t0, .mask = -1, .use_reg2 = true };
+            gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false);
+            gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->cc_srcT,
+                               .reg2 = cpu_cc_src, .use_reg2 = true };
             break;
-
         case JCC_L:
             cond = TCG_COND_LT;
             goto fast_jcc_l;
         case JCC_LE:
             cond = TCG_COND_LE;
         fast_jcc_l:
-            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
-            gen_exts(size, s->tmp4);
-            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
-            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
-                               .reg2 = t0, .mask = -1, .use_reg2 = true };
+            gen_ext_tl(s->cc_srcT, s->cc_srcT, size, true);
+            gen_ext_tl(cpu_cc_src, cpu_cc_src, size, true);
+            cc = (CCPrepare) { .cond = cond, .reg = s->cc_srcT,
+                               .reg2 = cpu_cc_src, .use_reg2 = true };
             break;
 
         default:
@@ -1141,8 +1108,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
             break;
         case JCC_BE:
             gen_compute_eflags(s);
-            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                               .mask = CC_Z | CC_C };
+            cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+                               .imm = CC_Z | CC_C };
             break;
         case JCC_S:
             cc = gen_prepare_eflags_s(s, reg);
@@ -1152,22 +1119,22 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
             break;
         case JCC_L:
             gen_compute_eflags(s);
-            if (reg == cpu_cc_src) {
-                reg = s->tmp0;
+            if (!reg || reg == cpu_cc_src) {
+                reg = tcg_temp_new();
             }
             tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
-            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
-                               .mask = CC_O };
+            cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
+                               .imm = CC_O };
             break;
         default:
         case JCC_LE:
             gen_compute_eflags(s);
-            if (reg == cpu_cc_src) {
-                reg = s->tmp0;
+            if (!reg || reg == cpu_cc_src) {
+                reg = tcg_temp_new();
             }
             tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
-            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
-                               .mask = CC_O | CC_Z };
+            cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
+                               .imm = CC_O | CC_Z };
             break;
         }
         break;
@@ -1192,16 +1159,6 @@ static void gen_setcc1(DisasContext *s, int b, TCGv reg)
         return;
     }
 
-    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
-        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
-        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
-        tcg_gen_andi_tl(reg, reg, 1);
-        return;
-    }
-    if (cc.mask != -1) {
-        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
-        cc.reg = reg;
-    }
     if (cc.use_reg2) {
         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
     } else {
@@ -1218,12 +1175,8 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
    value 'b'. In the fast case, T0 is guaranteed not to be used. */
 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
+    CCPrepare cc = gen_prepare_cc(s, b, NULL);
 
-    if (cc.mask != -1) {
-        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
-        cc.reg = s->T0;
-    }
     if (cc.use_reg2) {
         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
     } else {
@@ -1233,17 +1186,13 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
 
 /* Generate a conditional jump to label 'l1' according to jump opcode
    value 'b'. In the fast case, T0 is guaranteed not to be used.
-   A translation block must end soon.  */
+   One or both of the branches will call gen_jmp_rel, so ensure
+   cc_op is clean.  */
 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
+    CCPrepare cc = gen_prepare_cc(s, b, NULL);
 
     gen_update_cc_op(s);
-    if (cc.mask != -1) {
-        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
-        cc.reg = s->T0;
-    }
-    set_cc_op(s, CC_OP_DYNAMIC);
     if (cc.use_reg2) {
         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
     } else {
@@ -1252,11 +1201,15 @@ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
 }
 
 /* XXX: does not work with gdbstub "ice" single step - not a
-   serious problem */
+   serious problem.  The caller can jump to the returned label
+   to stop the REP but, if the flags have changed, it has to call
+   gen_update_cc_op before doing so.  */
 static TCGLabel *gen_jz_ecx_string(DisasContext *s)
 {
     TCGLabel *l1 = gen_new_label();
     TCGLabel *l2 = gen_new_label();
+
+    gen_update_cc_op(s);
     gen_op_jnz_ecx(s, l1);
     gen_set_label(l2);
     gen_jmp_rel_csize(s, 0, 1);
@@ -1298,7 +1251,11 @@ static void gen_cmps(DisasContext *s, MemOp ot)
     gen_string_movl_A0_EDI(s);
     gen_op_ld_v(s, ot, s->T1, s->A0);
     gen_string_movl_A0_ESI(s);
-    gen_op(s, OP_CMPL, ot, OR_TMP0);
+    gen_op_ld_v(s, ot, s->T0, s->A0);
+    tcg_gen_mov_tl(cpu_cc_src, s->T1);
+    tcg_gen_mov_tl(s->cc_srcT, s->T0);
+    tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
+    set_cc_op(s, CC_OP_SUBB + ot);
 
     dshift = gen_compute_Dshift(s, ot);
     gen_op_add_reg(s, s->aflag, R_ESI, dshift);
@@ -1352,7 +1309,6 @@ static void gen_repz(DisasContext *s, MemOp ot,
                      void (*fn)(DisasContext *s, MemOp ot))
 {
     TCGLabel *l2;
-    gen_update_cc_op(s);
     l2 = gen_jz_ecx_string(s);
     fn(s, ot);
     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
@@ -1374,15 +1330,18 @@ static void gen_repz2(DisasContext *s, MemOp ot, int nz,
                       void (*fn)(DisasContext *s, MemOp ot))
 {
     TCGLabel *l2;
-    gen_update_cc_op(s);
     l2 = gen_jz_ecx_string(s);
     fn(s, ot);
     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
-    gen_update_cc_op(s);
     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);
     if (s->repz_opt) {
         gen_op_jz_ecx(s, l2);
     }
+    /*
+     * Only one iteration is done at a time, so the translation
+     * block ends unconditionally after this instruction and there
+     * is no control flow junction - no need to set CC_OP_DYNAMIC.
+     */
     gen_jmp_rel_csize(s, -cur_insn_len(s), 0);
 }
 
@@ -1485,165 +1444,6 @@ static bool check_cpl0(DisasContext *s)
     return false;
 }
 
-/* If vm86, check for iopl == 3; if not, raise #GP and return false. */
-static bool check_vm86_iopl(DisasContext *s)
-{
-    if (!VM86(s) || IOPL(s) == 3) {
-        return true;
-    }
-    gen_exception_gpf(s);
-    return false;
-}
-
-/* Check for iopl allowing access; if not, raise #GP and return false. */
-static bool check_iopl(DisasContext *s)
-{
-    if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
-        return true;
-    }
-    gen_exception_gpf(s);
-    return false;
-}
-
-/* if d == OR_TMP0, it means memory operand (address in A0) */
-static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
-{
-    /* Invalid lock prefix when destination is not memory or OP_CMPL. */
-    if ((d != OR_TMP0 || op == OP_CMPL) && s1->prefix & PREFIX_LOCK) {
-        gen_illegal_opcode(s1);
-        return;
-    }
-
-    if (d != OR_TMP0) {
-        gen_op_mov_v_reg(s1, ot, s1->T0, d);
-    } else if (!(s1->prefix & PREFIX_LOCK)) {
-        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
-    }
-    switch(op) {
-    case OP_ADCL:
-        gen_compute_eflags_c(s1, s1->tmp4);
-        if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
-            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
-                                        s1->mem_index, ot | MO_LE);
-        } else {
-            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
-            tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
-            gen_op_st_rm_T0_A0(s1, ot, d);
-        }
-        gen_op_update3_cc(s1, s1->tmp4);
-        set_cc_op(s1, CC_OP_ADCB + ot);
-        break;
-    case OP_SBBL:
-        gen_compute_eflags_c(s1, s1->tmp4);
-        if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
-            tcg_gen_neg_tl(s1->T0, s1->T0);
-            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
-                                        s1->mem_index, ot | MO_LE);
-        } else {
-            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
-            tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
-            gen_op_st_rm_T0_A0(s1, ot, d);
-        }
-        gen_op_update3_cc(s1, s1->tmp4);
-        set_cc_op(s1, CC_OP_SBBB + ot);
-        break;
-    case OP_ADDL:
-        if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
-                                        s1->mem_index, ot | MO_LE);
-        } else {
-            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
-            gen_op_st_rm_T0_A0(s1, ot, d);
-        }
-        gen_op_update2_cc(s1);
-        set_cc_op(s1, CC_OP_ADDB + ot);
-        break;
-    case OP_SUBL:
-        if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_neg_tl(s1->T0, s1->T1);
-            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
-                                        s1->mem_index, ot | MO_LE);
-            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
-        } else {
-            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
-            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
-            gen_op_st_rm_T0_A0(s1, ot, d);
-        }
-        gen_op_update2_cc(s1);
-        set_cc_op(s1, CC_OP_SUBB + ot);
-        break;
-    default:
-    case OP_ANDL:
-        if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
-                                        s1->mem_index, ot | MO_LE);
-        } else {
-            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
-            gen_op_st_rm_T0_A0(s1, ot, d);
-        }
-        gen_op_update1_cc(s1);
-        set_cc_op(s1, CC_OP_LOGICB + ot);
-        break;
-    case OP_ORL:
-        if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
-                                       s1->mem_index, ot | MO_LE);
-        } else {
-            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
-            gen_op_st_rm_T0_A0(s1, ot, d);
-        }
-        gen_op_update1_cc(s1);
-        set_cc_op(s1, CC_OP_LOGICB + ot);
-        break;
-    case OP_XORL:
-        if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
-                                        s1->mem_index, ot | MO_LE);
-        } else {
-            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
-            gen_op_st_rm_T0_A0(s1, ot, d);
-        }
-        gen_op_update1_cc(s1);
-        set_cc_op(s1, CC_OP_LOGICB + ot);
-        break;
-    case OP_CMPL:
-        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
-        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
-        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
-        set_cc_op(s1, CC_OP_SUBB + ot);
-        break;
-    }
-}
-
-/* if d == OR_TMP0, it means memory operand (address in A0) */
-static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
-{
-    if (s1->prefix & PREFIX_LOCK) {
-        if (d != OR_TMP0) {
-            /* Lock prefix when destination is not memory */
-            gen_illegal_opcode(s1);
-            return;
-        }
-        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
-        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
-                                    s1->mem_index, ot | MO_LE);
-    } else {
-        if (d != OR_TMP0) {
-            gen_op_mov_v_reg(s1, ot, s1->T0, d);
-        } else {
-            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
-        }
-        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
-        gen_op_st_rm_T0_A0(s1, ot, d);
-    }
-
-    gen_compute_eflags_c(s1, cpu_cc_src);
-    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
-    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
-}
-
 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
                             TCGv shm1, TCGv count, bool is_right)
 {
@@ -1686,298 +1486,6 @@ static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
     set_cc_op(s, CC_OP_DYNAMIC);
 }
 
-static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
-                            int is_right, int is_arith)
-{
-    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
-
-    /* load */
-    if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, s->T0, s->A0);
-    } else {
-        gen_op_mov_v_reg(s, ot, s->T0, op1);
-    }
-
-    tcg_gen_andi_tl(s->T1, s->T1, mask);
-    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
-
-    if (is_right) {
-        if (is_arith) {
-            gen_exts(ot, s->T0);
-            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
-            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
-        } else {
-            gen_extu(ot, s->T0);
-            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
-            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
-        }
-    } else {
-        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
-        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
-    }
-
-    /* store */
-    gen_op_st_rm_T0_A0(s, ot, op1);
-
-    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
-}
-
-static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
-                            int is_right, int is_arith)
-{
-    int mask = (ot == MO_64 ? 0x3f : 0x1f);
-
-    /* load */
-    if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, s->T0, s->A0);
-    else
-        gen_op_mov_v_reg(s, ot, s->T0, op1);
-
-    op2 &= mask;
-    if (op2 != 0) {
-        if (is_right) {
-            if (is_arith) {
-                gen_exts(ot, s->T0);
-                tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
-                tcg_gen_sari_tl(s->T0, s->T0, op2);
-            } else {
-                gen_extu(ot, s->T0);
-                tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
-                tcg_gen_shri_tl(s->T0, s->T0, op2);
-            }
-        } else {
-            tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
-            tcg_gen_shli_tl(s->T0, s->T0, op2);
-        }
-    }
-
-    /* store */
-    gen_op_st_rm_T0_A0(s, ot, op1);
-
-    /* update eflags if non zero shift */
-    if (op2 != 0) {
-        tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
-        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
-    }
-}
-
-static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
-{
-    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
-    TCGv_i32 t0, t1;
-
-    /* load */
-    if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, s->T0, s->A0);
-    } else {
-        gen_op_mov_v_reg(s, ot, s->T0, op1);
-    }
-
-    tcg_gen_andi_tl(s->T1, s->T1, mask);
-
-    switch (ot) {
-    case MO_8:
-        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
-        tcg_gen_ext8u_tl(s->T0, s->T0);
-        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
-        goto do_long;
-    case MO_16:
-        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
-        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
-        goto do_long;
-    do_long:
-#ifdef TARGET_X86_64
-    case MO_32:
-        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
-        if (is_right) {
-            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
-        } else {
-            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
-        }
-        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
-        break;
-#endif
-    default:
-        if (is_right) {
-            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
-        } else {
-            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
-        }
-        break;
-    }
-
-    /* store */
-    gen_op_st_rm_T0_A0(s, ot, op1);
-
-    /* We'll need the flags computed into CC_SRC.  */
-    gen_compute_eflags(s);
-
-    /* The value that was "rotated out" is now present at the other end
-       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
-       since we've computed the flags into CC_SRC, these variables are
-       currently dead.  */
-    if (is_right) {
-        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
-        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
-        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
-    } else {
-        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
-        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
-    }
-    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
-    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
-
-    /* Now conditionally store the new CC_OP value.  If the shift count
-       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
-       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
-       exactly as we computed above.  */
-    t0 = tcg_constant_i32(0);
-    t1 = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(t1, s->T1);
-    tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
-    tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
-    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
-                        s->tmp2_i32, s->tmp3_i32);
-
-    /* The CC_OP value is no longer predictable.  */
-    set_cc_op(s, CC_OP_DYNAMIC);
-}
-
-static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
-                          int is_right)
-{
-    int mask = (ot == MO_64 ? 0x3f : 0x1f);
-    int shift;
-
-    /* load */
-    if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, s->T0, s->A0);
-    } else {
-        gen_op_mov_v_reg(s, ot, s->T0, op1);
-    }
-
-    op2 &= mask;
-    if (op2 != 0) {
-        switch (ot) {
-#ifdef TARGET_X86_64
-        case MO_32:
-            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-            if (is_right) {
-                tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
-            } else {
-                tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
-            }
-            tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
-            break;
-#endif
-        default:
-            if (is_right) {
-                tcg_gen_rotri_tl(s->T0, s->T0, op2);
-            } else {
-                tcg_gen_rotli_tl(s->T0, s->T0, op2);
-            }
-            break;
-        case MO_8:
-            mask = 7;
-            goto do_shifts;
-        case MO_16:
-            mask = 15;
-        do_shifts:
-            shift = op2 & mask;
-            if (is_right) {
-                shift = mask + 1 - shift;
-            }
-            gen_extu(ot, s->T0);
-            tcg_gen_shli_tl(s->tmp0, s->T0, shift);
-            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
-            tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
-            break;
-        }
-    }
-
-    /* store */
-    gen_op_st_rm_T0_A0(s, ot, op1);
-
-    if (op2 != 0) {
-        /* Compute the flags into CC_SRC.  */
-        gen_compute_eflags(s);
-
-        /* The value that was "rotated out" is now present at the other end
-           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
-           since we've computed the flags into CC_SRC, these variables are
-           currently dead.  */
-        if (is_right) {
-            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
-            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
-            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
-        } else {
-            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
-            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
-        }
-        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
-        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
-        set_cc_op(s, CC_OP_ADCOX);
-    }
-}
-
-/* XXX: add faster immediate = 1 case */
-static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
-                           int is_right)
-{
-    gen_compute_eflags(s);
-    assert(s->cc_op == CC_OP_EFLAGS);
-
-    /* load */
-    if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, s->T0, s->A0);
-    else
-        gen_op_mov_v_reg(s, ot, s->T0, op1);
-
-    if (is_right) {
-        switch (ot) {
-        case MO_8:
-            gen_helper_rcrb(s->T0, tcg_env, s->T0, s->T1);
-            break;
-        case MO_16:
-            gen_helper_rcrw(s->T0, tcg_env, s->T0, s->T1);
-            break;
-        case MO_32:
-            gen_helper_rcrl(s->T0, tcg_env, s->T0, s->T1);
-            break;
-#ifdef TARGET_X86_64
-        case MO_64:
-            gen_helper_rcrq(s->T0, tcg_env, s->T0, s->T1);
-            break;
-#endif
-        default:
-            g_assert_not_reached();
-        }
-    } else {
-        switch (ot) {
-        case MO_8:
-            gen_helper_rclb(s->T0, tcg_env, s->T0, s->T1);
-            break;
-        case MO_16:
-            gen_helper_rclw(s->T0, tcg_env, s->T0, s->T1);
-            break;
-        case MO_32:
-            gen_helper_rcll(s->T0, tcg_env, s->T0, s->T1);
-            break;
-#ifdef TARGET_X86_64
-        case MO_64:
-            gen_helper_rclq(s->T0, tcg_env, s->T0, s->T1);
-            break;
-#endif
-        default:
-            g_assert_not_reached();
-        }
-    }
-    /* store */
-    gen_op_st_rm_T0_A0(s, ot, op1);
-}
-
 /* XXX: add faster immediate case */
 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
                              bool is_right, TCGv count_in)
@@ -2062,63 +1570,6 @@ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
 }
 
-static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
-{
-    if (s != OR_TMP1)
-        gen_op_mov_v_reg(s1, ot, s1->T1, s);
-    switch(op) {
-    case OP_ROL:
-        gen_rot_rm_T1(s1, ot, d, 0);
-        break;
-    case OP_ROR:
-        gen_rot_rm_T1(s1, ot, d, 1);
-        break;
-    case OP_SHL:
-    case OP_SHL1:
-        gen_shift_rm_T1(s1, ot, d, 0, 0);
-        break;
-    case OP_SHR:
-        gen_shift_rm_T1(s1, ot, d, 1, 0);
-        break;
-    case OP_SAR:
-        gen_shift_rm_T1(s1, ot, d, 1, 1);
-        break;
-    case OP_RCL:
-        gen_rotc_rm_T1(s1, ot, d, 0);
-        break;
-    case OP_RCR:
-        gen_rotc_rm_T1(s1, ot, d, 1);
-        break;
-    }
-}
-
-static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
-{
-    switch(op) {
-    case OP_ROL:
-        gen_rot_rm_im(s1, ot, d, c, 0);
-        break;
-    case OP_ROR:
-        gen_rot_rm_im(s1, ot, d, c, 1);
-        break;
-    case OP_SHL:
-    case OP_SHL1:
-        gen_shift_rm_im(s1, ot, d, c, 0, 0);
-        break;
-    case OP_SHR:
-        gen_shift_rm_im(s1, ot, d, c, 1, 0);
-        break;
-    case OP_SAR:
-        gen_shift_rm_im(s1, ot, d, c, 1, 1);
-        break;
-    default:
-        /* currently not optimized */
-        tcg_gen_movi_tl(s1->T1, c);
-        gen_shift(s1, op, ot, d, OR_TMP1);
-        break;
-    }
-}
-
 #define X86_MAX_INSN_LENGTH 15
 
 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
@@ -2154,11 +1605,6 @@ static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
     return translator_ldub(env, &s->base, advance_pc(env, s, 1));
 }
 
-static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
-{
-    return translator_lduw(env, &s->base, advance_pc(env, s, 2));
-}
-
 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
 {
     return translator_lduw(env, &s->base, advance_pc(env, s, 2));
@@ -2484,13 +1930,16 @@ static target_long insn_get_signed(CPUX86State *env, DisasContext *s, MemOp ot)
     return ret;
 }
 
-static inline int insn_const_size(MemOp ot)
+static void gen_conditional_jump_labels(DisasContext *s, target_long diff,
+                                        TCGLabel *not_taken, TCGLabel *taken)
 {
-    if (ot <= MO_32) {
-        return 1 << ot;
-    } else {
-        return 4;
+    if (not_taken) {
+        gen_set_label(not_taken);
     }
+    gen_jmp_rel_csize(s, 0, 1);
+
+    gen_set_label(taken);
+    gen_jmp_rel(s, s->dflag, diff, 0);
 }
 
 static void gen_jcc(DisasContext *s, int b, int diff)
@@ -2498,20 +1947,13 @@ static void gen_jcc(DisasContext *s, int b, int diff)
     TCGLabel *l1 = gen_new_label();
 
     gen_jcc1(s, b, l1);
-    gen_jmp_rel_csize(s, 0, 1);
-    gen_set_label(l1);
-    gen_jmp_rel(s, s->dflag, diff, 0);
+    gen_conditional_jump_labels(s, diff, NULL, l1);
 }
 
 static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, s->T1);
+    CCPrepare cc = gen_prepare_cc(s, b, NULL);
 
-    if (cc.mask != -1) {
-        TCGv t0 = tcg_temp_new();
-        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
-        cc.reg = t0;
-    }
     if (!cc.use_reg2) {
         cc.reg2 = tcg_constant_tl(cc.imm);
     }
@@ -2519,26 +1961,21 @@ static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src)
     tcg_gen_movcond_tl(cc.cond, dest, cc.reg, cc.reg2, src, dest);
 }
 
-static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
+static void gen_op_movl_seg_real(DisasContext *s, X86Seg seg_reg, TCGv seg)
 {
-    tcg_gen_ld32u_tl(s->T0, tcg_env,
-                     offsetof(CPUX86State,segs[seg_reg].selector));
-}
-
-static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
-{
-    tcg_gen_ext16u_tl(s->T0, s->T0);
-    tcg_gen_st32_tl(s->T0, tcg_env,
+    TCGv selector = tcg_temp_new();
+    tcg_gen_ext16u_tl(selector, seg);
+    tcg_gen_st32_tl(selector, tcg_env,
                     offsetof(CPUX86State,segs[seg_reg].selector));
-    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
+    tcg_gen_shli_tl(cpu_seg_base[seg_reg], selector, 4);
 }
 
-/* move T0 to seg_reg and compute if the CPU state may change. Never
+/* move SRC to seg_reg and compute if the CPU state may change. Never
    call this function with seg_reg == R_CS */
-static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
+static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src)
 {
     if (PE(s) && !VM86(s)) {
-        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, src);
         gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), s->tmp2_i32);
         /* abort translation because the addseg value may change or
            because ss32 may change. For R_SS, translation must always
@@ -2550,13 +1987,45 @@ static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
             s->base.is_jmp = DISAS_EOB_NEXT;
         }
     } else {
-        gen_op_movl_seg_T0_vm(s, seg_reg);
+        gen_op_movl_seg_real(s, seg_reg, src);
         if (seg_reg == R_SS) {
             s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ;
         }
     }
 }
 
+static void gen_far_call(DisasContext *s)
+{
+    TCGv_i32 new_cs = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(new_cs, s->T1);
+    if (PE(s) && !VM86(s)) {
+        gen_helper_lcall_protected(tcg_env, new_cs, s->T0,
+                                   tcg_constant_i32(s->dflag - 1),
+                                   eip_next_tl(s));
+    } else {
+        TCGv_i32 new_eip = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(new_eip, s->T0);
+        gen_helper_lcall_real(tcg_env, new_cs, new_eip,
+                              tcg_constant_i32(s->dflag - 1),
+                              eip_next_i32(s));
+    }
+    s->base.is_jmp = DISAS_JUMP;
+}
+
+static void gen_far_jmp(DisasContext *s)
+{
+    if (PE(s) && !VM86(s)) {
+        TCGv_i32 new_cs = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(new_cs, s->T1);
+        gen_helper_ljmp_protected(tcg_env, new_cs, s->T0,
+                                  eip_next_tl(s));
+    } else {
+        gen_op_movl_seg_real(s, R_CS, s->T1);
+        gen_op_jmp_v(s, s->T0);
+    }
+    s->base.is_jmp = DISAS_JUMP;
+}
+
 static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
 {
     /* no SVM activated; fast case */
@@ -2729,7 +2198,7 @@ static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
 
 /* an interrupt is different from an exception because of the
    privilege checks */
-static void gen_interrupt(DisasContext *s, int intno)
+static void gen_interrupt(DisasContext *s, uint8_t intno)
 {
     gen_update_cc_op(s);
     gen_update_eip_cur(s);
@@ -2796,7 +2265,7 @@ static void gen_bnd_jmp(DisasContext *s)
    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
    S->TF.  This is used by the syscall/sysret insns.  */
 static void
-do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
+gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
 {
     bool inhibit_reset;
 
@@ -2830,28 +2299,27 @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
 }
 
 static inline void
-gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
+gen_eob_syscall(DisasContext *s)
 {
-    do_gen_eob_worker(s, inhibit, recheck_tf, false);
+    gen_eob_worker(s, false, true, false);
 }
 
-/* End of block.
-   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
-static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
+/* End of block.  Set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
+static void gen_eob_inhibit_irq(DisasContext *s)
 {
-    gen_eob_worker(s, inhibit, false);
+    gen_eob_worker(s, true, false, false);
 }
 
 /* End of block, resetting the inhibit irq flag.  */
 static void gen_eob(DisasContext *s)
 {
-    gen_eob_worker(s, false, false);
+    gen_eob_worker(s, false, false, false);
 }
 
 /* Jump to register */
 static void gen_jr(DisasContext *s)
 {
-    do_gen_eob_worker(s, false, false, true);
+    gen_eob_worker(s, false, false, true);
 }
 
 /* Jump to eip+diff, truncating the result to OT. */
@@ -2862,6 +2330,8 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num)
     target_ulong new_pc = s->pc + diff;
     target_ulong new_eip = new_pc - s->cs_base;
 
+    assert(!s->cc_op_dirty);
+
     /* In 64-bit mode, operand size is fixed at 64 bits. */
     if (!CODE64(s)) {
         if (ot == MO_16) {
@@ -2875,9 +2345,6 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num)
     }
     new_eip &= mask;
 
-    gen_update_cc_op(s);
-    set_cc_op(s, CC_OP_DYNAMIC);
-
     if (tb_cflags(s->base.tb) & CF_PCREL) {
         tcg_gen_addi_tl(cpu_eip, cpu_eip, new_pc - s->pc_save);
         /*
@@ -2984,10 +2451,6 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
     tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
 }
 
-#include "decode-new.h"
-#include "emit.c.inc"
-#include "decode-new.c.inc"
-
 static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
 {
     TCGv_i64 cmp, val, old;
@@ -3086,739 +2549,583 @@ static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
 }
 #endif
 
-/* convert one instruction. s->base.is_jmp is set if the translation must
-   be stopped. Return the next pc value */
-static bool disas_insn(DisasContext *s, CPUState *cpu)
+static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
 {
     CPUX86State *env = cpu_env(cpu);
-    int b, prefixes;
-    int shift;
-    MemOp ot, aflag, dflag;
-    int modrm, reg, rm, mod, op, opreg, val;
-    bool orig_cc_op_dirty = s->cc_op_dirty;
-    CCOp orig_cc_op = s->cc_op;
-    target_ulong orig_pc_save = s->pc_save;
+    bool update_fip = true;
+    int modrm, mod, rm, op;
 
-    s->pc = s->base.pc_next;
-    s->override = -1;
-#ifdef TARGET_X86_64
-    s->rex_r = 0;
-    s->rex_x = 0;
-    s->rex_b = 0;
-#endif
-    s->rip_offset = 0; /* for relative ip address */
-    s->vex_l = 0;
-    s->vex_v = 0;
-    s->vex_w = false;
-    switch (sigsetjmp(s->jmpbuf, 0)) {
-    case 0:
-        break;
-    case 1:
-        gen_exception_gpf(s);
+    if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
+        /* if CR0.EM or CR0.TS are set, generate an FPU exception */
+        /* XXX: what to do if illegal op ? */
+        gen_exception(s, EXCP07_PREX);
         return true;
-    case 2:
-        /* Restore state that may affect the next instruction. */
-        s->pc = s->base.pc_next;
-        /*
-         * TODO: These save/restore can be removed after the table-based
-         * decoder is complete; we will be decoding the insn completely
-         * before any code generation that might affect these variables.
-         */
-        s->cc_op_dirty = orig_cc_op_dirty;
-        s->cc_op = orig_cc_op;
-        s->pc_save = orig_pc_save;
-        /* END TODO */
-        s->base.num_insns--;
-        tcg_remove_ops_after(s->prev_insn_end);
-        s->base.insn_start = s->prev_insn_start;
-        s->base.is_jmp = DISAS_TOO_MANY;
-        return false;
-    default:
-        g_assert_not_reached();
     }
+    modrm = x86_ldub_code(env, s);
+    mod = (modrm >> 6) & 3;
+    rm = modrm & 7;
+    op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+    if (mod != 3) {
+        /* memory op */
+        AddressParts a = gen_lea_modrm_0(env, s, modrm);
+        TCGv ea = gen_lea_modrm_1(s, a, false);
+        TCGv last_addr = tcg_temp_new();
+        bool update_fdp = true;
+
+        tcg_gen_mov_tl(last_addr, ea);
+        gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
+
+        switch (op) {
+        case 0x00 ... 0x07: /* fxxxs */
+        case 0x10 ... 0x17: /* fixxxl */
+        case 0x20 ... 0x27: /* fxxxl */
+        case 0x30 ... 0x37: /* fixxx */
+            {
+                int op1;
+                op1 = op & 7;
+
+                switch (op >> 4) {
+                case 0:
+                    tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUL);
+                    gen_helper_flds_FT0(tcg_env, s->tmp2_i32);
+                    break;
+                case 1:
+                    tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUL);
+                    gen_helper_fildl_FT0(tcg_env, s->tmp2_i32);
+                    break;
+                case 2:
+                    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
+                                        s->mem_index, MO_LEUQ);
+                    gen_helper_fldl_FT0(tcg_env, s->tmp1_i64);
+                    break;
+                case 3:
+                default:
+                    tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LESW);
+                    gen_helper_fildl_FT0(tcg_env, s->tmp2_i32);
+                    break;
+                }
 
-    prefixes = 0;
-
- next_byte:
-    s->prefix = prefixes;
-    b = x86_ldub_code(env, s);
-    /* Collect prefixes.  */
-    switch (b) {
-    default:
-        break;
-    case 0x0f:
-        b = x86_ldub_code(env, s) + 0x100;
-        break;
-    case 0xf3:
-        prefixes |= PREFIX_REPZ;
-        prefixes &= ~PREFIX_REPNZ;
-        goto next_byte;
-    case 0xf2:
-        prefixes |= PREFIX_REPNZ;
-        prefixes &= ~PREFIX_REPZ;
-        goto next_byte;
-    case 0xf0:
-        prefixes |= PREFIX_LOCK;
-        goto next_byte;
-    case 0x2e:
-        s->override = R_CS;
-        goto next_byte;
-    case 0x36:
-        s->override = R_SS;
-        goto next_byte;
-    case 0x3e:
-        s->override = R_DS;
-        goto next_byte;
-    case 0x26:
-        s->override = R_ES;
-        goto next_byte;
-    case 0x64:
-        s->override = R_FS;
-        goto next_byte;
-    case 0x65:
-        s->override = R_GS;
-        goto next_byte;
-    case 0x66:
-        prefixes |= PREFIX_DATA;
-        goto next_byte;
-    case 0x67:
-        prefixes |= PREFIX_ADR;
-        goto next_byte;
-#ifdef TARGET_X86_64
-    case 0x40 ... 0x4f:
-        if (CODE64(s)) {
-            /* REX prefix */
-            prefixes |= PREFIX_REX;
-            s->vex_w = (b >> 3) & 1;
-            s->rex_r = (b & 0x4) << 1;
-            s->rex_x = (b & 0x2) << 2;
-            s->rex_b = (b & 0x1) << 3;
-            goto next_byte;
-        }
-        break;
-#endif
-    case 0xc5: /* 2-byte VEX */
-    case 0xc4: /* 3-byte VEX */
-        if (CODE32(s) && !VM86(s)) {
-            int vex2 = x86_ldub_code(env, s);
-            s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
-
-            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
-                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
-                   otherwise the instruction is LES or LDS.  */
-                break;
+                gen_helper_fp_arith_ST0_FT0(op1);
+                if (op1 == 3) {
+                    /* fcomp needs pop */
+                    gen_helper_fpop(tcg_env);
+                }
             }
-            disas_insn_new(s, cpu, b);
-            return s->pc;
-        }
-        break;
-    }
-
-    /* Post-process prefixes.  */
-    if (CODE64(s)) {
-        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
-           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
-           over 0x66 if both are present.  */
-        dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
-        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
-        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
-    } else {
-        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
-        if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
-            dflag = MO_32;
-        } else {
-            dflag = MO_16;
-        }
-        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
-        if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
-            aflag = MO_32;
-        }  else {
-            aflag = MO_16;
-        }
-    }
-
-    s->prefix = prefixes;
-    s->aflag = aflag;
-    s->dflag = dflag;
-
-    /* now check op code */
-    switch (b) {
-        /**************************/
-        /* arith & logic */
-    case 0x00 ... 0x05:
-    case 0x08 ... 0x0d:
-    case 0x10 ... 0x15:
-    case 0x18 ... 0x1d:
-    case 0x20 ... 0x25:
-    case 0x28 ... 0x2d:
-    case 0x30 ... 0x35:
-    case 0x38 ... 0x3d:
-        {
-            int f;
-            op = (b >> 3) & 7;
-            f = (b >> 1) & 3;
-
-            ot = mo_b_d(b, dflag);
-
-            switch(f) {
-            case 0: /* OP Ev, Gv */
-                modrm = x86_ldub_code(env, s);
-                reg = ((modrm >> 3) & 7) | REX_R(s);
-                mod = (modrm >> 6) & 3;
-                rm = (modrm & 7) | REX_B(s);
-                if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
-                    opreg = OR_TMP0;
-                } else if (op == OP_XORL && rm == reg) {
-                xor_zero:
-                    /* xor reg, reg optimisation */
-                    set_cc_op(s, CC_OP_CLR);
-                    tcg_gen_movi_tl(s->T0, 0);
-                    gen_op_mov_reg_v(s, ot, reg, s->T0);
+            break;
+        case 0x08: /* flds */
+        case 0x0a: /* fsts */
+        case 0x0b: /* fstps */
+        case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
+        case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
+        case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
+            switch (op & 7) {
+            case 0:
+                switch (op >> 4) {
+                case 0:
+                    tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUL);
+                    gen_helper_flds_ST0(tcg_env, s->tmp2_i32);
+                    break;
+                case 1:
+                    tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUL);
+                    gen_helper_fildl_ST0(tcg_env, s->tmp2_i32);
+                    break;
+                case 2:
+                    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
+                                        s->mem_index, MO_LEUQ);
+                    gen_helper_fldl_ST0(tcg_env, s->tmp1_i64);
+                    break;
+                case 3:
+                default:
+                    tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LESW);
+                    gen_helper_fildl_ST0(tcg_env, s->tmp2_i32);
                     break;
-                } else {
-                    opreg = rm;
                 }
-                gen_op_mov_v_reg(s, ot, s->T1, reg);
-                gen_op(s, op, ot, opreg);
                 break;
-            case 1: /* OP Gv, Ev */
-                modrm = x86_ldub_code(env, s);
-                mod = (modrm >> 6) & 3;
-                reg = ((modrm >> 3) & 7) | REX_R(s);
-                rm = (modrm & 7) | REX_B(s);
-                if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, s->T1, s->A0);
-                } else if (op == OP_XORL && rm == reg) {
-                    goto xor_zero;
-                } else {
-                    gen_op_mov_v_reg(s, ot, s->T1, rm);
+            case 1:
+                /* XXX: the corresponding CPUID bit must be tested ! */
+                switch (op >> 4) {
+                case 1:
+                    gen_helper_fisttl_ST0(s->tmp2_i32, tcg_env);
+                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUL);
+                    break;
+                case 2:
+                    gen_helper_fisttll_ST0(s->tmp1_i64, tcg_env);
+                    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
+                                        s->mem_index, MO_LEUQ);
+                    break;
+                case 3:
+                default:
+                    gen_helper_fistt_ST0(s->tmp2_i32, tcg_env);
+                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUW);
+                    break;
                 }
-                gen_op(s, op, ot, reg);
+                gen_helper_fpop(tcg_env);
                 break;
-            case 2: /* OP A, Iv */
-                val = insn_get(env, s, ot);
-                tcg_gen_movi_tl(s->T1, val);
-                gen_op(s, op, ot, OR_EAX);
-                break;
-            }
-        }
-        break;
-
-    case 0x82:
-        if (CODE64(s))
-            goto illegal_op;
-        /* fall through */
-    case 0x80: /* GRP1 */
-    case 0x81:
-    case 0x83:
-        {
-            ot = mo_b_d(b, dflag);
-
-            modrm = x86_ldub_code(env, s);
-            mod = (modrm >> 6) & 3;
-            rm = (modrm & 7) | REX_B(s);
-            op = (modrm >> 3) & 7;
-
-            if (mod != 3) {
-                if (b == 0x83)
-                    s->rip_offset = 1;
-                else
-                    s->rip_offset = insn_const_size(ot);
-                gen_lea_modrm(env, s, modrm);
-                opreg = OR_TMP0;
-            } else {
-                opreg = rm;
-            }
-
-            switch(b) {
             default:
-            case 0x80:
-            case 0x81:
-            case 0x82:
-                val = insn_get(env, s, ot);
-                break;
-            case 0x83:
-                val = (int8_t)insn_get(env, s, MO_8);
+                switch (op >> 4) {
+                case 0:
+                    gen_helper_fsts_ST0(s->tmp2_i32, tcg_env);
+                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUL);
+                    break;
+                case 1:
+                    gen_helper_fistl_ST0(s->tmp2_i32, tcg_env);
+                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUL);
+                    break;
+                case 2:
+                    gen_helper_fstl_ST0(s->tmp1_i64, tcg_env);
+                    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
+                                        s->mem_index, MO_LEUQ);
+                    break;
+                case 3:
+                default:
+                    gen_helper_fist_ST0(s->tmp2_i32, tcg_env);
+                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUW);
+                    break;
+                }
+                if ((op & 7) == 3) {
+                    gen_helper_fpop(tcg_env);
+                }
                 break;
             }
-            tcg_gen_movi_tl(s->T1, val);
-            gen_op(s, op, ot, opreg);
+            break;
+        case 0x0c: /* fldenv mem */
+            gen_helper_fldenv(tcg_env, s->A0,
+                              tcg_constant_i32(s->dflag - 1));
+            update_fip = update_fdp = false;
+            break;
+        case 0x0d: /* fldcw mem */
+            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                s->mem_index, MO_LEUW);
+            gen_helper_fldcw(tcg_env, s->tmp2_i32);
+            update_fip = update_fdp = false;
+            break;
+        case 0x0e: /* fnstenv mem */
+            gen_helper_fstenv(tcg_env, s->A0,
+                              tcg_constant_i32(s->dflag - 1));
+            update_fip = update_fdp = false;
+            break;
+        case 0x0f: /* fnstcw mem */
+            gen_helper_fnstcw(s->tmp2_i32, tcg_env);
+            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                s->mem_index, MO_LEUW);
+            update_fip = update_fdp = false;
+            break;
+        case 0x1d: /* fldt mem */
+            gen_helper_fldt_ST0(tcg_env, s->A0);
+            break;
+        case 0x1f: /* fstpt mem */
+            gen_helper_fstt_ST0(tcg_env, s->A0);
+            gen_helper_fpop(tcg_env);
+            break;
+        case 0x2c: /* frstor mem */
+            gen_helper_frstor(tcg_env, s->A0,
+                              tcg_constant_i32(s->dflag - 1));
+            update_fip = update_fdp = false;
+            break;
+        case 0x2e: /* fnsave mem */
+            gen_helper_fsave(tcg_env, s->A0,
+                             tcg_constant_i32(s->dflag - 1));
+            update_fip = update_fdp = false;
+            break;
+        case 0x2f: /* fnstsw mem */
+            gen_helper_fnstsw(s->tmp2_i32, tcg_env);
+            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                s->mem_index, MO_LEUW);
+            update_fip = update_fdp = false;
+            break;
+        case 0x3c: /* fbld */
+            gen_helper_fbld_ST0(tcg_env, s->A0);
+            break;
+        case 0x3e: /* fbstp */
+            gen_helper_fbst_ST0(tcg_env, s->A0);
+            gen_helper_fpop(tcg_env);
+            break;
+        case 0x3d: /* fildll */
+            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
+                                s->mem_index, MO_LEUQ);
+            gen_helper_fildll_ST0(tcg_env, s->tmp1_i64);
+            break;
+        case 0x3f: /* fistpll */
+            gen_helper_fistll_ST0(s->tmp1_i64, tcg_env);
+            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
+                                s->mem_index, MO_LEUQ);
+            gen_helper_fpop(tcg_env);
+            break;
+        default:
+            return false;
         }
-        break;
 
-        /**************************/
-        /* inc, dec, and other misc arith */
-    case 0x40 ... 0x47: /* inc Gv */
-        ot = dflag;
-        gen_inc(s, ot, OR_EAX + (b & 7), 1);
-        break;
-    case 0x48 ... 0x4f: /* dec Gv */
-        ot = dflag;
-        gen_inc(s, ot, OR_EAX + (b & 7), -1);
-        break;
-    case 0xf6: /* GRP3 */
-    case 0xf7:
-        ot = mo_b_d(b, dflag);
+        if (update_fdp) {
+            int last_seg = s->override >= 0 ? s->override : a.def_seg;
 
-        modrm = x86_ldub_code(env, s);
-        mod = (modrm >> 6) & 3;
-        rm = (modrm & 7) | REX_B(s);
-        op = (modrm >> 3) & 7;
-        if (mod != 3) {
-            if (op == 0) {
-                s->rip_offset = insn_const_size(ot);
-            }
-            gen_lea_modrm(env, s, modrm);
-            /* For those below that handle locked memory, don't load here.  */
-            if (!(s->prefix & PREFIX_LOCK)
-                || op != 2) {
-                gen_op_ld_v(s, ot, s->T0, s->A0);
-            }
-        } else {
-            gen_op_mov_v_reg(s, ot, s->T0, rm);
+            tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
+                           offsetof(CPUX86State,
+                                    segs[last_seg].selector));
+            tcg_gen_st16_i32(s->tmp2_i32, tcg_env,
+                             offsetof(CPUX86State, fpds));
+            tcg_gen_st_tl(last_addr, tcg_env,
+                          offsetof(CPUX86State, fpdp));
         }
-
-        switch(op) {
-        case 0: /* test */
-            val = insn_get(env, s, ot);
-            tcg_gen_movi_tl(s->T1, val);
-            gen_op_testl_T0_T1_cc(s);
-            set_cc_op(s, CC_OP_LOGICB + ot);
+    } else {
+        /* register float ops */
+        int opreg = rm;
+
+        switch (op) {
+        case 0x08: /* fld sti */
+            gen_helper_fpush(tcg_env);
+            gen_helper_fmov_ST0_STN(tcg_env,
+                                    tcg_constant_i32((opreg + 1) & 7));
             break;
-        case 2: /* not */
-            if (s->prefix & PREFIX_LOCK) {
-                if (mod == 3) {
-                    goto illegal_op;
-                }
-                tcg_gen_movi_tl(s->T0, ~0);
-                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
-                                            s->mem_index, ot | MO_LE);
-            } else {
-                tcg_gen_not_tl(s->T0, s->T0);
-                if (mod != 3) {
-                    gen_op_st_v(s, ot, s->T0, s->A0);
-                } else {
-                    gen_op_mov_reg_v(s, ot, rm, s->T0);
-                }
-            }
+        case 0x09: /* fxchg sti */
+        case 0x29: /* fxchg4 sti, undocumented op */
+        case 0x39: /* fxchg7 sti, undocumented op */
+            gen_helper_fxchg_ST0_STN(tcg_env, tcg_constant_i32(opreg));
             break;
-        case 3: /* neg */
-            if (s->prefix & PREFIX_LOCK) {
-                TCGLabel *label1;
-                TCGv a0, t0, t1, t2;
-
-                if (mod == 3) {
-                    goto illegal_op;
-                }
-                a0 = s->A0;
-                t0 = s->T0;
-                label1 = gen_new_label();
-
-                gen_set_label(label1);
-                t1 = tcg_temp_new();
-                t2 = tcg_temp_new();
-                tcg_gen_mov_tl(t2, t0);
-                tcg_gen_neg_tl(t1, t0);
-                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
-                                          s->mem_index, ot | MO_LE);
-                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
-
-                tcg_gen_neg_tl(s->T0, t0);
-            } else {
-                tcg_gen_neg_tl(s->T0, s->T0);
-                if (mod != 3) {
-                    gen_op_st_v(s, ot, s->T0, s->A0);
-                } else {
-                    gen_op_mov_reg_v(s, ot, rm, s->T0);
-                }
+        case 0x0a: /* grp d9/2 */
+            switch (rm) {
+            case 0: /* fnop */
+                /*
+                 * check exceptions (FreeBSD FPU probe)
+                 * needs to be treated as I/O because of ferr_irq
+                 */
+                translator_io_start(&s->base);
+                gen_helper_fwait(tcg_env);
+                update_fip = false;
+                break;
+            default:
+                return false;
             }
-            gen_op_update_neg_cc(s);
-            set_cc_op(s, CC_OP_SUBB + ot);
             break;
-        case 4: /* mul */
-            switch(ot) {
-            case MO_8:
-                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
-                tcg_gen_ext8u_tl(s->T0, s->T0);
-                tcg_gen_ext8u_tl(s->T1, s->T1);
-                /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
-                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
-                set_cc_op(s, CC_OP_MULB);
+        case 0x0c: /* grp d9/4 */
+            switch (rm) {
+            case 0: /* fchs */
+                gen_helper_fchs_ST0(tcg_env);
                 break;
-            case MO_16:
-                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
-                tcg_gen_ext16u_tl(s->T0, s->T0);
-                tcg_gen_ext16u_tl(s->T1, s->T1);
-                /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
-                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-                tcg_gen_shri_tl(s->T0, s->T0, 16);
-                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
-                tcg_gen_mov_tl(cpu_cc_src, s->T0);
-                set_cc_op(s, CC_OP_MULW);
+            case 1: /* fabs */
+                gen_helper_fabs_ST0(tcg_env);
                 break;
-            default:
-            case MO_32:
-                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
-                                  s->tmp2_i32, s->tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
-                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
-                set_cc_op(s, CC_OP_MULL);
+            case 4: /* ftst */
+                gen_helper_fldz_FT0(tcg_env);
+                gen_helper_fcom_ST0_FT0(tcg_env);
                 break;
-#ifdef TARGET_X86_64
-            case MO_64:
-                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
-                                  s->T0, cpu_regs[R_EAX]);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
-                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
-                set_cc_op(s, CC_OP_MULQ);
+            case 5: /* fxam */
+                gen_helper_fxam_ST0(tcg_env);
                 break;
-#endif
+            default:
+                return false;
             }
             break;
-        case 5: /* imul */
-            switch(ot) {
-            case MO_8:
-                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
-                tcg_gen_ext8s_tl(s->T0, s->T0);
-                tcg_gen_ext8s_tl(s->T1, s->T1);
-                /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
-                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-                tcg_gen_ext8s_tl(s->tmp0, s->T0);
-                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
-                set_cc_op(s, CC_OP_MULB);
+        case 0x0d: /* grp d9/5 */
+            {
+                switch (rm) {
+                case 0:
+                    gen_helper_fpush(tcg_env);
+                    gen_helper_fld1_ST0(tcg_env);
+                    break;
+                case 1:
+                    gen_helper_fpush(tcg_env);
+                    gen_helper_fldl2t_ST0(tcg_env);
+                    break;
+                case 2:
+                    gen_helper_fpush(tcg_env);
+                    gen_helper_fldl2e_ST0(tcg_env);
+                    break;
+                case 3:
+                    gen_helper_fpush(tcg_env);
+                    gen_helper_fldpi_ST0(tcg_env);
+                    break;
+                case 4:
+                    gen_helper_fpush(tcg_env);
+                    gen_helper_fldlg2_ST0(tcg_env);
+                    break;
+                case 5:
+                    gen_helper_fpush(tcg_env);
+                    gen_helper_fldln2_ST0(tcg_env);
+                    break;
+                case 6:
+                    gen_helper_fpush(tcg_env);
+                    gen_helper_fldz_ST0(tcg_env);
+                    break;
+                default:
+                    return false;
+                }
+            }
+            break;
+        case 0x0e: /* grp d9/6 */
+            switch (rm) {
+            case 0: /* f2xm1 */
+                gen_helper_f2xm1(tcg_env);
                 break;
-            case MO_16:
-                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
-                tcg_gen_ext16s_tl(s->T0, s->T0);
-                tcg_gen_ext16s_tl(s->T1, s->T1);
-                /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
-                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-                tcg_gen_ext16s_tl(s->tmp0, s->T0);
-                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
-                tcg_gen_shri_tl(s->T0, s->T0, 16);
-                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
-                set_cc_op(s, CC_OP_MULW);
+            case 1: /* fyl2x */
+                gen_helper_fyl2x(tcg_env);
                 break;
-            default:
-            case MO_32:
-                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
-                                  s->tmp2_i32, s->tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
-                tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
-                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
-                set_cc_op(s, CC_OP_MULL);
+            case 2: /* fptan */
+                gen_helper_fptan(tcg_env);
                 break;
-#ifdef TARGET_X86_64
-            case MO_64:
-                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
-                                  s->T0, cpu_regs[R_EAX]);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
-                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
-                set_cc_op(s, CC_OP_MULQ);
+            case 3: /* fpatan */
+                gen_helper_fpatan(tcg_env);
                 break;
-#endif
-            }
-            break;
-        case 6: /* div */
-            switch(ot) {
-            case MO_8:
-                gen_helper_divb_AL(tcg_env, s->T0);
+            case 4: /* fxtract */
+                gen_helper_fxtract(tcg_env);
                 break;
-            case MO_16:
-                gen_helper_divw_AX(tcg_env, s->T0);
+            case 5: /* fprem1 */
+                gen_helper_fprem1(tcg_env);
                 break;
-            default:
-            case MO_32:
-                gen_helper_divl_EAX(tcg_env, s->T0);
+            case 6: /* fdecstp */
+                gen_helper_fdecstp(tcg_env);
                 break;
-#ifdef TARGET_X86_64
-            case MO_64:
-                gen_helper_divq_EAX(tcg_env, s->T0);
+            default:
+            case 7: /* fincstp */
+                gen_helper_fincstp(tcg_env);
                 break;
-#endif
             }
             break;
-        case 7: /* idiv */
-            switch(ot) {
-            case MO_8:
-                gen_helper_idivb_AL(tcg_env, s->T0);
+        case 0x0f: /* grp d9/7 */
+            switch (rm) {
+            case 0: /* fprem */
+                gen_helper_fprem(tcg_env);
                 break;
-            case MO_16:
-                gen_helper_idivw_AX(tcg_env, s->T0);
+            case 1: /* fyl2xp1 */
+                gen_helper_fyl2xp1(tcg_env);
                 break;
-            default:
-            case MO_32:
-                gen_helper_idivl_EAX(tcg_env, s->T0);
+            case 2: /* fsqrt */
+                gen_helper_fsqrt(tcg_env);
                 break;
-#ifdef TARGET_X86_64
-            case MO_64:
-                gen_helper_idivq_EAX(tcg_env, s->T0);
+            case 3: /* fsincos */
+                gen_helper_fsincos(tcg_env);
+                break;
+            case 5: /* fscale */
+                gen_helper_fscale(tcg_env);
+                break;
+            case 4: /* frndint */
+                gen_helper_frndint(tcg_env);
+                break;
+            case 6: /* fsin */
+                gen_helper_fsin(tcg_env);
+                break;
+            default:
+            case 7: /* fcos */
+                gen_helper_fcos(tcg_env);
                 break;
-#endif
             }
             break;
-        default:
-            goto unknown_op;
-        }
-        break;
-
-    case 0xfe: /* GRP4 */
-    case 0xff: /* GRP5 */
-        ot = mo_b_d(b, dflag);
-
-        modrm = x86_ldub_code(env, s);
-        mod = (modrm >> 6) & 3;
-        rm = (modrm & 7) | REX_B(s);
-        op = (modrm >> 3) & 7;
-        if (op >= 2 && b == 0xfe) {
-            goto unknown_op;
-        }
-        if (CODE64(s)) {
-            if (op == 2 || op == 4) {
-                /* operand size for jumps is 64 bit */
-                ot = MO_64;
-            } else if (op == 3 || op == 5) {
-                ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
-            } else if (op == 6) {
-                /* default push size is 64 bit */
-                ot = mo_pushpop(s, dflag);
+        case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
+        case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
+        case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
+            {
+                int op1;
+
+                op1 = op & 7;
+                if (op >= 0x20) {
+                    gen_helper_fp_arith_STN_ST0(op1, opreg);
+                    if (op >= 0x30) {
+                        gen_helper_fpop(tcg_env);
+                    }
+                } else {
+                    gen_helper_fmov_FT0_STN(tcg_env,
+                                            tcg_constant_i32(opreg));
+                    gen_helper_fp_arith_ST0_FT0(op1);
+                }
             }
-        }
-        if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
-            if (op >= 2 && op != 3 && op != 5)
-                gen_op_ld_v(s, ot, s->T0, s->A0);
-        } else {
-            gen_op_mov_v_reg(s, ot, s->T0, rm);
-        }
-
-        switch(op) {
-        case 0: /* inc Ev */
-            if (mod != 3)
-                opreg = OR_TMP0;
-            else
-                opreg = rm;
-            gen_inc(s, ot, opreg, 1);
             break;
-        case 1: /* dec Ev */
-            if (mod != 3)
-                opreg = OR_TMP0;
-            else
-                opreg = rm;
-            gen_inc(s, ot, opreg, -1);
+        case 0x02: /* fcom */
+        case 0x22: /* fcom2, undocumented op */
+            gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+            gen_helper_fcom_ST0_FT0(tcg_env);
             break;
-        case 2: /* call Ev */
-            /* XXX: optimize if memory (no 'and' is necessary) */
-            if (dflag == MO_16) {
-                tcg_gen_ext16u_tl(s->T0, s->T0);
-            }
-            gen_push_v(s, eip_next_tl(s));
-            gen_op_jmp_v(s, s->T0);
-            gen_bnd_jmp(s);
-            s->base.is_jmp = DISAS_JUMP;
+        case 0x03: /* fcomp */
+        case 0x23: /* fcomp3, undocumented op */
+        case 0x32: /* fcomp5, undocumented op */
+            gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+            gen_helper_fcom_ST0_FT0(tcg_env);
+            gen_helper_fpop(tcg_env);
             break;
-        case 3: /* lcall Ev */
-            if (mod == 3) {
-                goto illegal_op;
-            }
-            gen_op_ld_v(s, ot, s->T1, s->A0);
-            gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, s->T0, s->A0);
-        do_lcall:
-            if (PE(s) && !VM86(s)) {
-                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-                gen_helper_lcall_protected(tcg_env, s->tmp2_i32, s->T1,
-                                           tcg_constant_i32(dflag - 1),
-                                           eip_next_tl(s));
-            } else {
-                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-                tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
-                gen_helper_lcall_real(tcg_env, s->tmp2_i32, s->tmp3_i32,
-                                      tcg_constant_i32(dflag - 1),
-                                      eip_next_i32(s));
+        case 0x15: /* da/5 */
+            switch (rm) {
+            case 1: /* fucompp */
+                gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1));
+                gen_helper_fucom_ST0_FT0(tcg_env);
+                gen_helper_fpop(tcg_env);
+                gen_helper_fpop(tcg_env);
+                break;
+            default:
+                return false;
             }
-            s->base.is_jmp = DISAS_JUMP;
             break;
-        case 4: /* jmp Ev */
-            if (dflag == MO_16) {
-                tcg_gen_ext16u_tl(s->T0, s->T0);
+        case 0x1c:
+            switch (rm) {
+            case 0: /* feni (287 only, just do nop here) */
+                break;
+            case 1: /* fdisi (287 only, just do nop here) */
+                break;
+            case 2: /* fclex */
+                gen_helper_fclex(tcg_env);
+                update_fip = false;
+                break;
+            case 3: /* fninit */
+                gen_helper_fninit(tcg_env);
+                update_fip = false;
+                break;
+            case 4: /* fsetpm (287 only, just do nop here) */
+                break;
+            default:
+                return false;
             }
-            gen_op_jmp_v(s, s->T0);
-            gen_bnd_jmp(s);
-            s->base.is_jmp = DISAS_JUMP;
             break;
-        case 5: /* ljmp Ev */
-            if (mod == 3) {
+        case 0x1d: /* fucomi */
+            if (!(s->cpuid_features & CPUID_CMOV)) {
                 goto illegal_op;
             }
-            gen_op_ld_v(s, ot, s->T1, s->A0);
-            gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, s->T0, s->A0);
-        do_ljmp:
-            if (PE(s) && !VM86(s)) {
-                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-                gen_helper_ljmp_protected(tcg_env, s->tmp2_i32, s->T1,
-                                          eip_next_tl(s));
-            } else {
-                gen_op_movl_seg_T0_vm(s, R_CS);
-                gen_op_jmp_v(s, s->T1);
+            gen_update_cc_op(s);
+            gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+            gen_helper_fucomi_ST0_FT0(tcg_env);
+            set_cc_op(s, CC_OP_EFLAGS);
+            break;
+        case 0x1e: /* fcomi */
+            if (!(s->cpuid_features & CPUID_CMOV)) {
+                goto illegal_op;
             }
-            s->base.is_jmp = DISAS_JUMP;
+            gen_update_cc_op(s);
+            gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+            gen_helper_fcomi_ST0_FT0(tcg_env);
+            set_cc_op(s, CC_OP_EFLAGS);
             break;
-        case 6: /* push Ev */
-            gen_push_v(s, s->T0);
+        case 0x28: /* ffree sti */
+            gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg));
             break;
-        default:
-            goto unknown_op;
-        }
-        break;
-
-    case 0x84: /* test Ev, Gv */
-    case 0x85:
-        ot = mo_b_d(b, dflag);
-
-        modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_v_reg(s, ot, s->T1, reg);
-        gen_op_testl_T0_T1_cc(s);
-        set_cc_op(s, CC_OP_LOGICB + ot);
-        break;
-
-    case 0xa8: /* test eAX, Iv */
-    case 0xa9:
-        ot = mo_b_d(b, dflag);
-        val = insn_get(env, s, ot);
-
-        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
-        tcg_gen_movi_tl(s->T1, val);
-        gen_op_testl_T0_T1_cc(s);
-        set_cc_op(s, CC_OP_LOGICB + ot);
-        break;
-
-    case 0x98: /* CWDE/CBW */
-        switch (dflag) {
-#ifdef TARGET_X86_64
-        case MO_64:
-            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
-            tcg_gen_ext32s_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
+        case 0x2a: /* fst sti */
+            gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg));
             break;
-#endif
-        case MO_32:
-            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
-            tcg_gen_ext16s_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
+        case 0x2b: /* fstp sti */
+        case 0x0b: /* fstp1 sti, undocumented op */
+        case 0x3a: /* fstp8 sti, undocumented op */
+        case 0x3b: /* fstp9 sti, undocumented op */
+            gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg));
+            gen_helper_fpop(tcg_env);
             break;
-        case MO_16:
-            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
-            tcg_gen_ext8s_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+        case 0x2c: /* fucom st(i) */
+            gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+            gen_helper_fucom_ST0_FT0(tcg_env);
             break;
-        default:
-            g_assert_not_reached();
-        }
-        break;
-    case 0x99: /* CDQ/CWD */
-        switch (dflag) {
-#ifdef TARGET_X86_64
-        case MO_64:
-            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
-            tcg_gen_sari_tl(s->T0, s->T0, 63);
-            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
+        case 0x2d: /* fucomp st(i) */
+            gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+            gen_helper_fucom_ST0_FT0(tcg_env);
+            gen_helper_fpop(tcg_env);
             break;
-#endif
-        case MO_32:
-            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
-            tcg_gen_ext32s_tl(s->T0, s->T0);
-            tcg_gen_sari_tl(s->T0, s->T0, 31);
-            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
+        case 0x33: /* de/3 */
+            switch (rm) {
+            case 1: /* fcompp */
+                gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1));
+                gen_helper_fcom_ST0_FT0(tcg_env);
+                gen_helper_fpop(tcg_env);
+                gen_helper_fpop(tcg_env);
+                break;
+            default:
+                return false;
+            }
             break;
-        case MO_16:
-            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
-            tcg_gen_ext16s_tl(s->T0, s->T0);
-            tcg_gen_sari_tl(s->T0, s->T0, 15);
-            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
+        case 0x38: /* ffreep sti, undocumented op */
+            gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg));
+            gen_helper_fpop(tcg_env);
             break;
-        default:
-            g_assert_not_reached();
-        }
-        break;
-    case 0x1af: /* imul Gv, Ev */
-    case 0x69: /* imul Gv, Ev, I */
-    case 0x6b:
-        ot = dflag;
-        modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-        if (b == 0x69)
-            s->rip_offset = insn_const_size(ot);
-        else if (b == 0x6b)
-            s->rip_offset = 1;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        if (b == 0x69) {
-            val = insn_get(env, s, ot);
-            tcg_gen_movi_tl(s->T1, val);
-        } else if (b == 0x6b) {
-            val = (int8_t)insn_get(env, s, MO_8);
-            tcg_gen_movi_tl(s->T1, val);
-        } else {
-            gen_op_mov_v_reg(s, ot, s->T1, reg);
-        }
-        switch (ot) {
-#ifdef TARGET_X86_64
-        case MO_64:
-            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
-            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
-            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
-            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
+        case 0x3c: /* df/4 */
+            switch (rm) {
+            case 0:
+                gen_helper_fnstsw(s->tmp2_i32, tcg_env);
+                tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+                break;
+            default:
+                return false;
+            }
             break;
-#endif
-        case MO_32:
-            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-            tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
-            tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
-                              s->tmp2_i32, s->tmp3_i32);
-            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
-            tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
-            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
-            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
-            tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
+        case 0x3d: /* fucomip */
+            if (!(s->cpuid_features & CPUID_CMOV)) {
+                goto illegal_op;
+            }
+            gen_update_cc_op(s);
+            gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+            gen_helper_fucomi_ST0_FT0(tcg_env);
+            gen_helper_fpop(tcg_env);
+            set_cc_op(s, CC_OP_EFLAGS);
             break;
-        default:
-            tcg_gen_ext16s_tl(s->T0, s->T0);
-            tcg_gen_ext16s_tl(s->T1, s->T1);
-            /* XXX: use 32 bit mul which could be faster */
-            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-            tcg_gen_ext16s_tl(s->tmp0, s->T0);
-            tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
-            gen_op_mov_reg_v(s, ot, reg, s->T0);
+        case 0x3e: /* fcomip */
+            if (!(s->cpuid_features & CPUID_CMOV)) {
+                goto illegal_op;
+            }
+            gen_update_cc_op(s);
+            gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+            gen_helper_fcomi_ST0_FT0(tcg_env);
+            gen_helper_fpop(tcg_env);
+            set_cc_op(s, CC_OP_EFLAGS);
             break;
+        case 0x10 ... 0x13: /* fcmovxx */
+        case 0x18 ... 0x1b:
+            {
+                int op1;
+                TCGLabel *l1;
+                static const uint8_t fcmov_cc[8] = {
+                    (JCC_B << 1),
+                    (JCC_Z << 1),
+                    (JCC_BE << 1),
+                    (JCC_P << 1),
+                };
+
+                if (!(s->cpuid_features & CPUID_CMOV)) {
+                    goto illegal_op;
+                }
+                op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
+                l1 = gen_new_label();
+                gen_jcc1_noeob(s, op1, l1);
+                gen_helper_fmov_ST0_STN(tcg_env,
+                                        tcg_constant_i32(opreg));
+                gen_set_label(l1);
+            }
+            break;
+        default:
+            return false;
         }
-        set_cc_op(s, CC_OP_MULB + ot);
-        break;
+    }
+
+    if (update_fip) {
+        tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
+                       offsetof(CPUX86State, segs[R_CS].selector));
+        tcg_gen_st16_i32(s->tmp2_i32, tcg_env,
+                         offsetof(CPUX86State, fpcs));
+        tcg_gen_st_tl(eip_cur_tl(s),
+                      tcg_env, offsetof(CPUX86State, fpip));
+    }
+    return true;
+
+ illegal_op:
+    gen_illegal_opcode(s);
+    return true;
+}
+
+static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
+{
+    CPUX86State *env = cpu_env(cpu);
+    int prefixes = s->prefix;
+    MemOp dflag = s->dflag;
+    int shift;
+    MemOp ot;
+    int modrm, reg, rm, mod, op, opreg, val;
+
+    /* now check op code */
+    switch (b) {
+        /**************************/
+        /* arith & logic */
     case 0x1c0:
     case 0x1c1: /* xadd Ev, Gv */
         ot = mo_b_d(b, dflag);
@@ -3976,375 +3283,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
         break;
 
         /**************************/
-        /* push/pop */
-    case 0x50 ... 0x57: /* push */
-        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
-        gen_push_v(s, s->T0);
-        break;
-    case 0x58 ... 0x5f: /* pop */
-        ot = gen_pop_T0(s);
-        /* NOTE: order is important for pop %sp */
-        gen_pop_update(s, ot);
-        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
-        break;
-    case 0x60: /* pusha */
-        if (CODE64(s))
-            goto illegal_op;
-        gen_pusha(s);
-        break;
-    case 0x61: /* popa */
-        if (CODE64(s))
-            goto illegal_op;
-        gen_popa(s);
-        break;
-    case 0x68: /* push Iv */
-    case 0x6a:
-        ot = mo_pushpop(s, dflag);
-        if (b == 0x68)
-            val = insn_get(env, s, ot);
-        else
-            val = (int8_t)insn_get(env, s, MO_8);
-        tcg_gen_movi_tl(s->T0, val);
-        gen_push_v(s, s->T0);
-        break;
-    case 0x8f: /* pop Ev */
-        modrm = x86_ldub_code(env, s);
-        mod = (modrm >> 6) & 3;
-        ot = gen_pop_T0(s);
-        if (mod == 3) {
-            /* NOTE: order is important for pop %sp */
-            gen_pop_update(s, ot);
-            rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_reg_v(s, ot, rm, s->T0);
-        } else {
-            /* NOTE: order is important too for MMU exceptions */
-            s->popl_esp_hack = 1 << ot;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
-            s->popl_esp_hack = 0;
-            gen_pop_update(s, ot);
-        }
-        break;
-    case 0xc8: /* enter */
-        {
-            int level;
-            val = x86_lduw_code(env, s);
-            level = x86_ldub_code(env, s);
-            gen_enter(s, val, level);
-        }
-        break;
-    case 0xc9: /* leave */
-        gen_leave(s);
-        break;
-    case 0x06: /* push es */
-    case 0x0e: /* push cs */
-    case 0x16: /* push ss */
-    case 0x1e: /* push ds */
-        if (CODE64(s))
-            goto illegal_op;
-        gen_op_movl_T0_seg(s, b >> 3);
-        gen_push_v(s, s->T0);
-        break;
-    case 0x1a0: /* push fs */
-    case 0x1a8: /* push gs */
-        gen_op_movl_T0_seg(s, (b >> 3) & 7);
-        gen_push_v(s, s->T0);
-        break;
-    case 0x07: /* pop es */
-    case 0x17: /* pop ss */
-    case 0x1f: /* pop ds */
-        if (CODE64(s))
-            goto illegal_op;
-        reg = b >> 3;
-        ot = gen_pop_T0(s);
-        gen_movl_seg_T0(s, reg);
-        gen_pop_update(s, ot);
-        break;
-    case 0x1a1: /* pop fs */
-    case 0x1a9: /* pop gs */
-        ot = gen_pop_T0(s);
-        gen_movl_seg_T0(s, (b >> 3) & 7);
-        gen_pop_update(s, ot);
-        break;
-
-        /**************************/
-        /* mov */
-    case 0x88:
-    case 0x89: /* mov Gv, Ev */
-        ot = mo_b_d(b, dflag);
-        modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-
-        /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
-        break;
-    case 0xc6:
-    case 0xc7: /* mov Ev, Iv */
-        ot = mo_b_d(b, dflag);
-        modrm = x86_ldub_code(env, s);
-        mod = (modrm >> 6) & 3;
-        if (mod != 3) {
-            s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
-        }
-        val = insn_get(env, s, ot);
-        tcg_gen_movi_tl(s->T0, val);
-        if (mod != 3) {
-            gen_op_st_v(s, ot, s->T0, s->A0);
-        } else {
-            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
-        }
-        break;
-    case 0x8a:
-    case 0x8b: /* mov Ev, Gv */
-        ot = mo_b_d(b, dflag);
-        modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_reg_v(s, ot, reg, s->T0);
-        break;
-    case 0x8e: /* mov seg, Gv */
-        modrm = x86_ldub_code(env, s);
-        reg = (modrm >> 3) & 7;
-        if (reg >= 6 || reg == R_CS)
-            goto illegal_op;
-        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-        gen_movl_seg_T0(s, reg);
-        break;
-    case 0x8c: /* mov Gv, seg */
-        modrm = x86_ldub_code(env, s);
-        reg = (modrm >> 3) & 7;
-        mod = (modrm >> 6) & 3;
-        if (reg >= 6)
-            goto illegal_op;
-        gen_op_movl_T0_seg(s, reg);
-        ot = mod == 3 ? dflag : MO_16;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
-        break;
-
-    case 0x1b6: /* movzbS Gv, Eb */
-    case 0x1b7: /* movzwS Gv, Eb */
-    case 0x1be: /* movsbS Gv, Eb */
-    case 0x1bf: /* movswS Gv, Eb */
-        {
-            MemOp d_ot;
-            MemOp s_ot;
-
-            /* d_ot is the size of destination */
-            d_ot = dflag;
-            /* ot is the size of source */
-            ot = (b & 1) + MO_8;
-            /* s_ot is the sign+size of source */
-            s_ot = b & 8 ? MO_SIGN | ot : ot;
-
-            modrm = x86_ldub_code(env, s);
-            reg = ((modrm >> 3) & 7) | REX_R(s);
-            mod = (modrm >> 6) & 3;
-            rm = (modrm & 7) | REX_B(s);
-
-            if (mod == 3) {
-                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
-                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
-                } else {
-                    gen_op_mov_v_reg(s, ot, s->T0, rm);
-                    switch (s_ot) {
-                    case MO_UB:
-                        tcg_gen_ext8u_tl(s->T0, s->T0);
-                        break;
-                    case MO_SB:
-                        tcg_gen_ext8s_tl(s->T0, s->T0);
-                        break;
-                    case MO_UW:
-                        tcg_gen_ext16u_tl(s->T0, s->T0);
-                        break;
-                    default:
-                    case MO_SW:
-                        tcg_gen_ext16s_tl(s->T0, s->T0);
-                        break;
-                    }
-                }
-                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
-            } else {
-                gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, s_ot, s->T0, s->A0);
-                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
-            }
-        }
-        break;
-
-    case 0x8d: /* lea */
-        modrm = x86_ldub_code(env, s);
-        mod = (modrm >> 6) & 3;
-        if (mod == 3)
-            goto illegal_op;
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-        {
-            AddressParts a = gen_lea_modrm_0(env, s, modrm);
-            TCGv ea = gen_lea_modrm_1(s, a, false);
-            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
-            gen_op_mov_reg_v(s, dflag, reg, s->A0);
-        }
-        break;
-
-    case 0xa0: /* mov EAX, Ov */
-    case 0xa1:
-    case 0xa2: /* mov Ov, EAX */
-    case 0xa3:
-        {
-            target_ulong offset_addr;
-
-            ot = mo_b_d(b, dflag);
-            offset_addr = insn_get_addr(env, s, s->aflag);
-            tcg_gen_movi_tl(s->A0, offset_addr);
-            gen_add_A0_ds_seg(s);
-            if ((b & 2) == 0) {
-                gen_op_ld_v(s, ot, s->T0, s->A0);
-                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
-            } else {
-                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
-                gen_op_st_v(s, ot, s->T0, s->A0);
-            }
-        }
-        break;
-    case 0xd7: /* xlat */
-        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
-        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
-        tcg_gen_add_tl(s->A0, s->A0, s->T0);
-        gen_add_A0_ds_seg(s);
-        gen_op_ld_v(s, MO_8, s->T0, s->A0);
-        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
-        break;
-    case 0xb0 ... 0xb7: /* mov R, Ib */
-        val = insn_get(env, s, MO_8);
-        tcg_gen_movi_tl(s->T0, val);
-        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
-        break;
-    case 0xb8 ... 0xbf: /* mov R, Iv */
-#ifdef TARGET_X86_64
-        if (dflag == MO_64) {
-            uint64_t tmp;
-            /* 64 bit case */
-            tmp = x86_ldq_code(env, s);
-            reg = (b & 7) | REX_B(s);
-            tcg_gen_movi_tl(s->T0, tmp);
-            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
-        } else
-#endif
-        {
-            ot = dflag;
-            val = insn_get(env, s, ot);
-            reg = (b & 7) | REX_B(s);
-            tcg_gen_movi_tl(s->T0, val);
-            gen_op_mov_reg_v(s, ot, reg, s->T0);
-        }
-        break;
-
-    case 0x91 ... 0x97: /* xchg R, EAX */
-    do_xchg_reg_eax:
-        ot = dflag;
-        reg = (b & 7) | REX_B(s);
-        rm = R_EAX;
-        goto do_xchg_reg;
-    case 0x86:
-    case 0x87: /* xchg Ev, Gv */
-        ot = mo_b_d(b, dflag);
-        modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-        mod = (modrm >> 6) & 3;
-        if (mod == 3) {
-            rm = (modrm & 7) | REX_B(s);
-        do_xchg_reg:
-            gen_op_mov_v_reg(s, ot, s->T0, reg);
-            gen_op_mov_v_reg(s, ot, s->T1, rm);
-            gen_op_mov_reg_v(s, ot, rm, s->T0);
-            gen_op_mov_reg_v(s, ot, reg, s->T1);
-        } else {
-            gen_lea_modrm(env, s, modrm);
-            gen_op_mov_v_reg(s, ot, s->T0, reg);
-            /* for xchg, lock is implicit */
-            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
-                                   s->mem_index, ot | MO_LE);
-            gen_op_mov_reg_v(s, ot, reg, s->T1);
-        }
-        break;
-    case 0xc4: /* les Gv */
-        /* In CODE64 this is VEX3; see above.  */
-        op = R_ES;
-        goto do_lxx;
-    case 0xc5: /* lds Gv */
-        /* In CODE64 this is VEX2; see above.  */
-        op = R_DS;
-        goto do_lxx;
-    case 0x1b2: /* lss Gv */
-        op = R_SS;
-        goto do_lxx;
-    case 0x1b4: /* lfs Gv */
-        op = R_FS;
-        goto do_lxx;
-    case 0x1b5: /* lgs Gv */
-        op = R_GS;
-    do_lxx:
-        ot = dflag != MO_16 ? MO_32 : MO_16;
-        modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-        mod = (modrm >> 6) & 3;
-        if (mod == 3)
-            goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
-        gen_op_ld_v(s, ot, s->T1, s->A0);
-        gen_add_A0_im(s, 1 << ot);
-        /* load the segment first to handle exceptions properly */
-        gen_op_ld_v(s, MO_16, s->T0, s->A0);
-        gen_movl_seg_T0(s, op);
-        /* then put the data */
-        gen_op_mov_reg_v(s, ot, reg, s->T1);
-        break;
-
-        /************************/
         /* shifts */
-    case 0xc0:
-    case 0xc1:
-        /* shift Ev,Ib */
-        shift = 2;
-    grp2:
-        {
-            ot = mo_b_d(b, dflag);
-            modrm = x86_ldub_code(env, s);
-            mod = (modrm >> 6) & 3;
-            op = (modrm >> 3) & 7;
-
-            if (mod != 3) {
-                if (shift == 2) {
-                    s->rip_offset = 1;
-                }
-                gen_lea_modrm(env, s, modrm);
-                opreg = OR_TMP0;
-            } else {
-                opreg = (modrm & 7) | REX_B(s);
-            }
-
-            /* simpler op */
-            if (shift == 0) {
-                gen_shift(s, op, ot, opreg, OR_ECX);
-            } else {
-                if (shift == 2) {
-                    shift = x86_ldub_code(env, s);
-                }
-                gen_shifti(s, op, ot, opreg, shift);
-            }
-        }
-        break;
-    case 0xd0:
-    case 0xd1:
-        /* shift Ev,1 */
-        shift = 1;
-        goto grp2;
-    case 0xd2:
-    case 0xd3:
-        /* shift Ev,cl */
-        shift = 0;
-        goto grp2;
-
     case 0x1a4: /* shld imm */
         op = 0;
         shift = 1;
@@ -4383,929 +3322,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
         break;
 
         /************************/
-        /* floats */
-    case 0xd8 ... 0xdf:
-        {
-            bool update_fip = true;
-
-            if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
-                /* if CR0.EM or CR0.TS are set, generate an FPU exception */
-                /* XXX: what to do if illegal op ? */
-                gen_exception(s, EXCP07_PREX);
-                break;
-            }
-            modrm = x86_ldub_code(env, s);
-            mod = (modrm >> 6) & 3;
-            rm = modrm & 7;
-            op = ((b & 7) << 3) | ((modrm >> 3) & 7);
-            if (mod != 3) {
-                /* memory op */
-                AddressParts a = gen_lea_modrm_0(env, s, modrm);
-                TCGv ea = gen_lea_modrm_1(s, a, false);
-                TCGv last_addr = tcg_temp_new();
-                bool update_fdp = true;
-
-                tcg_gen_mov_tl(last_addr, ea);
-                gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
-
-                switch (op) {
-                case 0x00 ... 0x07: /* fxxxs */
-                case 0x10 ... 0x17: /* fixxxl */
-                case 0x20 ... 0x27: /* fxxxl */
-                case 0x30 ... 0x37: /* fixxx */
-                    {
-                        int op1;
-                        op1 = op & 7;
-
-                        switch (op >> 4) {
-                        case 0:
-                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                                s->mem_index, MO_LEUL);
-                            gen_helper_flds_FT0(tcg_env, s->tmp2_i32);
-                            break;
-                        case 1:
-                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                                s->mem_index, MO_LEUL);
-                            gen_helper_fildl_FT0(tcg_env, s->tmp2_i32);
-                            break;
-                        case 2:
-                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
-                                                s->mem_index, MO_LEUQ);
-                            gen_helper_fldl_FT0(tcg_env, s->tmp1_i64);
-                            break;
-                        case 3:
-                        default:
-                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                                s->mem_index, MO_LESW);
-                            gen_helper_fildl_FT0(tcg_env, s->tmp2_i32);
-                            break;
-                        }
-
-                        gen_helper_fp_arith_ST0_FT0(op1);
-                        if (op1 == 3) {
-                            /* fcomp needs pop */
-                            gen_helper_fpop(tcg_env);
-                        }
-                    }
-                    break;
-                case 0x08: /* flds */
-                case 0x0a: /* fsts */
-                case 0x0b: /* fstps */
-                case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
-                case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
-                case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
-                    switch (op & 7) {
-                    case 0:
-                        switch (op >> 4) {
-                        case 0:
-                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                                s->mem_index, MO_LEUL);
-                            gen_helper_flds_ST0(tcg_env, s->tmp2_i32);
-                            break;
-                        case 1:
-                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                                s->mem_index, MO_LEUL);
-                            gen_helper_fildl_ST0(tcg_env, s->tmp2_i32);
-                            break;
-                        case 2:
-                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
-                                                s->mem_index, MO_LEUQ);
-                            gen_helper_fldl_ST0(tcg_env, s->tmp1_i64);
-                            break;
-                        case 3:
-                        default:
-                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                                s->mem_index, MO_LESW);
-                            gen_helper_fildl_ST0(tcg_env, s->tmp2_i32);
-                            break;
-                        }
-                        break;
-                    case 1:
-                        /* XXX: the corresponding CPUID bit must be tested ! */
-                        switch (op >> 4) {
-                        case 1:
-                            gen_helper_fisttl_ST0(s->tmp2_i32, tcg_env);
-                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                                s->mem_index, MO_LEUL);
-                            break;
-                        case 2:
-                            gen_helper_fisttll_ST0(s->tmp1_i64, tcg_env);
-                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
-                                                s->mem_index, MO_LEUQ);
-                            break;
-                        case 3:
-                        default:
-                            gen_helper_fistt_ST0(s->tmp2_i32, tcg_env);
-                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                                s->mem_index, MO_LEUW);
-                            break;
-                        }
-                        gen_helper_fpop(tcg_env);
-                        break;
-                    default:
-                        switch (op >> 4) {
-                        case 0:
-                            gen_helper_fsts_ST0(s->tmp2_i32, tcg_env);
-                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                                s->mem_index, MO_LEUL);
-                            break;
-                        case 1:
-                            gen_helper_fistl_ST0(s->tmp2_i32, tcg_env);
-                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                                s->mem_index, MO_LEUL);
-                            break;
-                        case 2:
-                            gen_helper_fstl_ST0(s->tmp1_i64, tcg_env);
-                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
-                                                s->mem_index, MO_LEUQ);
-                            break;
-                        case 3:
-                        default:
-                            gen_helper_fist_ST0(s->tmp2_i32, tcg_env);
-                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                                s->mem_index, MO_LEUW);
-                            break;
-                        }
-                        if ((op & 7) == 3) {
-                            gen_helper_fpop(tcg_env);
-                        }
-                        break;
-                    }
-                    break;
-                case 0x0c: /* fldenv mem */
-                    gen_helper_fldenv(tcg_env, s->A0,
-                                      tcg_constant_i32(dflag - 1));
-                    update_fip = update_fdp = false;
-                    break;
-                case 0x0d: /* fldcw mem */
-                    tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                        s->mem_index, MO_LEUW);
-                    gen_helper_fldcw(tcg_env, s->tmp2_i32);
-                    update_fip = update_fdp = false;
-                    break;
-                case 0x0e: /* fnstenv mem */
-                    gen_helper_fstenv(tcg_env, s->A0,
-                                      tcg_constant_i32(dflag - 1));
-                    update_fip = update_fdp = false;
-                    break;
-                case 0x0f: /* fnstcw mem */
-                    gen_helper_fnstcw(s->tmp2_i32, tcg_env);
-                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                        s->mem_index, MO_LEUW);
-                    update_fip = update_fdp = false;
-                    break;
-                case 0x1d: /* fldt mem */
-                    gen_helper_fldt_ST0(tcg_env, s->A0);
-                    break;
-                case 0x1f: /* fstpt mem */
-                    gen_helper_fstt_ST0(tcg_env, s->A0);
-                    gen_helper_fpop(tcg_env);
-                    break;
-                case 0x2c: /* frstor mem */
-                    gen_helper_frstor(tcg_env, s->A0,
-                                      tcg_constant_i32(dflag - 1));
-                    update_fip = update_fdp = false;
-                    break;
-                case 0x2e: /* fnsave mem */
-                    gen_helper_fsave(tcg_env, s->A0,
-                                     tcg_constant_i32(dflag - 1));
-                    update_fip = update_fdp = false;
-                    break;
-                case 0x2f: /* fnstsw mem */
-                    gen_helper_fnstsw(s->tmp2_i32, tcg_env);
-                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                        s->mem_index, MO_LEUW);
-                    update_fip = update_fdp = false;
-                    break;
-                case 0x3c: /* fbld */
-                    gen_helper_fbld_ST0(tcg_env, s->A0);
-                    break;
-                case 0x3e: /* fbstp */
-                    gen_helper_fbst_ST0(tcg_env, s->A0);
-                    gen_helper_fpop(tcg_env);
-                    break;
-                case 0x3d: /* fildll */
-                    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
-                                        s->mem_index, MO_LEUQ);
-                    gen_helper_fildll_ST0(tcg_env, s->tmp1_i64);
-                    break;
-                case 0x3f: /* fistpll */
-                    gen_helper_fistll_ST0(s->tmp1_i64, tcg_env);
-                    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
-                                        s->mem_index, MO_LEUQ);
-                    gen_helper_fpop(tcg_env);
-                    break;
-                default:
-                    goto unknown_op;
-                }
-
-                if (update_fdp) {
-                    int last_seg = s->override >= 0 ? s->override : a.def_seg;
-
-                    tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
-                                   offsetof(CPUX86State,
-                                            segs[last_seg].selector));
-                    tcg_gen_st16_i32(s->tmp2_i32, tcg_env,
-                                     offsetof(CPUX86State, fpds));
-                    tcg_gen_st_tl(last_addr, tcg_env,
-                                  offsetof(CPUX86State, fpdp));
-                }
-            } else {
-                /* register float ops */
-                opreg = rm;
-
-                switch (op) {
-                case 0x08: /* fld sti */
-                    gen_helper_fpush(tcg_env);
-                    gen_helper_fmov_ST0_STN(tcg_env,
-                                            tcg_constant_i32((opreg + 1) & 7));
-                    break;
-                case 0x09: /* fxchg sti */
-                case 0x29: /* fxchg4 sti, undocumented op */
-                case 0x39: /* fxchg7 sti, undocumented op */
-                    gen_helper_fxchg_ST0_STN(tcg_env, tcg_constant_i32(opreg));
-                    break;
-                case 0x0a: /* grp d9/2 */
-                    switch (rm) {
-                    case 0: /* fnop */
-                        /*
-                         * check exceptions (FreeBSD FPU probe)
-                         * needs to be treated as I/O because of ferr_irq
-                         */
-                        translator_io_start(&s->base);
-                        gen_helper_fwait(tcg_env);
-                        update_fip = false;
-                        break;
-                    default:
-                        goto unknown_op;
-                    }
-                    break;
-                case 0x0c: /* grp d9/4 */
-                    switch (rm) {
-                    case 0: /* fchs */
-                        gen_helper_fchs_ST0(tcg_env);
-                        break;
-                    case 1: /* fabs */
-                        gen_helper_fabs_ST0(tcg_env);
-                        break;
-                    case 4: /* ftst */
-                        gen_helper_fldz_FT0(tcg_env);
-                        gen_helper_fcom_ST0_FT0(tcg_env);
-                        break;
-                    case 5: /* fxam */
-                        gen_helper_fxam_ST0(tcg_env);
-                        break;
-                    default:
-                        goto unknown_op;
-                    }
-                    break;
-                case 0x0d: /* grp d9/5 */
-                    {
-                        switch (rm) {
-                        case 0:
-                            gen_helper_fpush(tcg_env);
-                            gen_helper_fld1_ST0(tcg_env);
-                            break;
-                        case 1:
-                            gen_helper_fpush(tcg_env);
-                            gen_helper_fldl2t_ST0(tcg_env);
-                            break;
-                        case 2:
-                            gen_helper_fpush(tcg_env);
-                            gen_helper_fldl2e_ST0(tcg_env);
-                            break;
-                        case 3:
-                            gen_helper_fpush(tcg_env);
-                            gen_helper_fldpi_ST0(tcg_env);
-                            break;
-                        case 4:
-                            gen_helper_fpush(tcg_env);
-                            gen_helper_fldlg2_ST0(tcg_env);
-                            break;
-                        case 5:
-                            gen_helper_fpush(tcg_env);
-                            gen_helper_fldln2_ST0(tcg_env);
-                            break;
-                        case 6:
-                            gen_helper_fpush(tcg_env);
-                            gen_helper_fldz_ST0(tcg_env);
-                            break;
-                        default:
-                            goto unknown_op;
-                        }
-                    }
-                    break;
-                case 0x0e: /* grp d9/6 */
-                    switch (rm) {
-                    case 0: /* f2xm1 */
-                        gen_helper_f2xm1(tcg_env);
-                        break;
-                    case 1: /* fyl2x */
-                        gen_helper_fyl2x(tcg_env);
-                        break;
-                    case 2: /* fptan */
-                        gen_helper_fptan(tcg_env);
-                        break;
-                    case 3: /* fpatan */
-                        gen_helper_fpatan(tcg_env);
-                        break;
-                    case 4: /* fxtract */
-                        gen_helper_fxtract(tcg_env);
-                        break;
-                    case 5: /* fprem1 */
-                        gen_helper_fprem1(tcg_env);
-                        break;
-                    case 6: /* fdecstp */
-                        gen_helper_fdecstp(tcg_env);
-                        break;
-                    default:
-                    case 7: /* fincstp */
-                        gen_helper_fincstp(tcg_env);
-                        break;
-                    }
-                    break;
-                case 0x0f: /* grp d9/7 */
-                    switch (rm) {
-                    case 0: /* fprem */
-                        gen_helper_fprem(tcg_env);
-                        break;
-                    case 1: /* fyl2xp1 */
-                        gen_helper_fyl2xp1(tcg_env);
-                        break;
-                    case 2: /* fsqrt */
-                        gen_helper_fsqrt(tcg_env);
-                        break;
-                    case 3: /* fsincos */
-                        gen_helper_fsincos(tcg_env);
-                        break;
-                    case 5: /* fscale */
-                        gen_helper_fscale(tcg_env);
-                        break;
-                    case 4: /* frndint */
-                        gen_helper_frndint(tcg_env);
-                        break;
-                    case 6: /* fsin */
-                        gen_helper_fsin(tcg_env);
-                        break;
-                    default:
-                    case 7: /* fcos */
-                        gen_helper_fcos(tcg_env);
-                        break;
-                    }
-                    break;
-                case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
-                case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
-                case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
-                    {
-                        int op1;
-
-                        op1 = op & 7;
-                        if (op >= 0x20) {
-                            gen_helper_fp_arith_STN_ST0(op1, opreg);
-                            if (op >= 0x30) {
-                                gen_helper_fpop(tcg_env);
-                            }
-                        } else {
-                            gen_helper_fmov_FT0_STN(tcg_env,
-                                                    tcg_constant_i32(opreg));
-                            gen_helper_fp_arith_ST0_FT0(op1);
-                        }
-                    }
-                    break;
-                case 0x02: /* fcom */
-                case 0x22: /* fcom2, undocumented op */
-                    gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
-                    gen_helper_fcom_ST0_FT0(tcg_env);
-                    break;
-                case 0x03: /* fcomp */
-                case 0x23: /* fcomp3, undocumented op */
-                case 0x32: /* fcomp5, undocumented op */
-                    gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
-                    gen_helper_fcom_ST0_FT0(tcg_env);
-                    gen_helper_fpop(tcg_env);
-                    break;
-                case 0x15: /* da/5 */
-                    switch (rm) {
-                    case 1: /* fucompp */
-                        gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1));
-                        gen_helper_fucom_ST0_FT0(tcg_env);
-                        gen_helper_fpop(tcg_env);
-                        gen_helper_fpop(tcg_env);
-                        break;
-                    default:
-                        goto unknown_op;
-                    }
-                    break;
-                case 0x1c:
-                    switch (rm) {
-                    case 0: /* feni (287 only, just do nop here) */
-                        break;
-                    case 1: /* fdisi (287 only, just do nop here) */
-                        break;
-                    case 2: /* fclex */
-                        gen_helper_fclex(tcg_env);
-                        update_fip = false;
-                        break;
-                    case 3: /* fninit */
-                        gen_helper_fninit(tcg_env);
-                        update_fip = false;
-                        break;
-                    case 4: /* fsetpm (287 only, just do nop here) */
-                        break;
-                    default:
-                        goto unknown_op;
-                    }
-                    break;
-                case 0x1d: /* fucomi */
-                    if (!(s->cpuid_features & CPUID_CMOV)) {
-                        goto illegal_op;
-                    }
-                    gen_update_cc_op(s);
-                    gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
-                    gen_helper_fucomi_ST0_FT0(tcg_env);
-                    set_cc_op(s, CC_OP_EFLAGS);
-                    break;
-                case 0x1e: /* fcomi */
-                    if (!(s->cpuid_features & CPUID_CMOV)) {
-                        goto illegal_op;
-                    }
-                    gen_update_cc_op(s);
-                    gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
-                    gen_helper_fcomi_ST0_FT0(tcg_env);
-                    set_cc_op(s, CC_OP_EFLAGS);
-                    break;
-                case 0x28: /* ffree sti */
-                    gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg));
-                    break;
-                case 0x2a: /* fst sti */
-                    gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg));
-                    break;
-                case 0x2b: /* fstp sti */
-                case 0x0b: /* fstp1 sti, undocumented op */
-                case 0x3a: /* fstp8 sti, undocumented op */
-                case 0x3b: /* fstp9 sti, undocumented op */
-                    gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg));
-                    gen_helper_fpop(tcg_env);
-                    break;
-                case 0x2c: /* fucom st(i) */
-                    gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
-                    gen_helper_fucom_ST0_FT0(tcg_env);
-                    break;
-                case 0x2d: /* fucomp st(i) */
-                    gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
-                    gen_helper_fucom_ST0_FT0(tcg_env);
-                    gen_helper_fpop(tcg_env);
-                    break;
-                case 0x33: /* de/3 */
-                    switch (rm) {
-                    case 1: /* fcompp */
-                        gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1));
-                        gen_helper_fcom_ST0_FT0(tcg_env);
-                        gen_helper_fpop(tcg_env);
-                        gen_helper_fpop(tcg_env);
-                        break;
-                    default:
-                        goto unknown_op;
-                    }
-                    break;
-                case 0x38: /* ffreep sti, undocumented op */
-                    gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg));
-                    gen_helper_fpop(tcg_env);
-                    break;
-                case 0x3c: /* df/4 */
-                    switch (rm) {
-                    case 0:
-                        gen_helper_fnstsw(s->tmp2_i32, tcg_env);
-                        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
-                        gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
-                        break;
-                    default:
-                        goto unknown_op;
-                    }
-                    break;
-                case 0x3d: /* fucomip */
-                    if (!(s->cpuid_features & CPUID_CMOV)) {
-                        goto illegal_op;
-                    }
-                    gen_update_cc_op(s);
-                    gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
-                    gen_helper_fucomi_ST0_FT0(tcg_env);
-                    gen_helper_fpop(tcg_env);
-                    set_cc_op(s, CC_OP_EFLAGS);
-                    break;
-                case 0x3e: /* fcomip */
-                    if (!(s->cpuid_features & CPUID_CMOV)) {
-                        goto illegal_op;
-                    }
-                    gen_update_cc_op(s);
-                    gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
-                    gen_helper_fcomi_ST0_FT0(tcg_env);
-                    gen_helper_fpop(tcg_env);
-                    set_cc_op(s, CC_OP_EFLAGS);
-                    break;
-                case 0x10 ... 0x13: /* fcmovxx */
-                case 0x18 ... 0x1b:
-                    {
-                        int op1;
-                        TCGLabel *l1;
-                        static const uint8_t fcmov_cc[8] = {
-                            (JCC_B << 1),
-                            (JCC_Z << 1),
-                            (JCC_BE << 1),
-                            (JCC_P << 1),
-                        };
-
-                        if (!(s->cpuid_features & CPUID_CMOV)) {
-                            goto illegal_op;
-                        }
-                        op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
-                        l1 = gen_new_label();
-                        gen_jcc1_noeob(s, op1, l1);
-                        gen_helper_fmov_ST0_STN(tcg_env,
-                                                tcg_constant_i32(opreg));
-                        gen_set_label(l1);
-                    }
-                    break;
-                default:
-                    goto unknown_op;
-                }
-            }
-
-            if (update_fip) {
-                tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
-                               offsetof(CPUX86State, segs[R_CS].selector));
-                tcg_gen_st16_i32(s->tmp2_i32, tcg_env,
-                                 offsetof(CPUX86State, fpcs));
-                tcg_gen_st_tl(eip_cur_tl(s),
-                              tcg_env, offsetof(CPUX86State, fpip));
-            }
-        }
-        break;
-        /************************/
-        /* string ops */
-
-    case 0xa4: /* movsS */
-    case 0xa5:
-        ot = mo_b_d(b, dflag);
-        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
-            gen_repz_movs(s, ot);
-        } else {
-            gen_movs(s, ot);
-        }
-        break;
-
-    case 0xaa: /* stosS */
-    case 0xab:
-        ot = mo_b_d(b, dflag);
-        gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
-        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
-            gen_repz_stos(s, ot);
-        } else {
-            gen_stos(s, ot);
-        }
-        break;
-    case 0xac: /* lodsS */
-    case 0xad:
-        ot = mo_b_d(b, dflag);
-        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
-            gen_repz_lods(s, ot);
-        } else {
-            gen_lods(s, ot);
-        }
-        break;
-    case 0xae: /* scasS */
-    case 0xaf:
-        ot = mo_b_d(b, dflag);
-        gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
-        if (prefixes & PREFIX_REPNZ) {
-            gen_repz_scas(s, ot, 1);
-        } else if (prefixes & PREFIX_REPZ) {
-            gen_repz_scas(s, ot, 0);
-        } else {
-            gen_scas(s, ot);
-        }
-        break;
-
-    case 0xa6: /* cmpsS */
-    case 0xa7:
-        ot = mo_b_d(b, dflag);
-        if (prefixes & PREFIX_REPNZ) {
-            gen_repz_cmps(s, ot, 1);
-        } else if (prefixes & PREFIX_REPZ) {
-            gen_repz_cmps(s, ot, 0);
-        } else {
-            gen_cmps(s, ot);
-        }
-        break;
-    case 0x6c: /* insS */
-    case 0x6d:
-        ot = mo_b_d32(b, dflag);
-        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
-        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
-        if (!gen_check_io(s, ot, s->tmp2_i32,
-                          SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
-            break;
-        }
-        translator_io_start(&s->base);
-        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
-            gen_repz_ins(s, ot);
-        } else {
-            gen_ins(s, ot);
-        }
-        break;
-    case 0x6e: /* outsS */
-    case 0x6f:
-        ot = mo_b_d32(b, dflag);
-        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
-        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
-        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
-            break;
-        }
-        translator_io_start(&s->base);
-        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
-            gen_repz_outs(s, ot);
-        } else {
-            gen_outs(s, ot);
-        }
-        break;
-
-        /************************/
-        /* port I/O */
-
-    case 0xe4:
-    case 0xe5:
-        ot = mo_b_d32(b, dflag);
-        val = x86_ldub_code(env, s);
-        tcg_gen_movi_i32(s->tmp2_i32, val);
-        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
-            break;
-        }
-        translator_io_start(&s->base);
-        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
-        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
-        gen_bpt_io(s, s->tmp2_i32, ot);
-        break;
-    case 0xe6:
-    case 0xe7:
-        ot = mo_b_d32(b, dflag);
-        val = x86_ldub_code(env, s);
-        tcg_gen_movi_i32(s->tmp2_i32, val);
-        if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
-            break;
-        }
-        translator_io_start(&s->base);
-        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
-        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
-        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
-        gen_bpt_io(s, s->tmp2_i32, ot);
-        break;
-    case 0xec:
-    case 0xed:
-        ot = mo_b_d32(b, dflag);
-        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
-        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
-        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
-            break;
-        }
-        translator_io_start(&s->base);
-        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
-        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
-        gen_bpt_io(s, s->tmp2_i32, ot);
-        break;
-    case 0xee:
-    case 0xef:
-        ot = mo_b_d32(b, dflag);
-        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
-        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
-        if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
-            break;
-        }
-        translator_io_start(&s->base);
-        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
-        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
-        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
-        gen_bpt_io(s, s->tmp2_i32, ot);
-        break;
-
-        /************************/
-        /* control */
-    case 0xc2: /* ret im */
-        val = x86_ldsw_code(env, s);
-        ot = gen_pop_T0(s);
-        gen_stack_update(s, val + (1 << ot));
-        /* Note that gen_pop_T0 uses a zero-extending load.  */
-        gen_op_jmp_v(s, s->T0);
-        gen_bnd_jmp(s);
-        s->base.is_jmp = DISAS_JUMP;
-        break;
-    case 0xc3: /* ret */
-        ot = gen_pop_T0(s);
-        gen_pop_update(s, ot);
-        /* Note that gen_pop_T0 uses a zero-extending load.  */
-        gen_op_jmp_v(s, s->T0);
-        gen_bnd_jmp(s);
-        s->base.is_jmp = DISAS_JUMP;
-        break;
-    case 0xca: /* lret im */
-        val = x86_ldsw_code(env, s);
-    do_lret:
-        if (PE(s) && !VM86(s)) {
-            gen_update_cc_op(s);
-            gen_update_eip_cur(s);
-            gen_helper_lret_protected(tcg_env, tcg_constant_i32(dflag - 1),
-                                      tcg_constant_i32(val));
-        } else {
-            gen_stack_A0(s);
-            /* pop offset */
-            gen_op_ld_v(s, dflag, s->T0, s->A0);
-            /* NOTE: keeping EIP updated is not a problem in case of
-               exception */
-            gen_op_jmp_v(s, s->T0);
-            /* pop selector */
-            gen_add_A0_im(s, 1 << dflag);
-            gen_op_ld_v(s, dflag, s->T0, s->A0);
-            gen_op_movl_seg_T0_vm(s, R_CS);
-            /* add stack offset */
-            gen_stack_update(s, val + (2 << dflag));
-        }
-        s->base.is_jmp = DISAS_EOB_ONLY;
-        break;
-    case 0xcb: /* lret */
-        val = 0;
-        goto do_lret;
-    case 0xcf: /* iret */
-        gen_svm_check_intercept(s, SVM_EXIT_IRET);
-        if (!PE(s) || VM86(s)) {
-            /* real mode or vm86 mode */
-            if (!check_vm86_iopl(s)) {
-                break;
-            }
-            gen_helper_iret_real(tcg_env, tcg_constant_i32(dflag - 1));
-        } else {
-            gen_helper_iret_protected(tcg_env, tcg_constant_i32(dflag - 1),
-                                      eip_next_i32(s));
-        }
-        set_cc_op(s, CC_OP_EFLAGS);
-        s->base.is_jmp = DISAS_EOB_ONLY;
-        break;
-    case 0xe8: /* call im */
-        {
-            int diff = (dflag != MO_16
-                        ? (int32_t)insn_get(env, s, MO_32)
-                        : (int16_t)insn_get(env, s, MO_16));
-            gen_push_v(s, eip_next_tl(s));
-            gen_bnd_jmp(s);
-            gen_jmp_rel(s, dflag, diff, 0);
-        }
-        break;
-    case 0x9a: /* lcall im */
-        {
-            unsigned int selector, offset;
-
-            if (CODE64(s))
-                goto illegal_op;
-            ot = dflag;
-            offset = insn_get(env, s, ot);
-            selector = insn_get(env, s, MO_16);
-
-            tcg_gen_movi_tl(s->T0, selector);
-            tcg_gen_movi_tl(s->T1, offset);
-        }
-        goto do_lcall;
-    case 0xe9: /* jmp im */
-        {
-            int diff = (dflag != MO_16
-                        ? (int32_t)insn_get(env, s, MO_32)
-                        : (int16_t)insn_get(env, s, MO_16));
-            gen_bnd_jmp(s);
-            gen_jmp_rel(s, dflag, diff, 0);
-        }
-        break;
-    case 0xea: /* ljmp im */
-        {
-            unsigned int selector, offset;
-
-            if (CODE64(s))
-                goto illegal_op;
-            ot = dflag;
-            offset = insn_get(env, s, ot);
-            selector = insn_get(env, s, MO_16);
-
-            tcg_gen_movi_tl(s->T0, selector);
-            tcg_gen_movi_tl(s->T1, offset);
-        }
-        goto do_ljmp;
-    case 0xeb: /* jmp Jb */
-        {
-            int diff = (int8_t)insn_get(env, s, MO_8);
-            gen_jmp_rel(s, dflag, diff, 0);
-        }
-        break;
-    case 0x70 ... 0x7f: /* jcc Jb */
-        {
-            int diff = (int8_t)insn_get(env, s, MO_8);
-            gen_bnd_jmp(s);
-            gen_jcc(s, b, diff);
-        }
-        break;
-    case 0x180 ... 0x18f: /* jcc Jv */
-        {
-            int diff = (dflag != MO_16
-                        ? (int32_t)insn_get(env, s, MO_32)
-                        : (int16_t)insn_get(env, s, MO_16));
-            gen_bnd_jmp(s);
-            gen_jcc(s, b, diff);
-        }
-        break;
-
-    case 0x190 ... 0x19f: /* setcc Gv */
-        modrm = x86_ldub_code(env, s);
-        gen_setcc1(s, b, s->T0);
-        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
-        break;
-    case 0x140 ... 0x14f: /* cmov Gv, Ev */
-        if (!(s->cpuid_features & CPUID_CMOV)) {
-            goto illegal_op;
-        }
-        ot = dflag;
-        modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_cmovcc1(s, b ^ 1, s->T0, cpu_regs[reg]);
-        gen_op_mov_reg_v(s, ot, reg, s->T0);
-        break;
-
-        /************************/
-        /* flags */
-    case 0x9c: /* pushf */
-        gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
-        if (check_vm86_iopl(s)) {
-            gen_update_cc_op(s);
-            gen_helper_read_eflags(s->T0, tcg_env);
-            gen_push_v(s, s->T0);
-        }
-        break;
-    case 0x9d: /* popf */
-        gen_svm_check_intercept(s, SVM_EXIT_POPF);
-        if (check_vm86_iopl(s)) {
-            int mask = TF_MASK | AC_MASK | ID_MASK | NT_MASK;
-
-            if (CPL(s) == 0) {
-                mask |= IF_MASK | IOPL_MASK;
-            } else if (CPL(s) <= IOPL(s)) {
-                mask |= IF_MASK;
-            }
-            if (dflag == MO_16) {
-                mask &= 0xffff;
-            }
-
-            ot = gen_pop_T0(s);
-            gen_helper_write_eflags(tcg_env, s->T0, tcg_constant_i32(mask));
-            gen_pop_update(s, ot);
-            set_cc_op(s, CC_OP_EFLAGS);
-            /* abort translation because TF/AC flag may change */
-            s->base.is_jmp = DISAS_EOB_NEXT;
-        }
-        break;
-    case 0x9e: /* sahf */
-        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
-            goto illegal_op;
-        tcg_gen_shri_tl(s->T0, cpu_regs[R_EAX], 8);
-        gen_compute_eflags(s);
-        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
-        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
-        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
-        break;
-    case 0x9f: /* lahf */
-        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
-            goto illegal_op;
-        gen_compute_eflags(s);
-        /* Note: gen_compute_eflags() only gives the condition codes */
-        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
-        tcg_gen_deposit_tl(cpu_regs[R_EAX], cpu_regs[R_EAX], s->T0, 8, 8);
-        break;
-    case 0xf5: /* cmc */
-        gen_compute_eflags(s);
-        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
-        break;
-    case 0xf8: /* clc */
-        gen_compute_eflags(s);
-        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
-        break;
-    case 0xf9: /* stc */
-        gen_compute_eflags(s);
-        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
-        break;
-    case 0xfc: /* cld */
-        tcg_gen_movi_i32(s->tmp2_i32, 1);
-        tcg_gen_st_i32(s->tmp2_i32, tcg_env, offsetof(CPUX86State, df));
-        break;
-    case 0xfd: /* std */
-        tcg_gen_movi_i32(s->tmp2_i32, -1);
-        tcg_gen_st_i32(s->tmp2_i32, tcg_env, offsetof(CPUX86State, df));
-        break;
-
-        /************************/
         /* bit operations */
     case 0x1ba: /* bt/bts/btr/btc Gv, im */
         ot = dflag;
@@ -5494,188 +3510,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
         }
         gen_op_mov_reg_v(s, ot, reg, s->T0);
         break;
-        /************************/
-        /* bcd */
-    case 0x27: /* daa */
-        if (CODE64(s))
-            goto illegal_op;
-        gen_update_cc_op(s);
-        gen_helper_daa(tcg_env);
-        set_cc_op(s, CC_OP_EFLAGS);
-        break;
-    case 0x2f: /* das */
-        if (CODE64(s))
-            goto illegal_op;
-        gen_update_cc_op(s);
-        gen_helper_das(tcg_env);
-        set_cc_op(s, CC_OP_EFLAGS);
-        break;
-    case 0x37: /* aaa */
-        if (CODE64(s))
-            goto illegal_op;
-        gen_update_cc_op(s);
-        gen_helper_aaa(tcg_env);
-        set_cc_op(s, CC_OP_EFLAGS);
-        break;
-    case 0x3f: /* aas */
-        if (CODE64(s))
-            goto illegal_op;
-        gen_update_cc_op(s);
-        gen_helper_aas(tcg_env);
-        set_cc_op(s, CC_OP_EFLAGS);
-        break;
-    case 0xd4: /* aam */
-        if (CODE64(s))
-            goto illegal_op;
-        val = x86_ldub_code(env, s);
-        if (val == 0) {
-            gen_exception(s, EXCP00_DIVZ);
-        } else {
-            gen_helper_aam(tcg_env, tcg_constant_i32(val));
-            set_cc_op(s, CC_OP_LOGICB);
-        }
-        break;
-    case 0xd5: /* aad */
-        if (CODE64(s))
-            goto illegal_op;
-        val = x86_ldub_code(env, s);
-        gen_helper_aad(tcg_env, tcg_constant_i32(val));
-        set_cc_op(s, CC_OP_LOGICB);
-        break;
-        /************************/
-        /* misc */
-    case 0x90: /* nop */
-        /* XXX: correct lock test for all insn */
-        if (prefixes & PREFIX_LOCK) {
-            goto illegal_op;
-        }
-        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
-        if (REX_B(s)) {
-            goto do_xchg_reg_eax;
-        }
-        if (prefixes & PREFIX_REPZ) {
-            gen_update_cc_op(s);
-            gen_update_eip_cur(s);
-            gen_helper_pause(tcg_env, cur_insn_len_i32(s));
-            s->base.is_jmp = DISAS_NORETURN;
-        }
-        break;
-    case 0x9b: /* fwait */
-        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
-            (HF_MP_MASK | HF_TS_MASK)) {
-            gen_exception(s, EXCP07_PREX);
-        } else {
-            /* needs to be treated as I/O because of ferr_irq */
-            translator_io_start(&s->base);
-            gen_helper_fwait(tcg_env);
-        }
-        break;
-    case 0xcc: /* int3 */
-        gen_interrupt(s, EXCP03_INT3);
-        break;
-    case 0xcd: /* int N */
-        val = x86_ldub_code(env, s);
-        if (check_vm86_iopl(s)) {
-            gen_interrupt(s, val);
-        }
-        break;
-    case 0xce: /* into */
-        if (CODE64(s))
-            goto illegal_op;
-        gen_update_cc_op(s);
-        gen_update_eip_cur(s);
-        gen_helper_into(tcg_env, cur_insn_len_i32(s));
-        break;
-#ifdef WANT_ICEBP
-    case 0xf1: /* icebp (undocumented, exits to external debugger) */
-        gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
-        gen_debug(s);
-        break;
-#endif
-    case 0xfa: /* cli */
-        if (check_iopl(s)) {
-            gen_reset_eflags(s, IF_MASK);
-        }
-        break;
-    case 0xfb: /* sti */
-        if (check_iopl(s)) {
-            gen_set_eflags(s, IF_MASK);
-            /* interruptions are enabled only the first insn after sti */
-            gen_update_eip_next(s);
-            gen_eob_inhibit_irq(s, true);
-        }
-        break;
-    case 0x62: /* bound */
-        if (CODE64(s))
-            goto illegal_op;
-        ot = dflag;
-        modrm = x86_ldub_code(env, s);
-        reg = (modrm >> 3) & 7;
-        mod = (modrm >> 6) & 3;
-        if (mod == 3)
-            goto illegal_op;
-        gen_op_mov_v_reg(s, ot, s->T0, reg);
-        gen_lea_modrm(env, s, modrm);
-        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-        if (ot == MO_16) {
-            gen_helper_boundw(tcg_env, s->A0, s->tmp2_i32);
-        } else {
-            gen_helper_boundl(tcg_env, s->A0, s->tmp2_i32);
-        }
-        break;
-    case 0x1c8 ... 0x1cf: /* bswap reg */
-        reg = (b & 7) | REX_B(s);
-#ifdef TARGET_X86_64
-        if (dflag == MO_64) {
-            tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
-            break;
-        }
-#endif
-        tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
-        break;
-    case 0xd6: /* salc */
-        if (CODE64(s))
-            goto illegal_op;
-        gen_compute_eflags_c(s, s->T0);
-        tcg_gen_neg_tl(s->T0, s->T0);
-        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
-        break;
-    case 0xe0: /* loopnz */
-    case 0xe1: /* loopz */
-    case 0xe2: /* loop */
-    case 0xe3: /* jecxz */
-        {
-            TCGLabel *l1, *l2;
-            int diff = (int8_t)insn_get(env, s, MO_8);
-
-            l1 = gen_new_label();
-            l2 = gen_new_label();
-            gen_update_cc_op(s);
-            b &= 3;
-            switch(b) {
-            case 0: /* loopnz */
-            case 1: /* loopz */
-                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
-                gen_op_jz_ecx(s, l2);
-                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
-                break;
-            case 2: /* loop */
-                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
-                gen_op_jnz_ecx(s, l1);
-                break;
-            default:
-            case 3: /* jcxz */
-                gen_op_jz_ecx(s, l1);
-                break;
-            }
-
-            gen_set_label(l2);
-            gen_jmp_rel_csize(s, 0, 1);
-
-            gen_set_label(l1);
-            gen_jmp_rel(s, dflag, diff, 0);
-        }
-        break;
     case 0x130: /* wrmsr */
     case 0x132: /* rdmsr */
         if (check_cpl0(s)) {
@@ -5736,7 +3570,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
         /* TF handling for the syscall insn is different. The TF bit is  checked
            after the syscall insn completes. This allows #DB to not be
            generated after one has entered CPL0 if TF is set in FMASK.  */
-        gen_eob_worker(s, false, true);
+        gen_eob_syscall(s);
         break;
     case 0x107: /* sysret */
         /* For Intel SYSRET is only valid in long mode */
@@ -5755,7 +3589,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
                checked after the sysret insn completes. This allows #DB to be
                generated "as if" the syscall insn in userspace has just
                completed.  */
-            gen_eob_worker(s, false, true);
+            gen_eob_syscall(s);
         }
         break;
     case 0x1a2: /* cpuid */
@@ -5763,14 +3597,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
         gen_update_eip_cur(s);
         gen_helper_cpuid(tcg_env);
         break;
-    case 0xf4: /* hlt */
-        if (check_cpl0(s)) {
-            gen_update_cc_op(s);
-            gen_update_eip_cur(s);
-            gen_helper_hlt(tcg_env, cur_insn_len_i32(s));
-            s->base.is_jmp = DISAS_NORETURN;
-        }
-        break;
     case 0x100:
         modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
@@ -6175,72 +4001,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
             /* nothing to do */
         }
         break;
-    case 0x63: /* arpl or movslS (x86_64) */
-#ifdef TARGET_X86_64
-        if (CODE64(s)) {
-            int d_ot;
-            /* d_ot is the size of destination */
-            d_ot = dflag;
-
-            modrm = x86_ldub_code(env, s);
-            reg = ((modrm >> 3) & 7) | REX_R(s);
-            mod = (modrm >> 6) & 3;
-            rm = (modrm & 7) | REX_B(s);
-
-            if (mod == 3) {
-                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
-                /* sign extend */
-                if (d_ot == MO_64) {
-                    tcg_gen_ext32s_tl(s->T0, s->T0);
-                }
-                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
-            } else {
-                gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
-                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
-            }
-        } else
-#endif
-        {
-            TCGLabel *label1;
-            TCGv t0, t1, t2;
-
-            if (!PE(s) || VM86(s))
-                goto illegal_op;
-            t0 = tcg_temp_new();
-            t1 = tcg_temp_new();
-            t2 = tcg_temp_new();
-            ot = MO_16;
-            modrm = x86_ldub_code(env, s);
-            reg = (modrm >> 3) & 7;
-            mod = (modrm >> 6) & 3;
-            rm = modrm & 7;
-            if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, ot, t0, s->A0);
-            } else {
-                gen_op_mov_v_reg(s, ot, t0, rm);
-            }
-            gen_op_mov_v_reg(s, ot, t1, reg);
-            tcg_gen_andi_tl(s->tmp0, t0, 3);
-            tcg_gen_andi_tl(t1, t1, 3);
-            tcg_gen_movi_tl(t2, 0);
-            label1 = gen_new_label();
-            tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
-            tcg_gen_andi_tl(t0, t0, ~3);
-            tcg_gen_or_tl(t0, t0, t1);
-            tcg_gen_movi_tl(t2, CC_Z);
-            gen_set_label(label1);
-            if (mod != 3) {
-                gen_op_st_v(s, ot, t0, s->A0);
-           } else {
-                gen_op_mov_reg_v(s, ot, rm, t0);
-            }
-            gen_compute_eflags(s);
-            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
-            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
-        }
-        break;
     case 0x102: /* lar */
     case 0x103: /* lsl */
         {
@@ -6567,18 +4327,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
         }
         break;
     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
-    case 0x1c3: /* MOVNTI reg, mem */
-        if (!(s->cpuid_features & CPUID_SSE2))
-            goto illegal_op;
-        ot = mo_64_32(dflag);
-        modrm = x86_ldub_code(env, s);
-        mod = (modrm >> 6) & 3;
-        if (mod == 3)
-            goto illegal_op;
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-        /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
-        break;
     case 0x1ae:
         modrm = x86_ldub_code(env, s);
         switch (modrm) {
@@ -6821,28 +4569,21 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
 
         set_cc_op(s, CC_OP_POPCNT);
         break;
-    case 0x10e ... 0x117:
-    case 0x128 ... 0x12f:
-    case 0x138 ... 0x13a:
-    case 0x150 ... 0x179:
-    case 0x17c ... 0x17f:
-    case 0x1c2:
-    case 0x1c4 ... 0x1c6:
-    case 0x1d0 ... 0x1fe:
-        disas_insn_new(s, cpu, b);
-        break;
     default:
-        goto unknown_op;
+        g_assert_not_reached();
     }
-    return true;
+    return;
  illegal_op:
     gen_illegal_opcode(s);
-    return true;
+    return;
  unknown_op:
     gen_unknown_opcode(env, s);
-    return true;
 }
 
+#include "decode-new.h"
+#include "emit.c.inc"
+#include "decode-new.c.inc"
+
 void tcg_x86_init(void)
 {
     static const char reg_names[CPU_NB_REGS][4] = {
@@ -6964,7 +4705,6 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
 
     dc->cc_op = CC_OP_DYNAMIC;
     dc->cc_op_dirty = false;
-    dc->popl_esp_hack = 0;
     /* select memory access functions */
     dc->mem_index = cpu_mmu_index(cpu, false);
     dc->cpuid_features = env->features[FEAT_1_EDX];
@@ -7016,6 +4756,9 @@ static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
+    bool orig_cc_op_dirty = dc->cc_op_dirty;
+    CCOp orig_cc_op = dc->cc_op;
+    target_ulong orig_pc_save = dc->pc_save;
 
 #ifdef TARGET_VSYSCALL_PAGE
     /*
@@ -7028,23 +4771,51 @@ static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
     }
 #endif
 
-    if (disas_insn(dc, cpu)) {
-        target_ulong pc_next = dc->pc;
-        dc->base.pc_next = pc_next;
+    switch (sigsetjmp(dc->jmpbuf, 0)) {
+    case 0:
+        disas_insn(dc, cpu);
+        break;
+    case 1:
+        gen_exception_gpf(dc);
+        break;
+    case 2:
+        /* Restore state that may affect the next instruction. */
+        dc->pc = dc->base.pc_next;
+        /*
+         * TODO: These save/restore can be removed after the table-based
+         * decoder is complete; we will be decoding the insn completely
+         * before any code generation that might affect these variables.
+         */
+        dc->cc_op_dirty = orig_cc_op_dirty;
+        dc->cc_op = orig_cc_op;
+        dc->pc_save = orig_pc_save;
+        /* END TODO */
+        dc->base.num_insns--;
+        tcg_remove_ops_after(dc->prev_insn_end);
+        dc->base.insn_start = dc->prev_insn_start;
+        dc->base.is_jmp = DISAS_TOO_MANY;
+        return;
+    default:
+        g_assert_not_reached();
+    }
 
-        if (dc->base.is_jmp == DISAS_NEXT) {
-            if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
-                /*
-                 * If single step mode, we generate only one instruction and
-                 * generate an exception.
-                 * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
-                 * the flag and abort the translation to give the irqs a
-                 * chance to happen.
-                 */
-                dc->base.is_jmp = DISAS_EOB_NEXT;
-            } else if (!is_same_page(&dc->base, pc_next)) {
-                dc->base.is_jmp = DISAS_TOO_MANY;
-            }
+    /*
+     * Instruction decoding completed (possibly with #GP if the
+     * 15-byte boundary was exceeded).
+     */
+    dc->base.pc_next = dc->pc;
+    if (dc->base.is_jmp == DISAS_NEXT) {
+        if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
+            /*
+             * If single step mode, we generate only one instruction and
+             * generate an exception.
+             * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
+             * the flag and abort the translation to give the irqs a
+             * chance to happen.
+             */
+            dc->base.is_jmp = DISAS_EOB_NEXT;
+        } else if (!is_same_page(&dc->base, dc->base.pc_next)) {
+            dc->base.is_jmp = DISAS_TOO_MANY;
         }
     }
 }
@@ -7070,7 +4841,7 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
     case DISAS_EOB_INHIBIT_IRQ:
         gen_update_cc_op(dc);
         gen_update_eip_cur(dc);
-        gen_eob_inhibit_irq(dc, true);
+        gen_eob_inhibit_irq(dc);
         break;
     case DISAS_JUMP:
         gen_jr(dc);