summary refs log tree commit diff stats
path: root/target/i386
diff options
context:
space:
mode:
Diffstat (limited to 'target/i386')
-rw-r--r--target/i386/cc_helper.c3
-rw-r--r--target/i386/cpu.h1
-rw-r--r--target/i386/helper.h2
-rw-r--r--target/i386/int_helper.c11
-rw-r--r--target/i386/ops_sse.h26
-rw-r--r--target/i386/ops_sse_header.h1
-rw-r--r--target/i386/translate.c89
7 files changed, 52 insertions, 81 deletions
diff --git a/target/i386/cc_helper.c b/target/i386/cc_helper.c
index 83af223c9f..c9c90e10db 100644
--- a/target/i386/cc_helper.c
+++ b/target/i386/cc_helper.c
@@ -105,6 +105,8 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
         return src1;
     case CC_OP_CLR:
         return CC_Z | CC_P;
+    case CC_OP_POPCNT:
+        return src1 ? 0 : CC_Z;
 
     case CC_OP_MULB:
         return compute_all_mulb(dst, src1);
@@ -232,6 +234,7 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
     case CC_OP_LOGICL:
     case CC_OP_LOGICQ:
     case CC_OP_CLR:
+    case CC_OP_POPCNT:
         return 0;
 
     case CC_OP_EFLAGS:
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index a7f2f6099d..a04e46b166 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -777,6 +777,7 @@ typedef enum {
     CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest.  */
 
     CC_OP_CLR, /* Z set, all other flags clear.  */
+    CC_OP_POPCNT, /* Z via CC_SRC, all other flags clear.  */
 
     CC_OP_NB,
 } CCOp;
diff --git a/target/i386/helper.h b/target/i386/helper.h
index bd9b2cf677..4c1aafffd6 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -202,8 +202,6 @@ DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, i64)
 DEF_HELPER_FLAGS_2(rdpkru, TCG_CALL_NO_WG, i64, env, i32)
 DEF_HELPER_FLAGS_3(wrpkru, TCG_CALL_NO_WG, void, env, i32, i64)
 
-DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 
diff --git a/target/i386/int_helper.c b/target/i386/int_helper.c
index 9e873ac150..4dc5c65991 100644
--- a/target/i386/int_helper.c
+++ b/target/i386/int_helper.c
@@ -417,17 +417,6 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0)
 # define clztl  clz64
 #endif
 
-/* bit operations */
-target_ulong helper_ctz(target_ulong t0)
-{
-    return ctztl(t0);
-}
-
-target_ulong helper_clz(target_ulong t0)
-{
-    return clztl(t0);
-}
-
 target_ulong helper_pdep(target_ulong src, target_ulong mask)
 {
     target_ulong dest = 0;
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 7a98f53864..16509d0a74 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -2157,32 +2157,6 @@ target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len)
     return crc;
 }
 
-#define POPMASK(i)     ((target_ulong) -1 / ((1LL << (1 << i)) + 1))
-#define POPCOUNT(n, i) ((n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i)))
-target_ulong helper_popcnt(CPUX86State *env, target_ulong n, uint32_t type)
-{
-    CC_SRC = n ? 0 : CC_Z;
-
-    n = POPCOUNT(n, 0);
-    n = POPCOUNT(n, 1);
-    n = POPCOUNT(n, 2);
-    n = POPCOUNT(n, 3);
-    if (type == 1) {
-        return n & 0xff;
-    }
-
-    n = POPCOUNT(n, 4);
-#ifndef TARGET_X86_64
-    return n;
-#else
-    if (type == 2) {
-        return n & 0xff;
-    }
-
-    return POPCOUNT(n, 5);
-#endif
-}
-
 void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
                                     uint32_t ctrl)
 {
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 64c5857cf4..094aafc573 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -333,7 +333,6 @@ DEF_HELPER_4(glue(pcmpestrm, SUFFIX), void, env, Reg, Reg, i32)
 DEF_HELPER_4(glue(pcmpistri, SUFFIX), void, env, Reg, Reg, i32)
 DEF_HELPER_4(glue(pcmpistrm, SUFFIX), void, env, Reg, Reg, i32)
 DEF_HELPER_3(crc32, tl, i32, tl, i32)
-DEF_HELPER_3(popcnt, tl, env, tl, i32)
 #endif
 
 /* AES-NI op helpers */
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 59e11fcd1f..5f5e60dab1 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -222,6 +222,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_CLR] = 0,
+    [CC_OP_POPCNT] = USES_CC_SRC,
 };
 
 static void set_cc_op(DisasContext *s, CCOp op)
@@ -383,8 +384,7 @@ static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
 static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 {
     if (ot == MO_8 && byte_reg_is_xH(reg)) {
-        tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
-        tcg_gen_ext8u_tl(t0, t0);
+        tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
     } else {
         tcg_gen_mov_tl(t0, cpu_regs[reg]);
     }
@@ -758,6 +758,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 
     case CC_OP_LOGICB ... CC_OP_LOGICQ:
     case CC_OP_CLR:
+    case CC_OP_POPCNT:
         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 
     case CC_OP_INCB ... CC_OP_INCQ:
@@ -825,6 +826,7 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
                              .mask = CC_S };
     case CC_OP_CLR:
+    case CC_OP_POPCNT:
         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
     default:
         {
@@ -844,6 +846,7 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
                              .mask = -1, .no_setcond = true };
     case CC_OP_CLR:
+    case CC_OP_POPCNT:
         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
     default:
         gen_compute_eflags(s);
@@ -867,6 +870,9 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
                              .mask = CC_Z };
     case CC_OP_CLR:
         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
+    case CC_OP_POPCNT:
+        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
+                             .mask = -1 };
     default:
         {
             TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
@@ -3768,8 +3774,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                     /* Extract the LEN into a mask.  Lengths larger than
                        operand size get all ones.  */
-                    tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8);
-                    tcg_gen_ext8u_tl(cpu_A0, cpu_A0);
+                    tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8);
                     tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
                                        cpu_A0, bound);
                     tcg_temp_free(bound);
@@ -3920,9 +3925,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                             gen_compute_eflags(s);
                         }
                         carry_in = cpu_tmp0;
-                        tcg_gen_shri_tl(carry_in, cpu_cc_src,
-                                        ctz32(b == 0x1f6 ? CC_C : CC_O));
-                        tcg_gen_andi_tl(carry_in, carry_in, 1);
+                        tcg_gen_extract_tl(carry_in, cpu_cc_src,
+                                           ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
                     }
 
                     switch (ot) {
@@ -5447,21 +5451,25 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_v_reg(ot, cpu_T0, rm);
-                switch (s_ot) {
-                case MO_UB:
-                    tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
-                    break;
-                case MO_SB:
-                    tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
-                    break;
-                case MO_UW:
-                    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
-                    break;
-                default:
-                case MO_SW:
-                    tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
-                    break;
+                if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
+                    tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8);
+                } else {
+                    gen_op_mov_v_reg(ot, cpu_T0, rm);
+                    switch (s_ot) {
+                    case MO_UB:
+                        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
+                        break;
+                    case MO_SB:
+                        tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
+                        break;
+                    case MO_UW:
+                        tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                        break;
+                    default:
+                    case MO_SW:
+                        tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+                        break;
+                    }
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             } else {
@@ -6806,21 +6814,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
             int size = 8 << ot;
+            /* For lzcnt/tzcnt, C bit is defined related to the input. */
             tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
             if (b & 1) {
                 /* For lzcnt, reduce the target_ulong result by the
                    number of zeros that we expect to find at the top.  */
-                gen_helper_clz(cpu_T0, cpu_T0);
+                tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS);
                 tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
             } else {
-                /* For tzcnt, a zero input must return the operand size:
-                   force all bits outside the operand size to 1.  */
-                target_ulong mask = (target_ulong)-2 << (size - 1);
-                tcg_gen_ori_tl(cpu_T0, cpu_T0, mask);
-                gen_helper_ctz(cpu_T0, cpu_T0);
-            }
-            /* For lzcnt/tzcnt, C and Z bits are defined and are
-               related to the result.  */
+                /* For tzcnt, a zero input must return the operand size.  */
+                tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size);
+            }
+            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
             gen_op_update1_cc();
             set_cc_op(s, CC_OP_BMILGB + ot);
         } else {
@@ -6828,20 +6833,20 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                to the input and not the result.  */
             tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
             set_cc_op(s, CC_OP_LOGICB + ot);
+
+            /* ??? The manual says that the output is undefined when the
+               input is zero, but real hardware leaves it unchanged, and
+               real programs appear to depend on that.  Accomplish this
+               by passing the output as the value to return upon zero.  */
             if (b & 1) {
                 /* For bsr, return the bit index of the first 1 bit,
                    not the count of leading zeros.  */
-                gen_helper_clz(cpu_T0, cpu_T0);
+                tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
+                tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1);
                 tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
             } else {
-                gen_helper_ctz(cpu_T0, cpu_T0);
+                tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]);
             }
-            /* ??? The manual says that the output is undefined when the
-               input is zero, but real hardware leaves it unchanged, and
-               real programs appear to depend on that.  */
-            tcg_gen_movi_tl(cpu_tmp0, 0);
-            tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T0, cpu_cc_dst, cpu_tmp0,
-                               cpu_regs[reg], cpu_T0);
         }
         gen_op_mov_reg_v(ot, reg, cpu_T0);
         break;
@@ -8207,10 +8212,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         }
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_helper_popcnt(cpu_T0, cpu_env, cpu_T0, tcg_const_i32(ot));
+        gen_extu(ot, cpu_T0);
+        tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+        tcg_gen_ctpop_tl(cpu_T0, cpu_T0);
         gen_op_mov_reg_v(ot, reg, cpu_T0);
 
-        set_cc_op(s, CC_OP_EFLAGS);
+        set_cc_op(s, CC_OP_POPCNT);
         break;
     case 0x10e ... 0x10f:
         /* 3DNow! instructions, ignore prefixes */