diff options
Diffstat (limited to 'target/i386')
| -rw-r--r-- | target/i386/cc_helper.c | 3 | ||||
| -rw-r--r-- | target/i386/cpu.h | 1 | ||||
| -rw-r--r-- | target/i386/helper.h | 2 | ||||
| -rw-r--r-- | target/i386/int_helper.c | 11 | ||||
| -rw-r--r-- | target/i386/ops_sse.h | 26 | ||||
| -rw-r--r-- | target/i386/ops_sse_header.h | 1 | ||||
| -rw-r--r-- | target/i386/translate.c | 89 |
7 files changed, 52 insertions, 81 deletions
diff --git a/target/i386/cc_helper.c b/target/i386/cc_helper.c index 83af223c9f..c9c90e10db 100644 --- a/target/i386/cc_helper.c +++ b/target/i386/cc_helper.c @@ -105,6 +105,8 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, return src1; case CC_OP_CLR: return CC_Z | CC_P; + case CC_OP_POPCNT: + return src1 ? 0 : CC_Z; case CC_OP_MULB: return compute_all_mulb(dst, src1); @@ -232,6 +234,7 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, case CC_OP_LOGICL: case CC_OP_LOGICQ: case CC_OP_CLR: + case CC_OP_POPCNT: return 0; case CC_OP_EFLAGS: diff --git a/target/i386/cpu.h b/target/i386/cpu.h index a7f2f6099d..a04e46b166 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -777,6 +777,7 @@ typedef enum { CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ CC_OP_CLR, /* Z set, all other flags clear. */ + CC_OP_POPCNT, /* Z via CC_SRC, all other flags clear. */ CC_OP_NB, } CCOp; diff --git a/target/i386/helper.h b/target/i386/helper.h index bd9b2cf677..4c1aafffd6 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -202,8 +202,6 @@ DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, i64) DEF_HELPER_FLAGS_2(rdpkru, TCG_CALL_NO_WG, i64, env, i32) DEF_HELPER_FLAGS_3(wrpkru, TCG_CALL_NO_WG, void, env, i32, i64) -DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl) diff --git a/target/i386/int_helper.c b/target/i386/int_helper.c index 9e873ac150..4dc5c65991 100644 --- a/target/i386/int_helper.c +++ b/target/i386/int_helper.c @@ -417,17 +417,6 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0) # define clztl clz64 #endif -/* bit operations */ -target_ulong helper_ctz(target_ulong t0) -{ - return ctztl(t0); -} - -target_ulong helper_clz(target_ulong t0) -{ - return clztl(t0); -} - target_ulong helper_pdep(target_ulong src, target_ulong mask) { target_ulong dest = 0; diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 7a98f53864..16509d0a74 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -2157,32 +2157,6 @@ target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len) return crc; } -#define POPMASK(i) ((target_ulong) -1 / ((1LL << (1 << i)) + 1)) -#define POPCOUNT(n, i) ((n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i))) -target_ulong helper_popcnt(CPUX86State *env, target_ulong n, uint32_t type) -{ - CC_SRC = n ? 0 : CC_Z; - - n = POPCOUNT(n, 0); - n = POPCOUNT(n, 1); - n = POPCOUNT(n, 2); - n = POPCOUNT(n, 3); - if (type == 1) { - return n & 0xff; - } - - n = POPCOUNT(n, 4); -#ifndef TARGET_X86_64 - return n; -#else - if (type == 2) { - return n & 0xff; - } - - return POPCOUNT(n, 5); -#endif -} - void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t ctrl) { diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 64c5857cf4..094aafc573 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -333,7 +333,6 @@ DEF_HELPER_4(glue(pcmpestrm, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(pcmpistri, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(pcmpistrm, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_3(crc32, tl, i32, tl, i32) -DEF_HELPER_3(popcnt, tl, env, tl, i32) #endif /* AES-NI op helpers */ diff --git a/target/i386/translate.c b/target/i386/translate.c index 59e11fcd1f..5f5e60dab1 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -222,6 +222,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2, [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, [CC_OP_CLR] = 0, + [CC_OP_POPCNT] = USES_CC_SRC, }; static void set_cc_op(DisasContext *s, CCOp op) @@ -383,8 +384,7 @@ static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0) static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg) { if (ot == MO_8 && byte_reg_is_xH(reg)) { - tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8); - tcg_gen_ext8u_tl(t0, t0); + tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8); } else { tcg_gen_mov_tl(t0, cpu_regs[reg]); } @@ -758,6 +758,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) case CC_OP_LOGICB ... CC_OP_LOGICQ: case CC_OP_CLR: + case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; case CC_OP_INCB ... CC_OP_INCQ: @@ -825,6 +826,7 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, .mask = CC_S }; case CC_OP_CLR: + case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; default: { @@ -844,6 +846,7 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2, .mask = -1, .no_setcond = true }; case CC_OP_CLR: + case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; default: gen_compute_eflags(s); @@ -867,6 +870,9 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) .mask = CC_Z }; case CC_OP_CLR: return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 }; + case CC_OP_POPCNT: + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src, + .mask = -1 }; default: { TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3; @@ -3768,8 +3774,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* Extract the LEN into a mask. Lengths larger than operand size get all ones. */ - tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8); - tcg_gen_ext8u_tl(cpu_A0, cpu_A0); + tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8); tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound, cpu_A0, bound); tcg_temp_free(bound); @@ -3920,9 +3925,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_compute_eflags(s); } carry_in = cpu_tmp0; - tcg_gen_shri_tl(carry_in, cpu_cc_src, - ctz32(b == 0x1f6 ? CC_C : CC_O)); - tcg_gen_andi_tl(carry_in, carry_in, 1); + tcg_gen_extract_tl(carry_in, cpu_cc_src, + ctz32(b == 0x1f6 ? CC_C : CC_O), 1); } switch (ot) { @@ -5447,21 +5451,25 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, rm = (modrm & 7) | REX_B(s); if (mod == 3) { - gen_op_mov_v_reg(ot, cpu_T0, rm); - switch (s_ot) { - case MO_UB: - tcg_gen_ext8u_tl(cpu_T0, cpu_T0); - break; - case MO_SB: - tcg_gen_ext8s_tl(cpu_T0, cpu_T0); - break; - case MO_UW: - tcg_gen_ext16u_tl(cpu_T0, cpu_T0); - break; - default: - case MO_SW: - tcg_gen_ext16s_tl(cpu_T0, cpu_T0); - break; + if (s_ot == MO_SB && byte_reg_is_xH(rm)) { + tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8); + } else { + gen_op_mov_v_reg(ot, cpu_T0, rm); + switch (s_ot) { + case MO_UB: + tcg_gen_ext8u_tl(cpu_T0, cpu_T0); + break; + case MO_SB: + tcg_gen_ext8s_tl(cpu_T0, cpu_T0); + break; + case MO_UW: + tcg_gen_ext16u_tl(cpu_T0, cpu_T0); + break; + default: + case MO_SW: + tcg_gen_ext16s_tl(cpu_T0, cpu_T0); + break; + } } gen_op_mov_reg_v(d_ot, reg, cpu_T0); } else { @@ -6806,21 +6814,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, ? s->cpuid_ext3_features & CPUID_EXT3_ABM : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) { int size = 8 << ot; + /* For lzcnt/tzcnt, C bit is defined related to the input. */ tcg_gen_mov_tl(cpu_cc_src, cpu_T0); if (b & 1) { /* For lzcnt, reduce the target_ulong result by the number of zeros that we expect to find at the top. */ - gen_helper_clz(cpu_T0, cpu_T0); + tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS); tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size); } else { - /* For tzcnt, a zero input must return the operand size: - force all bits outside the operand size to 1. */ - target_ulong mask = (target_ulong)-2 << (size - 1); - tcg_gen_ori_tl(cpu_T0, cpu_T0, mask); - gen_helper_ctz(cpu_T0, cpu_T0); - } - /* For lzcnt/tzcnt, C and Z bits are defined and are - related to the result. */ + /* For tzcnt, a zero input must return the operand size. */ + tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size); + } + /* For lzcnt/tzcnt, Z bit is defined related to the result. */ gen_op_update1_cc(); set_cc_op(s, CC_OP_BMILGB + ot); } else { @@ -6828,20 +6833,20 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, to the input and not the result. */ tcg_gen_mov_tl(cpu_cc_dst, cpu_T0); set_cc_op(s, CC_OP_LOGICB + ot); + + /* ??? The manual says that the output is undefined when the + input is zero, but real hardware leaves it unchanged, and + real programs appear to depend on that. Accomplish this + by passing the output as the value to return upon zero. */ if (b & 1) { /* For bsr, return the bit index of the first 1 bit, not the count of leading zeros. */ - gen_helper_clz(cpu_T0, cpu_T0); + tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1); + tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1); tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1); } else { - gen_helper_ctz(cpu_T0, cpu_T0); + tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]); } - /* ??? The manual says that the output is undefined when the - input is zero, but real hardware leaves it unchanged, and - real programs appear to depend on that. */ - tcg_gen_movi_tl(cpu_tmp0, 0); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T0, cpu_cc_dst, cpu_tmp0, - cpu_regs[reg], cpu_T0); } gen_op_mov_reg_v(ot, reg, cpu_T0); break; @@ -8207,10 +8212,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_helper_popcnt(cpu_T0, cpu_env, cpu_T0, tcg_const_i32(ot)); + gen_extu(ot, cpu_T0); + tcg_gen_mov_tl(cpu_cc_src, cpu_T0); + tcg_gen_ctpop_tl(cpu_T0, cpu_T0); gen_op_mov_reg_v(ot, reg, cpu_T0); - set_cc_op(s, CC_OP_EFLAGS); + set_cc_op(s, CC_OP_POPCNT); break; case 0x10e ... 0x10f: /* 3DNow! instructions, ignore prefixes */ |