diff options
Diffstat (limited to 'target/i386/tcg')
| -rw-r--r-- | target/i386/tcg/cc_helper.c | 6 | ||||
| -rw-r--r-- | target/i386/tcg/decode-new.c.inc | 152 | ||||
| -rw-r--r-- | target/i386/tcg/decode-new.h | 29 | ||||
| -rw-r--r-- | target/i386/tcg/emit.c.inc | 220 | ||||
| -rw-r--r-- | target/i386/tcg/excp_helper.c | 7 | ||||
| -rw-r--r-- | target/i386/tcg/fpu_helper.c | 10 | ||||
| -rw-r--r-- | target/i386/tcg/helper-tcg.h | 3 | ||||
| -rw-r--r-- | target/i386/tcg/int_helper.c | 8 | ||||
| -rw-r--r-- | target/i386/tcg/misc_helper.c | 4 | ||||
| -rw-r--r-- | target/i386/tcg/seg_helper.c | 8 | ||||
| -rw-r--r-- | target/i386/tcg/sysemu/fpu_helper.c | 6 | ||||
| -rw-r--r-- | target/i386/tcg/sysemu/misc_helper.c | 4 | ||||
| -rw-r--r-- | target/i386/tcg/tcg-cpu.c | 11 | ||||
| -rw-r--r-- | target/i386/tcg/translate.c | 183 |
14 files changed, 446 insertions, 205 deletions
diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c index c310bd842f..f76e9cb8cf 100644 --- a/target/i386/tcg/cc_helper.c +++ b/target/i386/tcg/cc_helper.c @@ -220,9 +220,9 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, } } -uint32_t cpu_cc_compute_all(CPUX86State *env, int op) +uint32_t cpu_cc_compute_all(CPUX86State *env) { - return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, op); + return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, CC_OP); } target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, @@ -335,7 +335,7 @@ target_ulong helper_read_eflags(CPUX86State *env) { uint32_t eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); eflags |= (env->df & DF_MASK); eflags |= env->eflags & ~(VM_MASK | RF_MASK); return eflags; diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 2bdbb1bba0..426c459412 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -26,6 +26,13 @@ * size (X86_SIZE_*) codes used in the manual. There are a few differences * though. * + * Operand sizes + * ------------- + * + * The manual lists d64 ("cannot encode 32-bit size in 64-bit mode") and f64 + * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the + * "v" or "z" sizes. The decoder simply makes them separate operand sizes. + * * Vector operands * --------------- * @@ -44,6 +51,11 @@ * if the difference is expressed via prefixes. Individual instructions * are separated by prefix in the generator functions. * + * There is a custom size "xh" used to address half of a SSE/AVX operand. + * This points to a 64-bit operand for SSE operations, 128-bit operand + * for 256-bit AVX operands, etc. It is used for conversion operations + * such as VCVTPH2PS or VCVTSS2SD. + * * There are a couple cases in which instructions (e.g. MOVD) write the * whole XMM or MM register but are established incorrectly in the manual * as "d" or "q". These have to be fixed for the decoder to work correctly. @@ -139,10 +151,13 @@ #define cpuid(feat) .cpuid = X86_FEAT_##feat, #define xchg .special = X86_SPECIAL_Locked, +#define lock .special = X86_SPECIAL_HasLock, #define mmx .special = X86_SPECIAL_MMX, -#define zext0 .special = X86_SPECIAL_ZExtOp0, -#define zext2 .special = X86_SPECIAL_ZExtOp2, +#define op0_Rd .special = X86_SPECIAL_Op0_Rd, +#define op2_Ry .special = X86_SPECIAL_Op2_Ry, #define avx_movx .special = X86_SPECIAL_AVXExtMov, +#define sextT0 .special = X86_SPECIAL_SExtT0, +#define zextT0 .special = X86_SPECIAL_ZExtT0, #define vex1 .vex_class = 1, #define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar, @@ -523,6 +538,28 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = { [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66), [0xde] = X86_OP_ENTRY3(VAESDEC, V,x, H,x, W,x, vex4 cpuid(AES) p_66), [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66), + + /* + * REG selects srcdest2 operand, VEX.vvvv selects src3. VEX class not found + * in manual, assumed to be 13 from the VEX.L0 constraint. + */ + [0xe0] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe1] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe2] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe3] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe4] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe5] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe6] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe7] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + + [0xe8] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe9] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xea] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xeb] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xec] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xed] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xee] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xef] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), }; /* five rows for no prefix, 66, F3, F2, 66+F2 */ @@ -558,8 +595,8 @@ static const X86OpEntry opcodes_0F38_F0toFF[16][5] = { [5] = { X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)), {}, - X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)), - X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)), + X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)), + X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)), {}, }, [6] = { @@ -570,10 +607,10 @@ static const X86OpEntry opcodes_0F38_F0toFF[16][5] = { {}, }, [7] = { - X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)), + X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)), X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)), - X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)), - X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)), + X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)), + X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)), {}, }, }; @@ -619,13 +656,13 @@ static const X86OpEntry opcodes_0F3A[256] = { [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x, W,x, I,b, vex6 chk(W0) cpuid(AVX) p_66), [0x06] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66), - [0x14] = X86_OP_ENTRY3(PEXTRB, E,b, V,dq, I,b, vex5 cpuid(SSE41) zext0 p_66), - [0x15] = X86_OP_ENTRY3(PEXTRW, E,w, V,dq, I,b, vex5 cpuid(SSE41) zext0 p_66), + [0x14] = X86_OP_ENTRY3(PEXTRB, E,b, V,dq, I,b, vex5 cpuid(SSE41) op0_Rd p_66), + [0x15] = X86_OP_ENTRY3(PEXTRW, E,w, V,dq, I,b, vex5 cpuid(SSE41) op0_Rd p_66), [0x16] = X86_OP_ENTRY3(PEXTR, E,y, V,dq, I,b, vex5 cpuid(SSE41) p_66), [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d, V,dq, I,b, vex5 cpuid(SSE41) p_66), [0x1d] = X86_OP_ENTRY3(VCVTPS2PH, W,xh, V,x, I,b, vex11 chk(W0) cpuid(F16C) p_66), - [0x20] = X86_OP_ENTRY4(PINSRB, V,dq, H,dq, E,b, vex5 cpuid(SSE41) zext2 p_66), + [0x20] = X86_OP_ENTRY4(PINSRB, V,dq, H,dq, E,b, vex5 cpuid(SSE41) op2_Ry p_66), [0x21] = X86_OP_GROUP0(VINSERTPS), [0x22] = X86_OP_ENTRY4(PINSR, V,dq, H,dq, E,y, vex5 cpuid(SSE41) p_66), @@ -1091,10 +1128,6 @@ static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod { int modrm = get_modrm(s, env); if ((modrm >> 6) == 3) { - if (s->prefix & PREFIX_LOCK) { - decode->e.gen = gen_illegal; - return 0xff; - } op->n = (modrm & 7); if (type != X86_TYPE_Q && type != X86_TYPE_N) { op->n |= REX_B(s); @@ -1201,6 +1234,8 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, case X86_TYPE_None: /* Implicit or absent */ case X86_TYPE_A: /* Implicit */ case X86_TYPE_F: /* EFLAGS/RFLAGS */ + case X86_TYPE_X: /* string source */ + case X86_TYPE_Y: /* string destination */ break; case X86_TYPE_B: /* VEX.vvvv selects a GPR */ @@ -1316,43 +1351,15 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, } case X86_TYPE_I: /* Immediate */ - op->unit = X86_OP_IMM; - decode->immediate = insn_get_signed(env, s, op->ot); - break; - case X86_TYPE_J: /* Relative offset for a jump */ op->unit = X86_OP_IMM; decode->immediate = insn_get_signed(env, s, op->ot); - decode->immediate += s->pc - s->cs_base; - if (s->dflag == MO_16) { - decode->immediate &= 0xffff; - } else if (!CODE64(s)) { - decode->immediate &= 0xffffffffu; - } break; case X86_TYPE_L: /* The upper 4 bits of the immediate select a 128-bit register */ op->n = insn_get(env, s, op->ot) >> 4; break; - case X86_TYPE_X: /* string source */ - op->n = -1; - decode->mem = (AddressParts) { - .def_seg = R_DS, - .base = R_ESI, - .index = -1, - }; - break; - - case X86_TYPE_Y: /* string destination */ - op->n = -1; - decode->mem = (AddressParts) { - .def_seg = R_ES, - .base = R_EDI, - .index = -1, - }; - break; - case X86_TYPE_2op: *op = decode->op[0]; break; @@ -1518,6 +1525,9 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2); case X86_FEAT_SHA_NI: return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI); + + case X86_FEAT_CMPCCXADD: + return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD); } g_assert_not_reached(); } @@ -1677,6 +1687,7 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) bool first = true; X86DecodedInsn decode; X86DecodeFunc decode_func = decode_root; + uint8_t cc_live; s->has_modrm = false; @@ -1830,6 +1841,7 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) } memset(&decode, 0, sizeof(decode)); + decode.cc_op = -1; decode.b = b; if (!decode_insn(s, env, decode_func, &decode)) { goto illegal_op; @@ -1869,19 +1881,22 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) if (decode.op[0].has_ea) { s->prefix |= PREFIX_LOCK; } + decode.e.special = X86_SPECIAL_HasLock; + /* fallthrough */ + case X86_SPECIAL_HasLock: break; - case X86_SPECIAL_ZExtOp0: + case X86_SPECIAL_Op0_Rd: assert(decode.op[0].unit == X86_OP_INT); if (!decode.op[0].has_ea) { decode.op[0].ot = MO_32; } break; - case X86_SPECIAL_ZExtOp2: + case X86_SPECIAL_Op2_Ry: assert(decode.op[2].unit == X86_OP_INT); if (!decode.op[2].has_ea) { - decode.op[2].ot = MO_32; + decode.op[2].ot = s->dflag == MO_16 ? MO_32 : s->dflag; } break; @@ -1893,10 +1908,22 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) } break; + case X86_SPECIAL_SExtT0: + case X86_SPECIAL_ZExtT0: + /* Handled in gen_load. */ + assert(decode.op[1].unit == X86_OP_INT); + break; + default: break; } + if (s->prefix & PREFIX_LOCK) { + if (decode.e.special != X86_SPECIAL_HasLock || !decode.op[0].has_ea) { + goto illegal_op; + } + } + if (!validate_vex(s, &decode)) { return; } @@ -1940,9 +1967,6 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) gen_load_ea(s, &decode.mem, decode.e.vex_class == 12); } if (s->prefix & PREFIX_LOCK) { - if (decode.op[0].unit != X86_OP_INT || !decode.op[0].has_ea) { - goto illegal_op; - } gen_load(s, &decode, 2, s->T1); decode.e.gen(s, env, &decode); } else { @@ -1956,6 +1980,38 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) decode.e.gen(s, env, &decode); gen_writeback(s, &decode, 0, s->T0); } + + /* + * Write back flags after last memory access. Some newer ALU instructions, as + * well as SSE instructions, write flags in the gen_* function, but that can + * cause incorrect tracking of CC_OP for instructions that write to both memory + * and flags. + */ + if (decode.cc_op != -1) { + if (decode.cc_dst) { + tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst); + } + if (decode.cc_src) { + tcg_gen_mov_tl(cpu_cc_src, decode.cc_src); + } + if (decode.cc_src2) { + tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2); + } + if (decode.cc_op == CC_OP_DYNAMIC) { + tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic); + } + set_cc_op(s, decode.cc_op); + cc_live = cc_op_live[decode.cc_op]; + } else { + cc_live = 0; + } + if (decode.cc_op != CC_OP_DYNAMIC) { + assert(!decode.cc_op_dynamic); + assert(!!decode.cc_dst == !!(cc_live & USES_CC_DST)); + assert(!!decode.cc_src == !!(cc_live & USES_CC_SRC)); + assert(!!decode.cc_src2 == !!(cc_live & USES_CC_SRC2)); + } + return; gp_fault: gen_exception_gpf(s); diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index e6c904a319..15e6bfef4b 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -104,6 +104,7 @@ typedef enum X86CPUIDFeature { X86_FEAT_AVX2, X86_FEAT_BMI1, X86_FEAT_BMI2, + X86_FEAT_CMPCCXADD, X86_FEAT_F16C, X86_FEAT_FMA, X86_FEAT_MOVBE, @@ -158,15 +159,27 @@ typedef enum X86InsnCheck { typedef enum X86InsnSpecial { X86_SPECIAL_None, + /* Accepts LOCK prefix; LOCKed operations do not load or writeback operand 0 */ + X86_SPECIAL_HasLock, + /* Always locked if it has a memory operand (XCHG) */ X86_SPECIAL_Locked, /* - * Register operand 0/2 is zero extended to 32 bits. Rd/Mb or Rd/Mw - * in the manual. + * Rd/Mb or Rd/Mw in the manual: register operand 0 is treated as 32 bits + * (and writeback zero-extends it to 64 bits if applicable). PREFIX_DATA + * does not trigger 16-bit writeback and, as a side effect, high-byte + * registers are never used. + */ + X86_SPECIAL_Op0_Rd, + + /* + * Ry/Mb in the manual (PINSRB). However, the high bits are never used by + * the instruction in either the register or memory cases; the *real* effect + * of this modifier is that high-byte registers are never used, even without + * a REX prefix. Therefore, PINSRW does not need it despite having Ry/Mw. */ - X86_SPECIAL_ZExtOp0, - X86_SPECIAL_ZExtOp2, + X86_SPECIAL_Op2_Ry, /* * Register operand 2 is extended to full width, while a memory operand @@ -179,6 +192,10 @@ typedef enum X86InsnSpecial { * become P/P/Q/N, and size "x" becomes "q". */ X86_SPECIAL_MMX, + + /* When loaded into s->T0, register operand 1 is zero/sign extended. */ + X86_SPECIAL_SExtT0, + X86_SPECIAL_ZExtT0, } X86InsnSpecial; /* @@ -267,6 +284,10 @@ struct X86DecodedInsn { target_ulong immediate; AddressParts mem; + TCGv cc_dst, cc_src, cc_src2; + TCGv_i32 cc_op_dynamic; + int8_t cc_op; + uint8_t b; }; diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 82da5488d4..6bcf88ecd7 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -55,11 +55,6 @@ static void gen_NM_exception(DisasContext *s) gen_exception(s, EXCP07_PREX); } -static void gen_illegal(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) -{ - gen_illegal_opcode(s); -} - static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib) { TCGv ea = gen_lea_modrm_1(s, *mem, is_vsib); @@ -237,9 +232,30 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v) break; case X86_OP_INT: if (op->has_ea) { - gen_op_ld_v(s, op->ot, v, s->A0); + if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) { + gen_op_ld_v(s, op->ot | MO_SIGN, v, s->A0); + } else { + gen_op_ld_v(s, op->ot, v, s->A0); + } + + } else if (op->ot == MO_8 && byte_reg_is_xH(s, op->n)) { + if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) { + tcg_gen_sextract_tl(v, cpu_regs[op->n - 4], 8, 8); + } else { + tcg_gen_extract_tl(v, cpu_regs[op->n - 4], 8, 8); + } + + } else if (op->ot < MO_TL && v == s->T0 && + (decode->e.special == X86_SPECIAL_SExtT0 || + decode->e.special == X86_SPECIAL_ZExtT0)) { + if (decode->e.special == X86_SPECIAL_SExtT0) { + tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot | MO_SIGN); + } else { + tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot); + } + } else { - gen_op_mov_v_reg(s, op->ot, v, op->n); + tcg_gen_mov_tl(v, cpu_regs[op->n]); } break; case X86_OP_IMM: @@ -323,6 +339,19 @@ static inline int vector_len(DisasContext *s, X86DecodedInsn *decode) return s->vex_l ? 32 : 16; } +static void prepare_update1_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op) +{ + decode->cc_dst = s->T0; + decode->cc_op = op; +} + +static void prepare_update2_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op) +{ + decode->cc_src = s->T1; + decode->cc_dst = s->T0; + decode->cc_op = op; +} + static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs) { MemOp ot = decode->op[0].ot; @@ -1011,6 +1040,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod VSIB_AVX(VPGATHERD, vpgatherd) VSIB_AVX(VPGATHERQ, vpgatherq) +/* ADCX/ADOX do not have memory operands and can use set_cc_op. */ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) { int opposite_cc_op; @@ -1073,8 +1103,7 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) MemOp ot = decode->op[0].ot; tcg_gen_andc_tl(s->T0, s->T1, s->T0); - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); + prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); } static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -1089,9 +1118,6 @@ static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) * Shifts larger than operand size get zeros. */ tcg_gen_ext8u_tl(s->A0, s->T1); - if (TARGET_LONG_BITS == 64 && ot == MO_32) { - tcg_gen_ext32u_tl(s->T0, s->T0); - } tcg_gen_shr_tl(s->T0, s->T0, s->A0); tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero); @@ -1105,10 +1131,10 @@ static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero); tcg_gen_andc_tl(s->T0, s->T0, s->T1); - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); + prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); } +/* BLSI do not have memory operands and can use set_cc_op. */ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1120,6 +1146,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) set_cc_op(s, CC_OP_BMILGB + ot); } +/* BLSMSK do not have memory operands and can use set_cc_op. */ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1131,6 +1158,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode set_cc_op(s, CC_OP_BMILGB + ot); } +/* BLSR do not have memory operands and can use set_cc_op. */ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1151,18 +1179,119 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_ext8u_tl(s->T1, s->T1); + tcg_gen_shl_tl(s->A0, mone, s->T1); + tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); + tcg_gen_andc_tl(s->T0, s->T0, s->A0); /* * Note that since we're using BMILG (in order to get O * cleared) we need to store the inverse into C. */ - tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound); + tcg_gen_setcond_tl(TCG_COND_LEU, s->T1, s->T1, bound); + prepare_update2_cc(decode, s, CC_OP_BMILGB + ot); +} + +static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGLabel *label_top = gen_new_label(); + TCGLabel *label_bottom = gen_new_label(); + TCGv oldv = tcg_temp_new(); + TCGv newv = tcg_temp_new(); + TCGv cmpv = tcg_temp_new(); + TCGCond cond; + + TCGv cmp_lhs, cmp_rhs; + MemOp ot, ot_full; + + int jcc_op = (decode->b >> 1) & 7; + static const TCGCond cond_table[8] = { + [JCC_O] = TCG_COND_LT, /* test sign bit by comparing against 0 */ + [JCC_B] = TCG_COND_LTU, + [JCC_Z] = TCG_COND_EQ, + [JCC_BE] = TCG_COND_LEU, + [JCC_S] = TCG_COND_LT, /* test sign bit by comparing against 0 */ + [JCC_P] = TCG_COND_EQ, /* even parity - tests low bit of popcount */ + [JCC_L] = TCG_COND_LT, + [JCC_LE] = TCG_COND_LE, + }; - tcg_gen_shl_tl(s->A0, mone, s->T1); - tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); - tcg_gen_andc_tl(s->T0, s->T0, s->A0); + cond = cond_table[jcc_op]; + if (decode->b & 1) { + cond = tcg_invert_cond(cond); + } - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_BMILGB + ot); + ot = decode->op[0].ot; + ot_full = ot | MO_LE; + if (jcc_op >= JCC_S) { + /* + * Sign-extend values before subtracting for S, P (zero/sign extension + * does not matter there) L, LE and their inverses. + */ + ot_full |= MO_SIGN; + } + + /* + * cmpv will be moved to cc_src *after* cpu_regs[] is written back, so use + * tcg_gen_ext_tl instead of gen_ext_tl. + */ + tcg_gen_ext_tl(cmpv, cpu_regs[decode->op[1].n], ot_full); + + /* + * Cmpxchg loop starts here. + * - s->T1: addition operand (from decoder) + * - s->A0: dest address (from decoder) + * - s->cc_srcT: memory operand (lhs for comparison) + * - cmpv: rhs for comparison + */ + gen_set_label(label_top); + gen_op_ld_v(s, ot_full, s->cc_srcT, s->A0); + tcg_gen_sub_tl(s->T0, s->cc_srcT, cmpv); + + /* Compute the comparison result by hand, to avoid clobbering cc_*. */ + switch (jcc_op) { + case JCC_O: + /* (src1 ^ src2) & (src1 ^ dst). newv is only used here for a moment */ + tcg_gen_xor_tl(newv, s->cc_srcT, s->T0); + tcg_gen_xor_tl(s->tmp0, s->cc_srcT, cmpv); + tcg_gen_and_tl(s->tmp0, s->tmp0, newv); + tcg_gen_sextract_tl(s->tmp0, s->tmp0, 0, 8 << ot); + cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + break; + + case JCC_P: + tcg_gen_ext8u_tl(s->tmp0, s->T0); + tcg_gen_ctpop_tl(s->tmp0, s->tmp0); + tcg_gen_andi_tl(s->tmp0, s->tmp0, 1); + cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + break; + + case JCC_S: + tcg_gen_sextract_tl(s->tmp0, s->T0, 0, 8 << ot); + cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + break; + + default: + cmp_lhs = s->cc_srcT, cmp_rhs = cmpv; + break; + } + + /* Compute new value: if condition does not hold, just store back s->cc_srcT */ + tcg_gen_add_tl(newv, s->cc_srcT, s->T1); + tcg_gen_movcond_tl(cond, newv, cmp_lhs, cmp_rhs, newv, s->cc_srcT); + tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, s->cc_srcT, newv, s->mem_index, ot_full); + + /* Exit unconditionally if cmpxchg succeeded. */ + tcg_gen_brcond_tl(TCG_COND_EQ, oldv, s->cc_srcT, label_bottom); + + /* Try again if there was actually a store to make. */ + tcg_gen_brcond_tl(cond, cmp_lhs, cmp_rhs, label_top); + gen_set_label(label_bottom); + + /* Store old value to registers only after a successful store. */ + gen_writeback(s, decode, 1, s->cc_srcT); + + decode->cc_dst = s->T0; + decode->cc_src = cmpv; + decode->cc_op = CC_OP_SUBB + ot; } static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -1242,9 +1371,7 @@ static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]); - gen_extu(s->aflag, s->A0); - gen_add_A0_ds_seg(s); + gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_DS, s->override); if (s->prefix & PREFIX_DATA) { gen_helper_maskmov_xmm(tcg_env, OP_PTR1, OP_PTR2, s->A0); @@ -1355,7 +1482,8 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) /* low part of result in VEX.vvvv, high in MODRM */ switch (ot) { - default: + case MO_32: +#ifdef TARGET_X86_64 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, @@ -1363,13 +1491,15 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32); break; -#ifdef TARGET_X86_64 + case MO_64: - tcg_gen_mulu2_i64(cpu_regs[s->vex_v], s->T0, s->T0, s->T1); - break; #endif - } + tcg_gen_mulu2_tl(cpu_regs[s->vex_v], s->T0, s->T0, s->T1); + break; + default: + g_assert_not_reached(); + } } static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -1432,19 +1562,11 @@ static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - MemOp ot = decode->op[1].ot; - if (ot < MO_64) { - tcg_gen_ext32u_tl(s->T0, s->T0); - } gen_helper_pdep(s->T0, s->T0, s->T1); } static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - MemOp ot = decode->op[1].ot; - if (ot < MO_64) { - tcg_gen_ext32u_tl(s->T0, s->T0); - } gen_helper_pext(s->T0, s->T0, s->T1); } @@ -1772,14 +1894,24 @@ static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; - int b = decode->immediate; + int mask = ot == MO_64 ? 63 : 31; + int b = decode->immediate & mask; - if (ot == MO_64) { - tcg_gen_rotri_tl(s->T0, s->T0, b & 63); - } else { + switch (ot) { + case MO_32: +#ifdef TARGET_X86_64 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31); + tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b); tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); + break; + + case MO_64: +#endif + tcg_gen_rotri_tl(s->T0, s->T0, b); + break; + + default: + g_assert_not_reached(); } } @@ -1790,9 +1922,6 @@ static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) mask = ot == MO_64 ? 63 : 31; tcg_gen_andi_tl(s->T1, s->T1, mask); - if (ot != MO_64) { - tcg_gen_ext32s_tl(s->T0, s->T0); - } tcg_gen_sar_tl(s->T0, s->T0, s->T1); } @@ -1867,9 +1996,6 @@ static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) mask = ot == MO_64 ? 63 : 31; tcg_gen_andi_tl(s->T1, s->T1, mask); - if (ot != MO_64) { - tcg_gen_ext32u_tl(s->T0, s->T0); - } tcg_gen_shr_tl(s->T0, s->T0, s->T1); } diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c index 7c3c8dc7fe..65e37ae2a0 100644 --- a/target/i386/tcg/excp_helper.c +++ b/target/i386/tcg/excp_helper.c @@ -28,7 +28,7 @@ G_NORETURN void helper_raise_interrupt(CPUX86State *env, int intno, int next_eip_addend) { - raise_interrupt(env, intno, 1, 0, next_eip_addend); + raise_interrupt(env, intno, next_eip_addend); } G_NORETURN void helper_raise_exception(CPUX86State *env, int exception_index) @@ -112,10 +112,9 @@ void raise_interrupt2(CPUX86State *env, int intno, /* shortcuts to generate exceptions */ -G_NORETURN void raise_interrupt(CPUX86State *env, int intno, int is_int, - int error_code, int next_eip_addend) +G_NORETURN void raise_interrupt(CPUX86State *env, int intno, int next_eip_addend) { - raise_interrupt2(env, intno, is_int, error_code, next_eip_addend, 0); + raise_interrupt2(env, intno, 1, 0, next_eip_addend, 0); } G_NORETURN void raise_exception_err(CPUX86State *env, int exception_index, diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 4430d3d380..4b965a5d6c 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -484,9 +484,8 @@ void helper_fcomi_ST0_FT0(CPUX86State *env) FloatRelation ret; ret = floatx80_compare(ST0, FT0, &env->fp_status); - eflags = cpu_cc_compute_all(env, CC_OP); - eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; - CC_SRC = eflags; + eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); + CC_SRC = eflags | fcomi_ccval[ret + 1]; merge_exception_flags(env, old_flags); } @@ -497,9 +496,8 @@ void helper_fucomi_ST0_FT0(CPUX86State *env) FloatRelation ret; ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); - eflags = cpu_cc_compute_all(env, CC_OP); - eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; - CC_SRC = eflags; + eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); + CC_SRC = eflags | fcomi_ccval[ret + 1]; merge_exception_flags(env, old_flags); } diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index cd1723389a..ce34b737bb 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -65,8 +65,7 @@ G_NORETURN void raise_exception_err(CPUX86State *env, int exception_index, int error_code); G_NORETURN void raise_exception_err_ra(CPUX86State *env, int exception_index, int error_code, uintptr_t retaddr); -G_NORETURN void raise_interrupt(CPUX86State *nenv, int intno, int is_int, - int error_code, int next_eip_addend); +G_NORETURN void raise_interrupt(CPUX86State *nenv, int intno, int next_eip_addend); G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr, MMUAccessType access_type, uintptr_t retaddr); diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c index 05418f181f..ab85dc5540 100644 --- a/target/i386/tcg/int_helper.c +++ b/target/i386/tcg/int_helper.c @@ -190,7 +190,7 @@ void helper_aaa(CPUX86State *env) int al, ah, af; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); af = eflags & CC_A; al = env->regs[R_EAX] & 0xff; ah = (env->regs[R_EAX] >> 8) & 0xff; @@ -214,7 +214,7 @@ void helper_aas(CPUX86State *env) int al, ah, af; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); af = eflags & CC_A; al = env->regs[R_EAX] & 0xff; ah = (env->regs[R_EAX] >> 8) & 0xff; @@ -237,7 +237,7 @@ void helper_daa(CPUX86State *env) int old_al, al, af, cf; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); cf = eflags & CC_C; af = eflags & CC_A; old_al = al = env->regs[R_EAX] & 0xff; @@ -264,7 +264,7 @@ void helper_das(CPUX86State *env) int al, al1, af, cf; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); cf = eflags & CC_C; af = eflags & CC_A; al = env->regs[R_EAX] & 0xff; diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c index babff06186..b0f0f7b893 100644 --- a/target/i386/tcg/misc_helper.c +++ b/target/i386/tcg/misc_helper.c @@ -41,9 +41,9 @@ void helper_into(CPUX86State *env, int next_eip_addend) { int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if (eflags & CC_O) { - raise_interrupt(env, EXCP04_INTO, 1, 0, next_eip_addend); + raise_interrupt(env, EXCP04_INTO, next_eip_addend); } } diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index eb29a1fd4e..34ccabd8ce 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -2230,7 +2230,7 @@ target_ulong helper_lsl(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl, type; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2277,7 +2277,7 @@ target_ulong helper_lar(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl, type; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2326,7 +2326,7 @@ void helper_verr(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2364,7 +2364,7 @@ void helper_verw(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } diff --git a/target/i386/tcg/sysemu/fpu_helper.c b/target/i386/tcg/sysemu/fpu_helper.c index 93506cdd94..e0305ba234 100644 --- a/target/i386/tcg/sysemu/fpu_helper.c +++ b/target/i386/tcg/sysemu/fpu_helper.c @@ -32,9 +32,9 @@ void x86_register_ferr_irq(qemu_irq irq) void fpu_check_raise_ferr_irq(CPUX86State *env) { if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) { - qemu_mutex_lock_iothread(); + bql_lock(); qemu_irq_raise(ferr_irq); - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } } @@ -49,7 +49,7 @@ void cpu_set_ignne(void) { CPUX86State *env = &X86_CPU(first_cpu)->env; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); env->hflags2 |= HF2_IGNNE_MASK; /* diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index e1528b7f80..1ddfc9fe09 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -118,9 +118,9 @@ void helper_write_crN(CPUX86State *env, int reg, target_ulong t0) break; case 8: if (!(env->hflags2 & HF2_VINTR_MASK)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_set_apic_tpr(env_archcpu(env)->apic_state, t0); - qemu_mutex_unlock_iothread(); + bql_unlock(); } env->int_ctl = (env->int_ctl & ~V_TPR_MASK) | (t0 & V_TPR_MASK); diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c index 2c6a12c835..6e881e9e27 100644 --- a/target/i386/tcg/tcg-cpu.c +++ b/target/i386/tcg/tcg-cpu.c @@ -52,7 +52,12 @@ static void x86_cpu_synchronize_from_tb(CPUState *cs, /* The instruction pointer is always up to date with CF_PCREL. */ if (!(tb_cflags(tb) & CF_PCREL)) { CPUX86State *env = cpu_env(cs); - env->eip = tb->pc - tb->cs_base; + + if (tb->flags & HF_CS64_MASK) { + env->eip = tb->pc; + } else { + env->eip = (uint32_t)(tb->pc - tb->cs_base); + } } } @@ -66,8 +71,10 @@ static void x86_restore_state_to_opc(CPUState *cs, if (tb_cflags(tb) & CF_PCREL) { env->eip = (env->eip & TARGET_PAGE_MASK) | data[0]; + } else if (tb->flags & HF_CS64_MASK) { + env->eip = data[0]; } else { - env->eip = data[0] - tb->cs_base; + env->eip = (uint32_t)(data[0] - tb->cs_base); } if (cc_op != CC_OP_DYNAMIC) { env->cc_op = cc_op; diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 587d88692a..e1eb82a5c6 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -122,6 +122,7 @@ typedef struct DisasContext { int cpuid_ext3_features; int cpuid_7_0_ebx_features; int cpuid_7_0_ecx_features; + int cpuid_7_1_eax_features; int cpuid_xsave_features; /* TCG local temps */ @@ -522,9 +523,9 @@ void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val) gen_op_mov_reg_v(s, size, reg, s->tmp0); } -static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg) +static inline void gen_op_add_reg(DisasContext *s, MemOp size, int reg, TCGv val) { - tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0); + tcg_gen_add_tl(s->tmp0, cpu_regs[reg], val); gen_op_mov_reg_v(s, size, reg, s->tmp0); } @@ -552,8 +553,10 @@ static void gen_update_eip_cur(DisasContext *s) assert(s->pc_save != -1); if (tb_cflags(s->base.tb) & CF_PCREL) { tcg_gen_addi_tl(cpu_eip, cpu_eip, s->base.pc_next - s->pc_save); + } else if (CODE64(s)) { + tcg_gen_movi_tl(cpu_eip, s->base.pc_next); } else { - tcg_gen_movi_tl(cpu_eip, s->base.pc_next - s->cs_base); + tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->base.pc_next - s->cs_base)); } s->pc_save = s->base.pc_next; } @@ -563,8 +566,10 @@ static void gen_update_eip_next(DisasContext *s) assert(s->pc_save != -1); if (tb_cflags(s->base.tb) & CF_PCREL) { tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save); + } else if (CODE64(s)) { + tcg_gen_movi_tl(cpu_eip, s->base.pc_next); } else { - tcg_gen_movi_tl(cpu_eip, s->pc - s->cs_base); + tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->base.pc_next - s->cs_base)); } s->pc_save = s->pc; } @@ -610,8 +615,10 @@ static TCGv eip_next_tl(DisasContext *s) TCGv ret = tcg_temp_new(); tcg_gen_addi_tl(ret, cpu_eip, s->pc - s->pc_save); return ret; + } else if (CODE64(s)) { + return tcg_constant_tl(s->pc); } else { - return tcg_constant_tl(s->pc - s->cs_base); + return tcg_constant_tl((uint32_t)(s->pc - s->cs_base)); } } @@ -622,22 +629,24 @@ static TCGv eip_cur_tl(DisasContext *s) TCGv ret = tcg_temp_new(); tcg_gen_addi_tl(ret, cpu_eip, s->base.pc_next - s->pc_save); return ret; + } else if (CODE64(s)) { + return tcg_constant_tl(s->base.pc_next); } else { - return tcg_constant_tl(s->base.pc_next - s->cs_base); + return tcg_constant_tl((uint32_t)(s->base.pc_next - s->cs_base)); } } -/* Compute SEG:REG into A0. SEG is selected from the override segment +/* Compute SEG:REG into DEST. SEG is selected from the override segment (OVR_SEG) and the default segment (DEF_SEG). OVR_SEG may be -1 to indicate no override. */ -static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0, - int def_seg, int ovr_seg) +static void gen_lea_v_seg_dest(DisasContext *s, MemOp aflag, TCGv dest, TCGv a0, + int def_seg, int ovr_seg) { switch (aflag) { #ifdef TARGET_X86_64 case MO_64: if (ovr_seg < 0) { - tcg_gen_mov_tl(s->A0, a0); + tcg_gen_mov_tl(dest, a0); return; } break; @@ -648,14 +657,14 @@ static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0, ovr_seg = def_seg; } if (ovr_seg < 0) { - tcg_gen_ext32u_tl(s->A0, a0); + tcg_gen_ext32u_tl(dest, a0); return; } break; case MO_16: /* 16 bit address */ - tcg_gen_ext16u_tl(s->A0, a0); - a0 = s->A0; + tcg_gen_ext16u_tl(dest, a0); + a0 = dest; if (ovr_seg < 0) { if (ADDSEG(s)) { ovr_seg = def_seg; @@ -672,17 +681,23 @@ static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0, TCGv seg = cpu_seg_base[ovr_seg]; if (aflag == MO_64) { - tcg_gen_add_tl(s->A0, a0, seg); + tcg_gen_add_tl(dest, a0, seg); } else if (CODE64(s)) { - tcg_gen_ext32u_tl(s->A0, a0); - tcg_gen_add_tl(s->A0, s->A0, seg); + tcg_gen_ext32u_tl(dest, a0); + tcg_gen_add_tl(dest, dest, seg); } else { - tcg_gen_add_tl(s->A0, a0, seg); - tcg_gen_ext32u_tl(s->A0, s->A0); + tcg_gen_add_tl(dest, a0, seg); + tcg_gen_ext32u_tl(dest, dest); } } } +static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0, + int def_seg, int ovr_seg) +{ + gen_lea_v_seg_dest(s, aflag, s->A0, a0, def_seg, ovr_seg); +} + static inline void gen_string_movl_A0_ESI(DisasContext *s) { gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override); @@ -693,10 +708,12 @@ static inline void gen_string_movl_A0_EDI(DisasContext *s) gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1); } -static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot) +static inline TCGv gen_compute_Dshift(DisasContext *s, MemOp ot) { - tcg_gen_ld32s_tl(s->T0, tcg_env, offsetof(CPUX86State, df)); - tcg_gen_shli_tl(s->T0, s->T0, ot); + TCGv dshift = tcg_temp_new(); + tcg_gen_ld32s_tl(dshift, tcg_env, offsetof(CPUX86State, df)); + tcg_gen_shli_tl(dshift, dshift, ot); + return dshift; }; static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign) @@ -704,6 +721,9 @@ static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign) if (size == MO_TL) { return src; } + if (!dst) { + dst = tcg_temp_new(); + } tcg_gen_ext_tl(dst, src, size | (sign ? MO_SIGN : 0)); return dst; } @@ -720,9 +740,9 @@ static void gen_exts(MemOp ot, TCGv reg) static void gen_op_j_ecx(DisasContext *s, TCGCond cond, TCGLabel *label1) { - tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]); - gen_extu(s->aflag, s->tmp0); - tcg_gen_brcondi_tl(cond, s->tmp0, 0, label1); + TCGv tmp = gen_ext_tl(NULL, cpu_regs[R_ECX], s->aflag, false); + + tcg_gen_brcondi_tl(cond, tmp, 0, label1); } static inline void gen_op_jz_ecx(DisasContext *s, TCGLabel *label1) @@ -804,13 +824,16 @@ static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port, static void gen_movs(DisasContext *s, MemOp ot) { + TCGv dshift; + gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); gen_string_movl_A0_EDI(s); gen_op_st_v(s, ot, s->T0, s->A0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + + dshift = gen_compute_Dshift(s, ot); + gen_op_add_reg(s, s->aflag, R_ESI, dshift); + gen_op_add_reg(s, s->aflag, R_EDI, dshift); } static void gen_op_update1_cc(DisasContext *s) @@ -843,22 +866,22 @@ static void gen_op_update_neg_cc(DisasContext *s) tcg_gen_movi_tl(s->cc_srcT, 0); } -/* compute all eflags to cc_src */ -static void gen_compute_eflags(DisasContext *s) +/* compute all eflags to reg */ +static void gen_mov_eflags(DisasContext *s, TCGv reg) { - TCGv zero, dst, src1, src2; + TCGv dst, src1, src2; + TCGv_i32 cc_op; int live, dead; if (s->cc_op == CC_OP_EFLAGS) { + tcg_gen_mov_tl(reg, cpu_cc_src); return; } if (s->cc_op == CC_OP_CLR) { - tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P); - set_cc_op(s, CC_OP_EFLAGS); + tcg_gen_movi_tl(reg, CC_Z | CC_P); return; } - zero = NULL; dst = cpu_cc_dst; src1 = cpu_cc_src; src2 = cpu_cc_src2; @@ -867,7 +890,7 @@ static void gen_compute_eflags(DisasContext *s) live = cc_op_live[s->cc_op] & ~USES_CC_SRCT; dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2); if (dead) { - zero = tcg_constant_tl(0); + TCGv zero = tcg_constant_tl(0); if (dead & USES_CC_DST) { dst = zero; } @@ -879,8 +902,18 @@ static void gen_compute_eflags(DisasContext *s) } } - gen_update_cc_op(s); - gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op); + if (s->cc_op != CC_OP_DYNAMIC) { + cc_op = tcg_constant_i32(s->cc_op); + } else { + cc_op = cpu_cc_op; + } + gen_helper_cc_compute_all(reg, dst, src1, src2, cc_op); +} + +/* compute all eflags to cc_src */ +static void gen_compute_eflags(DisasContext *s) +{ + gen_mov_eflags(s, cpu_cc_src); set_cc_op(s, CC_OP_EFLAGS); } @@ -1012,6 +1045,9 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) case CC_OP_CLR: case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; + case CC_OP_MULB ... CC_OP_MULQ: + return (CCPrepare) { .cond = TCG_COND_NE, + .reg = cpu_cc_src, .mask = -1 }; default: gen_compute_eflags(s); return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, @@ -1118,10 +1154,9 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) if (reg == cpu_cc_src) { reg = s->tmp0; } - tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ - tcg_gen_xor_tl(reg, reg, cpu_cc_src); + tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S); cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = CC_S }; + .mask = CC_O }; break; default: case JCC_LE: @@ -1129,10 +1164,9 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) if (reg == cpu_cc_src) { reg = s->tmp0; } - tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ - tcg_gen_xor_tl(reg, reg, cpu_cc_src); + tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S); cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = CC_S | CC_Z }; + .mask = CC_O | CC_Z }; break; } break; @@ -1231,11 +1265,9 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s) static void gen_stos(DisasContext *s, MemOp ot) { - gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); gen_string_movl_A0_EDI(s); gen_op_st_v(s, ot, s->T0, s->A0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); } static void gen_lods(DisasContext *s, MemOp ot) @@ -1243,28 +1275,33 @@ static void gen_lods(DisasContext *s, MemOp ot) gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); gen_op_mov_reg_v(s, ot, R_EAX, s->T0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); + gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot)); } static void gen_scas(DisasContext *s, MemOp ot) { gen_string_movl_A0_EDI(s); gen_op_ld_v(s, ot, s->T1, s->A0); - gen_op(s, OP_CMPL, ot, R_EAX); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + tcg_gen_mov_tl(cpu_cc_src, s->T1); + tcg_gen_mov_tl(s->cc_srcT, s->T0); + tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1); + set_cc_op(s, CC_OP_SUBB + ot); + + gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); } static void gen_cmps(DisasContext *s, MemOp ot) { + TCGv dshift; + gen_string_movl_A0_EDI(s); gen_op_ld_v(s, ot, s->T1, s->A0); gen_string_movl_A0_ESI(s); gen_op(s, OP_CMPL, ot, OR_TMP0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + + dshift = gen_compute_Dshift(s, ot); + gen_op_add_reg(s, s->aflag, R_ESI, dshift); + gen_op_add_reg(s, s->aflag, R_EDI, dshift); } static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot) @@ -1292,8 +1329,7 @@ static void gen_ins(DisasContext *s, MemOp ot) tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); gen_helper_in_func(ot, s->T0, s->tmp2_i32); gen_op_st_v(s, ot, s->T0, s->A0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); gen_bpt_io(s, s->tmp2_i32, ot); } @@ -1306,8 +1342,7 @@ static void gen_outs(DisasContext *s, MemOp ot) tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0); gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); + gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot)); gen_bpt_io(s, s->tmp2_i32, ot); } @@ -2466,14 +2501,10 @@ static void gen_jcc(DisasContext *s, int b, int diff) gen_jmp_rel(s, s->dflag, diff, 0); } -static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b, - int modrm, int reg) +static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src) { - CCPrepare cc; + CCPrepare cc = gen_prepare_cc(s, b, s->T1); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - - cc = gen_prepare_cc(s, b, s->T1); if (cc.mask != -1) { TCGv t0 = tcg_temp_new(); tcg_gen_andi_tl(t0, cc.reg, cc.mask); @@ -2483,9 +2514,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b, cc.reg2 = tcg_constant_tl(cc.imm); } - tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2, - s->T0, cpu_regs[reg]); - gen_op_mov_reg_v(s, ot, reg, s->T0); + tcg_gen_movcond_tl(cc.cond, dest, cc.reg, cc.reg2, src, dest); } static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg) @@ -2552,7 +2581,7 @@ static void gen_push_v(DisasContext *s, TCGv val) if (!CODE64(s)) { if (ADDSEG(s)) { - new_esp = s->tmp4; + new_esp = tcg_temp_new(); tcg_gen_mov_tl(new_esp, s->A0); } gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); @@ -2567,8 +2596,8 @@ static MemOp gen_pop_T0(DisasContext *s) { MemOp d_ot = mo_pushpop(s, s->dflag); - gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1); - gen_op_ld_v(s, d_ot, s->T0, s->A0); + gen_lea_v_seg_dest(s, mo_stacksize(s), s->T0, cpu_regs[R_ESP], R_SS, -1); + gen_op_ld_v(s, d_ot, s->T0, s->T0); return d_ot; } @@ -2837,6 +2866,10 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num) } } new_eip &= mask; + new_pc = new_eip + s->cs_base; + if (!CODE64(s)) { + new_pc = (uint32_t)new_pc; + } gen_update_cc_op(s); set_cc_op(s, CC_OP_DYNAMIC); @@ -2854,8 +2887,7 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num) } } - if (use_goto_tb && - translator_use_goto_tb(&s->base, new_eip + s->cs_base)) { + if (use_goto_tb && translator_use_goto_tb(&s->base, new_pc)) { /* jump to same page: we can use a direct jump */ tcg_gen_goto_tb(tb_num); if (!(tb_cflags(s->base.tb) & CF_PCREL)) { @@ -4171,7 +4203,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]); tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]); tcg_gen_add_tl(s->A0, s->A0, s->T0); - gen_extu(s->aflag, s->A0); gen_add_A0_ds_seg(s); gen_op_ld_v(s, MO_8, s->T0, s->A0); gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0); @@ -4919,6 +4950,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) case 0xaa: /* stosS */ case 0xab: ot = mo_b_d(b, dflag); + gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { gen_repz_stos(s, ot); } else { @@ -4937,6 +4969,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) case 0xae: /* scasS */ case 0xaf: ot = mo_b_d(b, dflag); + gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); if (prefixes & PREFIX_REPNZ) { gen_repz_scas(s, ot, 1); } else if (prefixes & PREFIX_REPZ) { @@ -5190,7 +5223,9 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) ot = dflag; modrm = x86_ldub_code(env, s); reg = ((modrm >> 3) & 7) | REX_R(s); - gen_cmovcc1(env, s, ot, b, modrm, reg); + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + gen_cmovcc1(s, b ^ 1, s->T0, cpu_regs[reg]); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; /************************/ @@ -5823,7 +5858,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_update_cc_op(s); gen_update_eip_cur(s); tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]); - gen_extu(s->aflag, s->A0); gen_add_A0_ds_seg(s); gen_helper_monitor(tcg_env, s->A0); break; @@ -6930,6 +6964,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX]; dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX]; dc->cpuid_7_0_ecx_features = env->features[FEAT_7_0_ECX]; + dc->cpuid_7_1_eax_features = env->features[FEAT_7_1_EAX]; dc->cpuid_xsave_features = env->features[FEAT_XSAVE]; dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) || (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK))); |