diff options
Diffstat (limited to 'tcg/i386')
| -rw-r--r-- | tcg/i386/tcg-target.inc.c | 36 |
1 files changed, 27 insertions, 9 deletions
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 6f8cdca756..cf7536bd52 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -686,6 +686,18 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) } } +static inline void tcg_out_mb(TCGContext *s, TCGArg a0) +{ + /* Given the strength of x86 memory ordering, we only need care for + store-load ordering. Experimentally, "lock orl $0,0(%esp)" is + faster than "mfence", so don't bother with the sse insn. */ + if (a0 & TCG_MO_ST_LD) { + tcg_out8(s, 0xf0); + tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0); + tcg_out8(s, 0); + } +} + static inline void tcg_out_push(TCGContext *s, int reg) { tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); @@ -1202,7 +1214,10 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, TCGType ttype = TCG_TYPE_I32; TCGType tlbtype = TCG_TYPE_I32; int trexw = 0, hrexw = 0, tlbrexw = 0; - int a_bits = get_alignment_bits(opc); + unsigned a_bits = get_alignment_bits(opc); + unsigned s_bits = opc & MO_SIZE; + unsigned a_mask = (1 << a_bits) - 1; + unsigned s_mask = (1 << s_bits) - 1; target_ulong tlb_mask; if (TCG_TARGET_REG_BITS == 64) { @@ -1220,17 +1235,15 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } tcg_out_mov(s, tlbtype, r0, addrlo); - if (a_bits >= 0) { - /* A byte access or an alignment check required */ + /* If the required alignment is at least as large as the access, simply + copy the address and mask. For lesser alignments, check that we don't + cross pages for the complete access. */ + if (a_bits >= s_bits) { tcg_out_mov(s, ttype, r1, addrlo); - tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1); } else { - /* For unaligned access check that we don't cross pages using - the page address of the last byte. */ - tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, - (1 << (opc & MO_SIZE)) - 1); - tlb_mask = TARGET_PAGE_MASK; + tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask); } + tlb_mask = TARGET_PAGE_MASK | a_mask; tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); @@ -2130,6 +2143,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + case INDEX_op_mb: + tcg_out_mb(s, args[0]); + break; case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ @@ -2195,6 +2211,8 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + { INDEX_op_mb, { } }, + #if TCG_TARGET_REG_BITS == 32 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, |