diff options
Diffstat (limited to 'tcg/ppc/tcg-target.c.inc')
| -rw-r--r-- | tcg/ppc/tcg-target.c.inc | 533 |
1 files changed, 365 insertions, 168 deletions
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 5c873b2161..856c3b18f5 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -83,7 +83,7 @@ #define TCG_VEC_TMP2 TCG_REG_V1 #define TCG_REG_TB TCG_REG_R31 -#define USE_REG_TB (TCG_TARGET_REG_BITS == 64) +#define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00) /* Shorthand for size of a pointer. Avoid promotion to unsigned. */ #define SZP ((int)sizeof(void *)) @@ -101,11 +101,13 @@ #define ALL_GENERAL_REGS 0xffffffffu #define ALL_VECTOR_REGS 0xffffffff00000000ull +#ifndef R_PPC64_PCREL34 +#define R_PPC64_PCREL34 132 +#endif + #define have_isel (cpuinfo & CPUINFO_ISEL) -#ifndef CONFIG_SOFTMMU -#define TCG_GUEST_BASE_REG 30 -#endif +#define TCG_GUEST_BASE_REG TCG_REG_R30 #ifdef CONFIG_DEBUG_TCG static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { @@ -215,13 +217,19 @@ static const int tcg_target_callee_save_regs[] = { TCG_REG_R31 }; +/* For PPC, we use TB+4 instead of TB as the base. */ +static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target) +{ + return tcg_tbrel_diff(s, target) - 4; +} + static inline bool in_range_b(tcg_target_long target) { return target == sextract64(target, 0, 26); } static uint32_t reloc_pc24_val(const tcg_insn_unit *pc, - const tcg_insn_unit *target) + const tcg_insn_unit *target) { ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); tcg_debug_assert(in_range_b(disp)); @@ -241,7 +249,7 @@ static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) } static uint16_t reloc_pc14_val(const tcg_insn_unit *pc, - const tcg_insn_unit *target) + const tcg_insn_unit *target) { ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); tcg_debug_assert(disp == (int16_t) disp); @@ -260,6 +268,19 @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) return false; } +static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target) +{ + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); + + if (disp == sextract64(disp, 0, 34)) { + src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff); + src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff); + return true; + } + return false; +} + /* test if a constant matches the constraint */ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) { @@ -323,6 +344,15 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) #define STDX XO31(149) #define STQ XO62( 2) +#define PLWA OPCD( 41) +#define PLD OPCD( 57) +#define PLXSD OPCD( 42) +#define PLXV OPCD(25 * 2 + 1) /* force tx=1 */ + +#define PSTD OPCD( 61) +#define PSTXSD OPCD( 46) +#define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */ + #define ADDIC OPCD( 12) #define ADDI OPCD( 14) #define ADDIS OPCD( 15) @@ -356,6 +386,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) #define CRNAND XO19(225) #define CROR XO19(449) #define CRNOR XO19( 33) +#define ADDPCIS XO19( 2) #define EXTSB XO31(954) #define EXTSH XO31(922) @@ -680,6 +711,8 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, return reloc_pc14(code_ptr, target); case R_PPC_REL24: return reloc_pc24(code_ptr, target); + case R_PPC64_PCREL34: + return reloc_pc34(code_ptr, target); case R_PPC_ADDR16: /* * We are (slightly) abusing this relocation type. In particular, @@ -712,6 +745,52 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, return true; } +/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */ +static bool tcg_out_need_prefix_align(TCGContext *s) +{ + return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c; +} + +static void tcg_out_prefix_align(TCGContext *s) +{ + if (tcg_out_need_prefix_align(s)) { + tcg_out32(s, NOP); + } +} + +static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target) +{ + return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0); +} + +/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */ +static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, + unsigned ra, tcg_target_long imm, bool r) +{ + tcg_insn_unit p, i; + + p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff); + i = opc | TAI(rt, ra, imm); + + tcg_out_prefix_align(s); + tcg_out32(s, p); + tcg_out32(s, i); +} + +/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */ +static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt, + unsigned ra, tcg_target_long imm, bool r) +{ + tcg_insn_unit p, i; + + p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff); + i = opc | TAI(rt, ra, imm); + + tcg_out_prefix_align(s); + tcg_out32(s, p); + tcg_out32(s, i); +} + static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, TCGReg base, tcg_target_long offset); @@ -853,6 +932,19 @@ static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); } +static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm) +{ + uint32_t d0, d1, d2; + + tcg_debug_assert((imm & 0xffff) == 0); + tcg_debug_assert(imm == (int32_t)imm); + + d2 = extract32(imm, 16, 1); + d1 = extract32(imm, 17, 5); + d0 = extract32(imm, 22, 10); + tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2); +} + static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) { TCGReg tmp = dst == src ? TCG_REG_R0 : dst; @@ -991,12 +1083,31 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } /* Load addresses within the TB with one insn. */ - tb_diff = tcg_tbrel_diff(s, (void *)arg); + tb_diff = ppc_tbrel_diff(s, (void *)arg); if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) { tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff)); return; } + /* + * Load values up to 34 bits, and pc-relative addresses, + * with one prefixed insn. + */ + if (have_isa_3_10) { + if (arg == sextract64(arg, 0, 34)) { + /* pli ret,value = paddi ret,0,value,0 */ + tcg_out_mls_d(s, ADDI, ret, 0, arg, 0); + return; + } + + tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg); + if (tmp == sextract64(tmp, 0, 34)) { + /* pla ret,value = paddi ret,0,value,1 */ + tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1); + return; + } + } + /* Load 32-bit immediates with two insns. Note that we've already eliminated bare ADDIS, so we know both insns are required. */ if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { @@ -1035,6 +1146,19 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } + /* Load addresses within 2GB with 2 insns. */ + if (have_isa_3_00) { + intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4; + int16_t lo = hi; + + hi -= lo; + if (hi == (int32_t)hi) { + tcg_out_addpcis(s, TCG_REG_TMP2, hi); + tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo)); + return; + } + } + /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */ if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) { tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff); @@ -1044,10 +1168,21 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, /* Use the constant pool, if possible. */ if (!in_prologue && USE_REG_TB) { new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, - tcg_tbrel_diff(s, NULL)); + ppc_tbrel_diff(s, NULL)); tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); return; } + if (have_isa_3_10) { + tcg_out_8ls_d(s, PLD, ret, 0, 0, 1); + new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0); + return; + } + if (have_isa_3_00) { + tcg_out_addpcis(s, TCG_REG_TMP2, 0); + new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0); + tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0)); + return; + } tmp = arg >> 31 >> 1; tcg_out_movi(s, TCG_TYPE_I32, ret, tmp); @@ -1104,7 +1239,20 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, */ if (USE_REG_TB) { rel = R_PPC_ADDR16; - add = tcg_tbrel_diff(s, NULL); + add = ppc_tbrel_diff(s, NULL); + } else if (have_isa_3_10) { + if (type == TCG_TYPE_V64) { + tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1); + new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0); + } else { + tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1); + new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val); + } + return; + } else if (have_isa_3_00) { + tcg_out_addpcis(s, TCG_REG_TMP1, 0); + rel = R_PPC_REL14; + add = 0; } else { rel = R_PPC_ADDR32; add = 0; @@ -1131,6 +1279,8 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, if (USE_REG_TB) { tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); load_insn |= RA(TCG_REG_TB); + } else if (have_isa_3_00) { + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); } else { tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); @@ -1322,6 +1472,49 @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, break; } + /* For unaligned or large offsets, use the prefixed form. */ + if (have_isa_3_10 + && (offset != (int16_t)offset || (offset & align)) + && offset == sextract64(offset, 0, 34)) { + /* + * Note that the MLS:D insns retain their un-prefixed opcode, + * while the 8LS:D insns use a different opcode space. + */ + switch (opi) { + case LBZ: + case LHZ: + case LHA: + case LWZ: + case STB: + case STH: + case STW: + case ADDI: + tcg_out_mls_d(s, opi, rt, base, offset, 0); + return; + case LWA: + tcg_out_8ls_d(s, PLWA, rt, base, offset, 0); + return; + case LD: + tcg_out_8ls_d(s, PLD, rt, base, offset, 0); + return; + case STD: + tcg_out_8ls_d(s, PSTD, rt, base, offset, 0); + return; + case LXSD: + tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0); + return; + case STXSD: + tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0); + return; + case LXV: + tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0); + return; + case STXV: + tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0); + return; + } + } + /* For unaligned, or very large offsets, use the indexed form. */ if (offset & align || offset != (int32_t)offset || opi == 0) { if (rs == base) { @@ -2122,152 +2315,158 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, s_bits == MO_128); a_bits = h->aa.align; -#ifdef CONFIG_SOFTMMU - int mem_index = get_mmuidx(oi); - int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write); - int fast_off = tlb_mask_table_ofs(s, mem_index); - int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); - int table_off = fast_off + offsetof(CPUTLBDescFast, table); - - ldst = new_ldst_label(s); - ldst->is_ld = is_ld; - ldst->oi = oi; - ldst->addrlo_reg = addrlo; - ldst->addrhi_reg = addrhi; - - /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); - - /* Extract the page index, shifted into place for tlb index. */ - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_shri32(s, TCG_REG_R0, addrlo, - s->page_bits - CPU_TLB_ENTRY_BITS); - } else { - tcg_out_shri64(s, TCG_REG_R0, addrlo, - s->page_bits - CPU_TLB_ENTRY_BITS); - } - tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); + if (tcg_use_softmmu) { + int mem_index = get_mmuidx(oi); + int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write); + int fast_off = tlb_mask_table_ofs(s, mem_index); + int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); + int table_off = fast_off + offsetof(CPUTLBDescFast, table); - /* - * Load the (low part) TLB comparator into TMP2. - * For 64-bit host, always load the entire 64-bit slot for simplicity. - * We will ignore the high bits with tcg_out_cmp(..., addr_type). - */ - if (TCG_TARGET_REG_BITS == 64) { - if (cmp_off == 0) { - tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); + + /* Extract the page index, shifted into place for tlb index. */ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_shri32(s, TCG_REG_R0, addrlo, + s->page_bits - CPU_TLB_ENTRY_BITS); } else { - tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); - tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off); + tcg_out_shri64(s, TCG_REG_R0, addrlo, + s->page_bits - CPU_TLB_ENTRY_BITS); } - } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) { - tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); - } else { - tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, - cmp_off + 4 * HOST_BIG_ENDIAN); - } + tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); - /* - * Load the TLB addend for use on the fast path. - * Do this asap to minimize any load use delay. - */ - if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) { - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, - offsetof(CPUTLBEntry, addend)); - } - - /* Clear the non-page, non-alignment bits from the address in R0. */ - if (TCG_TARGET_REG_BITS == 32) { /* - * We don't support unaligned accesses on 32-bits. - * Preserve the bottom bits and thus trigger a comparison - * failure on unaligned accesses. + * Load the (low part) TLB comparator into TMP2. + * For 64-bit host, always load the entire 64-bit slot for simplicity. + * We will ignore the high bits with tcg_out_cmp(..., addr_type). */ - if (a_bits < s_bits) { - a_bits = s_bits; + if (TCG_TARGET_REG_BITS == 64) { + if (cmp_off == 0) { + tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, + TCG_REG_TMP1, TCG_REG_TMP2)); + } else { + tcg_out32(s, ADD | TAB(TCG_REG_TMP1, + TCG_REG_TMP1, TCG_REG_TMP2)); + tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, + TCG_REG_TMP1, cmp_off); + } + } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) { + tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, + TCG_REG_TMP1, TCG_REG_TMP2)); + } else { + tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, + cmp_off + 4 * HOST_BIG_ENDIAN); } - tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, - (32 - a_bits) & 31, 31 - s->page_bits); - } else { - TCGReg t = addrlo; /* - * If the access is unaligned, we need to make sure we fail if we - * cross a page boundary. The trick is to add the access size-1 - * to the address before masking the low bits. That will make the - * address overflow to the next page if we cross a page boundary, - * which will then force a mismatch of the TLB compare. + * Load the TLB addend for use on the fast path. + * Do this asap to minimize any load use delay. */ - if (a_bits < s_bits) { - unsigned a_mask = (1 << a_bits) - 1; - unsigned s_mask = (1 << s_bits) - 1; - tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); - t = TCG_REG_R0; + if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) { + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, + offsetof(CPUTLBEntry, addend)); } - /* Mask the address for the requested alignment. */ - if (addr_type == TCG_TYPE_I32) { - tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, + /* Clear the non-page, non-alignment bits from the address in R0. */ + if (TCG_TARGET_REG_BITS == 32) { + /* + * We don't support unaligned accesses on 32-bits. + * Preserve the bottom bits and thus trigger a comparison + * failure on unaligned accesses. + */ + if (a_bits < s_bits) { + a_bits = s_bits; + } + tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, (32 - a_bits) & 31, 31 - s->page_bits); - } else if (a_bits == 0) { - tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); } else { - tcg_out_rld(s, RLDICL, TCG_REG_R0, t, - 64 - s->page_bits, s->page_bits - a_bits); - tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); - } - } + TCGReg t = addrlo; + + /* + * If the access is unaligned, we need to make sure we fail if we + * cross a page boundary. The trick is to add the access size-1 + * to the address before masking the low bits. That will make the + * address overflow to the next page if we cross a page boundary, + * which will then force a mismatch of the TLB compare. + */ + if (a_bits < s_bits) { + unsigned a_mask = (1 << a_bits) - 1; + unsigned s_mask = (1 << s_bits) - 1; + tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); + t = TCG_REG_R0; + } - if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) { - /* Low part comparison into cr7. */ - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, - 0, 7, TCG_TYPE_I32); + /* Mask the address for the requested alignment. */ + if (addr_type == TCG_TYPE_I32) { + tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, + (32 - a_bits) & 31, 31 - s->page_bits); + } else if (a_bits == 0) { + tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); + } else { + tcg_out_rld(s, RLDICL, TCG_REG_R0, t, + 64 - s->page_bits, s->page_bits - a_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); + } + } - /* Load the high part TLB comparator into TMP2. */ - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, - cmp_off + 4 * !HOST_BIG_ENDIAN); + if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) { + /* Low part comparison into cr7. */ + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, + 0, 7, TCG_TYPE_I32); - /* Load addend, deferred for this case. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, - offsetof(CPUTLBEntry, addend)); + /* Load the high part TLB comparator into TMP2. */ + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, + cmp_off + 4 * !HOST_BIG_ENDIAN); - /* High part comparison into cr6. */ - tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32); + /* Load addend, deferred for this case. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, + offsetof(CPUTLBEntry, addend)); - /* Combine comparisons into cr7. */ - tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); - } else { - /* Full comparison into cr7. */ - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 7, addr_type); - } + /* High part comparison into cr6. */ + tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, + 0, 6, TCG_TYPE_I32); - /* Load a pointer into the current opcode w/conditional branch-link. */ - ldst->label_ptr[0] = s->code_ptr; - tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + /* Combine comparisons into cr7. */ + tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + } else { + /* Full comparison into cr7. */ + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, + 0, 7, addr_type); + } - h->base = TCG_REG_TMP1; -#else - if (a_bits) { - ldst = new_ldst_label(s); - ldst->is_ld = is_ld; - ldst->oi = oi; - ldst->addrlo_reg = addrlo; - ldst->addrhi_reg = addrhi; + /* Load a pointer into the current opcode w/conditional branch-link. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); - /* We are expecting a_bits to max out at 7, much lower than ANDI. */ - tcg_debug_assert(a_bits < 16); - tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1)); + h->base = TCG_REG_TMP1; + } else { + if (a_bits) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7, much lower than ANDI. */ + tcg_debug_assert(a_bits < 16); + tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1)); + + ldst->label_ptr[0] = s->code_ptr; + tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); + } - ldst->label_ptr[0] = s->code_ptr; - tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); + h->base = guest_base ? TCG_GUEST_BASE_REG : 0; } - h->base = guest_base ? TCG_GUEST_BASE_REG : 0; -#endif - if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) { /* Zero-extend the guest address for use in the host address. */ tcg_out_ext32u(s, TCG_REG_R0, addrlo); @@ -2500,18 +2699,13 @@ static void tcg_target_qemu_prologue(TCGContext *s) } tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); -#ifndef CONFIG_SOFTMMU - if (guest_base) { + if (!tcg_use_softmmu && guest_base) { tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } -#endif tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); - if (USE_REG_TB) { - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]); - } tcg_out32(s, BCCTR | BO_ALWAYS); /* Epilogue */ @@ -2529,7 +2723,17 @@ static void tcg_target_qemu_prologue(TCGContext *s) static void tcg_out_tb_start(TCGContext *s) { - /* nothing to do */ + /* Load TCG_REG_TB. */ + if (USE_REG_TB) { + if (have_isa_3_00) { + /* lnia REG_TB */ + tcg_out_addpcis(s, TCG_REG_TB, 0); + } else { + /* bcl 20,31,$+4 (preferred form for getting nia) */ + tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK); + tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR); + } + } } static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) @@ -2541,33 +2745,33 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) static void tcg_out_goto_tb(TCGContext *s, int which) { uintptr_t ptr = get_jmp_target_addr(s, which); + int16_t lo; - if (USE_REG_TB) { - ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr); - tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset); - - /* TODO: Use direct branches when possible. */ - set_jmp_insn_offset(s, which); - tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); - - tcg_out32(s, BCCTR | BO_ALWAYS); + /* Direct branch will be patched by tb_target_set_jmp_target. */ + set_jmp_insn_offset(s, which); + tcg_out32(s, NOP); - /* For the unlinked case, need to reset TCG_REG_TB. */ - set_jmp_reset_offset(s, which); - tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, - -tcg_current_code_size(s)); + /* When branch is out of range, fall through to indirect. */ + if (USE_REG_TB) { + ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr); + tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset); + } else if (have_isa_3_10) { + ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr); + tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1); + } else if (have_isa_3_00) { + ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4; + lo = offset; + tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); } else { - /* Direct branch will be patched by tb_target_set_jmp_target. */ - set_jmp_insn_offset(s, which); - tcg_out32(s, NOP); - - /* When branch is out of range, fall through to indirect. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr); - tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS); - set_jmp_reset_offset(s, which); + lo = ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo); } + + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + set_jmp_reset_offset(s, which); } void tb_target_set_jmp_target(const TranslationBlock *tb, int n, @@ -2577,10 +2781,6 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, intptr_t diff = addr - jmp_rx; tcg_insn_unit insn; - if (USE_REG_TB) { - return; - } - if (in_range_b(diff)) { insn = B | (diff & 0x3fffffc); } else { @@ -2600,9 +2800,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_ptr: tcg_out32(s, MTSPR | RS(args[0]) | CTR); - if (USE_REG_TB) { - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]); - } tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); tcg_out32(s, BCCTR | BO_ALWAYS); break; @@ -3645,7 +3842,7 @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, tcgv_vec_arg(t1), tcgv_vec_arg(t2)); vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v0), tcgv_vec_arg(t1)); - break; + break; case MO_32: tcg_debug_assert(!have_isa_2_07); |