diff options
Diffstat (limited to 'tcg/sparc/tcg-target.c.inc')
| -rw-r--r-- | tcg/sparc/tcg-target.c.inc | 348 |
1 files changed, 296 insertions, 52 deletions
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 0c062c60eb..72d9552fd0 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -211,6 +211,7 @@ static const int tcg_target_call_oarg_regs[] = { #define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00)) #define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10)) #define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01)) +#define ARITH_ANDCC (INSN_OP(2) | INSN_OP3(0x11)) #define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05)) #define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02)) #define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12)) @@ -323,15 +324,26 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type, switch (type) { case R_SPARC_WDISP16: - assert(check_fit_ptr(pcrel >> 2, 16)); + if (!check_fit_ptr(pcrel >> 2, 16)) { + return false; + } insn &= ~INSN_OFF16(-1); insn |= INSN_OFF16(pcrel); break; case R_SPARC_WDISP19: - assert(check_fit_ptr(pcrel >> 2, 19)); + if (!check_fit_ptr(pcrel >> 2, 19)) { + return false; + } insn &= ~INSN_OFF19(-1); insn |= INSN_OFF19(pcrel); break; + case R_SPARC_13: + if (!check_fit_ptr(value, 13)) { + return false; + } + insn &= ~INSN_IMM13(-1); + insn |= INSN_IMM13(value); + break; default: g_assert_not_reached(); } @@ -413,15 +425,31 @@ static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg) tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR); } +static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg) +{ + if (check_fit_i32(arg, 13)) { + /* A 13-bit constant sign-extended to 64-bits. */ + tcg_out_movi_imm13(s, ret, arg); + } else { + /* A 32-bit constant zero-extended to 64 bits. */ + tcg_out_sethi(s, ret, arg); + if (arg & 0x3ff) { + tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); + } + } +} + static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, - tcg_target_long arg, bool in_prologue) + tcg_target_long arg, bool in_prologue, + TCGReg scratch) { tcg_target_long hi, lo = (int32_t)arg; tcg_target_long test, lsb; - /* Make sure we test 32-bit constants for imm13 properly. */ - if (type == TCG_TYPE_I32) { - arg = lo; + /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ + if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { + tcg_out_movi_imm32(s, ret, arg); + return; } /* A 13-bit constant sign-extended to 64-bits. */ @@ -439,15 +467,6 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } } - /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ - if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { - tcg_out_sethi(s, ret, arg); - if (arg & 0x3ff) { - tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); - } - return; - } - /* A 32-bit constant sign-extended to 64-bits. */ if (arg == lo) { tcg_out_sethi(s, ret, ~arg); @@ -455,38 +474,47 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } - /* A 21-bit constant, shifted. */ + /* A 32-bit constant, shifted. */ lsb = ctz64(arg); test = (tcg_target_long)arg >> lsb; - if (check_fit_tl(test, 13)) { - tcg_out_movi_imm13(s, ret, test); - tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX); - return; - } else if (lsb > 10 && test == extract64(test, 0, 21)) { + if (lsb > 10 && test == extract64(test, 0, 21)) { tcg_out_sethi(s, ret, test << 10); tcg_out_arithi(s, ret, ret, lsb - 10, SHIFT_SLLX); return; + } else if (test == (uint32_t)test || test == (int32_t)test) { + tcg_out_movi_int(s, TCG_TYPE_I64, ret, test, in_prologue, scratch); + tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX); + return; + } + + /* Use the constant pool, if possible. */ + if (!in_prologue && USE_REG_TB) { + new_pool_label(s, arg, R_SPARC_13, s->code_ptr, + tcg_tbrel_diff(s, NULL)); + tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB)); + return; } /* A 64-bit constant decomposed into 2 32-bit pieces. */ if (check_fit_i32(lo, 13)) { hi = (arg - lo) >> 32; - tcg_out_movi(s, TCG_TYPE_I32, ret, hi); + tcg_out_movi_imm32(s, ret, hi); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); tcg_out_arithi(s, ret, ret, lo, ARITH_ADD); } else { hi = arg >> 32; - tcg_out_movi(s, TCG_TYPE_I32, ret, hi); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo); + tcg_out_movi_imm32(s, ret, hi); + tcg_out_movi_imm32(s, scratch, lo); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); - tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR); + tcg_out_arith(s, ret, ret, scratch, ARITH_OR); } } static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { - tcg_out_movi_int(s, type, ret, arg, false); + tcg_debug_assert(ret != TCG_REG_T2); + tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2); } static void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1, @@ -795,7 +823,7 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, if (use_vis3_instructions && !is_sub) { /* Note that ADDXC doesn't accept immediates. */ if (bhconst && bh != 0) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh); + tcg_out_movi_imm13(s, TCG_REG_T2, bh); bh = TCG_REG_T2; } tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC); @@ -811,9 +839,13 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0); } } else { - /* Otherwise adjust BH as if there is carry into T2 ... */ + /* + * Otherwise adjust BH as if there is carry into T2. + * Note that constant BH is constrained to 11 bits for the MOVCC, + * so the adjustment fits 12 bits. + */ if (bhconst) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1)); + tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1)); } else { tcg_out_arithi(s, TCG_REG_T2, bh, 1, is_sub ? ARITH_SUB : ARITH_ADD); @@ -827,6 +859,19 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, tcg_out_mov(s, TCG_TYPE_I64, rl, tmp); } +static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest, + bool in_prologue, bool tail_call) +{ + uintptr_t desti = (uintptr_t)dest; + + /* Be careful not to clobber %o7 for a tail call. */ + tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1, + desti & ~0xfff, in_prologue, + tail_call ? TCG_REG_G2 : TCG_REG_O7); + tcg_out_arithi(s, tail_call ? TCG_REG_G0 : TCG_REG_O7, + TCG_REG_T1, desti & 0xfff, JMPL); +} + static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, bool in_prologue) { @@ -835,10 +880,7 @@ static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, if (disp == (int32_t)disp) { tcg_out32(s, CALL | (uint32_t)disp >> 2); } else { - uintptr_t desti = (uintptr_t)dest; - tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1, - desti & ~0xfff, in_prologue); - tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL); + tcg_out_jmpl_const(s, dest, in_prologue, false); } } @@ -929,11 +971,10 @@ static void build_trampolines(TCGContext *s) /* Set the retaddr operand. */ tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); - /* Set the env operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); /* Tail call. */ - tcg_out_call_nodelay(s, qemu_ld_helpers[i], true); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); + tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true); + /* delay slot -- set the env argument */ + tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0); } for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) { @@ -975,14 +1016,46 @@ static void build_trampolines(TCGContext *s) if (ra >= TCG_REG_O6) { tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, TCG_TARGET_CALL_STACK_OFFSET); - ra = TCG_REG_G1; + } else { + tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); } - tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); - /* Set the env operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); + /* Tail call. */ - tcg_out_call_nodelay(s, qemu_st_helpers[i], true); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); + tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true); + /* delay slot -- set the env argument */ + tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0); + } +} +#else +static const tcg_insn_unit *qemu_unalign_ld_trampoline; +static const tcg_insn_unit *qemu_unalign_st_trampoline; + +static void build_trampolines(TCGContext *s) +{ + for (int ld = 0; ld < 2; ++ld) { + void *helper; + + while ((uintptr_t)s->code_ptr & 15) { + tcg_out_nop(s); + } + + if (ld) { + helper = helper_unaligned_ld; + qemu_unalign_ld_trampoline = tcg_splitwx_to_rx(s->code_ptr); + } else { + helper = helper_unaligned_st; + qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr); + } + + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Install the high part of the address. */ + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); + } + + /* Tail call. */ + tcg_out_jmpl_const(s, helper, true, true); + /* delay slot -- set the env argument */ + tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0); } } #endif @@ -1013,7 +1086,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) #ifndef CONFIG_SOFTMMU if (guest_base != 0) { - tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); + tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, + guest_base, true, TCG_REG_T1); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif @@ -1034,9 +1108,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* delay slot */ tcg_out_movi_imm13(s, TCG_REG_O0, 0); -#ifdef CONFIG_SOFTMMU build_trampolines(s); -#endif } static void tcg_out_nop_fill(tcg_insn_unit *p, int count) @@ -1121,18 +1193,22 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = { [MO_UB] = LDUB, [MO_SB] = LDSB, + [MO_UB | MO_LE] = LDUB, + [MO_SB | MO_LE] = LDSB, [MO_BEUW] = LDUH, [MO_BESW] = LDSH, [MO_BEUL] = LDUW, [MO_BESL] = LDSW, [MO_BEUQ] = LDX, + [MO_BESQ] = LDX, [MO_LEUW] = LDUH_LE, [MO_LESW] = LDSH_LE, [MO_LEUL] = LDUW_LE, [MO_LESL] = LDSW_LE, [MO_LEUQ] = LDX_LE, + [MO_LESQ] = LDX_LE, }; static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = { @@ -1151,11 +1227,12 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, MemOpIdx oi, bool is_64) { MemOp memop = get_memop(oi); + tcg_insn_unit *label_ptr; + #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); TCGReg addrz, param; const tcg_insn_unit *func; - tcg_insn_unit *label_ptr; addrz = tcg_out_tlb_load(s, addr, memi, memop, offsetof(CPUTLBEntry, addr_read)); @@ -1219,13 +1296,99 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #else + TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0); + unsigned a_bits = get_alignment_bits(memop); + unsigned s_bits = memop & MO_SIZE; + unsigned t_bits; + if (SPARC64 && TARGET_LONG_BITS == 32) { tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); addr = TCG_REG_T1; } - tcg_out_ldst_rr(s, data, addr, - (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), + + /* + * Normal case: alignment equal to access size. + */ + if (a_bits == s_bits) { + tcg_out_ldst_rr(s, data, addr, index, + qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); + return; + } + + /* + * Test for at least natural alignment, and assume most accesses + * will be aligned -- perform a straight load in the delay slot. + * This is required to preserve atomicity for aligned accesses. + */ + t_bits = MAX(a_bits, s_bits); + tcg_debug_assert(t_bits < 13); + tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC); + + /* beq,a,pt %icc, label */ + label_ptr = s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addr, index, qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); + + if (a_bits >= s_bits) { + /* + * Overalignment: A successful alignment test will perform the memory + * operation in the delay slot, and failure need only invoke the + * handler for SIGBUS. + */ + TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64); + tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false); + /* delay slot -- move to low part of argument reg */ + tcg_out_mov_delay(s, arg_low, addr); + } else { + /* Underalignment: load by pieces of minimum alignment. */ + int ld_opc, a_size, s_size, i; + + /* + * Force full address into T1 early; avoids problems with + * overlap between @addr and @data. + */ + tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD); + + a_size = 1 << a_bits; + s_size = 1 << s_bits; + if ((memop & MO_BSWAP) == MO_BE) { + ld_opc = qemu_ld_opc[a_bits | MO_BE | (memop & MO_SIGN)]; + tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc); + ld_opc = qemu_ld_opc[a_bits | MO_BE]; + for (i = a_size; i < s_size; i += a_size) { + tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc); + tcg_out_arithi(s, data, data, a_size, SHIFT_SLLX); + tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR); + } + } else if (a_bits == 0) { + ld_opc = LDUB; + tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc); + for (i = a_size; i < s_size; i += a_size) { + if ((memop & MO_SIGN) && i == s_size - a_size) { + ld_opc = LDSB; + } + tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc); + tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX); + tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR); + } + } else { + ld_opc = qemu_ld_opc[a_bits | MO_LE]; + tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, ld_opc); + for (i = a_size; i < s_size; i += a_size) { + tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD); + if ((memop & MO_SIGN) && i == s_size - a_size) { + ld_opc = qemu_ld_opc[a_bits | MO_LE | MO_SIGN]; + } + tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, ld_opc); + tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX); + tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR); + } + } + } + + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #endif /* CONFIG_SOFTMMU */ } @@ -1233,11 +1396,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, MemOpIdx oi) { MemOp memop = get_memop(oi); + tcg_insn_unit *label_ptr; + #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); TCGReg addrz, param; const tcg_insn_unit *func; - tcg_insn_unit *label_ptr; addrz = tcg_out_tlb_load(s, addr, memi, memop, offsetof(CPUTLBEntry, addr_write)); @@ -1274,13 +1438,93 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #else + TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0); + unsigned a_bits = get_alignment_bits(memop); + unsigned s_bits = memop & MO_SIZE; + unsigned t_bits; + if (SPARC64 && TARGET_LONG_BITS == 32) { tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); addr = TCG_REG_T1; } - tcg_out_ldst_rr(s, data, addr, - (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), + + /* + * Normal case: alignment equal to access size. + */ + if (a_bits == s_bits) { + tcg_out_ldst_rr(s, data, addr, index, + qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); + return; + } + + /* + * Test for at least natural alignment, and assume most accesses + * will be aligned -- perform a straight store in the delay slot. + * This is required to preserve atomicity for aligned accesses. + */ + t_bits = MAX(a_bits, s_bits); + tcg_debug_assert(t_bits < 13); + tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC); + + /* beq,a,pt %icc, label */ + label_ptr = s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addr, index, qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); + + if (a_bits >= s_bits) { + /* + * Overalignment: A successful alignment test will perform the memory + * operation in the delay slot, and failure need only invoke the + * handler for SIGBUS. + */ + TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64); + tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false); + /* delay slot -- move to low part of argument reg */ + tcg_out_mov_delay(s, arg_low, addr); + } else { + /* Underalignment: store by pieces of minimum alignment. */ + int st_opc, a_size, s_size, i; + + /* + * Force full address into T1 early; avoids problems with + * overlap between @addr and @data. + */ + tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD); + + a_size = 1 << a_bits; + s_size = 1 << s_bits; + if ((memop & MO_BSWAP) == MO_BE) { + st_opc = qemu_st_opc[a_bits | MO_BE]; + for (i = 0; i < s_size; i += a_size) { + TCGReg d = data; + int shift = (s_size - a_size - i) * 8; + if (shift) { + d = TCG_REG_T2; + tcg_out_arithi(s, d, data, shift, SHIFT_SRLX); + } + tcg_out_ldst(s, d, TCG_REG_T1, i, st_opc); + } + } else if (a_bits == 0) { + tcg_out_ldst(s, data, TCG_REG_T1, 0, STB); + for (i = 1; i < s_size; i++) { + tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX); + tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, STB); + } + } else { + /* Note that ST*A with immediate asi must use indexed address. */ + st_opc = qemu_st_opc[a_bits + MO_LE]; + tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, st_opc); + for (i = a_size; i < s_size; i += a_size) { + tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX); + tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD); + tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, st_opc); + } + } + } + + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #endif /* CONFIG_SOFTMMU */ } |