diff options
Diffstat (limited to 'tcg')
| -rw-r--r-- | tcg/README | 22 | ||||
| -rw-r--r-- | tcg/aarch64/tcg-target.c.inc | 125 | ||||
| -rw-r--r-- | tcg/aarch64/tcg-target.h | 2 | ||||
| -rw-r--r-- | tcg/arm/tcg-target.c.inc | 295 | ||||
| -rw-r--r-- | tcg/arm/tcg-target.h | 2 | ||||
| -rw-r--r-- | tcg/i386/tcg-target.c.inc | 20 | ||||
| -rw-r--r-- | tcg/mips/tcg-target.c.inc | 102 | ||||
| -rw-r--r-- | tcg/optimize.c | 56 | ||||
| -rw-r--r-- | tcg/ppc/tcg-target.c.inc | 228 | ||||
| -rw-r--r-- | tcg/riscv/tcg-target.c.inc | 64 | ||||
| -rw-r--r-- | tcg/s390/tcg-target.c.inc | 34 | ||||
| -rw-r--r-- | tcg/tcg-op-gvec.c | 122 | ||||
| -rw-r--r-- | tcg/tcg-op.c | 145 | ||||
| -rw-r--r-- | tcg/tcg.c | 28 | ||||
| -rw-r--r-- | tcg/tci.c | 3 | ||||
| -rw-r--r-- | tcg/tci/tcg-target.c.inc | 23 |
16 files changed, 790 insertions, 481 deletions
diff --git a/tcg/README b/tcg/README index 8510d823e3..c2e7762a37 100644 --- a/tcg/README +++ b/tcg/README @@ -295,19 +295,25 @@ ext32u_i64 t0, t1 8, 16 or 32 bit sign/zero extension (both operands must have the same type) -* bswap16_i32/i64 t0, t1 +* bswap16_i32/i64 t0, t1, flags -16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order -bytes are set to zero. +16 bit byte swap on the low bits of a 32/64 bit input. +If flags & TCG_BSWAP_IZ, then t1 is known to be zero-extended from bit 15. +If flags & TCG_BSWAP_OZ, then t0 will be zero-extended from bit 15. +If flags & TCG_BSWAP_OS, then t0 will be sign-extended from bit 15. +If neither TCG_BSWAP_OZ nor TCG_BSWAP_OS are set, then the bits of +t0 above bit 15 may contain any value. -* bswap32_i32/i64 t0, t1 +* bswap32_i64 t0, t1, flags -32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that -the four high order bytes are set to zero. +32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, +except they apply from bit 31 instead of bit 15. -* bswap64_i64 t0, t1 +* bswap32_i32 t0, t1, flags +* bswap64_i64 t0, t1, flags -64 bit byte swap +32/64 bit byte swap. The flags are ignored, but still present +for consistency with the other bswap opcodes. * discard_i32/i64 t0 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 27cde314a9..5924977b42 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -475,9 +475,7 @@ typedef enum { /* Data-processing (1 source) instructions. */ I3507_CLZ = 0x5ac01000, I3507_RBIT = 0x5ac00000, - I3507_REV16 = 0x5ac00400, - I3507_REV32 = 0x5ac00800, - I3507_REV64 = 0x5ac00c00, + I3507_REV = 0x5ac00000, /* + size << 10 */ /* Data-processing (2 source) instructions. */ I3508_LSLV = 0x1ac02000, @@ -1417,19 +1415,11 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, } } -static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); -} - -static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); -} - -static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) +static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, + TCGReg rd, TCGReg rn) { - tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); + /* REV, REV16, REV32 */ + tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); } static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, @@ -1557,28 +1547,34 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * TCGMemOpIdx oi, uintptr_t ra) */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, +static void * const qemu_ld_helpers[4] = { + [MO_8] = helper_ret_ldub_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_16] = helper_be_lduw_mmu, + [MO_32] = helper_be_ldul_mmu, + [MO_64] = helper_be_ldq_mmu, +#else + [MO_16] = helper_le_lduw_mmu, + [MO_32] = helper_le_ldul_mmu, + [MO_64] = helper_le_ldq_mmu, +#endif }; /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, * uintxx_t val, TCGMemOpIdx oi, * uintptr_t ra) */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, +static void * const qemu_st_helpers[4] = { + [MO_8] = helper_ret_stb_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_16] = helper_be_stw_mmu, + [MO_32] = helper_be_stl_mmu, + [MO_64] = helper_be_stq_mmu, +#else + [MO_16] = helper_le_stw_mmu, + [MO_32] = helper_le_stl_mmu, + [MO_64] = helper_le_stq_mmu, +#endif }; static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) @@ -1602,7 +1598,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); tcg_out_adr(s, TCG_REG_X3, lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); if (opc & MO_SIGN) { tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); } else { @@ -1628,7 +1624,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); tcg_out_adr(s, TCG_REG_X4, lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]); tcg_out_goto(s, lb->raddr); return true; } @@ -1724,7 +1720,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, TCGReg data_r, TCGReg addr_r, TCGType otype, TCGReg off_r) { - const MemOp bswap = memop & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((memop & MO_BSWAP) == 0); switch (memop & MO_SSIZE) { case MO_UB: @@ -1736,40 +1733,19 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, break; case MO_UW: tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev16(s, data_r, data_r); - } break; case MO_SW: - if (bswap) { - tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); - tcg_out_rev16(s, data_r, data_r); - tcg_out_sxt(s, ext, MO_16, data_r, data_r); - } else { - tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), - data_r, addr_r, otype, off_r); - } + tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), + data_r, addr_r, otype, off_r); break; case MO_UL: tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev32(s, data_r, data_r); - } break; case MO_SL: - if (bswap) { - tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); - tcg_out_rev32(s, data_r, data_r); - tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); - } else { - tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); - } + tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); break; case MO_Q: tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev64(s, data_r, data_r); - } break; default: tcg_abort(); @@ -1780,31 +1756,20 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, TCGReg data_r, TCGReg addr_r, TCGType otype, TCGReg off_r) { - const MemOp bswap = memop & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((memop & MO_BSWAP) == 0); switch (memop & MO_SIZE) { case MO_8: tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); break; case MO_16: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev16(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); break; case MO_32: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev32(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); break; case MO_64: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev64(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); break; default: @@ -2184,15 +2149,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_bswap64_i64: - tcg_out_rev64(s, a0, a1); + tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); break; case INDEX_op_bswap32_i64: + tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); + if (a2 & TCG_BSWAP_OS) { + tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0); + } + break; case INDEX_op_bswap32_i32: - tcg_out_rev32(s, a0, a1); + tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); break; case INDEX_op_bswap16_i64: case INDEX_op_bswap16_i32: - tcg_out_rev16(s, a0, a1); + tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); + if (a2 & TCG_BSWAP_OS) { + /* Output must be sign-extended. */ + tcg_out_sxt(s, ext, MO_16, a0, a0); + } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* Output must be zero-extended, but input isn't. */ + tcg_out_uxt(s, MO_16, a0, a0); + } break; case INDEX_op_ext8s_i64: diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index ef55f7c185..551baf8da3 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -148,7 +148,7 @@ typedef enum { #define TCG_TARGET_HAS_cmpsel_vec 0 #define TCG_TARGET_DEFAULT_MO (0) -#define TCG_TARGET_HAS_MEMORY_BSWAP 1 +#define TCG_TARGET_HAS_MEMORY_BSWAP 0 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 5157143246..7a761a602e 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1013,50 +1013,71 @@ static inline void tcg_out_ext16u(TCGContext *s, int cond, } } -static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn) +static void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn, int flags) { if (use_armv6_instructions) { - /* revsh */ - tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); - } -} + if (flags & TCG_BSWAP_OS) { + /* revsh */ + tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); + return; + } -static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { /* rev16 */ tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); + if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* uxth */ + tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd); + } + return; } -} -/* swap the two low bytes assuming that the two high input bytes and the - two high output bit can hold any value. */ -static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { - /* rev16 */ - tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - } else { + if (flags == 0) { + /* + * For stores, no input or output extension: + * rn = xxAB + * lsr tmp, rn, #8 tmp = 0xxA + * and tmp, tmp, #0xff tmp = 000A + * orr rd, tmp, rn, lsl #8 rd = xABA + */ tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); tcg_out_dat_reg(s, cond, ARITH_ORR, rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); + return; + } + + /* + * Byte swap, leaving the result at the top of the register. + * We will then shift down, zero or sign-extending. + */ + if (flags & TCG_BSWAP_IZ) { + /* + * rn = 00AB + * ror tmp, rn, #8 tmp = B00A + * orr tmp, tmp, tmp, lsl #16 tmp = BA00 + */ + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, rn, SHIFT_IMM_ROR(8)); + tcg_out_dat_reg(s, cond, ARITH_ORR, + TCG_REG_TMP, TCG_REG_TMP, TCG_REG_TMP, + SHIFT_IMM_LSL(16)); + } else { + /* + * rn = xxAB + * and tmp, rn, #0xff00 tmp = 00A0 + * lsl tmp, tmp, #8 tmp = 0A00 + * orr tmp, tmp, rn, lsl #24 tmp = BA00 + */ + tcg_out_dat_rI(s, cond, ARITH_AND, TCG_REG_TMP, rn, 0xff00, 1); + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSL(8)); + tcg_out_dat_reg(s, cond, ARITH_ORR, + TCG_REG_TMP, TCG_REG_TMP, rn, SHIFT_IMM_LSL(24)); } + tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, TCG_REG_TMP, + (flags & TCG_BSWAP_OS + ? SHIFT_IMM_ASR(8) : SHIFT_IMM_LSR(8))); } static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) @@ -1372,34 +1393,38 @@ static void tcg_out_vldst(TCGContext *s, ARMInsn insn, /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ -static void * const qemu_ld_helpers[16] = { +static void * const qemu_ld_helpers[8] = { [MO_UB] = helper_ret_ldub_mmu, [MO_SB] = helper_ret_ldsb_mmu, - - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LESL] = helper_le_ldul_mmu, - - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BESL] = helper_be_ldul_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_UW] = helper_be_lduw_mmu, + [MO_UL] = helper_be_ldul_mmu, + [MO_Q] = helper_be_ldq_mmu, + [MO_SW] = helper_be_ldsw_mmu, + [MO_SL] = helper_be_ldul_mmu, +#else + [MO_UW] = helper_le_lduw_mmu, + [MO_UL] = helper_le_ldul_mmu, + [MO_Q] = helper_le_ldq_mmu, + [MO_SW] = helper_le_ldsw_mmu, + [MO_SL] = helper_le_ldul_mmu, +#endif }; /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, * uintxx_t val, int mmu_idx, uintptr_t ra) */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, +static void * const qemu_st_helpers[4] = { + [MO_8] = helper_ret_stb_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_16] = helper_be_stw_mmu, + [MO_32] = helper_be_stl_mmu, + [MO_64] = helper_be_stq_mmu, +#else + [MO_16] = helper_le_stw_mmu, + [MO_32] = helper_le_stl_mmu, + [MO_64] = helper_le_stq_mmu, +#endif }; /* Helper routines for marshalling helper function arguments into @@ -1604,9 +1629,9 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) icache usage. For pre-armv6, use the signed helpers since we do not have a single insn sign-extend. */ if (use_armv6_instructions) { - func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]; + func = qemu_ld_helpers[opc & MO_SIZE]; } else { - func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]; + func = qemu_ld_helpers[opc & MO_SSIZE]; if (opc & MO_SIGN) { opc = MO_UL; } @@ -1684,7 +1709,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); /* Tail-call to the helper, which will return to the fast path. */ - tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]); return true; } #endif /* SOFTMMU */ @@ -1693,7 +1718,8 @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addend) { - MemOp bswap = opc & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & MO_SSIZE) { case MO_UB: @@ -1704,49 +1730,30 @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, break; case MO_UW: tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); - if (bswap) { - tcg_out_bswap16(s, COND_AL, datalo, datalo); - } break; case MO_SW: - if (bswap) { - tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); - tcg_out_bswap16s(s, COND_AL, datalo, datalo); - } else { - tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend); - } + tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend); break; case MO_UL: - default: tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); - if (bswap) { - tcg_out_bswap32(s, COND_AL, datalo, datalo); - } break; case MO_Q: - { - TCGReg dl = (bswap ? datahi : datalo); - TCGReg dh = (bswap ? datalo : datahi); - - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (dl & 1) == 0 && dh == dl + 1) { - tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend); - } else if (dl != addend) { - tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo); - tcg_out_ld32_12(s, COND_AL, dh, addend, 4); - } else { - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP, - addend, addrlo, SHIFT_IMM_LSL(0)); - tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0); - tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4); - } - if (bswap) { - tcg_out_bswap32(s, COND_AL, dl, dl); - tcg_out_bswap32(s, COND_AL, dh, dh); - } + /* Avoid ldrd for user-only emulation, to handle unaligned. */ + if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { + tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); + } else if (datalo != addend) { + tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo); + tcg_out_ld32_12(s, COND_AL, datahi, addend, 4); + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP, + addend, addrlo, SHIFT_IMM_LSL(0)); + tcg_out_ld32_12(s, COND_AL, datalo, TCG_REG_TMP, 0); + tcg_out_ld32_12(s, COND_AL, datahi, TCG_REG_TMP, 4); } break; + default: + g_assert_not_reached(); } } @@ -1754,7 +1761,8 @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, TCGReg addrlo) { - MemOp bswap = opc & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & MO_SSIZE) { case MO_UB: @@ -1765,47 +1773,28 @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, break; case MO_UW: tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); - if (bswap) { - tcg_out_bswap16(s, COND_AL, datalo, datalo); - } break; case MO_SW: - if (bswap) { - tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); - tcg_out_bswap16s(s, COND_AL, datalo, datalo); - } else { - tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0); - } + tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0); break; case MO_UL: - default: tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); - if (bswap) { - tcg_out_bswap32(s, COND_AL, datalo, datalo); - } break; case MO_Q: - { - TCGReg dl = (bswap ? datahi : datalo); - TCGReg dh = (bswap ? datalo : datahi); - - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (dl & 1) == 0 && dh == dl + 1) { - tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0); - } else if (dl == addrlo) { - tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); - tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); - } else { - tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); - tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); - } - if (bswap) { - tcg_out_bswap32(s, COND_AL, dl, dl); - tcg_out_bswap32(s, COND_AL, dh, dh); - } + /* Avoid ldrd for user-only emulation, to handle unaligned. */ + if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { + tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0); + } else if (datalo == addrlo) { + tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4); + tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); + } else { + tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); + tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4); } break; + default: + g_assert_not_reached(); } } @@ -1854,44 +1843,31 @@ static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, MemOp opc, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addend) { - MemOp bswap = opc & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & MO_SIZE) { case MO_8: tcg_out_st8_r(s, cond, datalo, addrlo, addend); break; case MO_16: - if (bswap) { - tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo); - tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend); - } else { - tcg_out_st16_r(s, cond, datalo, addrlo, addend); - } + tcg_out_st16_r(s, cond, datalo, addrlo, addend); break; case MO_32: - default: - if (bswap) { - tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); - tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend); - } else { - tcg_out_st32_r(s, cond, datalo, addrlo, addend); - } + tcg_out_st32_r(s, cond, datalo, addrlo, addend); break; case MO_64: /* Avoid strd for user-only emulation, to handle unaligned. */ - if (bswap) { - tcg_out_bswap32(s, cond, TCG_REG_R0, datahi); - tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo); - tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); - tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4); - } else if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { + if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_r(s, cond, datalo, addrlo, addend); } else { tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); tcg_out_st32_12(s, cond, datahi, addend, 4); } break; + default: + g_assert_not_reached(); } } @@ -1899,44 +1875,31 @@ static inline void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, TCGReg addrlo) { - MemOp bswap = opc & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & MO_SIZE) { case MO_8: tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0); break; case MO_16: - if (bswap) { - tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0); - } else { - tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0); - } + tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0); break; case MO_32: - default: - if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); - } else { - tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); - } + tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); break; case MO_64: /* Avoid strd for user-only emulation, to handle unaligned. */ - if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4); - } else if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { + if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); } else { tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4); } break; + default: + g_assert_not_reached(); } } @@ -2245,7 +2208,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_bswap16_i32: - tcg_out_bswap16(s, COND_AL, args[0], args[1]); + tcg_out_bswap16(s, COND_AL, args[0], args[1], args[2]); break; case INDEX_op_bswap32_i32: tcg_out_bswap32(s, COND_AL, args[0], args[1]); diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 57fd0c0c74..95fcef33bc 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -174,7 +174,7 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_cmpsel_vec 0 #define TCG_TARGET_DEFAULT_MO (0) -#define TCG_TARGET_HAS_MEMORY_BSWAP 1 +#define TCG_TARGET_HAS_MEMORY_BSWAP 0 /* not defined -- call should be eliminated at compile time */ void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 34113388ef..98d924b91a 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2421,10 +2421,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; OP_32_64(bswap16): - tcg_out_rolw_8(s, a0); + if (a2 & TCG_BSWAP_OS) { + /* Output must be sign-extended. */ + if (rexw) { + tcg_out_bswap64(s, a0); + tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48); + } else { + tcg_out_bswap32(s, a0); + tcg_out_shifti(s, SHIFT_SAR, a0, 16); + } + } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* Output must be zero-extended, but input isn't. */ + tcg_out_bswap32(s, a0); + tcg_out_shifti(s, SHIFT_SHR, a0, 16); + } else { + tcg_out_rolw_8(s, a0); + } break; OP_32_64(bswap32): tcg_out_bswap32(s, a0); + if (rexw && (a2 & TCG_BSWAP_OS)) { + tcg_out_ext32s(s, a0, a0); + } break; OP_32_64(neg): diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 5944448b2a..bf0eb84e2d 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -540,39 +540,37 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } } -static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) +static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags) { + /* ret and arg can't be register tmp0 */ + tcg_debug_assert(ret != TCG_TMP0); + tcg_debug_assert(arg != TCG_TMP0); + + /* With arg = abcd: */ if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - } else { - /* ret and arg can't be register at */ - if (ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); /* badc */ + if (flags & TCG_BSWAP_OS) { + tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); /* ssdc */ + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xffff); /* 00dc */ } - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + return; } -} -static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); /* 0abc */ + if (!(flags & TCG_BSWAP_IZ)) { + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff); /* 000c */ + } + if (flags & TCG_BSWAP_OS) { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); /* d000 */ + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); /* ssd0 */ } else { - /* ret and arg can't be register at */ - if (ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); /* bcd0 */ + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); /* 00d0 */ } - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); /* ssdc */ } static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub) @@ -581,27 +579,20 @@ static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub) tcg_debug_assert(ok); } -static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) +static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg, int flags) { if (use_mips32r2_instructions) { tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_bf(s, OPC_DEXT, ret, ret, 31, 0); + } } else { - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot -- never omit the insn, like tcg_out_mov might. */ - tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); - tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); - } -} - -static void tcg_out_bswap32u(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg); - tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret); - tcg_out_dsrl(s, ret, ret, 32); - } else { - tcg_out_bswap_subr(s, bswap32u_addr); + if (flags & TCG_BSWAP_OZ) { + tcg_out_bswap_subr(s, bswap32u_addr); + } else { + tcg_out_bswap_subr(s, bswap32_addr); + } /* delay slot -- never omit the insn, like tcg_out_mov might. */ tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); @@ -1367,14 +1358,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, break; case MO_UW | MO_BSWAP: tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16(s, lo, TCG_TMP1); + tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ); break; case MO_UW: tcg_out_opc_imm(s, OPC_LHU, lo, base, 0); break; case MO_SW | MO_BSWAP: tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16s(s, lo, TCG_TMP1); + tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS); break; case MO_SW: tcg_out_opc_imm(s, OPC_LH, lo, base, 0); @@ -1383,7 +1374,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, if (TCG_TARGET_REG_BITS == 64 && is_64) { if (use_mips32r2_instructions) { tcg_out_opc_imm(s, OPC_LWU, lo, base, 0); - tcg_out_bswap32u(s, lo, lo); + tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ); } else { tcg_out_bswap_subr(s, bswap32u_addr); /* delay slot */ @@ -1396,7 +1387,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, case MO_SL | MO_BSWAP: if (use_mips32r2_instructions) { tcg_out_opc_imm(s, OPC_LW, lo, base, 0); - tcg_out_bswap32(s, lo, lo); + tcg_out_bswap32(s, lo, lo, 0); } else { tcg_out_bswap_subr(s, bswap32_addr); /* delay slot */ @@ -1514,8 +1505,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, break; case MO_16 | MO_BSWAP: - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, lo, 0xffff); - tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1); + tcg_out_bswap16(s, TCG_TMP1, lo, 0); lo = TCG_TMP1; /* FALLTHRU */ case MO_16: @@ -1523,7 +1513,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, break; case MO_32 | MO_BSWAP: - tcg_out_bswap32(s, TCG_TMP3, lo); + tcg_out_bswap32(s, TCG_TMP3, lo, 0); lo = TCG_TMP3; /* FALLTHRU */ case MO_32: @@ -1542,9 +1532,9 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0); tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4); } else { - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi); + tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0); tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0); - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo); + tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0); tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4); } break; @@ -1933,10 +1923,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_not_i64: i1 = OPC_NOR; goto do_unary; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - i1 = OPC_WSBH; - goto do_unary; case INDEX_op_ext8s_i32: case INDEX_op_ext8s_i64: i1 = OPC_SEB; @@ -1948,11 +1934,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); break; + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + tcg_out_bswap16(s, a0, a1, a2); + break; case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, a0, a1); + tcg_out_bswap32(s, a0, a1, 0); break; case INDEX_op_bswap32_i64: - tcg_out_bswap32u(s, a0, a1); + tcg_out_bswap32(s, a0, a1, a2); break; case INDEX_op_bswap64_i64: tcg_out_bswap64(s, a0, a1); diff --git a/tcg/optimize.c b/tcg/optimize.c index 211a4209a0..9876ac52a8 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -355,10 +355,12 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) return (uint16_t)x; CASE_OP_32_64(bswap16): - return bswap16(x); + x = bswap16(x); + return y & TCG_BSWAP_OS ? (int16_t)x : x; CASE_OP_32_64(bswap32): - return bswap32(x); + x = bswap32(x); + return y & TCG_BSWAP_OS ? (int32_t)x : x; case INDEX_op_bswap64_i64: return bswap64(x); @@ -1029,6 +1031,42 @@ void tcg_optimize(TCGContext *s) } break; + CASE_OP_32_64(bswap16): + mask = arg_info(op->args[1])->mask; + if (mask <= 0xffff) { + op->args[2] |= TCG_BSWAP_IZ; + } + mask = bswap16(mask); + switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { + case TCG_BSWAP_OZ: + break; + case TCG_BSWAP_OS: + mask = (int16_t)mask; + break; + default: /* undefined high bits */ + mask |= MAKE_64BIT_MASK(16, 48); + break; + } + break; + + case INDEX_op_bswap32_i64: + mask = arg_info(op->args[1])->mask; + if (mask <= 0xffffffffu) { + op->args[2] |= TCG_BSWAP_IZ; + } + mask = bswap32(mask); + switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { + case TCG_BSWAP_OZ: + break; + case TCG_BSWAP_OS: + mask = (int32_t)mask; + break; + default: /* undefined high bits */ + mask |= MAKE_64BIT_MASK(32, 32); + break; + } + break; + default: break; } @@ -1135,9 +1173,6 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(ext16s): CASE_OP_32_64(ext16u): CASE_OP_32_64(ctpop): - CASE_OP_32_64(bswap16): - CASE_OP_32_64(bswap32): - case INDEX_op_bswap64_i64: case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: @@ -1151,6 +1186,17 @@ void tcg_optimize(TCGContext *s) } goto do_default; + CASE_OP_32_64(bswap16): + CASE_OP_32_64(bswap32): + case INDEX_op_bswap64_i64: + if (arg_is_const(op->args[1])) { + tmp = do_constant_folding(opc, arg_info(op->args[1])->val, + op->args[2]); + tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); + break; + } + goto do_default; + CASE_OP_32_64(add): CASE_OP_32_64(sub): CASE_OP_32_64(mul): diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 795701442b..e0f4665213 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -413,6 +413,10 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define SRAD XO31(794) #define SRADI XO31(413<<1) +#define BRH XO31(219) +#define BRW XO31(155) +#define BRD XO31(187) + #define TW XO31( 4) #define TRAP (TW | TO(31)) @@ -738,6 +742,26 @@ static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); } +static inline void tcg_out_ext8s(TCGContext *s, TCGReg dst, TCGReg src) +{ + tcg_out32(s, EXTSB | RA(dst) | RS(src)); +} + +static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src) +{ + tcg_out32(s, EXTSH | RA(dst) | RS(src)); +} + +static inline void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src) +{ + tcg_out32(s, ANDI | SAI(src, dst, 0xffff)); +} + +static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src) +{ + tcg_out32(s, EXTSW | RA(dst) | RS(src)); +} + static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) { tcg_out_rld(s, RLDICL, dst, src, 0, 32); @@ -753,6 +777,12 @@ static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); } +static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31)); +} + static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) { tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); @@ -763,6 +793,116 @@ static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); } +static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); +} + +static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) +{ + TCGReg tmp = dst == src ? TCG_REG_R0 : dst; + + if (have_isa_3_10) { + tcg_out32(s, BRH | RA(dst) | RS(src)); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext16s(s, dst, dst); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext16u(s, dst, dst); + } + return; + } + + /* + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = xxxxabcd + */ + /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ + tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); + /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ + tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); + + if (flags & TCG_BSWAP_OS) { + tcg_out_ext16s(s, dst, tmp); + } else { + tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); + } +} + +static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) +{ + TCGReg tmp = dst == src ? TCG_REG_R0 : dst; + + if (have_isa_3_10) { + tcg_out32(s, BRW | RA(dst) | RS(src)); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, dst, dst); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext32u(s, dst, dst); + } + return; + } + + /* + * Stolen from gcc's builtin_bswap32. + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = xxxxabcd + */ + /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ + tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); + /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ + tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); + /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ + tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); + + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, dst, tmp); + } else { + tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); + } +} + +static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) +{ + TCGReg t0 = dst == src ? TCG_REG_R0 : dst; + TCGReg t1 = dst == src ? dst : TCG_REG_R0; + + if (have_isa_3_10) { + tcg_out32(s, BRD | RA(dst) | RS(src)); + return; + } + + /* + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = abcdefgh + */ + /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ + tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); + /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ + tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); + /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ + tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); + + /* t0 = rol64(t0, 32) = hgfe0000 */ + tcg_out_rld(s, RLDICL, t0, t0, 32, 0); + /* t1 = rol64(src, 32) = efghabcd */ + tcg_out_rld(s, RLDICL, t1, src, 32, 0); + + /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ + tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); + /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ + tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); + /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ + tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); + + tcg_out_mov(s, TCG_TYPE_REG, dst, t0); +} + /* Emit a move into ret of arg, if it can be done in one insn. */ static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) { @@ -2322,7 +2462,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const int const_args[TCG_MAX_OP_ARGS]) { TCGArg a0, a1, a2; - int c; switch (opc) { case INDEX_op_exit_tb: @@ -2390,7 +2529,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); + tcg_out_ext8s(s, args[0], args[0]); break; case INDEX_op_ld16u_i32: case INDEX_op_ld16u_i64: @@ -2587,8 +2726,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_sar_i32: if (const_args[2]) { - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31)); + tcg_out_sari32(s, args[0], args[1], args[2]); } else { tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); } @@ -2676,8 +2814,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_sar_i64: if (const_args[2]) { - int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); - tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh); + tcg_out_sari64(s, args[0], args[1], args[2]); } else { tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); } @@ -2728,18 +2865,15 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext8s_i32: case INDEX_op_ext8s_i64: - c = EXTSB; - goto gen_ext; + tcg_out_ext8s(s, args[0], args[1]); + break; case INDEX_op_ext16s_i32: case INDEX_op_ext16s_i64: - c = EXTSH; - goto gen_ext; + tcg_out_ext16s(s, args[0], args[1]); + break; case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: - c = EXTSW; - goto gen_ext; - gen_ext: - tcg_out32(s, c | RS(args[1]) | RA(args[0])); + tcg_out_ext32s(s, args[0], args[1]); break; case INDEX_op_extu_i32_i64: tcg_out_ext32u(s, args[0], args[1]); @@ -2759,72 +2893,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_bswap16_i32: case INDEX_op_bswap16_i64: - a0 = args[0], a1 = args[1]; - /* a1 = abcd */ - if (a0 != a1) { - /* a0 = (a1 r<< 24) & 0xff # 000c */ - tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); - /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */ - tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23); - } else { - /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */ - tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23); - /* a0 = (a1 r<< 24) & 0xff # 000c */ - tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); - /* a0 = a0 | r0 # 00dc */ - tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0)); - } + tcg_out_bswap16(s, args[0], args[1], args[2]); break; - case INDEX_op_bswap32_i32: + tcg_out_bswap32(s, args[0], args[1], 0); + break; case INDEX_op_bswap32_i64: - /* Stolen from gcc's builtin_bswap32 */ - a1 = args[1]; - a0 = args[0] == a1 ? TCG_REG_R0 : args[0]; - - /* a1 = args[1] # abcd */ - /* a0 = rotate_left (a1, 8) # bcda */ - tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); - /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); - /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); - - if (a0 == TCG_REG_R0) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } + tcg_out_bswap32(s, args[0], args[1], args[2]); break; - case INDEX_op_bswap64_i64: - a0 = args[0], a1 = args[1], a2 = TCG_REG_R0; - if (a0 == a1) { - a0 = TCG_REG_R0; - a2 = a1; - } - - /* a1 = # abcd efgh */ - /* a0 = rl32(a1, 8) # 0000 fghe */ - tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); - /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); - /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); - - /* a0 = rl64(a0, 32) # hgfe 0000 */ - /* a2 = rl64(a1, 32) # efgh abcd */ - tcg_out_rld(s, RLDICL, a0, a0, 32, 0); - tcg_out_rld(s, RLDICL, a2, a1, 32, 0); - - /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */ - tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31); - /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */ - tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7); - /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */ - tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23); - - if (a0 == 0) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } + tcg_out_bswap64(s, args[0], args[1]); break; case INDEX_op_deposit_i32: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index da7eecafc5..c16f96b401 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -852,37 +852,43 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * TCGMemOpIdx oi, uintptr_t ra) */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_SB] = helper_ret_ldsb_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, +static void * const qemu_ld_helpers[8] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_UW] = helper_be_lduw_mmu, + [MO_SW] = helper_be_ldsw_mmu, + [MO_UL] = helper_be_ldul_mmu, #if TCG_TARGET_REG_BITS == 64 - [MO_LESL] = helper_le_ldsl_mmu, + [MO_SL] = helper_be_ldsl_mmu, #endif - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, + [MO_Q] = helper_be_ldq_mmu, +#else + [MO_UW] = helper_le_lduw_mmu, + [MO_SW] = helper_le_ldsw_mmu, + [MO_UL] = helper_le_ldul_mmu, #if TCG_TARGET_REG_BITS == 64 - [MO_BESL] = helper_be_ldsl_mmu, + [MO_SL] = helper_le_ldsl_mmu, +#endif + [MO_Q] = helper_le_ldq_mmu, #endif - [MO_BEQ] = helper_be_ldq_mmu, }; /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, * uintxx_t val, TCGMemOpIdx oi, * uintptr_t ra) */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, +static void * const qemu_st_helpers[4] = { + [MO_8] = helper_ret_stb_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_16] = helper_be_stw_mmu, + [MO_32] = helper_be_stl_mmu, + [MO_64] = helper_be_stq_mmu, +#else + [MO_16] = helper_le_stw_mmu, + [MO_32] = helper_le_stl_mmu, + [MO_64] = helper_le_stq_mmu, +#endif }; /* We don't support oversize guests */ @@ -997,7 +1003,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_movi(s, TCG_TYPE_PTR, a2, oi); tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); + tcg_out_call(s, qemu_ld_helpers[opc & MO_SSIZE]); tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); tcg_out_goto(s, l->raddr); @@ -1042,7 +1048,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_movi(s, TCG_TYPE_PTR, a3, oi); tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr); - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SSIZE)]); + tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]); tcg_out_goto(s, l->raddr); return true; @@ -1052,10 +1058,8 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc, bool is_64) { - const MemOp bswap = opc & MO_BSWAP; - - /* We don't yet handle byteswapping, assert */ - g_assert(!bswap); + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & (MO_SSIZE)) { case MO_UB: @@ -1139,10 +1143,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc) { - const MemOp bswap = opc & MO_BSWAP; - - /* We don't yet handle byteswapping, assert */ - g_assert(!bswap); + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & (MO_SSIZE)) { case MO_8: diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc index 5fe073f09a..b82cf19f09 100644 --- a/tcg/s390/tcg-target.c.inc +++ b/tcg/s390/tcg-target.c.inc @@ -1951,15 +1951,37 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]); break; - OP_32_64(bswap16): - /* The TCG bswap definition requires bits 0-47 already be zero. - Thus we don't need the G-type insns to implement bswap16_i64. */ - tcg_out_insn(s, RRE, LRVR, args[0], args[1]); - tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16); + case INDEX_op_bswap16_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + tcg_out_insn(s, RRE, LRVR, a0, a1); + if (a2 & TCG_BSWAP_OS) { + tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16); + } else { + tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16); + } + break; + case INDEX_op_bswap16_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + tcg_out_insn(s, RRE, LRVGR, a0, a1); + if (a2 & TCG_BSWAP_OS) { + tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48); + } else { + tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48); + } break; - OP_32_64(bswap32): + + case INDEX_op_bswap32_i32: tcg_out_insn(s, RRE, LRVR, args[0], args[1]); break; + case INDEX_op_bswap32_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + tcg_out_insn(s, RRE, LRVR, a0, a1); + if (a2 & TCG_BSWAP_OS) { + tgen_ext32s(s, a0, a0); + } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tgen_ext32u(s, a0, a0); + } + break; case INDEX_op_add2_i32: if (const_args[4]) { diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index 515db120cc..ffe55e908f 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -1736,12 +1736,45 @@ void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) gen_addv_mask(d, a, b, m); } +void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80)); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + + tcg_gen_andc_i32(t1, a, m); + tcg_gen_andc_i32(t2, b, m); + tcg_gen_xor_i32(t3, a, b); + tcg_gen_add_i32(d, t1, t2); + tcg_gen_and_i32(t3, t3, m); + tcg_gen_xor_i32(d, d, t3); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); +} + void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000)); gen_addv_mask(d, a, b, m); } +void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t1, a, ~0xffff); + tcg_gen_add_i32(t2, a, b); + tcg_gen_add_i32(t1, t1, b); + tcg_gen_deposit_i32(d, t1, t2, 0, 16); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); +} + void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 t1 = tcg_temp_new_i64(); @@ -1886,12 +1919,45 @@ void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) gen_subv_mask(d, a, b, m); } +void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80)); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + + tcg_gen_or_i32(t1, a, m); + tcg_gen_andc_i32(t2, b, m); + tcg_gen_eqv_i32(t3, a, b); + tcg_gen_sub_i32(d, t1, t2); + tcg_gen_and_i32(t3, t3, m); + tcg_gen_xor_i32(d, d, t3); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); +} + void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000)); gen_subv_mask(d, a, b, m); } +void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t1, b, ~0xffff); + tcg_gen_sub_i32(t2, a, b); + tcg_gen_sub_i32(t1, a, t1); + tcg_gen_deposit_i32(d, t1, t2, 0, 16); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); +} + void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 t1 = tcg_temp_new_i64(); @@ -2612,6 +2678,20 @@ void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_gen_andi_i64(d, d, mask); } +void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_8, 0xff << c); + tcg_gen_shli_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + +void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_16, 0xffff << c); + tcg_gen_shli_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { @@ -2663,6 +2743,20 @@ void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_gen_andi_i64(d, d, mask); } +void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_8, 0xff >> c); + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + +void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_16, 0xffff >> c); + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { @@ -2728,6 +2822,34 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_temp_free_i64(s); } +void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t s_mask = dup_const(MO_8, 0x80 >> c); + uint32_t c_mask = dup_const(MO_8, 0xff >> c); + TCGv_i32 s = tcg_temp_new_i32(); + + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */ + tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */ + tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */ + tcg_gen_or_i32(d, d, s); /* include sign extension */ + tcg_temp_free_i32(s); +} + +void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t s_mask = dup_const(MO_16, 0x8000 >> c); + uint32_t c_mask = dup_const(MO_16, 0xffff >> c); + TCGv_i32 s = tcg_temp_new_i32(); + + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */ + tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */ + tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */ + tcg_gen_or_i32(d, d, s); /* include sign extension */ + tcg_temp_free_i32(s); +} + void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index dcc2ed0bbc..44d711c0fc 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1001,26 +1001,42 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg) } } -/* Note: we assume the two high bytes are set to zero */ -void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg) +void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags) { + /* Only one extension flag may be present. */ + tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); + if (TCG_TARGET_HAS_bswap16_i32) { - tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg); + tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags); } else { TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); - tcg_gen_ext8u_i32(t0, arg); - tcg_gen_shli_i32(t0, t0, 8); - tcg_gen_shri_i32(ret, arg, 8); - tcg_gen_or_i32(ret, ret, t0); + tcg_gen_shri_i32(t0, arg, 8); + if (!(flags & TCG_BSWAP_IZ)) { + tcg_gen_ext8u_i32(t0, t0); + } + + if (flags & TCG_BSWAP_OS) { + tcg_gen_shli_i32(t1, arg, 24); + tcg_gen_sari_i32(t1, t1, 16); + } else if (flags & TCG_BSWAP_OZ) { + tcg_gen_ext8u_i32(t1, arg); + tcg_gen_shli_i32(t1, t1, 8); + } else { + tcg_gen_shli_i32(t1, arg, 8); + } + + tcg_gen_or_i32(ret, t0, t1); tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); } } void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) { if (TCG_TARGET_HAS_bswap32_i32) { - tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg); + tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0); } else { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); @@ -1654,49 +1670,79 @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) } } -/* Note: we assume the six high bytes are set to zero */ -void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg) +void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) { + /* Only one extension flag may be present. */ + tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); + if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg), flags); + if (flags & TCG_BSWAP_OS) { + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + } else { + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } } else if (TCG_TARGET_HAS_bswap16_i64) { - tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg); + tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t0, arg, 8); + if (!(flags & TCG_BSWAP_IZ)) { + tcg_gen_ext8u_i64(t0, t0); + } - tcg_gen_ext8u_i64(t0, arg); - tcg_gen_shli_i64(t0, t0, 8); - tcg_gen_shri_i64(ret, arg, 8); - tcg_gen_or_i64(ret, ret, t0); + if (flags & TCG_BSWAP_OS) { + tcg_gen_shli_i64(t1, arg, 56); + tcg_gen_sari_i64(t1, t1, 48); + } else if (flags & TCG_BSWAP_OZ) { + tcg_gen_ext8u_i64(t1, arg); + tcg_gen_shli_i64(t1, t1, 8); + } else { + tcg_gen_shli_i64(t1, arg, 8); + } + + tcg_gen_or_i64(ret, t0, t1); tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); } } -/* Note: we assume the four high bytes are set to zero */ -void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg) +void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) { + /* Only one extension flag may be present. */ + tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); + if (TCG_TARGET_REG_BITS == 32) { tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + if (flags & TCG_BSWAP_OS) { + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + } else { + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } } else if (TCG_TARGET_HAS_bswap32_i64) { - tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg); + tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff); - /* arg = ....abcd */ - tcg_gen_shri_i64(t0, arg, 8); /* t0 = .....abc */ - tcg_gen_and_i64(t1, arg, t2); /* t1 = .....b.d */ - tcg_gen_and_i64(t0, t0, t2); /* t0 = .....a.c */ - tcg_gen_shli_i64(t1, t1, 8); /* t1 = ....b.d. */ - tcg_gen_or_i64(ret, t0, t1); /* ret = ....badc */ - - tcg_gen_shli_i64(t1, ret, 48); /* t1 = dc...... */ - tcg_gen_shri_i64(t0, ret, 16); /* t0 = ......ba */ - tcg_gen_shri_i64(t1, t1, 32); /* t1 = ....dc.. */ - tcg_gen_or_i64(ret, t0, t1); /* ret = ....dcba */ + /* arg = xxxxabcd */ + tcg_gen_shri_i64(t0, arg, 8); /* t0 = .xxxxabc */ + tcg_gen_and_i64(t1, arg, t2); /* t1 = .....b.d */ + tcg_gen_and_i64(t0, t0, t2); /* t0 = .....a.c */ + tcg_gen_shli_i64(t1, t1, 8); /* t1 = ....b.d. */ + tcg_gen_or_i64(ret, t0, t1); /* ret = ....badc */ + + tcg_gen_shli_i64(t1, ret, 48); /* t1 = dc...... */ + tcg_gen_shri_i64(t0, ret, 16); /* t0 = ......ba */ + if (flags & TCG_BSWAP_OS) { + tcg_gen_sari_i64(t1, t1, 32); /* t1 = ssssdc.. */ + } else { + tcg_gen_shri_i64(t1, t1, 32); /* t1 = ....dc.. */ + } + tcg_gen_or_i64(ret, t0, t1); /* ret = ssssdcba */ tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); @@ -1717,7 +1763,7 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); } else if (TCG_TARGET_HAS_bswap64_i64) { - tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg); + tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -2830,7 +2876,7 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop) orig_memop = memop; if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { memop &= ~MO_BSWAP; - /* The bswap primitive requires zero-extended input. */ + /* The bswap primitive benefits from zero-extended input. */ if ((memop & MO_SSIZE) == MO_SW) { memop &= ~MO_SIGN; } @@ -2843,10 +2889,9 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop) if ((orig_memop ^ memop) & MO_BSWAP) { switch (orig_memop & MO_SIZE) { case MO_16: - tcg_gen_bswap16_i32(val, val); - if (orig_memop & MO_SIGN) { - tcg_gen_ext16s_i32(val, val); - } + tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN + ? TCG_BSWAP_IZ | TCG_BSWAP_OS + : TCG_BSWAP_IZ | TCG_BSWAP_OZ)); break; case MO_32: tcg_gen_bswap32_i32(val, val); @@ -2870,8 +2915,7 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop) swap = tcg_temp_new_i32(); switch (memop & MO_SIZE) { case MO_16: - tcg_gen_ext16u_i32(swap, val); - tcg_gen_bswap16_i32(swap, swap); + tcg_gen_bswap16_i32(swap, val, 0); break; case MO_32: tcg_gen_bswap32_i32(swap, val); @@ -2919,7 +2963,7 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop) orig_memop = memop; if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { memop &= ~MO_BSWAP; - /* The bswap primitive requires zero-extended input. */ + /* The bswap primitive benefits from zero-extended input. */ if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) { memop &= ~MO_SIGN; } @@ -2930,18 +2974,15 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop) plugin_gen_mem_callbacks(addr, info); if ((orig_memop ^ memop) & MO_BSWAP) { + int flags = (orig_memop & MO_SIGN + ? TCG_BSWAP_IZ | TCG_BSWAP_OS + : TCG_BSWAP_IZ | TCG_BSWAP_OZ); switch (orig_memop & MO_SIZE) { case MO_16: - tcg_gen_bswap16_i64(val, val); - if (orig_memop & MO_SIGN) { - tcg_gen_ext16s_i64(val, val); - } + tcg_gen_bswap16_i64(val, val, flags); break; case MO_32: - tcg_gen_bswap32_i64(val, val); - if (orig_memop & MO_SIGN) { - tcg_gen_ext32s_i64(val, val); - } + tcg_gen_bswap32_i64(val, val, flags); break; case MO_64: tcg_gen_bswap64_i64(val, val); @@ -2971,12 +3012,10 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop) swap = tcg_temp_new_i64(); switch (memop & MO_SIZE) { case MO_16: - tcg_gen_ext16u_i64(swap, val); - tcg_gen_bswap16_i64(swap, swap); + tcg_gen_bswap16_i64(swap, val, 0); break; case MO_32: - tcg_gen_ext32u_i64(swap, val); - tcg_gen_bswap32_i64(swap, swap); + tcg_gen_bswap32_i64(swap, val, 0); break; case MO_64: tcg_gen_bswap64_i64(swap, val); diff --git a/tcg/tcg.c b/tcg/tcg.c index 5e53c3348f..5150ed700e 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1778,6 +1778,14 @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", }; +static const char bswap_flag_name[][6] = { + [TCG_BSWAP_IZ] = "iz", + [TCG_BSWAP_OZ] = "oz", + [TCG_BSWAP_OS] = "os", + [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", + [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", +}; + static inline bool tcg_regset_single(TCGRegSet d) { return (d & (d - 1)) == 0; @@ -1921,6 +1929,26 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs) i = 1; } break; + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + case INDEX_op_bswap32_i32: + case INDEX_op_bswap32_i64: + case INDEX_op_bswap64_i64: + { + TCGArg flags = op->args[k]; + const char *name = NULL; + + if (flags < ARRAY_SIZE(bswap_flag_name)) { + name = bswap_flag_name[flags]; + } + if (name) { + col += qemu_log(",%s", name); + } else { + col += qemu_log(",$0x%" TCG_PRIlx, flags); + } + i = k = 1; + } + break; default: i = 0; break; diff --git a/tcg/tci.c b/tcg/tci.c index 71689d4a40..b672c7cae5 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -808,7 +808,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, regs[r0] = (int8_t)regs[r1]; break; #endif -#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 +#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 || \ + TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 CASE_32_64(ext16s) tci_args_rr(insn, &r0, &r1); regs[r0] = (int16_t)regs[r1]; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 9651e7a8f1..0cb16aaa81 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -597,6 +597,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { + TCGOpcode exts; + switch (opc) { case INDEX_op_exit_tb: tcg_out_op_p(s, opc, (void *)args[0]); @@ -710,13 +712,28 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, CASE_64(ext32u) /* Optional (TCG_TARGET_HAS_ext32u_i64). */ CASE_64(ext_i32) CASE_64(extu_i32) - CASE_32_64(bswap16) /* Optional (TCG_TARGET_HAS_bswap16_*). */ - CASE_32_64(bswap32) /* Optional (TCG_TARGET_HAS_bswap32_*). */ - CASE_64(bswap64) /* Optional (TCG_TARGET_HAS_bswap64_i64). */ CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */ + case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ + case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ tcg_out_op_rr(s, opc, args[0], args[1]); break; + case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */ + exts = INDEX_op_ext16s_i32; + goto do_bswap; + case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */ + exts = INDEX_op_ext16s_i64; + goto do_bswap; + case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ + exts = INDEX_op_ext32s_i64; + do_bswap: + /* The base tci bswaps zero-extend, and ignore high bits. */ + tcg_out_op_rr(s, opc, args[0], args[1]); + if (args[2] & TCG_BSWAP_OS) { + tcg_out_op_rr(s, exts, args[0], args[0]); + } + break; + CASE_32_64(add2) CASE_32_64(sub2) tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2], |