diff options
Diffstat (limited to 'tcg')
| -rw-r--r-- | tcg/aarch64/tcg-target.c.inc | 14 | ||||
| -rw-r--r-- | tcg/aarch64/tcg-target.h | 5 | ||||
| -rw-r--r-- | tcg/arm/tcg-target.c.inc | 19 | ||||
| -rw-r--r-- | tcg/arm/tcg-target.h | 1 | ||||
| -rw-r--r-- | tcg/i386/tcg-target-con-set.h | 2 | ||||
| -rw-r--r-- | tcg/i386/tcg-target-con-str.h | 1 | ||||
| -rw-r--r-- | tcg/i386/tcg-target.c.inc | 291 | ||||
| -rw-r--r-- | tcg/i386/tcg-target.h | 9 | ||||
| -rw-r--r-- | tcg/loongarch64/tcg-target.h | 6 | ||||
| -rw-r--r-- | tcg/mips/tcg-target.h | 5 | ||||
| -rw-r--r-- | tcg/optimize.c | 78 | ||||
| -rw-r--r-- | tcg/ppc/tcg-target.c.inc | 149 | ||||
| -rw-r--r-- | tcg/ppc/tcg-target.h | 5 | ||||
| -rw-r--r-- | tcg/riscv/tcg-target.c.inc | 49 | ||||
| -rw-r--r-- | tcg/riscv/tcg-target.h | 5 | ||||
| -rw-r--r-- | tcg/s390x/tcg-target.c.inc | 78 | ||||
| -rw-r--r-- | tcg/s390x/tcg-target.h | 5 | ||||
| -rw-r--r-- | tcg/sparc64/tcg-target.c.inc | 40 | ||||
| -rw-r--r-- | tcg/sparc64/tcg-target.h | 5 | ||||
| -rw-r--r-- | tcg/tcg-op-gvec.c | 6 | ||||
| -rw-r--r-- | tcg/tcg-op.c | 151 | ||||
| -rw-r--r-- | tcg/tcg.c | 9 | ||||
| -rw-r--r-- | tcg/tci/tcg-target.h | 5 |
23 files changed, 682 insertions, 256 deletions
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 35ca80cd56..0931a69448 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2262,6 +2262,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCG_REG_XZR, tcg_invert_cond(args[3])); break; + case INDEX_op_negsetcond_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_negsetcond_i64: + tcg_out_cmp(s, ext, a1, a2, c2); + /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ + tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR, + TCG_REG_XZR, tcg_invert_cond(args[3])); + break; + case INDEX_op_movcond_i32: a2 = (int32_t)a2; /* FALLTHRU */ @@ -2868,6 +2878,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_sub_i64: case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rA); case INDEX_op_mul_i32: @@ -3086,7 +3098,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) #if !defined(CONFIG_SOFTMMU) /* * Note that XZR cannot be encoded in the address base register slot, - * as that actaully encodes SP. Depending on the guest, we may need + * as that actually encodes SP. Depending on the guest, we may need * to zero-extend the guest address via the address index register slot, * therefore we need to load even a zero guest base into a register. */ diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index ce64de06e5..98727ea53b 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -86,14 +86,14 @@ typedef enum { #define TCG_TARGET_HAS_sextract_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_extrl_i64_i32 0 -#define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_div_i64 1 @@ -123,6 +123,7 @@ typedef enum { #define TCG_TARGET_HAS_sextract_i64 1 #define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 83e286088f..acb5f23b54 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1216,9 +1216,11 @@ static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args, case TCG_COND_LEU: case TCG_COND_GTU: case TCG_COND_GEU: - /* We perform a conditional comparision. If the high half is - equal, then overwrite the flags with the comparison of the - low half. The resulting flags cover the whole. */ + /* + * We perform a conditional comparison. If the high half is + * equal, then overwrite the flags with the comparison of the + * low half. The resulting flags cover the whole. + */ tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh); tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl); return cond; @@ -1250,7 +1252,7 @@ static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args, /* * Note that TCGReg references Q-registers. - * Q-regno = 2 * D-regno, so shift left by 1 whlie inserting. + * Q-regno = 2 * D-regno, so shift left by 1 while inserting. */ static uint32_t encode_vd(TCGReg rd) { @@ -1975,6 +1977,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], ARITH_MOV, args[0], 0, 0); break; + case INDEX_op_negsetcond_i32: + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[2], const_args[2]); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], + ARITH_MVN, args[0], 0, 0); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], + ARITH_MOV, args[0], 0, 0); + break; case INDEX_op_brcond2_i32: c = tcg_out_cmp2(s, args, const_args); @@ -2112,6 +2122,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_add_i32: case INDEX_op_sub_i32: case INDEX_op_setcond_i32: + case INDEX_op_negsetcond_i32: return C_O1_I2(r, r, rIN); case INDEX_op_and_i32: diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index c649db72a6..311a985209 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -116,6 +116,7 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h index 5ea3a292f0..7d00a7dde8 100644 --- a/tcg/i386/tcg-target-con-set.h +++ b/tcg/i386/tcg-target-con-set.h @@ -33,7 +33,7 @@ C_O1_I1(r, q) C_O1_I1(r, r) C_O1_I1(x, r) C_O1_I1(x, x) -C_O1_I2(Q, 0, Q) +C_O1_I2(q, 0, qi) C_O1_I2(q, r, re) C_O1_I2(r, 0, ci) C_O1_I2(r, 0, r) diff --git a/tcg/i386/tcg-target-con-str.h b/tcg/i386/tcg-target-con-str.h index 24e6bcb80d..95a30e58cd 100644 --- a/tcg/i386/tcg-target-con-str.h +++ b/tcg/i386/tcg-target-con-str.h @@ -19,7 +19,6 @@ REGS('D', 1u << TCG_REG_EDI) REGS('r', ALL_GENERAL_REGS) REGS('x', ALL_VECTOR_REGS) REGS('q', ALL_BYTEL_REGS) /* regs that can be used as a byte operand */ -REGS('Q', ALL_BYTEH_REGS) /* regs with a second byte (e.g. %ah) */ REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_ld/st */ REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st8_i32 data */ diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index a6b2eae995..0c3d1e4cef 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -144,7 +144,6 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) # define TCG_REG_L1 TCG_REG_EDX #endif -#define ALL_BYTEH_REGS 0x0000000fu #if TCG_TARGET_REG_BITS == 64 # define ALL_GENERAL_REGS 0x0000ffffu # define ALL_VECTOR_REGS 0xffff0000u @@ -152,7 +151,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #else # define ALL_GENERAL_REGS 0x000000ffu # define ALL_VECTOR_REGS 0x00ff0000u -# define ALL_BYTEL_REGS ALL_BYTEH_REGS +# define ALL_BYTEL_REGS 0x0000000fu #endif #ifdef CONFIG_SOFTMMU # define SOFTMMU_RESERVE_REGS ((1 << TCG_REG_L0) | (1 << TCG_REG_L1)) @@ -277,6 +276,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ #define OPC_MOVB_EvIz (0xc6) #define OPC_MOVL_EvIz (0xc7) +#define OPC_MOVB_Ib (0xb0) #define OPC_MOVL_Iv (0xb8) #define OPC_MOVBE_GyMy (0xf0 | P_EXT38) #define OPC_MOVBE_MyGy (0xf1 | P_EXT38) @@ -1436,99 +1436,89 @@ static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, } } -static void tcg_out_brcond32(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - TCGLabel *label, int small) +static void tcg_out_brcond(TCGContext *s, int rexw, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + TCGLabel *label, bool small) { - tcg_out_cmp(s, arg1, arg2, const_arg2, 0); + tcg_out_cmp(s, arg1, arg2, const_arg2, rexw); tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); } -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_brcond64(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - TCGLabel *label, int small) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); -} -#else -/* XXX: we implement it at the target level to avoid having to - handle cross basic blocks temporaries */ +#if TCG_TARGET_REG_BITS == 32 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, - const int *const_args, int small) + const int *const_args, bool small) { TCGLabel *label_next = gen_new_label(); TCGLabel *label_this = arg_label(args[5]); switch(args[4]) { case TCG_COND_EQ: - tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], - label_next, 1); - tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_NE, args[0], args[2], const_args[2], + label_next, 1); + tcg_out_brcond(s, 0, TCG_COND_EQ, args[1], args[3], const_args[3], + label_this, small); break; case TCG_COND_NE: - tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], - label_this, small); - tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_NE, args[0], args[2], const_args[2], + label_this, small); + tcg_out_brcond(s, 0, TCG_COND_NE, args[1], args[3], const_args[3], + label_this, small); break; case TCG_COND_LT: - tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_LT, args[1], args[3], const_args[3], + label_this, small); tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_LTU, args[0], args[2], const_args[2], + label_this, small); break; case TCG_COND_LE: - tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_LT, args[1], args[3], const_args[3], + label_this, small); tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_LEU, args[0], args[2], const_args[2], + label_this, small); break; case TCG_COND_GT: - tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_GT, args[1], args[3], const_args[3], + label_this, small); tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_GTU, args[0], args[2], const_args[2], + label_this, small); break; case TCG_COND_GE: - tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_GT, args[1], args[3], const_args[3], + label_this, small); tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_GEU, args[0], args[2], const_args[2], + label_this, small); break; case TCG_COND_LTU: - tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_LTU, args[1], args[3], const_args[3], + label_this, small); tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_LTU, args[0], args[2], const_args[2], + label_this, small); break; case TCG_COND_LEU: - tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_LTU, args[1], args[3], const_args[3], + label_this, small); tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_LEU, args[0], args[2], const_args[2], + label_this, small); break; case TCG_COND_GTU: - tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_GTU, args[1], args[3], const_args[3], + label_this, small); tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_GTU, args[0], args[2], const_args[2], + label_this, small); break; case TCG_COND_GEU: - tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_GTU, args[1], args[3], const_args[3], + label_this, small); tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], - label_this, small); + tcg_out_brcond(s, 0, TCG_COND_GEU, args[0], args[2], const_args[2], + label_this, small); break; default: g_assert_not_reached(); @@ -1537,23 +1527,107 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, } #endif -static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, - TCGArg arg1, TCGArg arg2, int const_arg2) +static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, + TCGArg dest, TCGArg arg1, TCGArg arg2, + int const_arg2, bool neg) { - tcg_out_cmp(s, arg1, arg2, const_arg2, 0); - tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); - tcg_out_ext8u(s, dest, dest); -} + bool inv = false; + bool cleared; -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, - TCGArg arg1, TCGArg arg2, int const_arg2) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); + switch (cond) { + case TCG_COND_NE: + inv = true; + /* fall through */ + case TCG_COND_EQ: + /* If arg2 is 0, convert to LTU/GEU vs 1. */ + if (const_arg2 && arg2 == 0) { + arg2 = 1; + goto do_ltu; + } + break; + + case TCG_COND_LEU: + inv = true; + /* fall through */ + case TCG_COND_GTU: + /* If arg2 is a register, swap for LTU/GEU. */ + if (!const_arg2) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + goto do_ltu; + } + break; + + case TCG_COND_GEU: + inv = true; + /* fall through */ + case TCG_COND_LTU: + do_ltu: + /* + * Relying on the carry bit, use SBB to produce -1 if LTU, 0 if GEU. + * We can then use NEG or INC to produce the desired result. + * This is always smaller than the SETCC expansion. + */ + tcg_out_cmp(s, arg1, arg2, const_arg2, rexw); + + /* X - X - C = -C = (C ? -1 : 0) */ + tgen_arithr(s, ARITH_SBB + (neg ? rexw : 0), dest, dest); + if (inv && neg) { + /* ~(C ? -1 : 0) = (C ? 0 : -1) */ + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, dest); + } else if (inv) { + /* (C ? -1 : 0) + 1 = (C ? 0 : 1) */ + tgen_arithi(s, ARITH_ADD, dest, 1, 0); + } else if (!neg) { + /* -(C ? -1 : 0) = (C ? 1 : 0) */ + tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NEG, dest); + } + return; + + case TCG_COND_GE: + inv = true; + /* fall through */ + case TCG_COND_LT: + /* If arg2 is 0, extract the sign bit. */ + if (const_arg2 && arg2 == 0) { + tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, dest, arg1); + if (inv) { + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, dest); + } + tcg_out_shifti(s, (neg ? SHIFT_SAR : SHIFT_SHR) + rexw, + dest, rexw ? 63 : 31); + return; + } + break; + + default: + break; + } + + /* + * If dest does not overlap the inputs, clearing it first is preferred. + * The XOR breaks any false dependency for the low-byte write to dest, + * and is also one byte smaller than MOVZBL. + */ + cleared = false; + if (dest != arg1 && (const_arg2 || dest != arg2)) { + tgen_arithr(s, ARITH_XOR, dest, dest); + cleared = true; + } + + tcg_out_cmp(s, arg1, arg2, const_arg2, rexw); tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); - tcg_out_ext8u(s, dest, dest); + + if (!cleared) { + tcg_out_ext8u(s, dest, dest); + } + if (neg) { + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, dest); + } } -#else + +#if TCG_TARGET_REG_BITS == 32 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, const int *const_args) { @@ -1610,23 +1684,13 @@ static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw, } } -static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest, - TCGReg c1, TCGArg c2, int const_c2, - TCGReg v1) -{ - tcg_out_cmp(s, c1, c2, const_c2, 0); - tcg_out_cmov(s, cond, 0, dest, v1); -} - -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest, - TCGReg c1, TCGArg c2, int const_c2, - TCGReg v1) +static void tcg_out_movcond(TCGContext *s, int rexw, TCGCond cond, + TCGReg dest, TCGReg c1, TCGArg c2, int const_c2, + TCGReg v1) { - tcg_out_cmp(s, c1, c2, const_c2, P_REXW); - tcg_out_cmov(s, cond, P_REXW, dest, v1); + tcg_out_cmp(s, c1, c2, const_c2, rexw); + tcg_out_cmov(s, cond, rexw, dest, v1); } -#endif static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, TCGArg arg2, bool const_a2) @@ -2574,14 +2638,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1); break; - case INDEX_op_brcond_i32: - tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0); + OP_32_64(brcond): + tcg_out_brcond(s, rexw, a2, a0, a1, const_args[1], + arg_label(args[3]), 0); break; - case INDEX_op_setcond_i32: - tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2); + OP_32_64(setcond): + tcg_out_setcond(s, rexw, args[3], a0, a1, a2, const_a2, false); break; - case INDEX_op_movcond_i32: - tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]); + OP_32_64(negsetcond): + tcg_out_setcond(s, rexw, args[3], a0, a1, a2, const_a2, true); + break; + OP_32_64(movcond): + tcg_out_movcond(s, rexw, args[5], a0, a1, a2, const_a2, args[3]); break; OP_32_64(bswap16): @@ -2730,16 +2798,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; - case INDEX_op_brcond_i64: - tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]); - break; - case INDEX_op_bswap64_i64: tcg_out_bswap64(s, a0); break; @@ -2751,13 +2809,30 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, OP_32_64(deposit): if (args[3] == 0 && args[4] == 8) { /* load bits 0..7 */ - tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0); - } else if (args[3] == 8 && args[4] == 8) { + if (const_a2) { + tcg_out_opc(s, OPC_MOVB_Ib | P_REXB_RM | LOWREGMASK(a0), + 0, a0, 0); + tcg_out8(s, a2); + } else { + tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0); + } + } else if (TCG_TARGET_REG_BITS == 32 && args[3] == 8 && args[4] == 8) { /* load bits 8..15 */ - tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4); + if (const_a2) { + tcg_out8(s, OPC_MOVB_Ib + a0 + 4); + tcg_out8(s, a2); + } else { + tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4); + } } else if (args[3] == 0 && args[4] == 16) { /* load bits 0..15 */ - tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0); + if (const_a2) { + tcg_out_opc(s, OPC_MOVL_Iv | P_DATA16 | LOWREGMASK(a0), + 0, a0, 0); + tcg_out16(s, a2); + } else { + tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0); + } } else { g_assert_not_reached(); } @@ -3312,10 +3387,12 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: - return C_O1_I2(Q, 0, Q); + return C_O1_I2(q, 0, qi); case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: return C_O1_I2(q, r, re); case INDEX_op_movcond_i32: diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 2a2e3fffa8..8417ea4899 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -150,6 +150,7 @@ typedef enum { #define TCG_TARGET_HAS_sextract_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 @@ -159,8 +160,7 @@ typedef enum { #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ -#define TCG_TARGET_HAS_extrl_i64_i32 1 -#define TCG_TARGET_HAS_extrh_i64_i32 1 +#define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 @@ -187,6 +187,7 @@ typedef enum { #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 @@ -227,8 +228,8 @@ typedef enum { #define TCG_TARGET_HAS_cmpsel_vec -1 #define TCG_TARGET_deposit_i32_valid(ofs, len) \ - (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \ - ((ofs) == 0 && (len) == 16)) + (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \ + (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8)) #define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid /* Check for the possibility of high-byte extraction and, for 64-bit, diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 26f1aab780..559be67186 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -86,6 +86,7 @@ typedef enum { /* optional instructions */ #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_div2_i32 0 @@ -122,6 +123,7 @@ typedef enum { /* 64-bit operations */ #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_div2_i64 0 @@ -130,8 +132,7 @@ typedef enum { #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_extrl_i64_i32 1 -#define TCG_TARGET_HAS_extrh_i64_i32 1 +#define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_ext8s_i64 1 #define TCG_TARGET_HAS_ext16s_i64 1 #define TCG_TARGET_HAS_ext32s_i64 1 @@ -157,6 +158,7 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 + #define TCG_TARGET_HAS_qemu_ldst_i128 0 #define TCG_TARGET_DEFAULT_MO (0) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index dd2efa795c..c0576f66d7 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -128,12 +128,12 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 +#define TCG_TARGET_HAS_negsetcond_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_extrl_i64_i32 1 -#define TCG_TARGET_HAS_extrh_i64_i32 1 +#define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_not_i64 1 @@ -150,6 +150,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 +#define TCG_TARGET_HAS_negsetcond_i64 0 #endif /* optional instructions detected at runtime */ diff --git a/tcg/optimize.c b/tcg/optimize.c index d2156367a3..3013eb04e6 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1279,6 +1279,8 @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op) static bool fold_deposit(OptContext *ctx, TCGOp *op) { + TCGOpcode and_opc; + if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { uint64_t t1 = arg_info(op->args[1])->val; uint64_t t2 = arg_info(op->args[2])->val; @@ -1287,6 +1289,41 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) return tcg_opt_gen_movi(ctx, op, op->args[0], t1); } + switch (ctx->type) { + case TCG_TYPE_I32: + and_opc = INDEX_op_and_i32; + break; + case TCG_TYPE_I64: + and_opc = INDEX_op_and_i64; + break; + default: + g_assert_not_reached(); + } + + /* Inserting a value into zero at offset 0. */ + if (arg_is_const(op->args[1]) + && arg_info(op->args[1])->val == 0 + && op->args[3] == 0) { + uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]); + + op->opc = and_opc; + op->args[1] = op->args[2]; + op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask)); + ctx->z_mask = mask & arg_info(op->args[1])->z_mask; + return false; + } + + /* Inserting zero into a value. */ + if (arg_is_const(op->args[2]) + && arg_info(op->args[2])->val == 0) { + uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0); + + op->opc = and_opc; + op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask)); + ctx->z_mask = mask & arg_info(op->args[1])->z_mask; + return false; + } + ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask, op->args[3], op->args[4], arg_info(op->args[2])->z_mask); @@ -1530,14 +1567,22 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { uint64_t tv = arg_info(op->args[3])->val; uint64_t fv = arg_info(op->args[4])->val; - TCGOpcode opc; + TCGOpcode opc, negopc = 0; switch (ctx->type) { case TCG_TYPE_I32: opc = INDEX_op_setcond_i32; + if (TCG_TARGET_HAS_negsetcond_i32) { + negopc = INDEX_op_negsetcond_i32; + } + tv = (int32_t)tv; + fv = (int32_t)fv; break; case TCG_TYPE_I64: opc = INDEX_op_setcond_i64; + if (TCG_TARGET_HAS_negsetcond_i64) { + negopc = INDEX_op_negsetcond_i64; + } break; default: g_assert_not_reached(); @@ -1549,6 +1594,14 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) } else if (fv == 1 && tv == 0) { op->opc = opc; op->args[3] = tcg_invert_cond(cond); + } else if (negopc) { + if (tv == -1 && fv == 0) { + op->opc = negopc; + op->args[3] = cond; + } else if (fv == -1 && tv == 0) { + op->opc = negopc; + op->args[3] = tcg_invert_cond(cond); + } } } return false; @@ -1759,6 +1812,26 @@ static bool fold_setcond(OptContext *ctx, TCGOp *op) return false; } +static bool fold_negsetcond(OptContext *ctx, TCGOp *op) +{ + TCGCond cond = op->args[3]; + int i; + + if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { + op->args[3] = cond = tcg_swap_cond(cond); + } + + i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); + if (i >= 0) { + return tcg_opt_gen_movi(ctx, op, op->args[0], -i); + } + + /* Value is {0,-1} so all bits are repetitions of the sign. */ + ctx->s_mask = -1; + return false; +} + + static bool fold_setcond2(OptContext *ctx, TCGOp *op) { TCGCond cond = op->args[5]; @@ -2216,6 +2289,9 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(setcond): done = fold_setcond(&ctx, op); break; + CASE_OP_32_64(negsetcond): + done = fold_negsetcond(&ctx, op); + break; case INDEX_op_setcond2_i32: done = fold_setcond2(&ctx, op); break; diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 511e14b180..090f11e71c 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -447,6 +447,11 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define TW XO31( 4) #define TRAP (TW | TO(31)) +#define SETBC XO31(384) /* v3.10 */ +#define SETBCR XO31(416) /* v3.10 */ +#define SETNBC XO31(448) /* v3.10 */ +#define SETNBCR XO31(480) /* v3.10 */ + #define NOP ORI /* ori 0,0,0 */ #define LVX XO31(103) @@ -1548,8 +1553,20 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, } static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, - TCGReg dst, TCGReg src) + TCGReg dst, TCGReg src, bool neg) { + if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { + /* + * X != 0 implies X + -1 generates a carry. + * RT = (~X + X) + CA + * = -1 + CA + * = CA ? 0 : -1 + */ + tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, src, src)); + return; + } + if (type == TCG_TYPE_I32) { tcg_out32(s, CNTLZW | RS(src) | RA(dst)); tcg_out_shri32(s, dst, dst, 5); @@ -1557,18 +1574,28 @@ static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, tcg_out32(s, CNTLZD | RS(src) | RA(dst)); tcg_out_shri64(s, dst, dst, 6); } + if (neg) { + tcg_out32(s, NEG | RT(dst) | RA(dst)); + } } -static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) +static void tcg_out_setcond_ne0(TCGContext *s, TCGType type, + TCGReg dst, TCGReg src, bool neg) { - /* X != 0 implies X + -1 generates a carry. Extra addition - trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ - if (dst != src) { - tcg_out32(s, ADDIC | TAI(dst, src, -1)); - tcg_out32(s, SUBFE | TAB(dst, dst, src)); - } else { + if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) { + /* + * X != 0 implies X + -1 generates a carry. Extra addition + * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. + */ tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); + return; + } + tcg_out_setcond_eq0(s, type, dst, src, false); + if (neg) { + tcg_out32(s, ADDI | TAI(dst, dst, -1)); + } else { + tcg_out_xori32(s, dst, dst, 1); } } @@ -1590,9 +1617,10 @@ static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, TCGArg arg0, TCGArg arg1, TCGArg arg2, - int const_arg2) + int const_arg2, bool neg) { - int crop, sh; + int sh; + bool inv; tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); @@ -1601,18 +1629,31 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, arg2 = (uint32_t)arg2; } + /* With SETBC/SETBCR, we can always implement with 2 insns. */ + if (have_isa_3_10) { + tcg_insn_unit bi, opc; + + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + + /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */ + bi = tcg_to_bc[cond] & (0x1f << 16); + if (tcg_to_bc[cond] & BO(8)) { + opc = neg ? SETNBC : SETBC; + } else { + opc = neg ? SETNBCR : SETBCR; + } + tcg_out32(s, opc | RT(arg0) | bi); + return; + } + /* Handle common and trivial cases before handling anything else. */ if (arg2 == 0) { switch (cond) { case TCG_COND_EQ: - tcg_out_setcond_eq0(s, type, arg0, arg1); + tcg_out_setcond_eq0(s, type, arg0, arg1, neg); return; case TCG_COND_NE: - if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { - tcg_out_ext32u(s, TCG_REG_R0, arg1); - arg1 = TCG_REG_R0; - } - tcg_out_setcond_ne0(s, arg0, arg1); + tcg_out_setcond_ne0(s, type, arg0, arg1, neg); return; case TCG_COND_GE: tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); @@ -1621,9 +1662,17 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, case TCG_COND_LT: /* Extract the sign bit. */ if (type == TCG_TYPE_I32) { - tcg_out_shri32(s, arg0, arg1, 31); + if (neg) { + tcg_out_sari32(s, arg0, arg1, 31); + } else { + tcg_out_shri32(s, arg0, arg1, 31); + } } else { - tcg_out_shri64(s, arg0, arg1, 63); + if (neg) { + tcg_out_sari64(s, arg0, arg1, 63); + } else { + tcg_out_shri64(s, arg0, arg1, 63); + } } return; default: @@ -1641,7 +1690,7 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, isel = tcg_to_isel[cond]; - tcg_out_movi(s, type, arg0, 1); + tcg_out_movi(s, type, arg0, neg ? -1 : 1); if (isel & 1) { /* arg0 = (bc ? 0 : 1) */ tab = TAB(arg0, 0, arg0); @@ -1655,51 +1704,47 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, return; } + inv = false; switch (cond) { case TCG_COND_EQ: arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); - tcg_out_setcond_eq0(s, type, arg0, arg1); - return; + tcg_out_setcond_eq0(s, type, arg0, arg1, neg); + break; case TCG_COND_NE: arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); - /* Discard the high bits only once, rather than both inputs. */ - if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { - tcg_out_ext32u(s, TCG_REG_R0, arg1); - arg1 = TCG_REG_R0; - } - tcg_out_setcond_ne0(s, arg0, arg1); - return; + tcg_out_setcond_ne0(s, type, arg0, arg1, neg); + break; + case TCG_COND_LE: + case TCG_COND_LEU: + inv = true; + /* fall through */ case TCG_COND_GT: case TCG_COND_GTU: - sh = 30; - crop = 0; - goto crtest; - - case TCG_COND_LT: - case TCG_COND_LTU: - sh = 29; - crop = 0; + sh = 30; /* CR7 CR_GT */ goto crtest; case TCG_COND_GE: case TCG_COND_GEU: - sh = 31; - crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); + inv = true; + /* fall through */ + case TCG_COND_LT: + case TCG_COND_LTU: + sh = 29; /* CR7 CR_LT */ goto crtest; - case TCG_COND_LE: - case TCG_COND_LEU: - sh = 31; - crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); crtest: tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - if (crop) { - tcg_out32(s, crop); - } tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); + if (neg && inv) { + tcg_out32(s, ADDI | TAI(arg0, arg0, -1)); + } else if (neg) { + tcg_out32(s, NEG | RT(arg0) | RA(arg0)); + } else if (inv) { + tcg_out_xori32(s, arg0, arg0, 1); + } break; default: @@ -2982,11 +3027,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_setcond_i32: tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], - const_args[2]); + const_args[2], false); break; case INDEX_op_setcond_i64: tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], - const_args[2]); + const_args[2], false); + break; + case INDEX_op_negsetcond_i32: + tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], + const_args[2], true); + break; + case INDEX_op_negsetcond_i64: + tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], + const_args[2], true); break; case INDEX_op_setcond2_i32: tcg_out_setcond2(s, args, const_args); @@ -3724,6 +3777,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: case INDEX_op_setcond_i32: + case INDEX_op_negsetcond_i32: case INDEX_op_and_i64: case INDEX_op_andc_i64: case INDEX_op_shl_i64: @@ -3732,6 +3786,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_rotl_i64: case INDEX_op_rotr_i64: case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, ri); case INDEX_op_mul_i32: diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 9a41fab8cc..8bfb14998e 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -97,6 +97,7 @@ typedef enum { #define TCG_TARGET_HAS_sextract_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 1 @@ -106,8 +107,7 @@ typedef enum { #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_extrl_i64_i32 0 -#define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 have_isa_3_00 #define TCG_TARGET_HAS_rot_i64 1 @@ -135,6 +135,7 @@ typedef enum { #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index eeaeb6b6e3..9be81c1b7b 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -69,7 +69,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { static const int tcg_target_reg_alloc_order[] = { /* Call saved registers */ - /* TCG_REG_S0 reservered for TCG_AREG0 */ + /* TCG_REG_S0 reserved for TCG_AREG0 */ TCG_REG_S1, TCG_REG_S2, TCG_REG_S3, @@ -260,7 +260,7 @@ typedef enum { /* Zba: Bit manipulation extension, address generation */ OPC_ADD_UW = 0x0800003b, - /* Zbb: Bit manipulation extension, basic bit manipulaton */ + /* Zbb: Bit manipulation extension, basic bit manipulation */ OPC_ANDN = 0x40007033, OPC_CLZ = 0x60001013, OPC_CLZW = 0x6000101b, @@ -936,6 +936,44 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) +{ + int tmpflags; + TCGReg tmp; + + /* For LT/GE comparison against 0, replicate the sign bit. */ + if (c2 && arg2 == 0) { + switch (cond) { + case TCG_COND_GE: + tcg_out_opc_imm(s, OPC_XORI, ret, arg1, -1); + arg1 = ret; + /* fall through */ + case TCG_COND_LT: + tcg_out_opc_imm(s, OPC_SRAI, ret, arg1, TCG_TARGET_REG_BITS - 1); + return; + default: + break; + } + } + + tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2); + tmp = tmpflags & ~SETCOND_FLAGS; + + /* If intermediate result is zero/non-zero: test != 0. */ + if (tmpflags & SETCOND_NEZ) { + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp); + tmp = ret; + } + + /* Produce the 0/-1 result. */ + if (tmpflags & SETCOND_INV) { + tcg_out_opc_imm(s, OPC_ADDI, ret, tmp, -1); + } else { + tcg_out_opc_reg(s, OPC_SUB, ret, TCG_REG_ZERO, tmp); + } +} + static void tcg_out_movcond_zicond(TCGContext *s, TCGReg ret, TCGReg test_ne, int val1, bool c_val1, int val2, bool c_val2) @@ -1782,6 +1820,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_setcond(s, args[3], a0, a1, a2, c2); break; + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: + tcg_out_negsetcond(s, args[3], a0, a1, a2, c2); + break; + case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: tcg_out_movcond(s, args[5], a0, a1, a2, c2, @@ -1910,6 +1953,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_xor_i64: case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); case INDEX_op_andc_i32: diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index e1d8110ee4..c1132d178f 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -88,6 +88,7 @@ extern bool have_zbb; /* optional instructions */ #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_div2_i32 0 @@ -123,6 +124,7 @@ extern bool have_zbb; #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_div2_i64 0 @@ -131,8 +133,7 @@ extern bool have_zbb; #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_extrl_i64_i32 1 -#define TCG_TARGET_HAS_extrh_i64_i32 1 +#define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_ext8s_i64 1 #define TCG_TARGET_HAS_ext16s_i64 1 #define TCG_TARGET_HAS_ext32s_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index a94f7908d6..ecd8aaf2a1 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1266,7 +1266,8 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, } static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, - TCGReg dest, TCGReg c1, TCGArg c2, int c2const) + TCGReg dest, TCGReg c1, TCGArg c2, + bool c2const, bool neg) { int cc; @@ -1275,11 +1276,27 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, /* Emit: d = 0, d = (cc ? 1 : d). */ cc = tgen_cmp(s, type, cond, c1, c2, c2const, false); tcg_out_movi(s, TCG_TYPE_I64, dest, 0); - tcg_out_insn(s, RIEg, LOCGHI, dest, 1, cc); + tcg_out_insn(s, RIEg, LOCGHI, dest, neg ? -1 : 1, cc); return; } - restart: + switch (cond) { + case TCG_COND_GEU: + case TCG_COND_LTU: + case TCG_COND_LT: + case TCG_COND_GE: + /* Swap operands so that we can use LEU/GTU/GT/LE. */ + if (!c2const) { + TCGReg t = c1; + c1 = c2; + c2 = t; + cond = tcg_swap_cond(cond); + } + break; + default: + break; + } + switch (cond) { case TCG_COND_NE: /* X != 0 is X > 0. */ @@ -1292,11 +1309,20 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, case TCG_COND_GTU: case TCG_COND_GT: - /* The result of a compare has CC=2 for GT and CC=3 unused. - ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */ + /* + * The result of a compare has CC=2 for GT and CC=3 unused. + * ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. + */ tgen_cmp(s, type, cond, c1, c2, c2const, true); tcg_out_movi(s, type, dest, 0); tcg_out_insn(s, RRE, ALCGR, dest, dest); + if (neg) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, LCR, dest, dest); + } else { + tcg_out_insn(s, RRE, LCGR, dest, dest); + } + } return; case TCG_COND_EQ: @@ -1310,27 +1336,17 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, case TCG_COND_LEU: case TCG_COND_LE: - /* As above, but we're looking for borrow, or !carry. - The second insn computes d - d - borrow, or -1 for true - and 0 for false. So we must mask to 1 bit afterward. */ + /* + * As above, but we're looking for borrow, or !carry. + * The second insn computes d - d - borrow, or -1 for true + * and 0 for false. So we must mask to 1 bit afterward. + */ tgen_cmp(s, type, cond, c1, c2, c2const, true); tcg_out_insn(s, RRE, SLBGR, dest, dest); - tgen_andi(s, type, dest, 1); - return; - - case TCG_COND_GEU: - case TCG_COND_LTU: - case TCG_COND_LT: - case TCG_COND_GE: - /* Swap operands so that we can use LEU/GTU/GT/LE. */ - if (!c2const) { - TCGReg t = c1; - c1 = c2; - c2 = t; - cond = tcg_swap_cond(cond); - goto restart; + if (!neg) { + tgen_andi(s, type, dest, 1); } - break; + return; default: g_assert_not_reached(); @@ -1339,7 +1355,7 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, cc = tgen_cmp(s, type, cond, c1, c2, c2const, false); /* Emit: d = 0, t = 1, d = (cc ? t : d). */ tcg_out_movi(s, TCG_TYPE_I64, dest, 0); - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1); + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, neg ? -1 : 1); tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc); } @@ -2288,7 +2304,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_setcond_i32: tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], - args[2], const_args[2]); + args[2], const_args[2], false); + break; + case INDEX_op_negsetcond_i32: + tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], + args[2], const_args[2], true); break; case INDEX_op_movcond_i32: tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], @@ -2566,7 +2586,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_setcond_i64: tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], - args[2], const_args[2]); + args[2], const_args[2], false); + break; + case INDEX_op_negsetcond_i64: + tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], + args[2], const_args[2], true); break; case INDEX_op_movcond_i64: tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], @@ -3109,8 +3133,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_rotr_i32: case INDEX_op_rotr_i64: case INDEX_op_setcond_i32: + case INDEX_op_negsetcond_i32: return C_O1_I2(r, r, ri); case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rA); case INDEX_op_clz_i64: diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 9a405003b9..50e12ef9d6 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -96,14 +96,14 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_sextract_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_extrl_i64_i32 0 -#define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_div2_i64 1 @@ -132,6 +132,7 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index ffcb879211..f2a346a1bd 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -720,7 +720,7 @@ static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, } static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const) + TCGReg c1, int32_t c2, int c2const, bool neg) { /* For 32-bit comparisons, we can play games with ADDC/SUBC. */ switch (cond) { @@ -760,22 +760,34 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, default: tcg_out_cmp(s, c1, c2, c2const); tcg_out_movi_s13(s, ret, 0); - tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1); + tcg_out_movcc(s, cond, MOVCC_ICC, ret, neg ? -1 : 1, 1); return; } tcg_out_cmp(s, c1, c2, c2const); if (cond == TCG_COND_LTU) { - tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC); + if (neg) { + /* 0 - 0 - C = -C = (C ? -1 : 0) */ + tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_SUBC); + } else { + /* 0 + 0 + C = C = (C ? 1 : 0) */ + tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC); + } } else { - tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC); + if (neg) { + /* 0 + -1 + C = C - 1 = (C ? 0 : -1) */ + tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_ADDC); + } else { + /* 0 - -1 - C = 1 - C = (C ? 0 : 1) */ + tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC); + } } } static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const) + TCGReg c1, int32_t c2, int c2const, bool neg) { - if (use_vis3_instructions) { + if (use_vis3_instructions && !neg) { switch (cond) { case TCG_COND_NE: if (c2 != 0) { @@ -796,11 +808,11 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, if the input does not overlap the output. */ if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) { tcg_out_movi_s13(s, ret, 0); - tcg_out_movr(s, cond, ret, c1, 1, 1); + tcg_out_movr(s, cond, ret, c1, neg ? -1 : 1, 1); } else { tcg_out_cmp(s, c1, c2, c2const); tcg_out_movi_s13(s, ret, 0); - tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1); + tcg_out_movcc(s, cond, MOVCC_XCC, ret, neg ? -1 : 1, 1); } } @@ -1355,7 +1367,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3])); break; case INDEX_op_setcond_i32: - tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2); + tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2, false); + break; + case INDEX_op_negsetcond_i32: + tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2, true); break; case INDEX_op_movcond_i32: tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); @@ -1437,7 +1452,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); break; case INDEX_op_setcond_i64: - tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2); + tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2, false); + break; + case INDEX_op_negsetcond_i64: + tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2, true); break; case INDEX_op_movcond_i64: tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); @@ -1564,6 +1582,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_sar_i64: case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: return C_O1_I2(r, rZ, rJ); case INDEX_op_brcond_i32: diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index d454278811..3d41c9659b 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -106,6 +106,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_sextract_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 @@ -114,8 +115,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_extrl_i64_i32 1 -#define TCG_TARGET_HAS_extrh_i64_i32 1 +#define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 @@ -143,6 +143,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index a062239804..e260a07c61 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -3692,8 +3692,7 @@ static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, for (i = 0; i < oprsz; i += 4) { tcg_gen_ld_i32(t0, cpu_env, aofs + i); tcg_gen_ld_i32(t1, cpu_env, bofs + i); - tcg_gen_setcond_i32(cond, t0, t0, t1); - tcg_gen_neg_i32(t0, t0); + tcg_gen_negsetcond_i32(cond, t0, t0, t1); tcg_gen_st_i32(t0, cpu_env, dofs + i); } tcg_temp_free_i32(t1); @@ -3710,8 +3709,7 @@ static void expand_cmp_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, for (i = 0; i < oprsz; i += 8) { tcg_gen_ld_i64(t0, cpu_env, aofs + i); tcg_gen_ld_i64(t1, cpu_env, bofs + i); - tcg_gen_setcond_i64(cond, t0, t0, t1); - tcg_gen_neg_i64(t0, t0); + tcg_gen_negsetcond_i64(cond, t0, t0, t1); tcg_gen_st_i64(t0, cpu_env, dofs + i); } tcg_temp_free_i64(t1); diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 7aadb37756..02a8cadcc0 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -276,6 +276,21 @@ void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret, tcg_gen_setcond_i32(cond, ret, arg1, tcg_constant_i32(arg2)); } +void tcg_gen_negsetcond_i32(TCGCond cond, TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (cond == TCG_COND_ALWAYS) { + tcg_gen_movi_i32(ret, -1); + } else if (cond == TCG_COND_NEVER) { + tcg_gen_movi_i32(ret, 0); + } else if (TCG_TARGET_HAS_negsetcond_i32) { + tcg_gen_op4i_i32(INDEX_op_negsetcond_i32, ret, arg1, arg2, cond); + } else { + tcg_gen_setcond_i32(cond, ret, arg1, arg2); + tcg_gen_neg_i32(ret, ret); + } +} + void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { if (arg2 == 0) { @@ -848,8 +863,7 @@ void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); - tcg_gen_setcond_i32(cond, t0, c1, c2); - tcg_gen_neg_i32(t0, t0); + tcg_gen_negsetcond_i32(cond, t0, c1, c2); tcg_gen_and_i32(t1, v1, t0); tcg_gen_andc_i32(ret, v2, t0); tcg_gen_or_i32(ret, ret, t1); @@ -1021,6 +1035,14 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg) } } +/* + * bswap16_i32: 16-bit byte swap on the low bits of a 32-bit value. + * + * Byte pattern: xxab -> yyba + * + * With TCG_BSWAP_IZ, x == zero, else undefined. + * With TCG_BSWAP_OZ, y == zero, with TCG_BSWAP_OS y == sign, else undefined. + */ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags) { /* Only one extension flag may be present. */ @@ -1032,27 +1054,35 @@ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags) TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); - tcg_gen_shri_i32(t0, arg, 8); + /* arg = ..ab (IZ) xxab (!IZ) */ + tcg_gen_shri_i32(t0, arg, 8); /* t0 = ...a (IZ) .xxa (!IZ) */ if (!(flags & TCG_BSWAP_IZ)) { - tcg_gen_ext8u_i32(t0, t0); + tcg_gen_ext8u_i32(t0, t0); /* t0 = ...a */ } if (flags & TCG_BSWAP_OS) { - tcg_gen_shli_i32(t1, arg, 24); - tcg_gen_sari_i32(t1, t1, 16); + tcg_gen_shli_i32(t1, arg, 24); /* t1 = b... */ + tcg_gen_sari_i32(t1, t1, 16); /* t1 = ssb. */ } else if (flags & TCG_BSWAP_OZ) { - tcg_gen_ext8u_i32(t1, arg); - tcg_gen_shli_i32(t1, t1, 8); + tcg_gen_ext8u_i32(t1, arg); /* t1 = ...b */ + tcg_gen_shli_i32(t1, t1, 8); /* t1 = ..b. */ } else { - tcg_gen_shli_i32(t1, arg, 8); + tcg_gen_shli_i32(t1, arg, 8); /* t1 = xab. */ } - tcg_gen_or_i32(ret, t0, t1); + tcg_gen_or_i32(ret, t0, t1); /* ret = ..ba (OZ) */ + /* = ssba (OS) */ + /* = xaba (no flag) */ tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); } } +/* + * bswap32_i32: 32-bit byte swap on a 32-bit value. + * + * Byte pattern: abcd -> dcba + */ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) { if (TCG_TARGET_HAS_bswap32_i32) { @@ -1078,6 +1108,11 @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) } } +/* + * hswap_i32: Swap 16-bit halfwords within a 32-bit value. + * + * Byte pattern: abcd -> cdab + */ void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg) { /* Swapping 2 16-bit elements is a rotate. */ @@ -1567,6 +1602,27 @@ void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret, } } +void tcg_gen_negsetcond_i64(TCGCond cond, TCGv_i64 ret, + TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (cond == TCG_COND_ALWAYS) { + tcg_gen_movi_i64(ret, -1); + } else if (cond == TCG_COND_NEVER) { + tcg_gen_movi_i64(ret, 0); + } else if (TCG_TARGET_HAS_negsetcond_i64) { + tcg_gen_op4i_i64(INDEX_op_negsetcond_i64, ret, arg1, arg2, cond); + } else if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret), + TCGV_LOW(arg1), TCGV_HIGH(arg1), + TCGV_LOW(arg2), TCGV_HIGH(arg2), cond); + tcg_gen_neg_i32(TCGV_LOW(ret), TCGV_LOW(ret)); + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_LOW(ret)); + } else { + tcg_gen_setcond_i64(cond, ret, arg1, arg2); + tcg_gen_neg_i64(ret, ret); + } +} + void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { if (arg2 == 0) { @@ -1721,6 +1777,14 @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) } } +/* + * bswap16_i64: 16-bit byte swap on the low bits of a 64-bit value. + * + * Byte pattern: xxxxxxxxab -> yyyyyyyyba + * + * With TCG_BSWAP_IZ, x == zero, else undefined. + * With TCG_BSWAP_OZ, y == zero, with TCG_BSWAP_OS y == sign, else undefined. + */ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) { /* Only one extension flag may be present. */ @@ -1739,27 +1803,38 @@ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_shri_i64(t0, arg, 8); + /* arg = ......ab or xxxxxxab */ + tcg_gen_shri_i64(t0, arg, 8); /* t0 = .......a or .xxxxxxa */ if (!(flags & TCG_BSWAP_IZ)) { - tcg_gen_ext8u_i64(t0, t0); + tcg_gen_ext8u_i64(t0, t0); /* t0 = .......a */ } if (flags & TCG_BSWAP_OS) { - tcg_gen_shli_i64(t1, arg, 56); - tcg_gen_sari_i64(t1, t1, 48); + tcg_gen_shli_i64(t1, arg, 56); /* t1 = b....... */ + tcg_gen_sari_i64(t1, t1, 48); /* t1 = ssssssb. */ } else if (flags & TCG_BSWAP_OZ) { - tcg_gen_ext8u_i64(t1, arg); - tcg_gen_shli_i64(t1, t1, 8); + tcg_gen_ext8u_i64(t1, arg); /* t1 = .......b */ + tcg_gen_shli_i64(t1, t1, 8); /* t1 = ......b. */ } else { - tcg_gen_shli_i64(t1, arg, 8); + tcg_gen_shli_i64(t1, arg, 8); /* t1 = xxxxxab. */ } - tcg_gen_or_i64(ret, t0, t1); + tcg_gen_or_i64(ret, t0, t1); /* ret = ......ba (OZ) */ + /* ssssssba (OS) */ + /* xxxxxaba (no flag) */ tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); } } +/* + * bswap32_i64: 32-bit byte swap on the low bits of a 64-bit value. + * + * Byte pattern: xxxxabcd -> yyyydcba + * + * With TCG_BSWAP_IZ, x == zero, else undefined. + * With TCG_BSWAP_OZ, y == zero, with TCG_BSWAP_OS y == sign, else undefined. + */ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) { /* Only one extension flag may be present. */ @@ -1793,13 +1868,19 @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) } else { tcg_gen_shri_i64(t1, t1, 32); /* t1 = ....dc.. */ } - tcg_gen_or_i64(ret, t0, t1); /* ret = ssssdcba */ + tcg_gen_or_i64(ret, t0, t1); /* ret = ssssdcba (OS) */ + /* ....dcba (else) */ tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); } } +/* + * bswap64_i64: 64-bit byte swap on a 64-bit value. + * + * Byte pattern: abcdefgh -> hgfedcba + */ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 32) { @@ -1845,24 +1926,35 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) } } +/* + * hswap_i64: Swap 16-bit halfwords within a 64-bit value. + * See also include/qemu/bitops.h, hswap64. + * + * Byte pattern: abcdefgh -> ghefcdab + */ void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg) { uint64_t m = 0x0000ffff0000ffffull; TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - /* See include/qemu/bitops.h, hswap64. */ - tcg_gen_rotli_i64(t1, arg, 32); - tcg_gen_andi_i64(t0, t1, m); - tcg_gen_shli_i64(t0, t0, 16); - tcg_gen_shri_i64(t1, t1, 16); - tcg_gen_andi_i64(t1, t1, m); - tcg_gen_or_i64(ret, t0, t1); + /* arg = abcdefgh */ + tcg_gen_rotli_i64(t1, arg, 32); /* t1 = efghabcd */ + tcg_gen_andi_i64(t0, t1, m); /* t0 = ..gh..cd */ + tcg_gen_shli_i64(t0, t0, 16); /* t0 = gh..cd.. */ + tcg_gen_shri_i64(t1, t1, 16); /* t1 = ..efghab */ + tcg_gen_andi_i64(t1, t1, m); /* t1 = ..ef..ab */ + tcg_gen_or_i64(ret, t0, t1); /* ret = ghefcdab */ tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); } +/* + * wswap_i64: Swap 32-bit words within a 64-bit value. + * + * Byte pattern: abcdefgh -> efghabcd + */ void tcg_gen_wswap_i64(TCGv_i64 ret, TCGv_i64 arg) { /* Swapping 2 32-bit elements is a rotate. */ @@ -2527,8 +2619,7 @@ void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_setcond_i64(cond, t0, c1, c2); - tcg_gen_neg_i64(t0, t0); + tcg_gen_negsetcond_i64(cond, t0, c1, c2); tcg_gen_and_i64(t1, v1, t0); tcg_gen_andc_i64(ret, v2, t0); tcg_gen_or_i64(ret, ret, t1); @@ -2681,7 +2772,7 @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_mov_i32(ret, TCGV_LOW(arg)); - } else if (TCG_TARGET_HAS_extrl_i64_i32) { + } else if (TCG_TARGET_HAS_extr_i64_i32) { tcg_gen_op2(INDEX_op_extrl_i64_i32, tcgv_i32_arg(ret), tcgv_i64_arg(arg)); } else { @@ -2693,7 +2784,7 @@ void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_mov_i32(ret, TCGV_HIGH(arg)); - } else if (TCG_TARGET_HAS_extrh_i64_i32) { + } else if (TCG_TARGET_HAS_extr_i64_i32) { tcg_gen_op2(INDEX_op_extrh_i64_i32, tcgv_i32_arg(ret), tcgv_i64_arg(arg)); } else { diff --git a/tcg/tcg.c b/tcg/tcg.c index ddfe9a96cb..620dbe08da 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1879,6 +1879,8 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_sar_i32: return true; + case INDEX_op_negsetcond_i32: + return TCG_TARGET_HAS_negsetcond_i32; case INDEX_op_movcond_i32: return TCG_TARGET_HAS_movcond_i32; case INDEX_op_div_i32: @@ -1977,6 +1979,8 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_extu_i32_i64: return TCG_TARGET_REG_BITS == 64; + case INDEX_op_negsetcond_i64: + return TCG_TARGET_HAS_negsetcond_i64; case INDEX_op_movcond_i64: return TCG_TARGET_HAS_movcond_i64; case INDEX_op_div_i64: @@ -2000,9 +2004,8 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_extract2_i64: return TCG_TARGET_HAS_extract2_i64; case INDEX_op_extrl_i64_i32: - return TCG_TARGET_HAS_extrl_i64_i32; case INDEX_op_extrh_i64_i32: - return TCG_TARGET_HAS_extrh_i64_i32; + return TCG_TARGET_HAS_extr_i64_i32; case INDEX_op_ext8s_i64: return TCG_TARGET_HAS_ext8s_i64; case INDEX_op_ext16s_i64: @@ -2510,11 +2513,13 @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) switch (c) { case INDEX_op_brcond_i32: case INDEX_op_setcond_i32: + case INDEX_op_negsetcond_i32: case INDEX_op_movcond_i32: case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: case INDEX_op_brcond_i64: case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i64: case INDEX_op_movcond_i64: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 37ee10c959..91ca33b616 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -70,14 +70,14 @@ #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_extrl_i64_i32 0 -#define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 @@ -105,6 +105,7 @@ #define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_muls2_i64 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 |