From 7ec8bab3deae643b1ce579c2d65a244f30708330 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 14 Oct 2016 12:04:32 -0500 Subject: tcg: Add field extraction primitives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds tcg_gen_extract_* and tcg_gen_sextract_* for extraction of fixed position bitfields, much like we already have for deposit. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/optimize.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'tcg/optimize.c') diff --git a/tcg/optimize.c b/tcg/optimize.c index 0f1349086b..f41ed2cfc1 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -878,6 +878,19 @@ void tcg_optimize(TCGContext *s) temps[args[2]].mask); break; + CASE_OP_32_64(extract): + mask = extract64(temps[args[1]].mask, args[2], args[3]); + if (args[2] == 0) { + affected = temps[args[1]].mask & ~mask; + } + break; + CASE_OP_32_64(sextract): + mask = sextract64(temps[args[1]].mask, args[2], args[3]); + if (args[2] == 0 && (tcg_target_long)mask >= 0) { + affected = temps[args[1]].mask & ~mask; + } + break; + CASE_OP_32_64(or): CASE_OP_32_64(xor): mask = temps[args[1]].mask | temps[args[2]].mask; @@ -1048,6 +1061,22 @@ void tcg_optimize(TCGContext *s) } goto do_default; + CASE_OP_32_64(extract): + if (temp_is_const(args[1])) { + tmp = extract64(temps[args[1]].val, args[2], args[3]); + tcg_opt_gen_movi(s, op, args, args[0], tmp); + break; + } + goto do_default; + + CASE_OP_32_64(sextract): + if (temp_is_const(args[1])) { + tmp = sextract64(temps[args[1]].val, args[2], args[3]); + tcg_opt_gen_movi(s, op, args, args[0], tmp); + break; + } + goto do_default; + CASE_OP_32_64(setcond): tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); if (tmp != 2) { -- cgit 1.4.1 From 333b21b809fc80ce67c8f6a7d1c7cc66437d9791 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Oct 2016 20:44:32 -0700 Subject: tcg/optimize: Fold movcond 0/1 into setcond Signed-off-by: Richard Henderson --- tcg/optimize.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tcg/optimize.c') diff --git a/tcg/optimize.c b/tcg/optimize.c index f41ed2cfc1..9e26bb7e4e 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1105,6 +1105,21 @@ void tcg_optimize(TCGContext *s) tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); break; } + if (temp_is_const(args[3]) && temp_is_const(args[4])) { + tcg_target_ulong tv = temps[args[3]].val; + tcg_target_ulong fv = temps[args[4]].val; + TCGCond cond = args[5]; + if (fv == 1 && tv == 0) { + cond = tcg_invert_cond(cond); + } else if (!(tv == 1 && fv == 0)) { + goto do_default; + } + args[3] = cond; + op->opc = opc = (opc == INDEX_op_movcond_i32 + ? INDEX_op_setcond_i32 + : INDEX_op_setcond_i64); + nb_iargs = 2; + } goto do_default; case INDEX_op_add2_i32: -- cgit 1.4.1 From 0e28d0063bbd9e59a981ea2d20f82f30c5d956a8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 16 Nov 2016 09:23:28 +0100 Subject: tcg: Add clz and ctz opcodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg-runtime.c | 20 +++++++ tcg/README | 8 +++ tcg/aarch64/tcg-target.h | 4 ++ tcg/arm/tcg-target.h | 2 + tcg/i386/tcg-target.h | 4 ++ tcg/ia64/tcg-target.h | 4 ++ tcg/mips/tcg-target.h | 2 + tcg/optimize.c | 36 ++++++++++++ tcg/ppc/tcg-target.h | 4 ++ tcg/s390/tcg-target.h | 4 ++ tcg/sparc/tcg-target.h | 4 ++ tcg/tcg-op.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++ tcg/tcg-op.h | 16 ++++++ tcg/tcg-opc.h | 4 ++ tcg/tcg-runtime.h | 5 ++ tcg/tcg.h | 2 + tcg/tci/tcg-target.h | 4 ++ 17 files changed, 266 insertions(+) (limited to 'tcg/optimize.c') diff --git a/tcg-runtime.c b/tcg-runtime.c index 9327b6f23b..eb3bade7f6 100644 --- a/tcg-runtime.c +++ b/tcg-runtime.c @@ -101,6 +101,26 @@ int64_t HELPER(mulsh_i64)(int64_t arg1, int64_t arg2) return h; } +uint32_t HELPER(clz_i32)(uint32_t arg, uint32_t zero_val) +{ + return arg ? clz32(arg) : zero_val; +} + +uint32_t HELPER(ctz_i32)(uint32_t arg, uint32_t zero_val) +{ + return arg ? ctz32(arg) : zero_val; +} + +uint64_t HELPER(clz_i64)(uint64_t arg, uint64_t zero_val) +{ + return arg ? clz64(arg) : zero_val; +} + +uint64_t HELPER(ctz_i64)(uint64_t arg, uint64_t zero_val) +{ + return arg ? ctz64(arg) : zero_val; +} + void HELPER(exit_atomic)(CPUArchState *env) { cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); diff --git a/tcg/README b/tcg/README index 6946b5bcc2..a9858c2f74 100644 --- a/tcg/README +++ b/tcg/README @@ -246,6 +246,14 @@ t0=~(t1|t2) t0=t1|~t2 +* clz_i32/i64 t0, t1, t2 + +t0 = t1 ? clz(t1) : t2 + +* ctz_i32/i64 t0, t1, t2 + +t0 = t1 ? ctz(t1) : t2 + ********* Shifts/Rotates * shl_i32/i64 t0, t1, t2 diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 4a74bd8d95..976f493157 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -62,6 +62,8 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i32 1 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 @@ -94,6 +96,8 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i64 1 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 1 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 4e307289d9..02cc242c37 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -110,6 +110,8 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions #define TCG_TARGET_HAS_extract_i32 use_armv7_instructions #define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index dc19c47be1..f2d9955b70 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -93,6 +93,8 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 @@ -125,6 +127,8 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 8856dc8f15..9a829ae751 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -140,6 +140,10 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 1 #define TCG_TARGET_HAS_nand_i64 1 #define TCG_TARGET_HAS_nor_i32 1 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i32 0 +#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_orc_i64 1 diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 92d203aca8..06988cf29e 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -121,6 +121,8 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_nor_i32 1 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_andc_i32 0 #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 diff --git a/tcg/optimize.c b/tcg/optimize.c index 9e26bb7e4e..e7ecce478e 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -296,6 +296,18 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) CASE_OP_32_64(nor): return ~(x | y); + case INDEX_op_clz_i32: + return (uint32_t)x ? clz32(x) : y; + + case INDEX_op_clz_i64: + return x ? clz64(x) : y; + + case INDEX_op_ctz_i32: + return (uint32_t)x ? ctz32(x) : y; + + case INDEX_op_ctz_i64: + return x ? ctz64(x) : y; + CASE_OP_32_64(ext8s): return (int8_t)x; @@ -896,6 +908,16 @@ void tcg_optimize(TCGContext *s) mask = temps[args[1]].mask | temps[args[2]].mask; break; + case INDEX_op_clz_i32: + case INDEX_op_ctz_i32: + mask = temps[args[2]].mask | 31; + break; + + case INDEX_op_clz_i64: + case INDEX_op_ctz_i64: + mask = temps[args[2]].mask | 63; + break; + CASE_OP_32_64(setcond): case INDEX_op_setcond2_i32: mask = 1; @@ -1052,6 +1074,20 @@ void tcg_optimize(TCGContext *s) } goto do_default; + CASE_OP_32_64(clz): + CASE_OP_32_64(ctz): + if (temp_is_const(args[1])) { + TCGArg v = temps[args[1]].val; + if (v != 0) { + tmp = do_constant_folding(opc, v, 0); + tcg_opt_gen_movi(s, op, args, args[0], tmp); + } else { + tcg_opt_gen_mov(s, op, args, args[0], args[2]); + } + break; + } + goto do_default; + CASE_OP_32_64(deposit): if (temp_is_const(args[1]) && temp_is_const(args[2])) { tmp = deposit64(temps[args[1]].val, args[3], args[4], diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index b42c57a732..698a59905d 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -68,6 +68,8 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i32 1 #define TCG_TARGET_HAS_nand_i32 1 #define TCG_TARGET_HAS_nor_i32 1 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 0 @@ -101,6 +103,8 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i64 1 #define TCG_TARGET_HAS_nand_i64 1 #define TCG_TARGET_HAS_nor_i64 1 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index e9ac12e55b..3ac2dc989c 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -77,6 +77,8 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_deposit_i32 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_extract_i32 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_sextract_i32 0 @@ -108,6 +110,8 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_deposit_i64 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_extract_i64 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_sextract_i64 0 diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index a21216778a..340837ae9a 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -110,6 +110,8 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_extract_i32 0 #define TCG_TARGET_HAS_sextract_i32 0 @@ -142,6 +144,8 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 1927e53123..2b520c1953 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -457,6 +457,85 @@ void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) } } +void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCG_TARGET_HAS_clz_i32) { + tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2); + } else if (TCG_TARGET_HAS_clz_i64) { + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(t1, arg1); + tcg_gen_extu_i32_i64(t2, arg2); + tcg_gen_addi_i64(t2, t2, 32); + tcg_gen_clz_i64(t1, t1, t2); + tcg_gen_extrl_i64_i32(ret, t1); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + tcg_gen_subi_i32(ret, ret, 32); + } else { + gen_helper_clz_i32(ret, arg1, arg2); + } +} + +void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) +{ + TCGv_i32 t = tcg_const_i32(arg2); + tcg_gen_clz_i32(ret, arg1, t); + tcg_temp_free_i32(t); +} + +void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCG_TARGET_HAS_ctz_i32) { + tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2); + } else if (TCG_TARGET_HAS_ctz_i64) { + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(t1, arg1); + tcg_gen_extu_i32_i64(t2, arg2); + tcg_gen_ctz_i64(t1, t1, t2); + tcg_gen_extrl_i64_i32(ret, t1); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + } else if (TCG_TARGET_HAS_clz_i32) { + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + tcg_gen_neg_i32(t1, arg1); + tcg_gen_xori_i32(t2, arg2, 31); + tcg_gen_and_i32(t1, t1, arg1); + tcg_gen_clz_i32(ret, t1, t2); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + tcg_gen_xori_i32(ret, ret, 31); + } else if (TCG_TARGET_HAS_clz_i64) { + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i64 x1 = tcg_temp_new_i64(); + TCGv_i64 x2 = tcg_temp_new_i64(); + tcg_gen_neg_i32(t1, arg1); + tcg_gen_xori_i32(t2, arg2, 63); + tcg_gen_and_i32(t1, t1, arg1); + tcg_gen_extu_i32_i64(x1, t1); + tcg_gen_extu_i32_i64(x2, t2); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + tcg_gen_clz_i64(x1, x1, x2); + tcg_gen_extrl_i64_i32(ret, x1); + tcg_temp_free_i64(x1); + tcg_temp_free_i64(x2); + tcg_gen_xori_i32(ret, ret, 63); + } else { + gen_helper_ctz_i32(ret, arg1, arg2); + } +} + +void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) +{ + TCGv_i32 t = tcg_const_i32(arg2); + tcg_gen_ctz_i32(ret, arg1, t); + tcg_temp_free_i32(t); +} + void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_rot_i32) { @@ -1703,6 +1782,70 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) } } +void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCG_TARGET_HAS_clz_i64) { + tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2); + } else { + gen_helper_clz_i64(ret, arg1, arg2); + } +} + +void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) +{ + if (TCG_TARGET_REG_BITS == 32 + && TCG_TARGET_HAS_clz_i32 + && arg2 <= 0xffffffffu) { + TCGv_i32 t = tcg_const_i32((uint32_t)arg2 - 32); + tcg_gen_clz_i32(t, TCGV_LOW(arg1), t); + tcg_gen_addi_i32(t, t, 32); + tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + tcg_temp_free_i32(t); + } else { + TCGv_i64 t = tcg_const_i64(arg2); + tcg_gen_clz_i64(ret, arg1, t); + tcg_temp_free_i64(t); + } +} + +void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCG_TARGET_HAS_ctz_i64) { + tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2); + } else if (TCG_TARGET_HAS_clz_i64) { + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + tcg_gen_neg_i64(t1, arg1); + tcg_gen_xori_i64(t2, arg2, 63); + tcg_gen_and_i64(t1, t1, arg1); + tcg_gen_clz_i64(ret, t1, t2); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + tcg_gen_xori_i64(ret, ret, 63); + } else { + gen_helper_ctz_i64(ret, arg1, arg2); + } +} + +void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) +{ + if (TCG_TARGET_REG_BITS == 32 + && TCG_TARGET_HAS_ctz_i32 + && arg2 <= 0xffffffffu) { + TCGv_i32 t32 = tcg_const_i32((uint32_t)arg2 - 32); + tcg_gen_ctz_i32(t32, TCGV_HIGH(arg1), t32); + tcg_gen_addi_i32(t32, t32, 32); + tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + tcg_temp_free_i32(t32); + } else { + TCGv_i64 t64 = tcg_const_i64(arg2); + tcg_gen_ctz_i64(ret, arg1, t64); + tcg_temp_free_i64(t64); + } +} + void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_rot_i64) { diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index d42fd0de7c..7a24e84386 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -286,6 +286,10 @@ void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2); +void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2); void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); @@ -469,6 +473,10 @@ void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2); +void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2); void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); @@ -958,6 +966,10 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); #define tcg_gen_nand_tl tcg_gen_nand_i64 #define tcg_gen_nor_tl tcg_gen_nor_i64 #define tcg_gen_orc_tl tcg_gen_orc_i64 +#define tcg_gen_clz_tl tcg_gen_clz_i64 +#define tcg_gen_ctz_tl tcg_gen_ctz_i64 +#define tcg_gen_clzi_tl tcg_gen_clzi_i64 +#define tcg_gen_ctzi_tl tcg_gen_ctzi_i64 #define tcg_gen_rotl_tl tcg_gen_rotl_i64 #define tcg_gen_rotli_tl tcg_gen_rotli_i64 #define tcg_gen_rotr_tl tcg_gen_rotr_i64 @@ -1049,6 +1061,10 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); #define tcg_gen_nand_tl tcg_gen_nand_i32 #define tcg_gen_nor_tl tcg_gen_nor_i32 #define tcg_gen_orc_tl tcg_gen_orc_i32 +#define tcg_gen_clz_tl tcg_gen_clz_i32 +#define tcg_gen_ctz_tl tcg_gen_ctz_i32 +#define tcg_gen_clzi_tl tcg_gen_clzi_i32 +#define tcg_gen_ctzi_tl tcg_gen_ctzi_i32 #define tcg_gen_rotl_tl tcg_gen_rotl_i32 #define tcg_gen_rotli_tl tcg_gen_rotli_i32 #define tcg_gen_rotr_tl tcg_gen_rotr_i32 diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 11563ac010..d00db4f47c 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -104,6 +104,8 @@ DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32)) DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32)) DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32)) DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32)) +DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32)) +DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32)) DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) @@ -171,6 +173,8 @@ DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64)) DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64)) DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64)) DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64)) +DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64)) +DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64)) DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64)) diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h index 1deb86a099..eb1cd761b7 100644 --- a/tcg/tcg-runtime.h +++ b/tcg/tcg-runtime.h @@ -15,6 +15,11 @@ DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(clz_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32) +DEF_HELPER_FLAGS_2(ctz_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32) +DEF_HELPER_FLAGS_2(clz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(ctz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) + DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) #ifdef CONFIG_SOFTMMU diff --git a/tcg/tcg.h b/tcg/tcg.h index 144bdabbe0..e0262822cb 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -111,6 +111,8 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 20650428a8..0646444b87 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -74,6 +74,8 @@ #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_orc_i32 0 @@ -104,6 +106,8 @@ #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_orc_i64 0 -- cgit 1.4.1 From a768e4e99247911f00c5c0267c12d4e207d5f6cc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 21 Nov 2016 11:13:39 +0100 Subject: tcg: Add opcode for ctpop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The number of actual invocations of ctpop itself does not warrent an opcode, but it is very helpful for POWER7 to use in generating an expansion for ctz. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg-runtime.c | 10 ++++++++++ tcg/aarch64/tcg-target.h | 2 ++ tcg/arm/tcg-target.h | 1 + tcg/i386/tcg-target.h | 2 ++ tcg/ia64/tcg-target.h | 2 ++ tcg/mips/tcg-target.h | 2 ++ tcg/optimize.c | 14 ++++++++++++++ tcg/ppc/tcg-target.h | 2 ++ tcg/s390/tcg-target.h | 2 ++ tcg/sparc/tcg-target.h | 2 ++ tcg/tcg-op.c | 29 +++++++++++++++++++++++++++++ tcg/tcg-op.h | 4 ++++ tcg/tcg-opc.h | 2 ++ tcg/tcg-runtime.h | 2 ++ tcg/tcg.h | 1 + tcg/tci/tcg-target.h | 2 ++ 16 files changed, 79 insertions(+) (limited to 'tcg/optimize.c') diff --git a/tcg-runtime.c b/tcg-runtime.c index c8b98dfd51..4c60c96658 100644 --- a/tcg-runtime.c +++ b/tcg-runtime.c @@ -131,6 +131,16 @@ uint64_t HELPER(clrsb_i64)(uint64_t arg) return clrsb64(arg); } +uint32_t HELPER(ctpop_i32)(uint32_t arg) +{ + return ctpop32(arg); +} + +uint64_t HELPER(ctpop_i64)(uint64_t arg) +{ + return ctpop64(arg); +} + void HELPER(exit_atomic)(CPUArchState *env) { cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 9d6b00fbba..1a5ea23844 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -64,6 +64,7 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 +#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 @@ -98,6 +99,7 @@ typedef enum { #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 +#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 1 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 4cb94dc103..09a19c6f35 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -112,6 +112,7 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 use_armv5t_instructions #define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions +#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions #define TCG_TARGET_HAS_extract_i32 use_armv7_instructions #define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 8fff287caa..b8f73f5382 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -95,6 +95,7 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 +#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 @@ -129,6 +130,7 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 +#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 9a829ae751..42aea03a8b 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -144,6 +144,8 @@ typedef enum { #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_ctpop_i32 0 +#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_orc_i64 1 diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index a680f16e47..f46d64a3a7 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -165,6 +165,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_ctz_i32 0 +#define TCG_TARGET_HAS_ctpop_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions @@ -179,6 +180,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_clz_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_ctpop_i64 0 #endif /* optional instructions automatically implemented */ diff --git a/tcg/optimize.c b/tcg/optimize.c index e7ecce478e..adfc56ce62 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -308,6 +308,12 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) case INDEX_op_ctz_i64: return x ? ctz64(x) : y; + case INDEX_op_ctpop_i32: + return ctpop32(x); + + case INDEX_op_ctpop_i64: + return ctpop64(x); + CASE_OP_32_64(ext8s): return (int8_t)x; @@ -918,6 +924,13 @@ void tcg_optimize(TCGContext *s) mask = temps[args[2]].mask | 63; break; + case INDEX_op_ctpop_i32: + mask = 32 | 31; + break; + case INDEX_op_ctpop_i64: + mask = 64 | 63; + break; + CASE_OP_32_64(setcond): case INDEX_op_setcond2_i32: mask = 1; @@ -1031,6 +1044,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(ext8u): CASE_OP_32_64(ext16s): CASE_OP_32_64(ext16u): + CASE_OP_32_64(ctpop): case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index c798c9c85d..57e66cf3ed 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -72,6 +72,7 @@ extern bool have_isa_3_00; #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 have_isa_3_00 +#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 0 @@ -107,6 +108,7 @@ extern bool have_isa_3_00; #define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 have_isa_3_00 +#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 22500ba858..cbdd2a6275 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -79,6 +79,7 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 +#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_deposit_i32 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_extract_i32 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_sextract_i32 0 @@ -112,6 +113,7 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 (s390_facilities & FACILITY_EXT_IMM) #define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_deposit_i64 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_extract_i64 (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_sextract_i64 0 diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 340837ae9a..b8b74f96ff 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -112,6 +112,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 +#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_extract_i32 0 #define TCG_TARGET_HAS_sextract_i32 0 @@ -146,6 +147,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 620e26897f..6f4b1b62cb 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -550,6 +550,21 @@ void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) } } +void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1) +{ + if (TCG_TARGET_HAS_ctpop_i32) { + tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1); + } else if (TCG_TARGET_HAS_ctpop_i64) { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(t, arg1); + tcg_gen_ctpop_i64(t, t); + tcg_gen_extrl_i64_i32(ret, t); + tcg_temp_free_i64(t); + } else { + gen_helper_ctpop_i32(ret, arg1); + } +} + void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_rot_i32) { @@ -1874,6 +1889,20 @@ void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg) } } +void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1) +{ + if (TCG_TARGET_HAS_ctpop_i64) { + tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1); + } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) { + tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); + tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); + tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret)); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } else { + gen_helper_ctpop_i64(ret, arg1); + } +} + void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_rot_i64) { diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index c2f3db9288..c68e300a68 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -291,6 +291,7 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2); void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2); void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_ctpop_i32(TCGv_i32 a1, TCGv_i32 a2); void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); @@ -479,6 +480,7 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2); void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2); void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ctpop_i64(TCGv_i64 a1, TCGv_i64 a2); void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); @@ -973,6 +975,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); #define tcg_gen_clzi_tl tcg_gen_clzi_i64 #define tcg_gen_ctzi_tl tcg_gen_ctzi_i64 #define tcg_gen_clrsb_tl tcg_gen_clrsb_i64 +#define tcg_gen_ctpop_tl tcg_gen_ctpop_i64 #define tcg_gen_rotl_tl tcg_gen_rotl_i64 #define tcg_gen_rotli_tl tcg_gen_rotli_i64 #define tcg_gen_rotr_tl tcg_gen_rotr_i64 @@ -1069,6 +1072,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); #define tcg_gen_clzi_tl tcg_gen_clzi_i32 #define tcg_gen_ctzi_tl tcg_gen_ctzi_i32 #define tcg_gen_clrsb_tl tcg_gen_clrsb_i32 +#define tcg_gen_ctpop_tl tcg_gen_ctpop_i32 #define tcg_gen_rotl_tl tcg_gen_rotl_i32 #define tcg_gen_rotli_tl tcg_gen_rotli_i32 #define tcg_gen_rotr_tl tcg_gen_rotr_i32 diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index d00db4f47c..f06f89405e 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -106,6 +106,7 @@ DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32)) DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32)) DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32)) DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32)) +DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32)) DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) @@ -175,6 +176,7 @@ DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64)) DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64)) DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64)) DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64)) +DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64)) DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64)) diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h index 0d30f1a90c..114ea6fecf 100644 --- a/tcg/tcg-runtime.h +++ b/tcg/tcg-runtime.h @@ -21,6 +21,8 @@ DEF_HELPER_FLAGS_2(clz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(ctz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_1(clrsb_i32, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) +DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) diff --git a/tcg/tcg.h b/tcg/tcg.h index e0262822cb..631c6f69b1 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -113,6 +113,7 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 0646444b87..838bf3a858 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -76,6 +76,7 @@ #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 +#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_orc_i32 0 @@ -108,6 +109,7 @@ #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_orc_i64 0 -- cgit 1.4.1