diff options
| author | serpilliere <serpilliere@users.noreply.github.com> | 2018-07-10 19:04:39 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-07-10 19:04:39 +0200 |
| commit | c48a8ba7ed9110df962df94ab9db314b2873c6b2 (patch) | |
| tree | 6e14f8fdaa4471dc1fb8fdcd6bfe9e271500a803 /miasm2/jitter | |
| parent | a5221c1b926af7716860fd27039528cfb54d6095 (diff) | |
| parent | d65bbbcc4a7d3c0fff9e9c80a04e23bbc4bf5333 (diff) | |
| download | miasm-c48a8ba7ed9110df962df94ab9db314b2873c6b2.tar.gz miasm-c48a8ba7ed9110df962df94ab9db314b2873c6b2.zip | |
Merge pull request #795 from commial/features/better-float-sse
Better float support & additionnal SSE
Diffstat (limited to '')
| -rw-r--r-- | miasm2/jitter/arch/JitCore_x86.h | 16 | ||||
| -rw-r--r-- | miasm2/jitter/llvmconvert.py | 161 | ||||
| -rw-r--r-- | miasm2/jitter/op_semantics.c | 227 | ||||
| -rw-r--r-- | miasm2/jitter/op_semantics.h | 38 |
4 files changed, 288 insertions, 154 deletions
diff --git a/miasm2/jitter/arch/JitCore_x86.h b/miasm2/jitter/arch/JitCore_x86.h index 221ba5db..a5fc4bd4 100644 --- a/miasm2/jitter/arch/JitCore_x86.h +++ b/miasm2/jitter/arch/JitCore_x86.h @@ -49,14 +49,14 @@ typedef struct { uint64_t cond; - double float_st0; - double float_st1; - double float_st2; - double float_st3; - double float_st4; - double float_st5; - double float_st6; - double float_st7; + uint64_t float_st0; + uint64_t float_st1; + uint64_t float_st2; + uint64_t float_st3; + uint64_t float_st4; + uint64_t float_st5; + uint64_t float_st6; + uint64_t float_st7; unsigned int float_c0; unsigned int float_c1; diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index d63351cc..c4e6709d 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -51,6 +51,17 @@ class LLVMType(llvm_ir.Type): else: raise ValueError() + @classmethod + def fptype(cls, size): + """Return the floating type corresponding to precision @size""" + if size == 32: + precision = llvm_ir.FloatType() + elif size == 64: + precision = llvm_ir.DoubleType() + else: + raise RuntimeError("Unsupported precision: %x", size) + return precision + class LLVMContext(): @@ -236,8 +247,16 @@ class LLVMContext_JIT(LLVMContext): i8 = LLVMType.IntType(8) p8 = llvm_ir.PointerType(i8) itype = LLVMType.IntType(64) + ftype = llvm_ir.FloatType() + dtype = llvm_ir.DoubleType() fc = {"llvm.ctpop.i8": {"ret": i8, "args": [i8]}, + "llvm.nearbyint.f32": {"ret": ftype, + "args": [ftype]}, + "llvm.nearbyint.f64": {"ret": dtype, + "args": [dtype]}, + "llvm.trunc.f32": {"ret": ftype, + "args": [ftype]}, "segm2addr": {"ret": itype, "args": [p8, itype, @@ -245,6 +264,22 @@ class LLVMContext_JIT(LLVMContext): "x86_cpuid": {"ret": itype, "args": [itype, itype]}, + "fcom_c0": {"ret": itype, + "args": [dtype, + dtype]}, + "fcom_c1": {"ret": itype, + "args": [dtype, + dtype]}, + "fcom_c2": {"ret": itype, + "args": [dtype, + dtype]}, + "fcom_c3": {"ret": itype, + "args": [dtype, + dtype]}, + "llvm.sqrt.f32": {"ret": ftype, + "args": [ftype]}, + "llvm.sqrt.f64": {"ret": dtype, + "args": [dtype]}, } for k in [8, 16]: @@ -466,10 +501,7 @@ class LLVMFunction(): [llvm_ir.Constant(LLVMType.IntType(), offset)]) regs = self.llvm_context.ir_arch.arch.regs - if hasattr(regs, "float_list") and expr in regs.float_list: - pointee_type = llvm_ir.DoubleType() - else: - pointee_type = LLVMType.IntType(expr.size) + pointee_type = LLVMType.IntType(expr.size) ptr_casted = builder.bitcast(ptr, llvm_ir.PointerType(pointee_type)) # Store in cache @@ -764,15 +796,19 @@ class LLVMFunction(): itype = LLVMType.IntType(expr.size) cond_ok = self.builder.icmp_unsigned("<", count, itype(expr.size)) + zero = itype(0) if op == ">>": callback = builder.lshr elif op == "<<": callback = builder.shl elif op == "a>>": callback = builder.ashr + # x a>> size is 0 or -1, depending on x sign + cond_neg = self.builder.icmp_signed("<", value, zero) + zero = self.builder.select(cond_neg, itype(-1), zero) ret = self.builder.select(cond_ok, callback(value, count), - itype(0)) + zero) self.update_cache(expr, ret) return ret @@ -800,19 +836,118 @@ class LLVMFunction(): self.update_cache(expr, ret) return ret + if op.startswith("sint_to_fp"): + fptype = LLVMType.fptype(expr.size) + arg = self.add_ir(expr.args[0]) + ret = builder.sitofp(arg, fptype) + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + if op == "fp_to_sint32": + size_arg = expr.args[0].size + fptype_orig = LLVMType.fptype(size_arg) + arg = self.add_ir(expr.args[0]) + arg = builder.bitcast(arg, fptype_orig) + # Enforce IEEE-754 behavior. This could be enhanced with + # 'llvm.experimental.constrained.nearbyint' + if size_arg == 32: + func = self.mod.get_global("llvm.nearbyint.f32") + elif size_arg == 64: + func = self.mod.get_global("llvm.nearbyint.f64") + else: + raise RuntimeError("Unsupported size") + rounded = builder.call(func, [arg]) + ret = builder.fptoui(rounded, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret - if op in ["int_16_to_double", "int_32_to_double", "int_64_to_double", - "mem_16_to_double", "mem_32_to_double", "mem_64_to_double"]: + if op.startswith("fpconvert_fp"): + assert len(expr.args) == 1 + size_arg = expr.args[0].size + fptype = LLVMType.fptype(expr.size) + fptype_orig = LLVMType.fptype(size_arg) arg = self.add_ir(expr.args[0]) - ret = builder.uitofp(arg, llvm_ir.DoubleType()) + arg = builder.bitcast(arg, fptype_orig) + if expr.size > size_arg: + fc = builder.fpext + elif expr.size < size_arg: + fc = builder.fptrunc + else: + raise RuntimeError("Not supported, same size") + ret = fc(arg, fptype) + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op.startswith("fpround_"): + assert len(expr.args) == 1 + fptype = LLVMType.fptype(expr.size) + arg = self.add_ir(expr.args[0]) + arg = builder.bitcast(arg, fptype) + if op == "fpround_towardszero" and expr.size == 32: + fc = self.mod.get_global("llvm.trunc.f32") + else: + raise RuntimeError("Not supported, same size") + rounded = builder.call(fc, [arg]) + ret = builder.bitcast(rounded, llvm_ir.IntType(expr.size)) self.update_cache(expr, ret) return ret - if op in ["double_to_int_16", "double_to_int_32", "double_to_int_64", - "double_to_mem_16", "double_to_mem_32", "double_to_mem_64"]: + if op in ["fcom_c0", "fcom_c1", "fcom_c2", "fcom_c3"]: + arg1 = self.add_ir(expr.args[0]) + arg2 = self.add_ir(expr.args[0]) + fc_name = op + fc_ptr = self.mod.get_global(fc_name) + casted_args = [ + builder.bitcast(arg1, llvm_ir.DoubleType()), + builder.bitcast(arg2, llvm_ir.DoubleType()), + ] + ret = builder.call(fc_ptr, casted_args) + + # Cast ret if needed + ret_size = fc_ptr.return_value.type.width + if ret_size > expr.size: + ret = builder.trunc(ret, LLVMType.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op in ["fsqrt"]: arg = self.add_ir(expr.args[0]) - ret = builder.fptoui(arg, llvm_ir.IntType(expr.size)) + + # Apply the correct sqrt func + if expr.size == 32: + arg = builder.bitcast(arg, llvm_ir.FloatType()) + ret = builder.call(self.mod.get_global("llvm.sqrt.f32"), + [arg]) + elif expr.size == 64: + arg = builder.bitcast(arg, llvm_ir.DoubleType()) + ret = builder.call(self.mod.get_global("llvm.sqrt.f64"), + [arg]) + else: + raise RuntimeError("Unsupported precision: %x", expr.size) + + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op in ["fadd", "fmul", "fsub", "fdiv"]: + # More than 2 args not yet supported + assert len(expr.args) == 2 + arg1 = self.add_ir(expr.args[0]) + arg2 = self.add_ir(expr.args[1]) + precision = LLVMType.fptype(expr.size) + arg1 = builder.bitcast(arg1, precision) + arg2 = builder.bitcast(arg2, precision) + if op == "fadd": + ret = builder.fadd(arg1, arg2) + elif op == "fmul": + ret = builder.fmul(arg1, arg2) + elif op == "fsub": + ret = builder.fsub(arg1, arg2) + elif op == "fdiv": + ret = builder.fdiv(arg1, arg2) + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) self.update_cache(expr, ret) return ret @@ -832,10 +967,6 @@ class LLVMFunction(): callback = builder.urem elif op == "/": callback = builder.udiv - elif op == "fadd": - callback = builder.fadd - elif op == "fdiv": - callback = builder.fdiv else: raise NotImplementedError('Unknown op: %s' % op) diff --git a/miasm2/jitter/op_semantics.c b/miasm2/jitter/op_semantics.c index 0420532a..0bc3fcc5 100644 --- a/miasm2/jitter/op_semantics.c +++ b/miasm2/jitter/op_semantics.c @@ -355,147 +355,92 @@ void dump_float(void) */ } -double mem_32_to_double(unsigned int m) +uint32_t fpu_fadd32(uint32_t a, uint32_t b) { - float f; - double d; - - f = *((float*)&m); - d = f; -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%d float %e\n", m, d); -#endif - return d; -} - - -double mem_64_to_double(uint64_t m) -{ - double d; - d = *((double*)&m); + float c; + c = *((float*)&a) + *((float*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%"PRId64" double %e\n", m, d); -#endif - return d; -} - -double int_16_to_double(unsigned int m) -{ - double d; - - d = (double)(m&0xffff); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%d double %e\n", m, d); -#endif - return d; -} - -double int_32_to_double(unsigned int m) -{ - double d; - - d = (double)m; -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%d double %e\n", m, d); + printf("%e + %e -> %e\n", a, b, c); #endif - return d; + return *((uint32_t*)&c); } -double int_64_to_double(uint64_t m) +uint64_t fpu_fadd64(uint64_t a, uint64_t b) { - double d; - - d = (double)m; + double c; + c = *((double*)&a) + *((double*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%"PRId64" double %e\n", m, d); + printf("%e + %e -> %e\n", a, b, c); #endif - return d; + return *((uint64_t*)&c); } -int16_t double_to_int_16(double d) +uint32_t fpu_fsub32(uint32_t a, uint32_t b) { - int16_t i; - - i = (int16_t)d; + float c; + c = *((float*)&a) - *((float*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%e int %d\n", d, i); + printf("%e + %e -> %e\n", a, b, c); #endif - return i; + return *((uint32_t*)&c); } -int32_t double_to_int_32(double d) +uint64_t fpu_fsub64(uint64_t a, uint64_t b) { - int32_t i; - - i = (int32_t)d; + double c; + c = *((double*)&a) - *((double*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%e int %d\n", d, i); + printf("%e + %e -> %e\n", a, b, c); #endif - return i; + return *((uint64_t*)&c); } -int64_t double_to_int_64(double d) +uint32_t fpu_fmul32(uint32_t a, uint32_t b) { - int64_t i; - - i = (int64_t)d; + float c; + c = *((float*)&a) * *((float*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%e int %"PRId64"\n", d, i); + printf("%e * %e -> %e\n", a, b, c); #endif - return i; + return *((uint32_t*)&c); } - -double fpu_fadd(double a, double b) +uint64_t fpu_fmul64(uint64_t a, uint64_t b) { double c; - c = a + b; + c = *((double*)&a) * *((double*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%e + %e -> %e\n", a, b, c); + printf("%e * %e -> %e\n", a, b, c); #endif - return c; + return *((uint64_t*)&c); } -double fpu_fsub(double a, double b) +uint32_t fpu_fdiv32(uint32_t a, uint32_t b) { - double c; - c = a - b; -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e - %e -> %e\n", a, b, c); -#endif - return c; -} - -double fpu_fmul(double a, double b) -{ - double c; - c = a * b; + float c; + c = *((float*)&a) / *((float*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); printf("%e * %e -> %e\n", a, b, c); #endif - return c; + return *((uint32_t*)&c); } -double fpu_fdiv(double a, double b) +uint64_t fpu_fdiv64(uint64_t a, uint64_t b) { double c; - c = a / b; + c = *((double*)&a) / *((double*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%e / %e -> %e\n", a, b, c); + printf("%e * %e -> %e\n", a, b, c); #endif - return c; + return *((uint64_t*)&c); } double fpu_ftan(double a) @@ -567,15 +512,26 @@ double fpu_f2xm1(double a) return b; } -double fpu_fsqrt(double a) +uint32_t fpu_fsqrt32(uint32_t a) +{ + float b; + b = sqrtf(*((float*)&a)); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e sqrt %e\n", a, b); +#endif + return *((uint32_t*)&b); +} + +uint64_t fpu_fsqrt64(uint64_t a) { double b; - b = sqrt(a); + b = sqrt(*((double*)&a)); #ifdef DEBUG_MIASM_DOUBLE dump_float(); printf("%e sqrt %e\n", a, b); #endif - return b; + return *((uint64_t*)&b); } double fpu_fabs(double a) @@ -751,30 +707,75 @@ unsigned int fpu_fxam_c3(double a) } } -unsigned int double_to_mem_32(double d) +uint64_t sint64_to_fp64(int64_t a) { - unsigned int m; - float f; - f = d; - m = *((unsigned int*)&f); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%d %e\n", m, d); -#endif - return m; + double result = (double) a; + return *((uint64_t*)&result); } -uint64_t double_to_mem_64(double d) +uint32_t sint32_to_fp32(int32_t a) { - uint64_t m; - m = *((uint64_t*)&d); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%"PRId64" %e\n", m, d); -#endif - return m; + float result = (float) a; + return *((uint32_t*)&result); +} + +uint64_t sint32_to_fp64(int32_t a) +{ + double result = (double) a; + return *((uint64_t*)&result); } +int32_t fp32_to_sint32(uint32_t a) +{ + // Enforce nearbyint (IEEE-754 behavior) + float rounded = *((float*)&a); + rounded = nearbyintf(rounded); + return (int32_t) rounded; +} + +int64_t fp64_to_sint64(uint64_t a) +{ + // Enforce nearbyint (IEEE-754 behavior) + double rounded = *((double*)&a); + rounded = nearbyint(rounded); + return (int64_t) rounded; +} + +int32_t fp64_to_sint32(uint64_t a) +{ + // Enforce nearbyint (IEEE-754 behavior) + double rounded = *((double*)&a); + rounded = nearbyint(rounded); + return (int32_t) rounded; +} + +uint32_t fp64_to_fp32(uint64_t a) +{ + float result = (float) *((double*)&a); + return *((uint32_t*)&result); +} + +uint64_t fp32_to_fp64(uint32_t a) +{ + double result = (double) *((float*)&a); + return *((uint64_t*)&result); +} + +uint32_t fpround_towardszero_fp32(uint32_t a) +{ + float rounded = *((float*)&a); + rounded = truncf(rounded); + return *((uint32_t*)&rounded); +} + +uint64_t fpround_towardszero_fp64(uint64_t a) +{ + double rounded = *((float*)&a); + rounded = trunc(rounded); + return *((uint64_t*)&rounded); +} + + UDIV(16) UDIV(32) UDIV(64) diff --git a/miasm2/jitter/op_semantics.h b/miasm2/jitter/op_semantics.h index 3eb81cff..f8042895 100644 --- a/miasm2/jitter/op_semantics.h +++ b/miasm2/jitter/op_semantics.h @@ -96,19 +96,23 @@ int16_t idiv16(int16_t a, int16_t b); int16_t imod16(int16_t a, int16_t b); unsigned int x86_cpuid(unsigned int a, unsigned int reg_num); -double int2double(unsigned int m); -double fpu_fadd(double a, double b); -double fpu_fsub(double a, double b); -double fpu_fmul(double a, double b); -double fpu_fdiv(double a, double b); +uint32_t fpu_fadd32(uint32_t a, uint32_t b); +uint64_t fpu_fadd64(uint64_t a, uint64_t b); +uint32_t fpu_fsub32(uint32_t a, uint32_t b); +uint64_t fpu_fsub64(uint64_t a, uint64_t b); +uint32_t fpu_fmul32(uint32_t a, uint32_t b); +uint64_t fpu_fmul64(uint64_t a, uint64_t b); +uint32_t fpu_fdiv32(uint32_t a, uint32_t b); +uint64_t fpu_fdiv64(uint64_t a, uint64_t b); double fpu_ftan(double a); double fpu_frndint(double a); double fpu_fsin(double a); double fpu_fcos(double a); double fpu_fscale(double a, double b); double fpu_f2xm1(double a); -double fpu_fsqrt(double a); +uint32_t fpu_fsqrt32(uint32_t a); +uint64_t fpu_fsqrt64(uint64_t a); double fpu_fabs(double a); double fpu_fprem(double a, double b); double fpu_fchs(double a); @@ -124,18 +128,16 @@ unsigned int fpu_fxam_c1(double a); unsigned int fpu_fxam_c2(double a); unsigned int fpu_fxam_c3(double a); - -double mem_32_to_double(unsigned int m); -double mem_64_to_double(uint64_t m); -double int_16_to_double(unsigned int m); -double int_32_to_double(unsigned int m); -double int_64_to_double(uint64_t m); -int16_t double_to_int_16(double d); -int32_t double_to_int_32(double d); -int64_t double_to_int_64(double d); -unsigned int double_to_mem_32(double d); -uint64_t double_to_mem_64(double d); - +uint64_t sint64_to_fp64(int64_t a); +uint32_t sint32_to_fp32(int32_t a); +uint64_t sint32_to_fp64(int32_t a); +int32_t fp32_to_sint32(uint32_t a); +int64_t fp64_to_sint64(uint64_t a); +int32_t fp64_to_sint32(uint64_t a); +uint32_t fp64_to_fp32(uint64_t a); +uint64_t fp32_to_fp64(uint32_t a); +uint32_t fpround_towardszero_fp32(uint32_t a); +uint64_t fpround_towardszero_fp64(uint64_t a); #define SHIFT_RIGHT_ARITH(size, value, shift) \ ((uint ## size ## _t)((((uint64_t) (shift)) > ((size) - 1))? \ |