diff options
| author | serpilliere <serpilliere@users.noreply.github.com> | 2018-07-10 19:04:39 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-07-10 19:04:39 +0200 |
| commit | c48a8ba7ed9110df962df94ab9db314b2873c6b2 (patch) | |
| tree | 6e14f8fdaa4471dc1fb8fdcd6bfe9e271500a803 | |
| parent | a5221c1b926af7716860fd27039528cfb54d6095 (diff) | |
| parent | d65bbbcc4a7d3c0fff9e9c80a04e23bbc4bf5333 (diff) | |
| download | miasm-c48a8ba7ed9110df962df94ab9db314b2873c6b2.tar.gz miasm-c48a8ba7ed9110df962df94ab9db314b2873c6b2.zip | |
Merge pull request #795 from commial/features/better-float-sse
Better float support & additionnal SSE
| -rw-r--r-- | miasm2/arch/x86/arch.py | 86 | ||||
| -rw-r--r-- | miasm2/arch/x86/sem.py | 580 | ||||
| -rw-r--r-- | miasm2/expression/expression.py | 103 | ||||
| -rw-r--r-- | miasm2/ir/translators/C.py | 72 | ||||
| -rw-r--r-- | miasm2/jitter/arch/JitCore_x86.h | 16 | ||||
| -rw-r--r-- | miasm2/jitter/llvmconvert.py | 161 | ||||
| -rw-r--r-- | miasm2/jitter/op_semantics.c | 227 | ||||
| -rw-r--r-- | miasm2/jitter/op_semantics.h | 38 | ||||
| -rw-r--r-- | test/arch/x86/arch.py | 40 | ||||
| -rwxr-xr-x | test/test_all.py | 4 |
10 files changed, 931 insertions, 396 deletions
diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 815eaee6..bf872667 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -3771,7 +3771,8 @@ addop("movq", [bs8(0x0f), bs8(0xd6), pref_66] + addop("movmskps", [bs8(0x0f), bs8(0x50), no_xmm_pref] + rmmod(reg_modrm, rm_arg_xmm_reg)) - +addop("movmskpd", [bs8(0x0f), bs8(0x50), pref_66] + + rmmod(reg_modrm, rm_arg_xmm_reg)) addop("addss", [bs8(0x0f), bs8(0x58), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) addop("addsd", [bs8(0x0f), bs8(0x58), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) @@ -3792,10 +3793,6 @@ addop("pminsw", [bs8(0x0f), bs8(0xea), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) addop("ucomiss", [bs8(0x0f), bs8(0x2e), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm_m32)) addop("ucomisd", [bs8(0x0f), bs8(0x2e), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m64)) -addop("maxsd", [bs8(0x0f), bs8(0x5f), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) -addop("maxss", [bs8(0x0f), bs8(0x5f), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) - - addop("movzx", [bs8(0x0f), bs("1011011"), w8, sx] + rmmod(rmreg, rm_arg_sx)) addop("mul", [bs('1111011'), w8] + rmmod(d4, rm_arg_w8)) @@ -4021,9 +4018,9 @@ addop("xgetbv", [bs8(0x0f), bs8(0x01), bs8(0xd0)]) addop("movapd", [bs8(0x0f), bs("0010100"), swapargs] + rmmod(xmm_reg, rm_arg_xmm) + [bs_opmode16], [xmm_reg, rm_arg_xmm]) addop("movaps", [bs8(0x0f), bs("0010100"), swapargs] - + rmmod(xmm_reg, rm_arg_xmm) + [bs_opmode32], [xmm_reg, rm_arg_xmm]) + + rmmod(xmm_reg, rm_arg_xmm_m128) + [bs_opmode32], [xmm_reg, rm_arg_xmm_m128]) addop("movaps", [bs8(0x0f), bs("0010100"), swapargs] - + rmmod(xmm_reg, rm_arg_xmm) + [bs_opmode64], [xmm_reg, rm_arg_xmm]) + + rmmod(xmm_reg, rm_arg_xmm_m128) + [bs_opmode64], [xmm_reg, rm_arg_xmm_m128]) addop("movdqu", [bs8(0x0f), bs("011"), swapargs, bs("1111"), pref_f3] + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) addop("movdqa", [bs8(0x0f), bs("011"), swapargs, bs("1111"), pref_66] @@ -4045,7 +4042,8 @@ addop("movlhps", [bs8(0x0f), bs8(0x16), no_xmm_pref] + addop("movdq2q", [bs8(0x0f), bs8(0xd6), pref_f2] + rmmod(mm_reg, rm_arg_xmm_reg), [mm_reg, rm_arg_xmm_reg]) - +addop("movq2dq", [bs8(0x0f), bs8(0xd6), pref_f3] + + rmmod(xmm_reg, rm_arg_mm)) ## Additions # SSE @@ -4144,13 +4142,54 @@ addop("pxor", [bs8(0x0f), bs8(0xef), no_xmm_pref] + addop("pxor", [bs8(0x0f), bs8(0xef), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +### Comparisons (floating-point) +### +addop("minps", [bs8(0x0f), bs8(0x5d), no_xmm_pref] + rmmod(xmm_reg, + rm_arg_xmm_m128)) +addop("minss", [bs8(0x0f), bs8(0x5d), pref_f3] + rmmod(xmm_reg, + rm_arg_xmm_m32)) +addop("minpd", [bs8(0x0f), bs8(0x5d), pref_66] + rmmod(xmm_reg, + rm_arg_xmm_m128)) +addop("minsd", [bs8(0x0f), bs8(0x5d), pref_f2] + rmmod(xmm_reg, + rm_arg_xmm_m64)) +addop("maxps", [bs8(0x0f), bs8(0x5f), no_xmm_pref] + rmmod(xmm_reg, + rm_arg_xmm_m128)) +addop("maxpd", [bs8(0x0f), bs8(0x5f), pref_66] + rmmod(xmm_reg, + rm_arg_xmm_m128)) +addop("maxsd", [bs8(0x0f), bs8(0x5f), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) +addop("maxss", [bs8(0x0f), bs8(0x5f), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) + +for cond_name, value in [ + ("eq", 0x00), + ("lt", 0x01), + ("le", 0x02), + ("unord", 0x03), + ("neq", 0x04), + ("nlt", 0x05), + ("nle", 0x06), + ("ord", 0x07), +]: + addop("cmp%sps" % cond_name, [bs8(0x0f), bs8(0xc2), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm_m64) + [bs8(value)]) + addop("cmp%spd" % cond_name, [bs8(0x0f), bs8(0xc2), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m64) + [bs8(value)]) + addop("cmp%sss" % cond_name, [bs8(0x0f), bs8(0xc2), pref_f3] + + rmmod(xmm_reg, rm_arg_xmm_m32) + [bs8(value)]) + addop("cmp%ssd" % cond_name, [bs8(0x0f), bs8(0xc2), pref_f2] + + rmmod(xmm_reg, rm_arg_xmm_m32) + [bs8(value)]) + + + addop("pshufb", [bs8(0x0f), bs8(0x38), bs8(0x00), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) + rmmod(mm_reg, rm_arg_mm_m64)) addop("pshufb", [bs8(0x0f), bs8(0x38), bs8(0x00), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) + rmmod(xmm_reg, rm_arg_xmm_m128)) addop("pshufd", [bs8(0x0f), bs8(0x70), pref_66] + - rmmod(xmm_reg, rm_arg_xmm) + [u08]) - + rmmod(xmm_reg, rm_arg_xmm_m128) + [u08]) +addop("pshuflw", [bs8(0x0f), bs8(0x70), pref_f2] + + rmmod(xmm_reg, rm_arg_xmm_m128) + [u08]) +addop("pshufhw", [bs8(0x0f), bs8(0x70), pref_f3] + + rmmod(xmm_reg, rm_arg_xmm_m128) + [u08]) ### Convert @@ -4241,10 +4280,29 @@ addop("psrlw", [bs8(0x0f), bs8(0x71), pref_66] + rmmod(d2, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) addop("psrlw", [bs8(0x0f), bs8(0xd1), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) + rmmod(mm_reg, rm_arg_mm_m64), [mm_reg, rm_arg_mm_m64]) addop("psrlw", [bs8(0x0f), bs8(0xd1), pref_66] + - rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) + rmmod(xmm_reg, rm_arg_xmm_m128), [xmm_reg, rm_arg_xmm_m128]) + +addop("psraw", [bs8(0x0f), bs8(0xe1), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64), [mm_reg, rm_arg_mm_m64]) +addop("psraw", [bs8(0x0f), bs8(0xe1), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128), [xmm_reg, rm_arg_xmm_m128]) + +addop("psraw", [bs8(0x0f), bs8(0x71), no_xmm_pref] + + rmmod(d4, rm_arg_mm_m64) + [u08], [rm_arg_mm_m64, u08]) +addop("psraw", [bs8(0x0f), bs8(0x71), pref_66] + + rmmod(d4, rm_arg_xmm_m128) + [u08], [rm_arg_xmm_m128, u08]) + +addop("psrad", [bs8(0x0f), bs8(0xe2), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64), [mm_reg, rm_arg_mm_m64]) +addop("psrad", [bs8(0x0f), bs8(0xe2), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128), [xmm_reg, rm_arg_xmm_m128]) +addop("psrad", [bs8(0x0f), bs8(0x72), no_xmm_pref] + + rmmod(d4, rm_arg_mm_m64) + [u08], [rm_arg_mm_m64, u08]) +addop("psrad", [bs8(0x0f), bs8(0x72), pref_66] + + rmmod(d4, rm_arg_xmm_m128) + [u08], [rm_arg_xmm_m128, u08]) addop("psllq", [bs8(0x0f), bs8(0x73), no_xmm_pref] + diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index f3ca3a62..ef939144 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -1883,7 +1883,7 @@ def float_pop(avoid_flt=None, popcount=1): if avoid_flt != float_list[i]: e.append(m2_expr.ExprAff(float_list[i], float_list[i + popcount])) - fill_value = m2_expr.ExprOp("int_64_to_double", + fill_value = m2_expr.ExprOp("sint_to_fp64", m2_expr.ExprInt(0, float_list[i].size)) for i in xrange(8 - popcount, 8): e.append(m2_expr.ExprAff(float_list[i], @@ -1919,7 +1919,7 @@ def ftst(_, instr): dst = float_st0 e = [] - src = m2_expr.ExprOp('int_32_to_double', m2_expr.ExprInt(0, 32)) + src = m2_expr.ExprOp('sint_to_fp64', m2_expr.ExprInt(0, 32)) e.append(m2_expr.ExprAff(float_c0, m2_expr.ExprOp('fcom_c0', dst, src))) e.append(m2_expr.ExprAff(float_c1, m2_expr.ExprOp('fcom_c1', dst, src))) e.append(m2_expr.ExprAff(float_c2, m2_expr.ExprOp('fcom_c2', dst, src))) @@ -2045,8 +2045,8 @@ def comiss(_, instr, dst, src): e = [] - dst = m2_expr.ExprOp('int_32_to_float', dst[:32]) - src = m2_expr.ExprOp('int_32_to_float', src[:32]) + dst = m2_expr.ExprOp('sint_to_fp32', dst[:32]) + src = m2_expr.ExprOp('sint_to_fp32', src[:32]) e.append(m2_expr.ExprAff(cf, m2_expr.ExprOp('fcom_c0', dst, src))) e.append(m2_expr.ExprAff(pf, m2_expr.ExprOp('fcom_c2', dst, src))) @@ -2065,8 +2065,8 @@ def comisd(_, instr, dst, src): e = [] - dst = m2_expr.ExprOp('int_64_to_double', dst[:64]) - src = m2_expr.ExprOp('int_64_to_double', src[:64]) + dst = m2_expr.ExprOp('sint_to_fp64', dst[:64]) + src = m2_expr.ExprOp('sint_to_fp64', src[:64]) e.append(m2_expr.ExprAff(cf, m2_expr.ExprOp('fcom_c0', dst, src))) e.append(m2_expr.ExprAff(pf, m2_expr.ExprOp('fcom_c2', dst, src))) @@ -2081,7 +2081,9 @@ def comisd(_, instr, dst, src): def fld(_, instr, src): - src = mem2double(instr, src) + + if src.size == 32: + src = m2_expr.ExprOp("fpconvert_fp64", src) e = [] e.append(m2_expr.ExprAff(float_st7, float_st6)) @@ -2103,13 +2105,12 @@ def fld(_, instr, src): def fst(_, instr, dst): e = [] - if isinstance(dst, m2_expr.ExprMem): - if dst.size > 64: - raise NotImplementedError('float to long') - src = m2_expr.ExprOp('double_to_mem_%.2d' % dst.size, float_st0) - else: - src = float_st0 + if isinstance(dst, m2_expr.ExprMem) and dst.size > 64: + raise NotImplementedError('convert to 80bits') + src = float_st0 + if dst.size == 32: + src = m2_expr.ExprOp("fpconvert_fp32", src) e.append(m2_expr.ExprAff(dst, src)) e += set_float_cs_eip(instr) return e, [] @@ -2118,12 +2119,13 @@ def fst(_, instr, dst): def fstp(ir, instr, dst): e = [] - if isinstance(dst, m2_expr.ExprMem): - if dst.size > 64: - # TODO: move to 80 bits - dst = ir.ExprMem(dst.arg, size=64) + if isinstance(dst, m2_expr.ExprMem) and dst.size > 64: + raise NotImplementedError('convert to 80bits') - src = m2_expr.ExprOp('double_to_mem_%.2d' % dst.size, float_st0) + if isinstance(dst, m2_expr.ExprMem): + src = float_st0 + if dst.size == 32: + src = m2_expr.ExprOp("fpconvert_fp32", src) e.append(m2_expr.ExprAff(dst, src)) else: src = float_st0 @@ -2139,7 +2141,7 @@ def fstp(ir, instr, dst): def fist(_, instr, dst): e = [] - e.append(m2_expr.ExprAff(dst, m2_expr.ExprOp('double_to_int_%d' % dst.size, + e.append(m2_expr.ExprAff(dst, m2_expr.ExprOp('fp_to_sint%d' % dst.size, float_st0))) e += set_float_cs_eip(instr) @@ -2154,9 +2156,11 @@ def fistp(ir, instr, dst): def fisttp(_, instr, dst): e = [] - e.append(m2_expr.ExprAff(dst, - m2_expr.ExprOp('double_trunc_to_int_%d' % dst.size, - float_st0))) + e.append(m2_expr.ExprAff( + dst, + m2_expr.ExprOp('fp_to_sint%d' % dst.size, + m2_expr.ExprOp('fpround_towardszero', float_st0) + ))) e += set_float_cs_eip(instr) e += float_pop(dst) @@ -2165,7 +2169,7 @@ def fisttp(_, instr, dst): def fild(ir, instr, src): # XXXXX - src = m2_expr.ExprOp('int_%.2d_to_double' % src.size, src) + src = m2_expr.ExprOp('sint_to_fp64', src) e = [] e += set_float_cs_eip(instr) e_fld, extra = fld(ir, instr, src) @@ -2174,26 +2178,26 @@ def fild(ir, instr, src): def fldz(ir, instr): - return fld(ir, instr, m2_expr.ExprOp('int_32_to_double', + return fld(ir, instr, m2_expr.ExprOp('sint_to_fp64', m2_expr.ExprInt(0, 32))) def fld1(ir, instr): - return fld(ir, instr, m2_expr.ExprOp('int_32_to_double', + return fld(ir, instr, m2_expr.ExprOp('sint_to_fp64', m2_expr.ExprInt(1, 32))) def fldl2t(ir, instr): value_f = math.log(10) / math.log(2) value = struct.unpack('I', struct.pack('f', value_f))[0] - return fld(ir, instr, m2_expr.ExprOp('int_32_to_double', + return fld(ir, instr, m2_expr.ExprOp('sint_to_fp64', m2_expr.ExprInt(value, 32))) def fldpi(ir, instr): value_f = math.pi value = struct.unpack('I', struct.pack('f', value_f))[0] - return fld(ir, instr, m2_expr.ExprOp('int_32_to_double', + return fld(ir, instr, m2_expr.ExprOp('sint_to_fp64', m2_expr.ExprInt(value, 32))) @@ -2534,7 +2538,7 @@ def fptan(_, instr): e.append(m2_expr.ExprAff(float_st2, float_st1)) e.append(m2_expr.ExprAff(float_st1, m2_expr.ExprOp('ftan', float_st0))) e.append(m2_expr.ExprAff(float_st0, - m2_expr.ExprOp('int_32_to_double', + m2_expr.ExprOp('sint_to_fp64', m2_expr.ExprInt(1, 32)))) e.append( m2_expr.ExprAff(float_stack_ptr, @@ -3371,8 +3375,7 @@ def wrmsr(ir, instr): # MMX/SSE/AVX operations # - -def vec_op_clip(op, size): +def vec_op_clip(op, size, callback=None): """ Generate simd operations @op: the operator @@ -3380,9 +3383,12 @@ def vec_op_clip(op, size): """ def vec_op_clip_instr(ir, instr, dst, src): if op == '-': - return [m2_expr.ExprAff(dst[:size], dst[:size] - src[:size])], [] + result = dst[:size] - src[:size] else: - return [m2_expr.ExprAff(dst[:size], m2_expr.ExprOp(op, dst[:size], src[:size]))], [] + result = m2_expr.ExprOp(op, dst[:size], src[:size]) + if callback is not None: + result = callback(result) + return [m2_expr.ExprAff(dst[:size], result)], [] return vec_op_clip_instr # Generic vertical operation @@ -3407,38 +3413,6 @@ def vec_vertical_sem(op, elt_size, reg_size, dst, src, apply_on_output): return m2_expr.ExprCompose(*ops) -def float_vec_vertical_sem(op, elt_size, reg_size, dst, src, apply_on_output): - assert reg_size % elt_size == 0 - n = reg_size / elt_size - - x_to_int, int_to_x = {32: ('float_to_int_%d', 'int_%d_to_float'), - 64: ('double_to_int_%d', 'int_%d_to_double')}[elt_size] - if op == '-': - ops = [ - apply_on_output(m2_expr.ExprOp( - x_to_int % elt_size, - m2_expr.ExprOp(int_to_x % elt_size, dst[i * elt_size:(i + 1) * elt_size]) - - m2_expr.ExprOp( - int_to_x % elt_size, src[i * elt_size:( - i + 1) * elt_size]))) - for i in xrange(0, n) - ] - else: - ops = [ - apply_on_output(m2_expr.ExprOp( - x_to_int % elt_size, - m2_expr.ExprOp(op, - m2_expr.ExprOp( - int_to_x % elt_size, dst[i * elt_size:( - i + 1) * elt_size]), - m2_expr.ExprOp( - int_to_x % elt_size, src[i * elt_size:( - i + 1) * elt_size])))) - for i in xrange(0, n)] - - return m2_expr.ExprCompose(*ops) - - def __vec_vertical_instr_gen(op, elt_size, sem, apply_on_output): def vec_instr(ir, instr, dst, src): e = [] @@ -3456,11 +3430,6 @@ def vec_vertical_instr(op, elt_size, apply_on_output=lambda x: x): apply_on_output) -def float_vec_vertical_instr(op, elt_size, apply_on_output=lambda x: x): - return __vec_vertical_instr_gen(op, elt_size, float_vec_vertical_sem, - apply_on_output) - - def _keep_mul_high(expr, signed=False): assert expr.is_op("*") and len(expr.args) == 2 @@ -3487,6 +3456,32 @@ def _min_max(expr, signed): expr.args[0], ) +def _float_min_max(expr): + assert (expr.is_op("fmin") or expr.is_op("fmax")) and len(expr.args) == 2 + src1 = expr.args[0] + src2 = expr.args[1] + if expr.is_op("fmin"): + comp = m2_expr.expr_is_float_lower(src1, src2) + elif expr.is_op("fmax"): + comp = m2_expr.expr_is_float_lower(src2, src1) + + # x86 documentation (for MIN): + # IF ((SRC1 = 0.0) and (SRC2 = 0.0)) THEN DEST <-SRC2; + # ELSE IF (SRC1 = SNaN) THEN DEST <-SRC2; FI; + # ELSE IF (SRC2 = SNaN) THEN DEST <-SRC2; FI; + # ELSE IF (SRC1 < SRC2) THEN DEST <-SRC1; + # ELSE DEST<-SRC2; + # + # But this includes the NaN output of "SRC1 < SRC2" + # Associated text is more detailed, and this is the version impl here + return m2_expr.ExprCond( + m2_expr.expr_is_sNaN(src2), src2, + m2_expr.ExprCond( + m2_expr.expr_is_NaN(src2) | m2_expr.expr_is_NaN(src1), src2, + m2_expr.ExprCond(comp, src1, src2) + ) + ) + # Integer arithmetic # @@ -3616,22 +3611,100 @@ pmaxsw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=True)) # # SSE -addss = vec_op_clip('+', 32) -addsd = vec_op_clip('+', 64) -addps = float_vec_vertical_instr('+', 32) -addpd = float_vec_vertical_instr('+', 64) -subss = vec_op_clip('-', 32) -subsd = vec_op_clip('-', 64) -subps = float_vec_vertical_instr('-', 32) -subpd = float_vec_vertical_instr('-', 64) -mulss = vec_op_clip('*', 32) -mulsd = vec_op_clip('*', 64) -mulps = float_vec_vertical_instr('*', 32) -mulpd = float_vec_vertical_instr('*', 64) -divss = vec_op_clip('/', 32) -divsd = vec_op_clip('/', 64) -divps = float_vec_vertical_instr('/', 32) -divpd = float_vec_vertical_instr('/', 64) +addss = vec_op_clip('fadd', 32) +addsd = vec_op_clip('fadd', 64) +addps = vec_vertical_instr('fadd', 32) +addpd = vec_vertical_instr('fadd', 64) +subss = vec_op_clip('fsub', 32) +subsd = vec_op_clip('fsub', 64) +subps = vec_vertical_instr('fsub', 32) +subpd = vec_vertical_instr('fsub', 64) +mulss = vec_op_clip('fmul', 32) +mulsd = vec_op_clip('fmul', 64) +mulps = vec_vertical_instr('fmul', 32) +mulpd = vec_vertical_instr('fmul', 64) +divss = vec_op_clip('fdiv', 32) +divsd = vec_op_clip('fdiv', 64) +divps = vec_vertical_instr('fdiv', 32) +divpd = vec_vertical_instr('fdiv', 64) + +# Comparisons (floating-point) + +minps = vec_vertical_instr('fmin', 32, _float_min_max) +minpd = vec_vertical_instr('fmin', 64, _float_min_max) +minss = vec_op_clip('fmin', 32, _float_min_max) +minsd = vec_op_clip('fmin', 64, _float_min_max) +maxps = vec_vertical_instr('fmax', 32, _float_min_max) +maxpd = vec_vertical_instr('fmax', 64, _float_min_max) +maxss = vec_op_clip('fmax', 32, _float_min_max) +maxsd = vec_op_clip('fmax', 64, _float_min_max) + +def _float_compare_to_mask(expr): + if expr.op == 'unord': + to_ext = m2_expr.expr_is_NaN(expr.args[0]) | m2_expr.expr_is_NaN(expr.args[1]) + elif expr.op == 'ord': + to_ext = ~m2_expr.expr_is_NaN(expr.args[0]) & ~m2_expr.expr_is_NaN(expr.args[1]) + else: + if expr.op == '==fu': + to_ext = m2_expr.expr_is_float_equal(expr.args[0], expr.args[1]) + on_NaN = m2_expr.ExprInt(0, 1) + elif expr.op == '<fu': + to_ext = m2_expr.expr_is_float_lower(expr.args[0], expr.args[1]) + on_NaN = m2_expr.ExprInt(0, 1) + elif expr.op == '<=fu': + to_ext = (m2_expr.expr_is_float_equal(expr.args[0], expr.args[1]) | + m2_expr.expr_is_float_lower(expr.args[0], expr.args[1])) + on_NaN = m2_expr.ExprInt(0, 1) + elif expr.op == '!=fu': + to_ext = ~m2_expr.expr_is_float_equal(expr.args[0], expr.args[1]) + on_NaN = m2_expr.ExprInt(1, 1) + elif expr.op == '!<fu': + to_ext = ~m2_expr.expr_is_float_lower(expr.args[0], expr.args[1]) + on_NaN = m2_expr.ExprInt(1, 1) + elif expr.op == '!<=fu': + to_ext = ~(m2_expr.expr_is_float_equal(expr.args[0], expr.args[1]) | + m2_expr.expr_is_float_lower(expr.args[0], expr.args[1])) + on_NaN = m2_expr.ExprInt(1, 1) + + to_ext = m2_expr.ExprCond( + m2_expr.expr_is_NaN(expr.args[0]) | m2_expr.expr_is_NaN(expr.args[1]), + on_NaN, + to_ext + ) + return to_ext.signExtend(expr.size) + +cmpeqps = vec_vertical_instr('==fu', 32, lambda x: _float_compare_to_mask(x)) +cmpeqpd = vec_vertical_instr('==fu', 64, lambda x: _float_compare_to_mask(x)) +cmpeqss = vec_op_clip('==fu', 32, lambda x: _float_compare_to_mask(x)) +cmpeqsd = vec_op_clip('==fu', 64, lambda x: _float_compare_to_mask(x)) +cmpltps = vec_vertical_instr('<fu', 32, lambda x: _float_compare_to_mask(x)) +cmpltpd = vec_vertical_instr('<fu', 64, lambda x: _float_compare_to_mask(x)) +cmpltss = vec_op_clip('<fu', 32, lambda x: _float_compare_to_mask(x)) +cmpltsd = vec_op_clip('<fu', 64, lambda x: _float_compare_to_mask(x)) +cmpleps = vec_vertical_instr('<=fu', 32, lambda x: _float_compare_to_mask(x)) +cmplepd = vec_vertical_instr('<=fu', 64, lambda x: _float_compare_to_mask(x)) +cmpless = vec_op_clip('<=fu', 32, lambda x: _float_compare_to_mask(x)) +cmplesd = vec_op_clip('<=fu', 64, lambda x: _float_compare_to_mask(x)) +cmpunordps = vec_vertical_instr('unord', 32, lambda x: _float_compare_to_mask(x)) +cmpunordpd = vec_vertical_instr('unord', 64, lambda x: _float_compare_to_mask(x)) +cmpunordss = vec_op_clip('unord', 32, lambda x: _float_compare_to_mask(x)) +cmpunordsd = vec_op_clip('unord', 64, lambda x: _float_compare_to_mask(x)) +cmpneqps = vec_vertical_instr('!=fu', 32, lambda x: _float_compare_to_mask(x)) +cmpneqpd = vec_vertical_instr('!=fu', 64, lambda x: _float_compare_to_mask(x)) +cmpneqss = vec_op_clip('!=fu', 32, lambda x: _float_compare_to_mask(x)) +cmpneqsd = vec_op_clip('!=fu', 64, lambda x: _float_compare_to_mask(x)) +cmpnltps = vec_vertical_instr('!<fu', 32, lambda x: _float_compare_to_mask(x)) +cmpnltpd = vec_vertical_instr('!<fu', 64, lambda x: _float_compare_to_mask(x)) +cmpnltss = vec_op_clip('!<fu', 32, lambda x: _float_compare_to_mask(x)) +cmpnltsd = vec_op_clip('!<fu', 64, lambda x: _float_compare_to_mask(x)) +cmpnleps = vec_vertical_instr('!<=fu', 32, lambda x: _float_compare_to_mask(x)) +cmpnlepd = vec_vertical_instr('!<=fu', 64, lambda x: _float_compare_to_mask(x)) +cmpnless = vec_op_clip('!<=fu', 32, lambda x: _float_compare_to_mask(x)) +cmpnlesd = vec_op_clip('!<=fu', 64, lambda x: _float_compare_to_mask(x)) +cmpordps = vec_vertical_instr('ord', 32, lambda x: _float_compare_to_mask(x)) +cmpordpd = vec_vertical_instr('ord', 64, lambda x: _float_compare_to_mask(x)) +cmpordss = vec_op_clip('ord', 32, lambda x: _float_compare_to_mask(x)) +cmpordsd = vec_op_clip('ord', 64, lambda x: _float_compare_to_mask(x)) # Logical (floating-point) # @@ -3665,31 +3738,31 @@ def por(_, instr, dst, src): def cvtdq2pd(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:64], m2_expr.ExprOp('int_32_to_double', src[:32]))) + m2_expr.ExprAff(dst[:64], m2_expr.ExprOp('sint_to_fp64', src[:32]))) e.append( - m2_expr.ExprAff(dst[64:128], m2_expr.ExprOp('int_32_to_double', src[32:64]))) + m2_expr.ExprAff(dst[64:128], m2_expr.ExprOp('sint_to_fp64', src[32:64]))) return e, [] def cvtdq2ps(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('int_32_to_float', src[:32]))) + m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('sint_to_fp32', src[:32]))) e.append( - m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('int_32_to_float', src[32:64]))) + m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('sint_to_fp32', src[32:64]))) e.append( - m2_expr.ExprAff(dst[64:96], m2_expr.ExprOp('int_32_to_float', src[64:96]))) + m2_expr.ExprAff(dst[64:96], m2_expr.ExprOp('sint_to_fp32', src[64:96]))) e.append( - m2_expr.ExprAff(dst[96:128], m2_expr.ExprOp('int_32_to_float', src[96:128]))) + m2_expr.ExprAff(dst[96:128], m2_expr.ExprOp('sint_to_fp32', src[96:128]))) return e, [] def cvtpd2dq(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('double_to_int_32', src[:64]))) + m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('fp_to_sint32', src[:64]))) e.append( - m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('double_to_int_32', src[64:128]))) + m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('fp_to_sint32', src[64:128]))) e.append(m2_expr.ExprAff(dst[64:128], m2_expr.ExprInt(0, 64))) return e, [] @@ -3697,18 +3770,18 @@ def cvtpd2dq(_, instr, dst, src): def cvtpd2pi(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('double_to_int_32', src[:64]))) + m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('fp_to_sint32', src[:64]))) e.append( - m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('double_to_int_32', src[64:128]))) + m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('fp_to_sint32', src[64:128]))) return e, [] def cvtpd2ps(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('double_to_float', src[:64]))) + m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('fpconvert_fp32', src[:64]))) e.append( - m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('double_to_float', src[64:128]))) + m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('fpconvert_fp32', src[64:128]))) e.append(m2_expr.ExprAff(dst[64:128], m2_expr.ExprInt(0, 64))) return e, [] @@ -3716,148 +3789,131 @@ def cvtpd2ps(_, instr, dst, src): def cvtpi2pd(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:64], m2_expr.ExprOp('int_32_to_double', src[:32]))) + m2_expr.ExprAff(dst[:64], m2_expr.ExprOp('sint_to_fp64', src[:32]))) e.append( - m2_expr.ExprAff(dst[64:128], m2_expr.ExprOp('int_32_to_double', src[32:64]))) + m2_expr.ExprAff(dst[64:128], m2_expr.ExprOp('sint_to_fp64', src[32:64]))) return e, [] def cvtpi2ps(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('int_32_to_float', src[:32]))) + m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('sint_to_fp32', src[:32]))) e.append( - m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('int_32_to_float', src[32:64]))) + m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('sint_to_fp32', src[32:64]))) return e, [] def cvtps2dq(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('float_to_int_32', src[:32]))) + m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('fp_to_sint32', src[:32]))) e.append( - m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('float_to_int_32', src[32:64]))) + m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('fp_to_sint32', src[32:64]))) e.append( - m2_expr.ExprAff(dst[64:96], m2_expr.ExprOp('float_to_int_32', src[64:96]))) + m2_expr.ExprAff(dst[64:96], m2_expr.ExprOp('fp_to_sint32', src[64:96]))) e.append( - m2_expr.ExprAff(dst[96:128], m2_expr.ExprOp('float_to_int_32', src[96:128]))) + m2_expr.ExprAff(dst[96:128], m2_expr.ExprOp('fp_to_sint32', src[96:128]))) return e, [] def cvtps2pd(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:64], m2_expr.ExprOp('float_to_double', src[:32]))) + m2_expr.ExprAff(dst[:64], m2_expr.ExprOp('fpconvert_fp64', src[:32]))) e.append( - m2_expr.ExprAff(dst[64:128], m2_expr.ExprOp('float_to_double', src[32:64]))) + m2_expr.ExprAff(dst[64:128], m2_expr.ExprOp('fpconvert_fp64', src[32:64]))) return e, [] def cvtps2pi(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('float_to_int_32', src[:32]))) + m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('fp_to_sint32', src[:32]))) e.append( - m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('float_to_int_32', src[32:64]))) + m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('fp_to_sint32', src[32:64]))) return e, [] def cvtsd2si(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('double_to_int_32', src[:64]))) + m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('fp_to_sint32', src[:64]))) return e, [] def cvtsd2ss(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('double_to_float', src[:64]))) + m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('fpconvert_fp32', src[:64]))) return e, [] def cvtsi2sd(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:64], m2_expr.ExprOp('int_32_to_double', src[:32]))) + m2_expr.ExprAff(dst[:64], m2_expr.ExprOp('sint_to_fp64', src[:32]))) return e, [] def cvtsi2ss(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('int_32_to_float', src[:32]))) + m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('sint_to_fp32', src[:32]))) return e, [] def cvtss2sd(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:64], m2_expr.ExprOp('float_to_double', src[:32]))) + m2_expr.ExprAff(dst[:64], m2_expr.ExprOp('fpconvert_fp64', src[:32]))) return e, [] def cvtss2si(_, instr, dst, src): e = [] e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('float_to_int_32', src[:32]))) + m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('fp_to_sint32', src[:32]))) return e, [] -def cvttpd2pi(_, instr, dst, src): +def _cvtt_tpl(dst, src, numbers, double): e = [] - e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('double_trunc_to_int_32', src[:64]))) - e.append( - m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('double_trunc_to_int_32', src[64:128]))) - return e, [] + for i in numbers: + # For CVTT*D2* (Convert with Truncation ... Double-Precision) to work, + # a first conversion fp64 -> fp32 is needed + if double: + tmp_src = m2_expr.ExprOp('fpconvert_fp32', src[i*64:i*64 + 64]) + else: + tmp_src = src[i*32:i*32 + 32] + + e.append(m2_expr.ExprAff( + dst[i*32:i*32 + 32], + m2_expr.ExprOp('fp_to_sint32', m2_expr.ExprOp( + 'fpround_towardszero', + tmp_src + )))) + return e +def cvttpd2pi(_, instr, dst, src): + return _cvtt_tpl(dst, src, [0, 1], double=True), [] def cvttpd2dq(_, instr, dst, src): - e = [] - e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('double_trunc_to_int_32', src[:64]))) - e.append( - m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('double_trunc_to_int_32', src[64:128]))) + e = _cvtt_tpl(dst, src, [0, 1], double=True) e.append(m2_expr.ExprAff(dst[64:128], m2_expr.ExprInt(0, 64))) return e, [] +def cvttsd2si(_, instr, dst, src): + return _cvtt_tpl(dst, src, [0], double=True), [] def cvttps2dq(_, instr, dst, src): - e = [] - e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('float_trunc_to_int_32', src[:32]))) - e.append( - m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('float_trunc_to_int_32', src[32:64]))) - e.append( - m2_expr.ExprAff(dst[64:96], m2_expr.ExprOp('float_trunc_to_int_32', src[64:96]))) - e.append( - m2_expr.ExprAff(dst[96:128], m2_expr.ExprOp('float_trunc_to_int_32', src[96:128]))) - return e, [] - + return _cvtt_tpl(dst, src, [0, 1, 2, 3], double=False), [] def cvttps2pi(_, instr, dst, src): - e = [] - e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('float_trunc_to_int_32', src[:32]))) - e.append( - m2_expr.ExprAff(dst[32:64], m2_expr.ExprOp('float_trunc_to_int_32', src[32:64]))) - return e, [] - - -def cvttsd2si(_, instr, dst, src): - e = [] - e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('double_trunc_to_int_32', src[:64]))) - return e, [] - + return _cvtt_tpl(dst, src, [0, 1], double=False), [] def cvttss2si(_, instr, dst, src): - e = [] - e.append( - m2_expr.ExprAff(dst[:32], m2_expr.ExprOp('float_trunc_to_int_32', src[:32]))) - return e, [] - + return _cvtt_tpl(dst, src, [0], double=False), [] def movss(_, instr, dst, src): e = [] @@ -3925,52 +3981,55 @@ def pshufb(_, instr, dst, src): def pshufd(_, instr, dst, src, imm): - e = [] + control = int(imm) + out = [] for i in xrange(4): - index = imm[2 * i:2 * (i + 1)].zeroExtend(dst.size) - index <<= m2_expr.ExprInt(5, dst.size) - value = (dst >> index)[:32] - e.append(m2_expr.ExprAff(dst[32 * i:32 * (i + 1)], value)) - return e, [] + shift = ((control >> (i * 2)) & 3) * 32 + # shift is 2 bits long, expr.size is 128 + # => shift + 32 <= src.size + out.append(src[shift: shift + 32]) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] -def ps_rl_ll(ir, instr, dst, src, op, size): - loc_zero, loc_zero_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) +def pshuflw(_, instr, dst, src, imm): + control = int(imm) + out = [] + for i in xrange(4): + shift = ((control >> (i * 2)) & 3) * 16 + out.append(src[shift: shift + 16]) + out.append(src[64:]) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] - if src.size == 8: - count = src.zeroExtend(dst.size) - else: - count = src.zeroExtend(dst.size) +def pshufhw(_, instr, dst, src, imm): + control = int(imm) + out = [src[:64]] + for i in xrange(4): + shift = ((control >> (i * 2)) & 3) * 16 + out.append(src[shift + 64: shift + 16 + 64]) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] + + +def ps_rl_ll(ir, instr, dst, src, op, size): mask = {16: 0xF, 32: 0x1F, 64: 0x3F}[size] - test = expr_simp(count & m2_expr.ExprInt( - ((1 << dst.size) - 1) ^ mask, dst.size)) - e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(test, - loc_zero_expr, - loc_do_expr))] - - slices = [] - for i in xrange(0, dst.size, size): - slices.append(m2_expr.ExprOp(op, dst[i:i + size], count[:size])) + mask = m2_expr.ExprInt(mask, dst.size) - if isinstance(test, m2_expr.ExprInt): - if int(test) == 0: - return [m2_expr.ExprAff(dst[0:dst.size], m2_expr.ExprCompose(*slices))], [] - else: - return [m2_expr.ExprAff(dst, m2_expr.ExprInt(0, dst.size))], [] + # Saturate the counter to 2**size + count = src.zeroExtend(dst.size) + count = m2_expr.ExprCond(count & expr_simp(~mask), + m2_expr.ExprInt(size, dst.size), # saturation + count, # count < 2**size + ) + count = count[:size] + if src.is_int(): + count = expr_simp(count) - e_zero = [m2_expr.ExprAff(dst, m2_expr.ExprInt(0, dst.size)), - m2_expr.ExprAff(ir.IRDst, loc_next_expr)] - e_do = [] - e.append(m2_expr.ExprAff(dst[0:dst.size], m2_expr.ExprCompose(*slices))) - e_do.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) - return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)]), - IRBlock(loc_zero, [AssignBlock(e_zero, instr)])] + out = [] + for i in xrange(0, dst.size, size): + out.append(m2_expr.ExprOp(op, dst[i:i + size], count)) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] def psrlw(ir, instr, dst, src): @@ -3997,6 +4056,14 @@ def psllq(ir, instr, dst, src): return ps_rl_ll(ir, instr, dst, src, "<<", 64) +def psraw(ir, instr, dst, src): + return ps_rl_ll(ir, instr, dst, src, "a>>", 16) + + +def psrad(ir, instr, dst, src): + return ps_rl_ll(ir, instr, dst, src, "a>>", 32) + + def pslldq(_, instr, dst, src): assert src.is_int() e = [] @@ -4250,11 +4317,17 @@ def movdq2q(_, instr, dst, src): return e, [] +def movq2dq(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAff(dst, src[:64].zeroExtend(dst.size))) + return e, [] + + def sqrt_gen(_, instr, dst, src, size): e = [] out = [] - for i in src.size / size: - out.append(m2_expr.ExprOp('fsqrt' % size, + for i in xrange(src.size / size): + out.append(m2_expr.ExprOp('fsqrt', src[i * size: (i + 1) * size])) src = m2_expr.ExprCompose(*out) e.append(m2_expr.ExprAff(dst, src)) @@ -4479,10 +4552,10 @@ def maskmovq(ir, instr, src, mask): blks = [] # For each possibility, check if a write is necessary - check_labels = [m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) + check_labels = [m2_expr.ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) for _ in xrange(0, mask.size, 8)] # If the write has to be done, do it (otherwise, nothing happen) - write_labels = [m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) + write_labels = [m2_expr.ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) for _ in xrange(0, mask.size, 8)] # Build check blocks @@ -4495,7 +4568,7 @@ def maskmovq(ir, instr, src, mask): m2_expr.ExprCond(bit, write_label, next_check_label)) - blks.append(IRBlock(cur_label.name.loc_key, [AssignBlock([check], instr)])) + blks.append(IRBlock(cur_label.loc_key, [AssignBlock([check], instr)])) # Build write blocks dst_addr = mRDI[instr.mode] @@ -4509,7 +4582,7 @@ def maskmovq(ir, instr, src, mask): write_mem = m2_expr.ExprAff(m2_expr.ExprMem(write_addr, 8), src[start: start + 8]) jump = m2_expr.ExprAff(ir.IRDst, next_check_label) - blks.append(IRBlock(cur_label.name.loc_key, [AssignBlock([write_mem, jump], instr)])) + blks.append(IRBlock(cur_label.loc_key, [AssignBlock([write_mem, jump], instr)])) # If mask is null, bypass all e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(mask, @@ -4522,6 +4595,63 @@ def emms(ir, instr): # Implemented as a NOP return [], [] +# Common value without too many option, 0x1fa0 +STMXCSR_VALUE = 0x1fa0 +def stmxcsr(ir, instr, dst): + return [m2_expr.ExprAff(dst, m2_expr.ExprInt(STMXCSR_VALUE, dst.size))], [] + +def ldmxcsr(ir, instr, dst): + # Implemented as a NOP + return [], [] + + +def _select4(src, control): + # Implementation inspired from Intel Intrisics Guide + # @control is already resolved (was an immediate) + + if control == 0: + return src[:32] # 0 + elif control == 1: + return src[32:64] + elif control == 2: + return src[64:96] + elif control == 3: + return src[96:] + else: + raise ValueError("Control must be on 2 bits") + + +def shufps(ir, instr, dst, src, imm8): + out = [] + control = int(imm8) + for i in xrange(4): + if i < 2: + source = dst + else: + source = src + out.append(_select4(source, (control >> (i * 2)) & 3)) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] + + +def shufpd(ir, instr, dst, src, imm8): + out = [] + control = int(imm8) + out.append(dst[64:] if control & 1 else dst[:64]) + out.append(src[64:] if control & 2 else src[:64]) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] + +def movmskps(ir, instr, dst, src): + out = [] + for i in xrange(4): + out.append(src[(32 * i) + 31:(32 * i) + 32]) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out).zeroExtend(dst.size))], [] + +def movmskpd(ir, instr, dst, src): + out = [] + for i in xrange(2): + out.append(src[(64 * i) + 63:(64 * i) + 64]) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out).zeroExtend(dst.size))], [] + mnemo_func = {'mov': mov, 'xchg': xchg, @@ -4961,6 +5091,49 @@ mnemo_func = {'mov': mov, "divps": divps, "divpd": divpd, + # Comparisons (floating-point) + # + "minps": minps, + "minpd": minpd, + "minss": minss, + "minsd": minsd, + "maxps": maxps, + "maxpd": maxpd, + "maxss": maxss, + "maxsd": maxsd, + "cmpeqps": cmpeqps, + "cmpeqpd": cmpeqpd, + "cmpeqss": cmpeqss, + "cmpeqsd": cmpeqsd, + "cmpltps": cmpltps, + "cmpltpd": cmpltpd, + "cmpltss": cmpltss, + "cmpltsd": cmpltsd, + "cmpleps": cmpleps, + "cmplepd": cmplepd, + "cmpless": cmpless, + "cmplesd": cmplesd, + "cmpunordps": cmpunordps, + "cmpunordpd": cmpunordpd, + "cmpunordss": cmpunordss, + "cmpunordsd": cmpunordsd, + "cmpneqps": cmpneqps, + "cmpneqpd": cmpneqpd, + "cmpneqss": cmpneqss, + "cmpneqsd": cmpneqsd, + "cmpnltps": cmpnltps, + "cmpnltpd": cmpnltpd, + "cmpnltss": cmpnltss, + "cmpnltsd": cmpnltsd, + "cmpnleps": cmpnleps, + "cmpnlepd": cmpnlepd, + "cmpnless": cmpnless, + "cmpnlesd": cmpnlesd, + "cmpordps": cmpordps, + "cmpordpd": cmpordpd, + "cmpordss": cmpordss, + "cmpordsd": cmpordsd, + # Logical (floating-point) # @@ -4972,6 +5145,8 @@ mnemo_func = {'mov': mov, "wrmsr": wrmsr, "pshufb": pshufb, "pshufd": pshufd, + "pshuflw": pshuflw, + "pshufhw": pshufhw, "psrlw": psrlw, "psrld": psrld, @@ -4981,6 +5156,8 @@ mnemo_func = {'mov': mov, "psllq": psllq, "pslldq": pslldq, "psrldq": psrldq, + "psraw": psraw, + "psrad": psrad, "palignr": palignr, @@ -5036,6 +5213,7 @@ mnemo_func = {'mov': mov, "movlhps": movlhps, "movhlps": movhlps, "movdq2q": movdq2q, + "movq2dq": movq2dq, "sqrtpd": sqrtpd, "sqrtps": sqrtps, @@ -5061,6 +5239,12 @@ mnemo_func = {'mov': mov, "maskmovq": maskmovq, "maskmovdqu": maskmovq, "emms": emms, + "shufps": shufps, + "shufpd": shufpd, + "movmskps": movmskps, + "movmskpd": movmskpd, + "stmxcsr": stmxcsr, + "ldmxcsr": ldmxcsr, } diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index 8e63e6a2..3cf37070 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -1018,26 +1018,12 @@ class ExprOp(Expr): TOK_POS_STRICT, ]: size = 1 - elif self._op in ['mem_16_to_double', 'mem_32_to_double', - 'mem_64_to_double', 'mem_80_to_double', - 'int_16_to_double', 'int_32_to_double', - 'int_64_to_double', 'int_80_to_double']: - size = 64 - elif self._op in ['double_to_mem_16', 'double_to_int_16', - 'float_trunc_to_int_16', 'double_trunc_to_int_16']: - size = 16 - elif self._op in ['double_to_mem_32', 'double_to_int_32', - 'float_trunc_to_int_32', 'double_trunc_to_int_32', - 'double_to_float']: - size = 32 - elif self._op in ['double_to_mem_64', 'double_to_int_64', - 'float_trunc_to_int_64', 'double_trunc_to_int_64', - 'float_to_double']: - size = 64 - elif self._op in ['double_to_mem_80', 'double_to_int_80', - 'float_trunc_to_int_80', - 'double_trunc_to_int_80']: - size = 80 + elif self._op.startswith("sint_to_fp"): + size = int(self._op[len("sint_to_fp"):]) + elif self._op.startswith("fp_to_sint"): + size = int(self._op[len("fp_to_sint"):]) + elif self._op.startswith("fpconvert_fp"): + size = int(self._op[len("fpconvert_fp"):]) elif self._op in ['segm']: size = self._args[1].size else: @@ -1884,3 +1870,80 @@ def expr_is_signed_lower_or_equal(op1, op2): of = _expr_compute_of(op1, op2) zf = _expr_compute_zf(op1, op2) return zf | (nf ^ of) + +# sign bit | exponent | significand +size_to_IEEE754_info = { + 16: { + "exponent": 5, + "significand": 10, + }, + 32: { + "exponent": 8, + "significand": 23, + }, + 64: { + "exponent": 11, + "significand": 52, + }, +} + +def expr_is_NaN(expr): + """Return 1 or 0 on 1 bit if expr represent a NaN value according to IEEE754 + """ + info = size_to_IEEE754_info[expr.size] + exponent = expr[info["significand"]: info["significand"] + info["exponent"]] + + # exponent is full of 1s and significand is not NULL + return ExprCond(exponent - ExprInt(-1, exponent.size), + ExprInt(0, 1), + ExprCond(expr[:info["significand"]], ExprInt(1, 1), + ExprInt(0, 1))) + + +def expr_is_qNaN(expr): + """Return 1 or 0 on 1 bit if expr represent a qNaN (quiet) value according to + IEEE754 + """ + info = size_to_IEEE754_info[expr.size] + significand_top = expr[info["significand"]: info["significand"] + 1] + return expr_is_NaN(expr) & significand_top + + +def expr_is_sNaN(expr): + """Return 1 or 0 on 1 bit if expr represent a sNaN (signalling) value according + to IEEE754 + """ + info = size_to_IEEE754_info[expr.size] + significand_top = expr[info["significand"]: info["significand"] + 1] + return expr_is_NaN(expr) & ~significand_top + + +def expr_is_float_lower(op1, op2): + """Return 1 on 1 bit if @op1 < @op2, 0 otherwise. + /!\ Assume @op1 and @op2 are not NaN + Comparision is the floating point one, defined in IEEE754 + """ + sign1, sign2 = op1.msb(), op2.msb() + magn1, magn2 = op1[:-1], op2[:-1] + return ExprCond(sign1 ^ sign2, + # Sign different, only the sign matters + sign1, # sign1 ? op1 < op2 : op1 >= op2 + # Sign equals, the result is inversed for negatives + sign1 ^ (expr_is_unsigned_lower(magn1, magn2))) + + +def expr_is_float_equal(op1, op2): + """Return 1 on 1 bit if @op1 == @op2, 0 otherwise. + /!\ Assume @op1 and @op2 are not NaN + Comparision is the floating point one, defined in IEEE754 + """ + sign1, sign2 = op1.msb(), op2.msb() + magn1, magn2 = op1[:-1], op2[:-1] + return ExprCond(magn1 ^ magn2, + ExprInt(0, 1), + ExprCond(magn1, + # magn1 == magn2, are the signal equals? + ~(sign1 ^ sign2), + # Special case: -0.0 == +0.0 + ExprInt(1, 1)) + ) diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index cafec7c8..f8fd4d3b 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -94,17 +94,70 @@ class TranslatorC(Translator): self.from_expr(expr.args[0]), self._size2mask(expr.args[0].size), ) - elif (expr.op.startswith("double_to_") or - expr.op.endswith("_to_double") or - expr.op.startswith("access_") or + elif expr.op in [ + "ftan", "frndint", "f2xm1", "fsin", "fsqrt", "fabs", "fcos", + "fchs", + ]: + return "fpu_%s%d(%s)" % ( + expr.op, + expr.size, + self.from_expr(expr.args[0]), + ) + elif (expr.op.startswith("access_") or expr.op.startswith("load_") or expr.op.startswith("fxam_c") or - expr.op in ["-", "ftan", "frndint", "f2xm1", - "fsin", "fsqrt", "fabs", "fcos", "fchs"]): + expr.op in ["-"]): return "%s(%s)" % ( expr.op, self.from_expr(expr.args[0]) ) + elif expr.op.startswith("fpround_"): + return "%s_fp%d(%s)" % ( + expr.op, + expr.size, + self.from_expr(expr.args[0]), + ) + elif expr.op.startswith("sint_to_fp"): + dest_size = expr.size + arg_size = expr.args[0].size + if (arg_size, dest_size) in [ + (32, 32), (64, 64), (32, 64), + ]: + func = "sint%d_to_fp%d" % (arg_size, dest_size) + else: + raise RuntimeError( + "Unsupported size for sint_to_fp: %r to %r" % ( + arg_size, + dest_size + )) + return "%s(%s)" % (func, self.from_expr(expr.args[0])) + elif expr.op.startswith("fp_to_sint"): + dest_size = expr.size + arg_size = expr.args[0].size + if (arg_size, dest_size) in [ + (32, 32), (64, 64), (64, 32), + ]: + func = "fp%d_to_sint%d" % (arg_size, dest_size) + else: + raise RuntimeError( + "Unsupported size for fp_to_sint: %r to %r" % ( + arg_size, + dest_size + )) + return "%s(%s)" % (func, self.from_expr(expr.args[0])) + elif expr.op.startswith("fpconvert_fp"): + dest_size = expr.size + arg_size = expr.args[0].size + if (arg_size, dest_size) in [ + (32, 64), (64, 32) + ]: + func = "fp%d_to_fp%d" % (arg_size, dest_size) + else: + raise RuntimeError( + "Unsupported size for fpconvert: %r to %r" % (arg_size, + dest_size) + ) + return "%s(%s)" % (func, self.from_expr(expr.args[0])) else: raise NotImplementedError('Unknown op: %r' % expr.op) @@ -155,10 +208,11 @@ class TranslatorC(Translator): elif (expr.op.startswith("fcom") or expr.op in ["fadd", "fsub", "fdiv", 'fmul', "fscale", "fprem", "fprem_lsb", "fyl2x", "fpatan"]): - return "fpu_%s(%s, %s)" % ( + return "fpu_%s%d(%s, %s)" % ( expr.op, + expr.size, self.from_expr(expr.args[0]), - self.from_expr(expr.args[1]) + self.from_expr(expr.args[1]), ) elif expr.op == "segm": return "segm2addr(jitcpu, %s, %s)" % ( @@ -209,8 +263,8 @@ class TranslatorC(Translator): if expr.size in [8, 16, 32, 64, 128]: size = expr.size else: - # Uncommon expression size - size = expr.size + # Uncommon expression size, use at least uint8 + size = max(expr.size, 8) next_power = 1 while next_power <= size: next_power <<= 1 diff --git a/miasm2/jitter/arch/JitCore_x86.h b/miasm2/jitter/arch/JitCore_x86.h index 221ba5db..a5fc4bd4 100644 --- a/miasm2/jitter/arch/JitCore_x86.h +++ b/miasm2/jitter/arch/JitCore_x86.h @@ -49,14 +49,14 @@ typedef struct { uint64_t cond; - double float_st0; - double float_st1; - double float_st2; - double float_st3; - double float_st4; - double float_st5; - double float_st6; - double float_st7; + uint64_t float_st0; + uint64_t float_st1; + uint64_t float_st2; + uint64_t float_st3; + uint64_t float_st4; + uint64_t float_st5; + uint64_t float_st6; + uint64_t float_st7; unsigned int float_c0; unsigned int float_c1; diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index d63351cc..c4e6709d 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -51,6 +51,17 @@ class LLVMType(llvm_ir.Type): else: raise ValueError() + @classmethod + def fptype(cls, size): + """Return the floating type corresponding to precision @size""" + if size == 32: + precision = llvm_ir.FloatType() + elif size == 64: + precision = llvm_ir.DoubleType() + else: + raise RuntimeError("Unsupported precision: %x", size) + return precision + class LLVMContext(): @@ -236,8 +247,16 @@ class LLVMContext_JIT(LLVMContext): i8 = LLVMType.IntType(8) p8 = llvm_ir.PointerType(i8) itype = LLVMType.IntType(64) + ftype = llvm_ir.FloatType() + dtype = llvm_ir.DoubleType() fc = {"llvm.ctpop.i8": {"ret": i8, "args": [i8]}, + "llvm.nearbyint.f32": {"ret": ftype, + "args": [ftype]}, + "llvm.nearbyint.f64": {"ret": dtype, + "args": [dtype]}, + "llvm.trunc.f32": {"ret": ftype, + "args": [ftype]}, "segm2addr": {"ret": itype, "args": [p8, itype, @@ -245,6 +264,22 @@ class LLVMContext_JIT(LLVMContext): "x86_cpuid": {"ret": itype, "args": [itype, itype]}, + "fcom_c0": {"ret": itype, + "args": [dtype, + dtype]}, + "fcom_c1": {"ret": itype, + "args": [dtype, + dtype]}, + "fcom_c2": {"ret": itype, + "args": [dtype, + dtype]}, + "fcom_c3": {"ret": itype, + "args": [dtype, + dtype]}, + "llvm.sqrt.f32": {"ret": ftype, + "args": [ftype]}, + "llvm.sqrt.f64": {"ret": dtype, + "args": [dtype]}, } for k in [8, 16]: @@ -466,10 +501,7 @@ class LLVMFunction(): [llvm_ir.Constant(LLVMType.IntType(), offset)]) regs = self.llvm_context.ir_arch.arch.regs - if hasattr(regs, "float_list") and expr in regs.float_list: - pointee_type = llvm_ir.DoubleType() - else: - pointee_type = LLVMType.IntType(expr.size) + pointee_type = LLVMType.IntType(expr.size) ptr_casted = builder.bitcast(ptr, llvm_ir.PointerType(pointee_type)) # Store in cache @@ -764,15 +796,19 @@ class LLVMFunction(): itype = LLVMType.IntType(expr.size) cond_ok = self.builder.icmp_unsigned("<", count, itype(expr.size)) + zero = itype(0) if op == ">>": callback = builder.lshr elif op == "<<": callback = builder.shl elif op == "a>>": callback = builder.ashr + # x a>> size is 0 or -1, depending on x sign + cond_neg = self.builder.icmp_signed("<", value, zero) + zero = self.builder.select(cond_neg, itype(-1), zero) ret = self.builder.select(cond_ok, callback(value, count), - itype(0)) + zero) self.update_cache(expr, ret) return ret @@ -800,19 +836,118 @@ class LLVMFunction(): self.update_cache(expr, ret) return ret + if op.startswith("sint_to_fp"): + fptype = LLVMType.fptype(expr.size) + arg = self.add_ir(expr.args[0]) + ret = builder.sitofp(arg, fptype) + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + if op == "fp_to_sint32": + size_arg = expr.args[0].size + fptype_orig = LLVMType.fptype(size_arg) + arg = self.add_ir(expr.args[0]) + arg = builder.bitcast(arg, fptype_orig) + # Enforce IEEE-754 behavior. This could be enhanced with + # 'llvm.experimental.constrained.nearbyint' + if size_arg == 32: + func = self.mod.get_global("llvm.nearbyint.f32") + elif size_arg == 64: + func = self.mod.get_global("llvm.nearbyint.f64") + else: + raise RuntimeError("Unsupported size") + rounded = builder.call(func, [arg]) + ret = builder.fptoui(rounded, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret - if op in ["int_16_to_double", "int_32_to_double", "int_64_to_double", - "mem_16_to_double", "mem_32_to_double", "mem_64_to_double"]: + if op.startswith("fpconvert_fp"): + assert len(expr.args) == 1 + size_arg = expr.args[0].size + fptype = LLVMType.fptype(expr.size) + fptype_orig = LLVMType.fptype(size_arg) arg = self.add_ir(expr.args[0]) - ret = builder.uitofp(arg, llvm_ir.DoubleType()) + arg = builder.bitcast(arg, fptype_orig) + if expr.size > size_arg: + fc = builder.fpext + elif expr.size < size_arg: + fc = builder.fptrunc + else: + raise RuntimeError("Not supported, same size") + ret = fc(arg, fptype) + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op.startswith("fpround_"): + assert len(expr.args) == 1 + fptype = LLVMType.fptype(expr.size) + arg = self.add_ir(expr.args[0]) + arg = builder.bitcast(arg, fptype) + if op == "fpround_towardszero" and expr.size == 32: + fc = self.mod.get_global("llvm.trunc.f32") + else: + raise RuntimeError("Not supported, same size") + rounded = builder.call(fc, [arg]) + ret = builder.bitcast(rounded, llvm_ir.IntType(expr.size)) self.update_cache(expr, ret) return ret - if op in ["double_to_int_16", "double_to_int_32", "double_to_int_64", - "double_to_mem_16", "double_to_mem_32", "double_to_mem_64"]: + if op in ["fcom_c0", "fcom_c1", "fcom_c2", "fcom_c3"]: + arg1 = self.add_ir(expr.args[0]) + arg2 = self.add_ir(expr.args[0]) + fc_name = op + fc_ptr = self.mod.get_global(fc_name) + casted_args = [ + builder.bitcast(arg1, llvm_ir.DoubleType()), + builder.bitcast(arg2, llvm_ir.DoubleType()), + ] + ret = builder.call(fc_ptr, casted_args) + + # Cast ret if needed + ret_size = fc_ptr.return_value.type.width + if ret_size > expr.size: + ret = builder.trunc(ret, LLVMType.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op in ["fsqrt"]: arg = self.add_ir(expr.args[0]) - ret = builder.fptoui(arg, llvm_ir.IntType(expr.size)) + + # Apply the correct sqrt func + if expr.size == 32: + arg = builder.bitcast(arg, llvm_ir.FloatType()) + ret = builder.call(self.mod.get_global("llvm.sqrt.f32"), + [arg]) + elif expr.size == 64: + arg = builder.bitcast(arg, llvm_ir.DoubleType()) + ret = builder.call(self.mod.get_global("llvm.sqrt.f64"), + [arg]) + else: + raise RuntimeError("Unsupported precision: %x", expr.size) + + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op in ["fadd", "fmul", "fsub", "fdiv"]: + # More than 2 args not yet supported + assert len(expr.args) == 2 + arg1 = self.add_ir(expr.args[0]) + arg2 = self.add_ir(expr.args[1]) + precision = LLVMType.fptype(expr.size) + arg1 = builder.bitcast(arg1, precision) + arg2 = builder.bitcast(arg2, precision) + if op == "fadd": + ret = builder.fadd(arg1, arg2) + elif op == "fmul": + ret = builder.fmul(arg1, arg2) + elif op == "fsub": + ret = builder.fsub(arg1, arg2) + elif op == "fdiv": + ret = builder.fdiv(arg1, arg2) + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) self.update_cache(expr, ret) return ret @@ -832,10 +967,6 @@ class LLVMFunction(): callback = builder.urem elif op == "/": callback = builder.udiv - elif op == "fadd": - callback = builder.fadd - elif op == "fdiv": - callback = builder.fdiv else: raise NotImplementedError('Unknown op: %s' % op) diff --git a/miasm2/jitter/op_semantics.c b/miasm2/jitter/op_semantics.c index 0420532a..0bc3fcc5 100644 --- a/miasm2/jitter/op_semantics.c +++ b/miasm2/jitter/op_semantics.c @@ -355,147 +355,92 @@ void dump_float(void) */ } -double mem_32_to_double(unsigned int m) +uint32_t fpu_fadd32(uint32_t a, uint32_t b) { - float f; - double d; - - f = *((float*)&m); - d = f; -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%d float %e\n", m, d); -#endif - return d; -} - - -double mem_64_to_double(uint64_t m) -{ - double d; - d = *((double*)&m); + float c; + c = *((float*)&a) + *((float*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%"PRId64" double %e\n", m, d); -#endif - return d; -} - -double int_16_to_double(unsigned int m) -{ - double d; - - d = (double)(m&0xffff); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%d double %e\n", m, d); -#endif - return d; -} - -double int_32_to_double(unsigned int m) -{ - double d; - - d = (double)m; -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%d double %e\n", m, d); + printf("%e + %e -> %e\n", a, b, c); #endif - return d; + return *((uint32_t*)&c); } -double int_64_to_double(uint64_t m) +uint64_t fpu_fadd64(uint64_t a, uint64_t b) { - double d; - - d = (double)m; + double c; + c = *((double*)&a) + *((double*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%"PRId64" double %e\n", m, d); + printf("%e + %e -> %e\n", a, b, c); #endif - return d; + return *((uint64_t*)&c); } -int16_t double_to_int_16(double d) +uint32_t fpu_fsub32(uint32_t a, uint32_t b) { - int16_t i; - - i = (int16_t)d; + float c; + c = *((float*)&a) - *((float*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%e int %d\n", d, i); + printf("%e + %e -> %e\n", a, b, c); #endif - return i; + return *((uint32_t*)&c); } -int32_t double_to_int_32(double d) +uint64_t fpu_fsub64(uint64_t a, uint64_t b) { - int32_t i; - - i = (int32_t)d; + double c; + c = *((double*)&a) - *((double*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%e int %d\n", d, i); + printf("%e + %e -> %e\n", a, b, c); #endif - return i; + return *((uint64_t*)&c); } -int64_t double_to_int_64(double d) +uint32_t fpu_fmul32(uint32_t a, uint32_t b) { - int64_t i; - - i = (int64_t)d; + float c; + c = *((float*)&a) * *((float*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%e int %"PRId64"\n", d, i); + printf("%e * %e -> %e\n", a, b, c); #endif - return i; + return *((uint32_t*)&c); } - -double fpu_fadd(double a, double b) +uint64_t fpu_fmul64(uint64_t a, uint64_t b) { double c; - c = a + b; + c = *((double*)&a) * *((double*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%e + %e -> %e\n", a, b, c); + printf("%e * %e -> %e\n", a, b, c); #endif - return c; + return *((uint64_t*)&c); } -double fpu_fsub(double a, double b) +uint32_t fpu_fdiv32(uint32_t a, uint32_t b) { - double c; - c = a - b; -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e - %e -> %e\n", a, b, c); -#endif - return c; -} - -double fpu_fmul(double a, double b) -{ - double c; - c = a * b; + float c; + c = *((float*)&a) / *((float*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); printf("%e * %e -> %e\n", a, b, c); #endif - return c; + return *((uint32_t*)&c); } -double fpu_fdiv(double a, double b) +uint64_t fpu_fdiv64(uint64_t a, uint64_t b) { double c; - c = a / b; + c = *((double*)&a) / *((double*)&b); #ifdef DEBUG_MIASM_DOUBLE dump_float(); - printf("%e / %e -> %e\n", a, b, c); + printf("%e * %e -> %e\n", a, b, c); #endif - return c; + return *((uint64_t*)&c); } double fpu_ftan(double a) @@ -567,15 +512,26 @@ double fpu_f2xm1(double a) return b; } -double fpu_fsqrt(double a) +uint32_t fpu_fsqrt32(uint32_t a) +{ + float b; + b = sqrtf(*((float*)&a)); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e sqrt %e\n", a, b); +#endif + return *((uint32_t*)&b); +} + +uint64_t fpu_fsqrt64(uint64_t a) { double b; - b = sqrt(a); + b = sqrt(*((double*)&a)); #ifdef DEBUG_MIASM_DOUBLE dump_float(); printf("%e sqrt %e\n", a, b); #endif - return b; + return *((uint64_t*)&b); } double fpu_fabs(double a) @@ -751,30 +707,75 @@ unsigned int fpu_fxam_c3(double a) } } -unsigned int double_to_mem_32(double d) +uint64_t sint64_to_fp64(int64_t a) { - unsigned int m; - float f; - f = d; - m = *((unsigned int*)&f); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%d %e\n", m, d); -#endif - return m; + double result = (double) a; + return *((uint64_t*)&result); } -uint64_t double_to_mem_64(double d) +uint32_t sint32_to_fp32(int32_t a) { - uint64_t m; - m = *((uint64_t*)&d); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%"PRId64" %e\n", m, d); -#endif - return m; + float result = (float) a; + return *((uint32_t*)&result); +} + +uint64_t sint32_to_fp64(int32_t a) +{ + double result = (double) a; + return *((uint64_t*)&result); } +int32_t fp32_to_sint32(uint32_t a) +{ + // Enforce nearbyint (IEEE-754 behavior) + float rounded = *((float*)&a); + rounded = nearbyintf(rounded); + return (int32_t) rounded; +} + +int64_t fp64_to_sint64(uint64_t a) +{ + // Enforce nearbyint (IEEE-754 behavior) + double rounded = *((double*)&a); + rounded = nearbyint(rounded); + return (int64_t) rounded; +} + +int32_t fp64_to_sint32(uint64_t a) +{ + // Enforce nearbyint (IEEE-754 behavior) + double rounded = *((double*)&a); + rounded = nearbyint(rounded); + return (int32_t) rounded; +} + +uint32_t fp64_to_fp32(uint64_t a) +{ + float result = (float) *((double*)&a); + return *((uint32_t*)&result); +} + +uint64_t fp32_to_fp64(uint32_t a) +{ + double result = (double) *((float*)&a); + return *((uint64_t*)&result); +} + +uint32_t fpround_towardszero_fp32(uint32_t a) +{ + float rounded = *((float*)&a); + rounded = truncf(rounded); + return *((uint32_t*)&rounded); +} + +uint64_t fpround_towardszero_fp64(uint64_t a) +{ + double rounded = *((float*)&a); + rounded = trunc(rounded); + return *((uint64_t*)&rounded); +} + + UDIV(16) UDIV(32) UDIV(64) diff --git a/miasm2/jitter/op_semantics.h b/miasm2/jitter/op_semantics.h index 3eb81cff..f8042895 100644 --- a/miasm2/jitter/op_semantics.h +++ b/miasm2/jitter/op_semantics.h @@ -96,19 +96,23 @@ int16_t idiv16(int16_t a, int16_t b); int16_t imod16(int16_t a, int16_t b); unsigned int x86_cpuid(unsigned int a, unsigned int reg_num); -double int2double(unsigned int m); -double fpu_fadd(double a, double b); -double fpu_fsub(double a, double b); -double fpu_fmul(double a, double b); -double fpu_fdiv(double a, double b); +uint32_t fpu_fadd32(uint32_t a, uint32_t b); +uint64_t fpu_fadd64(uint64_t a, uint64_t b); +uint32_t fpu_fsub32(uint32_t a, uint32_t b); +uint64_t fpu_fsub64(uint64_t a, uint64_t b); +uint32_t fpu_fmul32(uint32_t a, uint32_t b); +uint64_t fpu_fmul64(uint64_t a, uint64_t b); +uint32_t fpu_fdiv32(uint32_t a, uint32_t b); +uint64_t fpu_fdiv64(uint64_t a, uint64_t b); double fpu_ftan(double a); double fpu_frndint(double a); double fpu_fsin(double a); double fpu_fcos(double a); double fpu_fscale(double a, double b); double fpu_f2xm1(double a); -double fpu_fsqrt(double a); +uint32_t fpu_fsqrt32(uint32_t a); +uint64_t fpu_fsqrt64(uint64_t a); double fpu_fabs(double a); double fpu_fprem(double a, double b); double fpu_fchs(double a); @@ -124,18 +128,16 @@ unsigned int fpu_fxam_c1(double a); unsigned int fpu_fxam_c2(double a); unsigned int fpu_fxam_c3(double a); - -double mem_32_to_double(unsigned int m); -double mem_64_to_double(uint64_t m); -double int_16_to_double(unsigned int m); -double int_32_to_double(unsigned int m); -double int_64_to_double(uint64_t m); -int16_t double_to_int_16(double d); -int32_t double_to_int_32(double d); -int64_t double_to_int_64(double d); -unsigned int double_to_mem_32(double d); -uint64_t double_to_mem_64(double d); - +uint64_t sint64_to_fp64(int64_t a); +uint32_t sint32_to_fp32(int32_t a); +uint64_t sint32_to_fp64(int32_t a); +int32_t fp32_to_sint32(uint32_t a); +int64_t fp64_to_sint64(uint64_t a); +int32_t fp64_to_sint32(uint64_t a); +uint32_t fp64_to_fp32(uint64_t a); +uint64_t fp32_to_fp64(uint32_t a); +uint32_t fpround_towardszero_fp32(uint32_t a); +uint64_t fpround_towardszero_fp64(uint64_t a); #define SHIFT_RIGHT_ARITH(size, value, shift) \ ((uint ## size ## _t)((((uint64_t) (shift)) > ((size) - 1))? \ diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 43e973e1..ce6012a0 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -2306,7 +2306,8 @@ reg_tests = [ "0f50c2"), (m64, "00000000 MOVMSKPS R8D, XMM2", "440f50c2"), - + (m64, "00000000 MOVMSKPD EAX, XMM2", + "660F50C2"), (m32, "00000000 ADDSS XMM2, DWORD PTR [ECX]", "f30f5811"), (m32, "00000000 ADDSS XMM1, XMM2", @@ -2351,6 +2352,32 @@ reg_tests = [ (m32, "00000000 MAXSS XMM0, DWORD PTR [EBX + 0x2CBD37]", "f30f5f8337bd2c00"), + + (m32, "00000000 MINPS XMM0, XMM2", + "0F5DC2"), + (m32, "00000000 MINSS XMM0, XMM3", + "F30F5DC3"), + (m32, "00000000 MINPD XMM0, XMM4", + "660F5DC4"), + (m32, "00000000 MINSD XMM0, XMM5", + "F20F5DC5"), + (m32, "00000000 MAXPS XMM0, XMM6", + "0F5FC6"), + (m32, "00000000 MAXPD XMM0, XMM1", + "660F5FC1"), + (m32, "00000000 MAXSD XMM0, XMM2", + "F20F5FC2"), + (m32, "00000000 MAXSS XMM0, XMM7", + "F30F5FC7"), + (m32, "00000000 CMPEQPS XMM0, XMM3", + "0FC2C300"), + (m32, "00000000 CMPEQSS XMM0, XMM4", + "F30FC2C400"), + (m32, "00000000 CMPEQPD XMM0, XMM5", + "660FC2C500"), + (m32, "00000000 CMPEQSD XMM0, XMM6", + "F20FC2C600"), + (m32, "00000000 CVTDQ2PD XMM0, XMM3", "f30fe6c3"), (m32, "00000000 CVTDQ2PS XMM0, XMM3", @@ -2485,6 +2512,9 @@ reg_tests = [ (m64, "00000000 MOVQ RCX, XMM0", "66480F7EC1"), + (m32, "00000000 MOVQ2DQ XMM0, MM1", + "F30FD6C1"), + (m32, "00000000 PAND MM2, MM6", "0fdbd6"), (m32, "00000000 PAND XMM2, XMM6", @@ -2671,6 +2701,14 @@ reg_tests = [ (m32, "00000000 PSRLW XMM6, 0x5", "660F71D605"), + (m32, "00000000 PSRAW XMM0, 0x7", + "660F71E007"), + (m32, "00000000 PSRAW XMM0, XMM3", + "660FE1C3"), + (m32, "00000000 PSRAD XMM0, 0x7", + "660F72E007"), + (m32, "00000000 PSRAD XMM0, XMM3", + "660FE2C3"), (m32, "00000000 PSRLQ MM2, QWORD PTR [EDX]", "0FD312"), diff --git a/test/test_all.py b/test/test_all.py index a10ab026..665fc3a5 100755 --- a/test/test_all.py +++ b/test/test_all.py @@ -162,6 +162,7 @@ QEMU_TESTS = [ "xchg", "string", "misc", + 'sse', # Unsupported # "floats", "segs", "code16", "exceptions", "single_step" ] @@ -169,6 +170,9 @@ QEMU_TESTS = [ for test_name in QEMU_TESTS: for jitter in QEMUTest.jitter_engines: + if (test_name, jitter) in [("sse", "python")]: + # SKIP unsupported + continue tags = [TAGS[jitter]] if jitter in TAGS else [] testset += QEMUTest(test_name, jitter, tags=tags) |