From 0c35bb71b6b3f32b2a62618f4b67ef87df81958d Mon Sep 17 00:00:00 2001 From: Ajax Date: Thu, 8 Feb 2018 12:14:59 +0100 Subject: Add PACKSSWB instruction 0F 63 /r PACKSSWB mm1, mm2/m64 66 0F 63 /r PACKSSWB xmm1, xmm2/m128 --- miasm2/arch/x86/sem.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index deebba8c..635206b6 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -4173,6 +4173,54 @@ def palignr(ir, instr, dst, src, imm): return [m2_expr.ExprAff(dst, result)], [] +def _signed_saturation(expr, dst_size): + """Saturate the expr @expr for @dst_size bit + Signed saturation return MAX_INT / MIN_INT or value depending on the value + """ + assert expr.size > dst_size + + median = 1 << (dst_size - 1) + min_int = m2_expr.ExprInt(- median, dst_size) + max_int = m2_expr.ExprInt(median - 1, dst_size) + signed = expr.msb() + value_unsigned = expr ^ expr.mask + m2_expr.ExprInt(1, expr.size) + # Re-use the sign bit + value = m2_expr.ExprCompose(expr[:dst_size - 1], signed) + + # Bit hack: to avoid a double signed comparison, use mask + # ie., in unsigned, 0xXY > 0x0f iff X is not null + + # if expr >s 0 + # if expr[dst_size:] > 0: # bigger than max_int + # -> max_int + # else + # -> value + # else # negative + # if expr[dst_size:-1] > 0: # smaller than min_int + # -> value + # else + # -> min_int + + return m2_expr.ExprCond( + signed, + m2_expr.ExprCond(value_unsigned[dst_size:], + min_int, + value), + m2_expr.ExprCond(expr[dst_size:], + max_int, + value), + ) + + +def packsswb(ir, instr, dst, src): + out = [] + for source in [dst, src]: + for start in xrange(0, dst.size, 16): + out.append(_signed_saturation(source[start:start + 16], 8)) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] + + + mnemo_func = {'mov': mov, 'xchg': xchg, 'movzx': movzx, @@ -4670,6 +4718,8 @@ mnemo_func = {'mov': mov, "pmovmskb": pmovmskb, + "packsswb": packsswb, + "smsw": smsw, } -- cgit 1.4.1 From 615ee255906c0fe036e9dc87ee65ed27e0b6f88d Mon Sep 17 00:00:00 2001 From: Ajax Date: Thu, 8 Feb 2018 13:24:29 +0100 Subject: Add PACKUSWB instruction --- miasm2/arch/x86/arch.py | 5 +++++ miasm2/arch/x86/sem.py | 33 +++++++++++++++++++++++++++++++++ test/arch/x86/arch.py | 5 +++++ 3 files changed, 43 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 793d5a47..572c2378 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4463,6 +4463,11 @@ addop("packsswb", [bs8(0x0f), bs8(0x63), no_xmm_pref] + addop("packsswb", [bs8(0x0f), bs8(0x63), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("packuswb", [bs8(0x0f), bs8(0x67), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("packuswb", [bs8(0x0f), bs8(0x67), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + mn_x86.bintree = factor_one_bit(mn_x86.bintree) # mn_x86.bintree = factor_fields_all(mn_x86.bintree) """ diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 635206b6..6dcc76af 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -4212,6 +4212,31 @@ def _signed_saturation(expr, dst_size): ) +def _unsigned_saturation(expr, dst_size): + """Saturate the expr @expr for @dst_size bit + Unsigned saturation return MAX_INT or value depending on the value + """ + assert expr.size > dst_size + + zero = m2_expr.ExprInt(0, dst_size) + max_int = m2_expr.ExprInt(-1, dst_size) + value = expr[:dst_size] + signed = expr.msb() + + + # Bit hack: to avoid a double signed comparison, use mask + # ie., in unsigned, 0xXY > 0x0f iff X is not null + + return m2_expr.ExprCond( + signed, + zero, + m2_expr.ExprCond(expr[dst_size:], + max_int, + value), + ) + + + def packsswb(ir, instr, dst, src): out = [] for source in [dst, src]: @@ -4220,6 +4245,13 @@ def packsswb(ir, instr, dst, src): return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] +def packuswb(ir, instr, dst, src): + out = [] + for source in [dst, src]: + for start in xrange(0, dst.size, 16): + out.append(_unsigned_saturation(source[start:start + 16], 8)) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] + mnemo_func = {'mov': mov, 'xchg': xchg, @@ -4719,6 +4751,7 @@ mnemo_func = {'mov': mov, "pmovmskb": pmovmskb, "packsswb": packsswb, + "packuswb": packuswb, "smsw": smsw, diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index b9bfec74..fbcb88f8 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -2974,6 +2974,11 @@ reg_tests = [ "0f63f8"), (m32, "00000000 PACKSSWB XMM0, XMM5", "660f63c5"), + + (m32, "00000000 PACKUSWB MM1, MM7", + "0f67cf"), + (m32, "00000000 PACKUSWB XMM0, XMM6", + "660f67c6"), ] -- cgit 1.4.1 From a328d6d33ce0b513bf41883380025ce8284f26d3 Mon Sep 17 00:00:00 2001 From: Ajax Date: Thu, 8 Feb 2018 13:30:21 +0100 Subject: Add PACKSSDW instruction 0F 6B /r PACKSSDW mm1, mm2/m64 66 0F 6B /r PACKSSDW xmm1, xmm2/m128 --- miasm2/arch/x86/arch.py | 4 ++++ miasm2/arch/x86/sem.py | 9 +++++++++ test/arch/x86/arch.py | 5 +++++ 3 files changed, 18 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 572c2378..71f4409d 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4462,6 +4462,10 @@ addop("packsswb", [bs8(0x0f), bs8(0x63), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm_m64)) addop("packsswb", [bs8(0x0f), bs8(0x63), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("packssdw", [bs8(0x0f), bs8(0x6b), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("packssdw", [bs8(0x0f), bs8(0x6b), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) addop("packuswb", [bs8(0x0f), bs8(0x67), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm_m64)) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 6dcc76af..57716447 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -4245,6 +4245,14 @@ def packsswb(ir, instr, dst, src): return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] +def packssdw(ir, instr, dst, src): + out = [] + for source in [dst, src]: + for start in xrange(0, dst.size, 32): + out.append(_signed_saturation(source[start:start + 32], 16)) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] + + def packuswb(ir, instr, dst, src): out = [] for source in [dst, src]: @@ -4751,6 +4759,7 @@ mnemo_func = {'mov': mov, "pmovmskb": pmovmskb, "packsswb": packsswb, + "packssdw": packssdw, "packuswb": packuswb, "smsw": smsw, diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index fbcb88f8..284bb40c 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -2975,6 +2975,11 @@ reg_tests = [ (m32, "00000000 PACKSSWB XMM0, XMM5", "660f63c5"), + (m32, "00000000 PACKSSDW MM2, MM0", + "0f6bd0"), + (m32, "00000000 PACKSSDW XMM0, XMM7", + "660f6bc7"), + (m32, "00000000 PACKUSWB MM1, MM7", "0f67cf"), (m32, "00000000 PACKUSWB XMM0, XMM6", -- cgit 1.4.1 From d533aee1b340f21974dc3c255d04ac0d35a73e84 Mon Sep 17 00:00:00 2001 From: Ajax Date: Thu, 8 Feb 2018 13:47:43 +0100 Subject: Add PMULLW instruction --- miasm2/arch/x86/arch.py | 6 ++++++ miasm2/arch/x86/sem.py | 15 +++++++++++++++ test/arch/x86/arch.py | 5 +++++ 3 files changed, 26 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 71f4409d..98e29b63 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4472,6 +4472,12 @@ addop("packuswb", [bs8(0x0f), bs8(0x67), no_xmm_pref] + addop("packuswb", [bs8(0x0f), bs8(0x67), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pmullw", [bs8(0x0f), bs8(0xd5), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmullw", [bs8(0x0f), bs8(0xd5), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + + mn_x86.bintree = factor_one_bit(mn_x86.bintree) # mn_x86.bintree = factor_fields_all(mn_x86.bintree) """ diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 57716447..7c990199 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3398,6 +3398,15 @@ psubw = vec_vertical_instr('-', 16) psubd = vec_vertical_instr('-', 32) psubq = vec_vertical_instr('-', 64) +# Multiplications +# + +# SSE +pmullb = vec_vertical_instr('*', 8) +pmullw = vec_vertical_instr('*', 16) +pmulld = vec_vertical_instr('*', 32) +pmullq = vec_vertical_instr('*', 64) + # Floating-point arithmetic # @@ -4645,6 +4654,12 @@ mnemo_func = {'mov': mov, "psubd": psubd, "psubq": psubq, + # SSE + "pmullb": pmullb, + "pmullw": pmullw, + "pmulld": pmulld, + "pmullq": pmullq, + # Arithmetic (floating-point) # diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 284bb40c..9f3256f3 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -2984,6 +2984,11 @@ reg_tests = [ "0f67cf"), (m32, "00000000 PACKUSWB XMM0, XMM6", "660f67c6"), + + (m32, "00000000 PMULLW MM4, MM2", + "0fd5e2"), + (m32, "00000000 PMULLW XMM0, XMM3", + "660fd5c3"), ] -- cgit 1.4.1 From c92be77fa7afa23dd06124325e9dc127ebb67e22 Mon Sep 17 00:00:00 2001 From: Ajax Date: Thu, 8 Feb 2018 15:57:53 +0100 Subject: Add PSUBSUB/PSUBUSW instr 0F D8 /r PSUBUSB mm, mm/m64 66 0F D8 /r PSUBUSB xmm1, xmm2/m128 --- miasm2/arch/x86/arch.py | 9 ++++++ miasm2/arch/x86/sem.py | 84 ++++++++++++++++++++++++++++++++++--------------- test/arch/x86/arch.py | 11 +++++++ 3 files changed, 79 insertions(+), 25 deletions(-) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 98e29b63..f4ef7349 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4477,6 +4477,15 @@ addop("pmullw", [bs8(0x0f), bs8(0xd5), no_xmm_pref] + addop("pmullw", [bs8(0x0f), bs8(0xd5), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("psubusb", [bs8(0x0f), bs8(0xd8), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("psubusb", [bs8(0x0f), bs8(0xd8), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("psubusw", [bs8(0x0f), bs8(0xd9), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("psubusw", [bs8(0x0f), bs8(0xd9), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + mn_x86.bintree = factor_one_bit(mn_x86.bintree) # mn_x86.bintree = factor_fields_all(mn_x86.bintree) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 7c990199..4b4f40a1 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3319,62 +3319,77 @@ def vec_op_clip(op, size): # Generic vertical operation -def vec_vertical_sem(op, elt_size, reg_size, dst, src): +def vec_vertical_sem(op, elt_size, reg_size, dst, src, apply_on_output): assert reg_size % elt_size == 0 n = reg_size / elt_size if op == '-': ops = [ - (dst[i * elt_size:(i + 1) * elt_size] - - src[i * elt_size:(i + 1) * elt_size]) for i in xrange(0, n)] + apply_on_output((dst[i * elt_size:(i + 1) * elt_size] + - src[i * elt_size:(i + 1) * elt_size])) + for i in xrange(0, n) + ] else: - ops = [m2_expr.ExprOp(op, dst[i * elt_size:(i + 1) * elt_size], - src[i * elt_size:(i + 1) * elt_size]) for i in xrange(0, n)] + ops = [ + apply_on_output(m2_expr.ExprOp(op, dst[i * elt_size:(i + 1) * elt_size], + src[i * elt_size:(i + 1) * elt_size])) + for i in xrange(0, n) + ] return m2_expr.ExprCompose(*ops) -def float_vec_vertical_sem(op, elt_size, reg_size, dst, src): +def float_vec_vertical_sem(op, elt_size, reg_size, dst, src, apply_on_output): assert reg_size % elt_size == 0 n = reg_size / elt_size x_to_int, int_to_x = {32: ('float_to_int_%d', 'int_%d_to_float'), 64: ('double_to_int_%d', 'int_%d_to_double')}[elt_size] if op == '-': - ops = [m2_expr.ExprOp(x_to_int % elt_size, - m2_expr.ExprOp(int_to_x % elt_size, dst[i * elt_size:(i + 1) * elt_size]) - - m2_expr.ExprOp( - int_to_x % elt_size, src[i * elt_size:( - i + 1) * elt_size])) for i in xrange(0, n)] + ops = [ + apply_on_output(m2_expr.ExprOp( + x_to_int % elt_size, + m2_expr.ExprOp(int_to_x % elt_size, dst[i * elt_size:(i + 1) * elt_size]) - + m2_expr.ExprOp( + int_to_x % elt_size, src[i * elt_size:( + i + 1) * elt_size]))) + for i in xrange(0, n) + ] else: - ops = [m2_expr.ExprOp(x_to_int % elt_size, - m2_expr.ExprOp(op, - m2_expr.ExprOp( - int_to_x % elt_size, dst[i * elt_size:( - i + 1) * elt_size]), - m2_expr.ExprOp( - int_to_x % elt_size, src[i * elt_size:( - i + 1) * elt_size]))) for i in xrange(0, n)] + ops = [ + apply_on_output(m2_expr.ExprOp( + x_to_int % elt_size, + m2_expr.ExprOp(op, + m2_expr.ExprOp( + int_to_x % elt_size, dst[i * elt_size:( + i + 1) * elt_size]), + m2_expr.ExprOp( + int_to_x % elt_size, src[i * elt_size:( + i + 1) * elt_size])))) + for i in xrange(0, n)] return m2_expr.ExprCompose(*ops) -def __vec_vertical_instr_gen(op, elt_size, sem): +def __vec_vertical_instr_gen(op, elt_size, sem, apply_on_output): def vec_instr(ir, instr, dst, src): e = [] if isinstance(src, m2_expr.ExprMem): src = ir.ExprMem(src.arg, dst.size) reg_size = dst.size - e.append(m2_expr.ExprAff(dst, sem(op, elt_size, reg_size, dst, src))) + e.append(m2_expr.ExprAff(dst, sem(op, elt_size, reg_size, dst, src, + apply_on_output))) return e, [] return vec_instr -def vec_vertical_instr(op, elt_size): - return __vec_vertical_instr_gen(op, elt_size, vec_vertical_sem) +def vec_vertical_instr(op, elt_size, apply_on_output=lambda x: x): + return __vec_vertical_instr_gen(op, elt_size, vec_vertical_sem, + apply_on_output) -def float_vec_vertical_instr(op, elt_size): - return __vec_vertical_instr_gen(op, elt_size, float_vec_vertical_sem) +def float_vec_vertical_instr(op, elt_size, apply_on_output=lambda x: x): + return __vec_vertical_instr_gen(op, elt_size, float_vec_vertical_sem, + apply_on_output) # Integer arithmetic @@ -4270,6 +4285,22 @@ def packuswb(ir, instr, dst, src): return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] +def _saturation_sub(expr): + assert expr.is_op("+") and len(expr.args) == 2 and expr.args[-1].is_op("-") + + # Compute the soustraction on one more bit to be able to distinguish cases: + # 0x48 - 0xd7 in 8 bit, should saturate + arg1 = expr.args[0].zeroExtend(expr.size + 1) + arg2 = expr.args[1].args[0].zeroExtend(expr.size + 1) + return _unsigned_saturation(arg1 - arg2, expr.size) + + +# Saturate SSE operations + +psubusb = vec_vertical_instr('-', 8, _saturation_sub) +psubusw = vec_vertical_instr('-', 16, _saturation_sub) + + mnemo_func = {'mov': mov, 'xchg': xchg, 'movzx': movzx, @@ -4777,6 +4808,9 @@ mnemo_func = {'mov': mov, "packssdw": packssdw, "packuswb": packuswb, + "psubusb": psubusb, + "psubusw": psubusw, + "smsw": smsw, } diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 9f3256f3..3e0b9333 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -2989,6 +2989,17 @@ reg_tests = [ "0fd5e2"), (m32, "00000000 PMULLW XMM0, XMM3", "660fd5c3"), + + (m32, "00000000 PSUBUSB MM5, MM3", + "0fd8eb"), + (m32, "00000000 PSUBUSB XMM0, XMM5", + "660fd8c5"), + + (m32, "00000000 PSUBUSW MM5, MM3", + "0fd9eb"), + (m32, "00000000 PSUBUSW XMM0, XMM5", + "660fd9c5"), + ] -- cgit 1.4.1 From bee25554ff9c86f81f16e191f09693f336365ad2 Mon Sep 17 00:00:00 2001 From: Ajax Date: Thu, 8 Feb 2018 16:19:55 +0100 Subject: Add PADDUSB/PADDUSW instruction 0F DC /r PADDUSB mm, mm/m64 66 0F DC /r PADDUSB xmm1, xmm2/m128 --- miasm2/arch/x86/arch.py | 9 +++++++++ miasm2/arch/x86/sem.py | 21 +++++++++++++++++++++ test/arch/x86/arch.py | 9 +++++++++ 3 files changed, 39 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index f4ef7349..303cad6e 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4486,6 +4486,15 @@ addop("psubusw", [bs8(0x0f), bs8(0xd9), no_xmm_pref] + addop("psubusw", [bs8(0x0f), bs8(0xd9), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("paddusb", [bs8(0x0f), bs8(0xdc), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("paddusb", [bs8(0x0f), bs8(0xdc), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("paddusw", [bs8(0x0f), bs8(0xdd), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("paddusw", [bs8(0x0f), bs8(0xdd), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + mn_x86.bintree = factor_one_bit(mn_x86.bintree) # mn_x86.bintree = factor_fields_all(mn_x86.bintree) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 4b4f40a1..adf5820d 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -4294,11 +4294,30 @@ def _saturation_sub(expr): arg2 = expr.args[1].args[0].zeroExtend(expr.size + 1) return _unsigned_saturation(arg1 - arg2, expr.size) +def _saturation_add(expr): + assert expr.is_op("+") and len(expr.args) == 2 + + # Compute the addition on one more bit to be able to distinguish cases: + # 0x48 + 0xd7 in 8 bit, should saturate + + arg1 = expr.args[0].zeroExtend(expr.size + 1) + arg2 = expr.args[1].zeroExtend(expr.size + 1) + + # We can also use _unsigned_saturation with two additionnal bits (to + # distinguish minus and overflow case) + # The resulting expression being more complicated with an impossible case + # (signed=True), we rewrite the rule here + + return m2_expr.ExprCond((arg1 + arg2).msb(), m2_expr.ExprInt(-1, expr.size), + expr) + # Saturate SSE operations psubusb = vec_vertical_instr('-', 8, _saturation_sub) psubusw = vec_vertical_instr('-', 16, _saturation_sub) +paddusb = vec_vertical_instr('+', 8, _saturation_add) +paddusw = vec_vertical_instr('+', 16, _saturation_add) mnemo_func = {'mov': mov, @@ -4810,6 +4829,8 @@ mnemo_func = {'mov': mov, "psubusb": psubusb, "psubusw": psubusw, + "paddusb": paddusb, + "paddusw": paddusw, "smsw": smsw, diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 3e0b9333..9b1a4d25 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3000,6 +3000,15 @@ reg_tests = [ (m32, "00000000 PSUBUSW XMM0, XMM5", "660fd9c5"), + (m32, "00000000 PADDUSB MM5, MM3", + "0fdceb"), + (m32, "00000000 PADDUSB XMM0, XMM6", + "660fdcc6"), + + (m32, "00000000 PADDUSW MM7, MM5", + "0fddfd"), + (m32, "00000000 PADDUSW XMM0, XMM1", + "660fddc1"), ] -- cgit 1.4.1 From 98a0c25a6e914089feb73e8bc4b79ff33a38a86b Mon Sep 17 00:00:00 2001 From: Ajax Date: Thu, 8 Feb 2018 16:43:37 +0100 Subject: Add PMULHW / PMULHUW instruction 0F E5 /r PMULHW mm, mm/m64 66 0F E5 /r PMULHW xmm1, xmm2/m128 NP 0F E4 /r PMULHUW mm1, mm2/m64 66 0F E4 /r PMULHUW xmm1, xmm2/m128 --- miasm2/arch/x86/arch.py | 9 +++++++++ miasm2/arch/x86/sem.py | 29 +++++++++++++++++++++++++++++ test/arch/x86/arch.py | 10 ++++++++++ 3 files changed, 48 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 303cad6e..d58ac64b 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4476,6 +4476,15 @@ addop("pmullw", [bs8(0x0f), bs8(0xd5), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm_m64)) addop("pmullw", [bs8(0x0f), bs8(0xd5), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pmulhuw", [bs8(0x0f), bs8(0xe4), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmulhuw", [bs8(0x0f), bs8(0xe4), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pmulhw", [bs8(0x0f), bs8(0xe5), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmulhw", [bs8(0x0f), bs8(0xe5), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + addop("psubusb", [bs8(0x0f), bs8(0xd8), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm_m64)) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index adf5820d..ba56c91c 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3392,6 +3392,17 @@ def float_vec_vertical_instr(op, elt_size, apply_on_output=lambda x: x): apply_on_output) +def _keep_mul_high(expr, signed=False): + assert expr.is_op("*") and len(expr.args) == 2 + + if signed: + arg1 = expr.args[0].signExtend(expr.size * 2) + arg2 = expr.args[1].signExtend(expr.size * 2) + else: + arg1 = expr.args[0].zeroExtend(expr.size * 2) + arg2 = expr.args[1].zeroExtend(expr.size * 2) + return m2_expr.ExprOp("*", arg1, arg2)[expr.size:] + # Integer arithmetic # @@ -3421,6 +3432,14 @@ pmullb = vec_vertical_instr('*', 8) pmullw = vec_vertical_instr('*', 16) pmulld = vec_vertical_instr('*', 32) pmullq = vec_vertical_instr('*', 64) +pmulhub = vec_vertical_instr('*', 8, _keep_mul_high) +pmulhuw = vec_vertical_instr('*', 16, _keep_mul_high) +pmulhud = vec_vertical_instr('*', 32, _keep_mul_high) +pmulhuq = vec_vertical_instr('*', 64, _keep_mul_high) +pmulhb = vec_vertical_instr('*', 8, lambda x: _keep_mul_high(x, signed=True)) +pmulhw = vec_vertical_instr('*', 16, lambda x: _keep_mul_high(x, signed=True)) +pmulhd = vec_vertical_instr('*', 32, lambda x: _keep_mul_high(x, signed=True)) +pmulhq = vec_vertical_instr('*', 64, lambda x: _keep_mul_high(x, signed=True)) # Floating-point arithmetic # @@ -4704,11 +4723,21 @@ mnemo_func = {'mov': mov, "psubd": psubd, "psubq": psubq, + # Multiplications # SSE "pmullb": pmullb, "pmullw": pmullw, "pmulld": pmulld, "pmullq": pmullq, + "pmulhub": pmulhub, + "pmulhuw": pmulhuw, + "pmulhud": pmulhud, + "pmulhuq": pmulhuq, + "pmulhb": pmulhb, + "pmulhw": pmulhw, + "pmulhd": pmulhd, + "pmulhq": pmulhq, + # Arithmetic (floating-point) # diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 9b1a4d25..82475e51 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3009,6 +3009,16 @@ reg_tests = [ "0fddfd"), (m32, "00000000 PADDUSW XMM0, XMM1", "660fddc1"), + + (m32, "00000000 PMULHUW MM6, MM4", + "0fe4f4"), + (m32, "00000000 PMULHUW XMM0, XMM7", + "660fe4c7"), + + (m32, "00000000 PMULHW MM6, MM4", + "0fe5f4"), + (m32, "00000000 PMULHW XMM0, XMM7", + "660fe5c7"), ] -- cgit 1.4.1 From 426fc42a2b991e0249e73edab304530ba96e8a79 Mon Sep 17 00:00:00 2001 From: Ajax Date: Fri, 9 Feb 2018 09:19:55 +0100 Subject: Add PSUBSB/PSUBSW instruction NP 0F E8 /r PSUBSB mm, mm/m64 66 0F E8 /r PSUBSB xmm1, xmm2/m128 NP 0F E9 /r PSUBSW mm, mm/m64 66 0F E9 /r PSUBSW xmm1, xmm2/m128 --- miasm2/arch/x86/arch.py | 9 +++++++++ miasm2/arch/x86/sem.py | 26 +++++++++++++++++++------- test/arch/x86/arch.py | 10 ++++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index d58ac64b..03d7449d 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4494,6 +4494,15 @@ addop("psubusw", [bs8(0x0f), bs8(0xd9), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm_m64)) addop("psubusw", [bs8(0x0f), bs8(0xd9), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("psubsb", [bs8(0x0f), bs8(0xe8), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("psubsb", [bs8(0x0f), bs8(0xe8), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("psubsw", [bs8(0x0f), bs8(0xe9), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("psubsw", [bs8(0x0f), bs8(0xe9), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + addop("paddusb", [bs8(0x0f), bs8(0xdc), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm_m64)) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index ba56c91c..5811502c 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -4226,7 +4226,7 @@ def _signed_saturation(expr, dst_size): min_int = m2_expr.ExprInt(- median, dst_size) max_int = m2_expr.ExprInt(median - 1, dst_size) signed = expr.msb() - value_unsigned = expr ^ expr.mask + m2_expr.ExprInt(1, expr.size) + value_unsigned = (expr ^ expr.mask) + m2_expr.ExprInt(1, expr.size) # Re-use the sign bit value = m2_expr.ExprCompose(expr[:dst_size - 1], signed) @@ -4234,7 +4234,7 @@ def _signed_saturation(expr, dst_size): # ie., in unsigned, 0xXY > 0x0f iff X is not null # if expr >s 0 - # if expr[dst_size:] > 0: # bigger than max_int + # if expr[dst_size - 1:] > 0: # bigger than max_int # -> max_int # else # -> value @@ -4246,10 +4246,10 @@ def _signed_saturation(expr, dst_size): return m2_expr.ExprCond( signed, - m2_expr.ExprCond(value_unsigned[dst_size:], + m2_expr.ExprCond(value_unsigned[dst_size - 1:], min_int, value), - m2_expr.ExprCond(expr[dst_size:], + m2_expr.ExprCond(expr[dst_size - 1:], max_int, value), ) @@ -4304,7 +4304,7 @@ def packuswb(ir, instr, dst, src): return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] -def _saturation_sub(expr): +def _saturation_sub_unsigned(expr): assert expr.is_op("+") and len(expr.args) == 2 and expr.args[-1].is_op("-") # Compute the soustraction on one more bit to be able to distinguish cases: @@ -4313,6 +4313,14 @@ def _saturation_sub(expr): arg2 = expr.args[1].args[0].zeroExtend(expr.size + 1) return _unsigned_saturation(arg1 - arg2, expr.size) +def _saturation_sub_signed(expr): + assert expr.is_op("+") and len(expr.args) == 2 and expr.args[-1].is_op("-") + + # Compute the substraction on two more bits, see _saturation_sub_unsigned + arg1 = expr.args[0].signExtend(expr.size + 2) + arg2 = expr.args[1].args[0].signExtend(expr.size + 2) + return _signed_saturation(arg1 - arg2, expr.size) + def _saturation_add(expr): assert expr.is_op("+") and len(expr.args) == 2 @@ -4333,10 +4341,12 @@ def _saturation_add(expr): # Saturate SSE operations -psubusb = vec_vertical_instr('-', 8, _saturation_sub) -psubusw = vec_vertical_instr('-', 16, _saturation_sub) +psubusb = vec_vertical_instr('-', 8, _saturation_sub_unsigned) +psubusw = vec_vertical_instr('-', 16, _saturation_sub_unsigned) paddusb = vec_vertical_instr('+', 8, _saturation_add) paddusw = vec_vertical_instr('+', 16, _saturation_add) +psubsb = vec_vertical_instr('-', 8, _saturation_sub_signed) +psubsw = vec_vertical_instr('-', 16, _saturation_sub_signed) mnemo_func = {'mov': mov, @@ -4860,6 +4870,8 @@ mnemo_func = {'mov': mov, "psubusw": psubusw, "paddusb": paddusb, "paddusw": paddusw, + "psubsb": psubsb, + "psubsw": psubsw, "smsw": smsw, diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 82475e51..fb757b33 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3019,6 +3019,16 @@ reg_tests = [ "0fe5f4"), (m32, "00000000 PMULHW XMM0, XMM7", "660fe5c7"), + + (m32, "00000000 PSUBSB MM2, MM0", + "0fe8d0"), + (m32, "00000000 PSUBSB XMM0, XMM4", + "660fe8c4"), + + (m32, "00000000 PSUBSW MM3, MM1", + "0fe9d9"), + (m32, "00000000 PSUBSW XMM0, XMM6", + "660fe9c6"), ] -- cgit 1.4.1 From df4b2904c00bca0d15062493666ce50ff0b56632 Mon Sep 17 00:00:00 2001 From: Ajax Date: Fri, 9 Feb 2018 09:33:24 +0100 Subject: Fix PMINSW semantic Tested against QEMU --- miasm2/arch/x86/sem.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 5811502c..97373abf 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3403,6 +3403,14 @@ def _keep_mul_high(expr, signed=False): arg2 = expr.args[1].zeroExtend(expr.size * 2) return m2_expr.ExprOp("*", arg1, arg2)[expr.size:] +def _signed_min(expr): + assert expr.is_op("min") and len(expr.args) == 2 + return m2_expr.ExprCond( + m2_expr.expr_is_signed_lower(expr.args[1], expr.args[0]), + expr.args[1], + expr.args[0], + ) + # Integer arithmetic # @@ -3441,6 +3449,13 @@ pmulhw = vec_vertical_instr('*', 16, lambda x: _keep_mul_high(x, signed=True)) pmulhd = vec_vertical_instr('*', 32, lambda x: _keep_mul_high(x, signed=True)) pmulhq = vec_vertical_instr('*', 64, lambda x: _keep_mul_high(x, signed=True)) +# Comparisons +# + +# SSE +pminsw = vec_vertical_instr('min', 16, _signed_min) + + # Floating-point arithmetic # @@ -3491,12 +3506,6 @@ def por(_, instr, dst, src): return e, [] -def pminsw(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAff(dst, m2_expr.ExprCond((dst - src).msb(), dst, src))) - return e, [] - - def cvtdq2pd(_, instr, dst, src): e = [] e.append( -- cgit 1.4.1 From 60a82ffba739af32ffadd56b16e755a5ea410009 Mon Sep 17 00:00:00 2001 From: Ajax Date: Fri, 9 Feb 2018 09:49:15 +0100 Subject: Add PADDSB/PADDSW instruction NP 0F EC /r PADDSB mm, mm/m64 66 0F EC /r PADDSB xmm1, xmm2/m128 NP 0F ED /r PADDSW mm, mm/m64 66 0F ED /r PADDSW xmm1, xmm2/m128 --- miasm2/arch/x86/arch.py | 8 ++++++++ miasm2/arch/x86/sem.py | 14 ++++++++++++++ test/arch/x86/arch.py | 10 ++++++++++ 3 files changed, 32 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 03d7449d..1b181f6f 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4512,6 +4512,14 @@ addop("paddusw", [bs8(0x0f), bs8(0xdd), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm_m64)) addop("paddusw", [bs8(0x0f), bs8(0xdd), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("paddsb", [bs8(0x0f), bs8(0xec), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("paddsb", [bs8(0x0f), bs8(0xec), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("paddsw", [bs8(0x0f), bs8(0xed), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("paddsw", [bs8(0x0f), bs8(0xed), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) mn_x86.bintree = factor_one_bit(mn_x86.bintree) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 97373abf..a30bcdc9 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -4347,6 +4347,16 @@ def _saturation_add(expr): return m2_expr.ExprCond((arg1 + arg2).msb(), m2_expr.ExprInt(-1, expr.size), expr) +def _saturation_add_signed(expr): + assert expr.is_op("+") and len(expr.args) == 2 + + # Compute the substraction on two more bits, see _saturation_add_unsigned + + arg1 = expr.args[0].signExtend(expr.size + 2) + arg2 = expr.args[1].signExtend(expr.size + 2) + + return _signed_saturation(arg1 + arg2, expr.size) + # Saturate SSE operations @@ -4356,6 +4366,8 @@ paddusb = vec_vertical_instr('+', 8, _saturation_add) paddusw = vec_vertical_instr('+', 16, _saturation_add) psubsb = vec_vertical_instr('-', 8, _saturation_sub_signed) psubsw = vec_vertical_instr('-', 16, _saturation_sub_signed) +paddsb = vec_vertical_instr('+', 8, _saturation_add_signed) +paddsw = vec_vertical_instr('+', 16, _saturation_add_signed) mnemo_func = {'mov': mov, @@ -4881,6 +4893,8 @@ mnemo_func = {'mov': mov, "paddusw": paddusw, "psubsb": psubsb, "psubsw": psubsw, + "paddsb": paddsb, + "paddsw": paddsw, "smsw": smsw, diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index fb757b33..8c191aad 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3029,6 +3029,16 @@ reg_tests = [ "0fe9d9"), (m32, "00000000 PSUBSW XMM0, XMM6", "660fe9c6"), + + (m32, "00000000 PADDSB MM2, MM0", + "0fecd0"), + (m32, "00000000 PADDSB XMM0, XMM4", + "660fecc4"), + + (m32, "00000000 PADDSW MM3, MM1", + "0fedd9"), + (m32, "00000000 PADDSW XMM0, XMM6", + "660fedc6"), ] -- cgit 1.4.1 From deda8791ecbaa3cd541667b04d44759b91a14372 Mon Sep 17 00:00:00 2001 From: Ajax Date: Fri, 9 Feb 2018 10:03:43 +0100 Subject: Unify the way PMIN / PMAX works --- miasm2/arch/x86/sem.py | 78 +++++++++++--------------------------------------- 1 file changed, 17 insertions(+), 61 deletions(-) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index a30bcdc9..0bb534e5 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3403,10 +3403,17 @@ def _keep_mul_high(expr, signed=False): arg2 = expr.args[1].zeroExtend(expr.size * 2) return m2_expr.ExprOp("*", arg1, arg2)[expr.size:] -def _signed_min(expr): - assert expr.is_op("min") and len(expr.args) == 2 +# Op, signed => associated comparison +_min_max_func = { + ("min", False): m2_expr.expr_is_unsigned_lower, + ("min", True): m2_expr.expr_is_signed_lower, + ("max", False): m2_expr.expr_is_unsigned_greater, + ("max", True): m2_expr.expr_is_signed_greater, +} +def _min_max(expr, signed): + assert (expr.is_op("min") or expr.is_op("max")) and len(expr.args) == 2 return m2_expr.ExprCond( - m2_expr.expr_is_signed_lower(expr.args[1], expr.args[0]), + _min_max_func[(expr.op, signed)](expr.args[1], expr.args[0]), expr.args[1], expr.args[0], ) @@ -3453,8 +3460,13 @@ pmulhq = vec_vertical_instr('*', 64, lambda x: _keep_mul_high(x, signed=True)) # # SSE -pminsw = vec_vertical_instr('min', 16, _signed_min) - +pminsw = vec_vertical_instr('min', 16, lambda x: _min_max(x, signed=True)) +pminub = vec_vertical_instr('min', 8, lambda x: _min_max(x, signed=False)) +pminuw = vec_vertical_instr('min', 16, lambda x: _min_max(x, signed=False)) +pminud = vec_vertical_instr('min', 32, lambda x: _min_max(x, signed=False)) +pmaxub = vec_vertical_instr('max', 8, lambda x: _min_max(x, signed=False)) +pmaxuw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=False)) +pmaxud = vec_vertical_instr('max', 32, lambda x: _min_max(x, signed=False)) # Floating-point arithmetic # @@ -3871,62 +3883,6 @@ def iret(ir, instr): return exprs, [] -def pmaxu(_, instr, dst, src, size): - e = [] - for i in xrange(0, dst.size, size): - op1 = dst[i:i + size] - op2 = src[i:i + size] - res = op1 - op2 - # Compote CF in @res = @op1 - @op2 - ret = (((op1 ^ op2) ^ res) ^ ((op1 ^ res) & (op1 ^ op2))).msb() - - e.append(m2_expr.ExprAff(dst[i:i + size], - m2_expr.ExprCond(ret, - src[i:i + size], - dst[i:i + size]))) - return e, [] - - -def pmaxub(ir, instr, dst, src): - return pmaxu(ir, instr, dst, src, 8) - - -def pmaxuw(ir, instr, dst, src): - return pmaxu(ir, instr, dst, src, 16) - - -def pmaxud(ir, instr, dst, src): - return pmaxu(ir, instr, dst, src, 32) - - -def pminu(_, instr, dst, src, size): - e = [] - for i in xrange(0, dst.size, size): - op1 = dst[i:i + size] - op2 = src[i:i + size] - res = op1 - op2 - # Compote CF in @res = @op1 - @op2 - ret = (((op1 ^ op2) ^ res) ^ ((op1 ^ res) & (op1 ^ op2))).msb() - - e.append(m2_expr.ExprAff(dst[i:i + size], - m2_expr.ExprCond(ret, - dst[i:i + size], - src[i:i + size]))) - return e, [] - - -def pminub(ir, instr, dst, src): - return pminu(ir, instr, dst, src, 8) - - -def pminuw(ir, instr, dst, src): - return pminu(ir, instr, dst, src, 16) - - -def pminud(ir, instr, dst, src): - return pminu(ir, instr, dst, src, 32) - - def pcmpeq(_, instr, dst, src, size): e = [] for i in xrange(0, dst.size, size): -- cgit 1.4.1 From 950bb44e32c5bed4dba7ef77949db86b4d36c5ca Mon Sep 17 00:00:00 2001 From: Ajax Date: Fri, 9 Feb 2018 10:09:21 +0100 Subject: Add PMAXSW instruction 0F EE /r PMAXSW mm1, mm2/m64 66 0F EE /r PMAXSW xmm1, xmm2/m128 --- miasm2/arch/x86/arch.py | 4 ++++ miasm2/arch/x86/sem.py | 2 ++ test/arch/x86/arch.py | 5 +++++ 3 files changed, 11 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 1b181f6f..aaf877fe 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4306,6 +4306,10 @@ addop("pmaxuw", [bs8(0x0f), bs8(0x38), bs8(0x3e), pref_66] + addop("pmaxud", [bs8(0x0f), bs8(0x38), bs8(0x3f), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +addop("pmaxsw", [bs8(0x0f), bs8(0xee), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmaxsw", [bs8(0x0f), bs8(0xee), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) addop("pminub", [bs8(0x0f), bs8(0xda), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 0bb534e5..5beedede 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3467,6 +3467,7 @@ pminud = vec_vertical_instr('min', 32, lambda x: _min_max(x, signed=False)) pmaxub = vec_vertical_instr('max', 8, lambda x: _min_max(x, signed=False)) pmaxuw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=False)) pmaxud = vec_vertical_instr('max', 32, lambda x: _min_max(x, signed=False)) +pmaxsw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=True)) # Floating-point arithmetic # @@ -4783,6 +4784,7 @@ mnemo_func = {'mov': mov, "pmaxub": pmaxub, "pmaxuw": pmaxuw, "pmaxud": pmaxud, + "pmaxsw": pmaxsw, "pminub": pminub, "pminuw": pminuw, diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 8c191aad..cc0a0a93 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3039,6 +3039,11 @@ reg_tests = [ "0fedd9"), (m32, "00000000 PADDSW XMM0, XMM6", "660fedc6"), + + (m32, "00000000 PMAXSW MM3, MM1", + "0feed9"), + (m32, "00000000 PMAXSW XMM0, XMM6", + "660feec6"), ] -- cgit 1.4.1 From b8bd5c0f24b786616b6f372f7f6dfad43438ab01 Mon Sep 17 00:00:00 2001 From: Ajax Date: Fri, 9 Feb 2018 10:23:14 +0100 Subject: Add PMULUDQ instruction NP 0F F4 /r PMULUDQ mm1, mm2/m64 66 0F F4 /r PMULUDQ xmm1, xmm2/m128 --- miasm2/arch/x86/arch.py | 4 ++++ miasm2/arch/x86/sem.py | 23 +++++++++++++++++++++++ test/arch/x86/arch.py | 5 +++++ 3 files changed, 32 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index aaf877fe..839487e8 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4488,6 +4488,10 @@ addop("pmulhw", [bs8(0x0f), bs8(0xe5), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm_m64)) addop("pmulhw", [bs8(0x0f), bs8(0xe5), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pmuludq", [bs8(0x0f), bs8(0xf4), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmuludq", [bs8(0x0f), bs8(0xf4), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) addop("psubusb", [bs8(0x0f), bs8(0xd8), no_xmm_pref] + diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 5beedede..d73eac96 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3418,6 +3418,7 @@ def _min_max(expr, signed): expr.args[0], ) + # Integer arithmetic # @@ -3456,6 +3457,27 @@ pmulhw = vec_vertical_instr('*', 16, lambda x: _keep_mul_high(x, signed=True)) pmulhd = vec_vertical_instr('*', 32, lambda x: _keep_mul_high(x, signed=True)) pmulhq = vec_vertical_instr('*', 64, lambda x: _keep_mul_high(x, signed=True)) +def pmuludq(ir, instr, dst, src): + e = [] + if dst.size == 64: + e.append(m2_expr.ExprAff( + dst, + src[:32].zeroExtend(64) * dst[:32].zeroExtend(64) + )) + elif dst.size == 128: + e.append(m2_expr.ExprAff( + dst[:64], + src[:32].zeroExtend(64) * dst[:32].zeroExtend(64) + )) + e.append(m2_expr.ExprAff( + dst[64:], + src[64:96].zeroExtend(64) * dst[64:96].zeroExtend(64) + )) + else: + raise RuntimeError("Unsupported size %d" % dst.size) + return e, [] + + # Comparisons # @@ -4725,6 +4747,7 @@ mnemo_func = {'mov': mov, "pmulhw": pmulhw, "pmulhd": pmulhd, "pmulhq": pmulhq, + "pmuludq": pmuludq, # Arithmetic (floating-point) diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index cc0a0a93..93ab4a48 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3044,6 +3044,11 @@ reg_tests = [ "0feed9"), (m32, "00000000 PMAXSW XMM0, XMM6", "660feec6"), + + (m32, "00000000 PMULUDQ MM3, MM1", + "0ff4d9"), + (m32, "00000000 PMULUDQ XMM0, XMM6", + "660ff4c6"), ] -- cgit 1.4.1 From 4a94e84923d8ac059fc2c41a5876835613204ad2 Mon Sep 17 00:00:00 2001 From: Ajax Date: Fri, 9 Feb 2018 10:43:48 +0100 Subject: Add PMADDWD instruction 0F F5 /r PMADDWD mm, mm/m64 66 0F F5 /r PMADDWD xmm1, xmm2/m128 --- miasm2/arch/x86/arch.py | 4 ++++ miasm2/arch/x86/sem.py | 19 +++++++++++++++++++ test/arch/x86/arch.py | 5 +++++ 3 files changed, 28 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 839487e8..ae5f3fd7 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4529,6 +4529,10 @@ addop("paddsw", [bs8(0x0f), bs8(0xed), no_xmm_pref] + addop("paddsw", [bs8(0x0f), bs8(0xed), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pmaddwd", [bs8(0x0f), bs8(0xf5), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmaddwd", [bs8(0x0f), bs8(0xf5), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) mn_x86.bintree = factor_one_bit(mn_x86.bintree) # mn_x86.bintree = factor_fields_all(mn_x86.bintree) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index d73eac96..3880ed67 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3477,6 +3477,22 @@ def pmuludq(ir, instr, dst, src): raise RuntimeError("Unsupported size %d" % dst.size) return e, [] +# Mix +# + +# SSE +def pmaddwd(ir, instr, dst, src): + sizedst = 32 + sizesrc = 16 + out = [] + for start in xrange(0, dst.size, sizedst): + base = start + mul1 = src[base: base + sizesrc].signExtend(sizedst) * dst[base: base + sizesrc].signExtend(sizedst) + base += sizesrc + mul2 = src[base: base + sizesrc].signExtend(sizedst) * dst[base: base + sizesrc].signExtend(sizedst) + out.append(mul1 + mul2) + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] + # Comparisons # @@ -4749,6 +4765,9 @@ mnemo_func = {'mov': mov, "pmulhq": pmulhq, "pmuludq": pmuludq, + # Mix + # SSE + "pmaddwd": pmaddwd, # Arithmetic (floating-point) # diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 93ab4a48..3d9fd31f 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3049,6 +3049,11 @@ reg_tests = [ "0ff4d9"), (m32, "00000000 PMULUDQ XMM0, XMM6", "660ff4c6"), + + (m32, "00000000 PMADDWD MM3, MM1", + "0ff5d9"), + (m32, "00000000 PMADDWD XMM0, XMM6", + "660ff5c6"), ] -- cgit 1.4.1 From 649c7b519fc93e9ef5750d03dfcc3e91c2968a36 Mon Sep 17 00:00:00 2001 From: Ajax Date: Fri, 9 Feb 2018 11:15:14 +0100 Subject: Add PSADBW instruction 0F F6 /r PSADBW mm1, mm2/m64 66 0F F6 /r PSADBW xmm1, xmm2/m128 --- miasm2/arch/x86/arch.py | 5 +++++ miasm2/arch/x86/sem.py | 26 ++++++++++++++++++++++++++ test/arch/x86/arch.py | 5 +++++ 3 files changed, 36 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index ae5f3fd7..a9a59a08 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4534,6 +4534,11 @@ addop("pmaddwd", [bs8(0x0f), bs8(0xf5), no_xmm_pref] + addop("pmaddwd", [bs8(0x0f), bs8(0xf5), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("psadbw", [bs8(0x0f), bs8(0xf6), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("psadbw", [bs8(0x0f), bs8(0xf6), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + mn_x86.bintree = factor_one_bit(mn_x86.bintree) # mn_x86.bintree = factor_fields_all(mn_x86.bintree) """ diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 3880ed67..51fcbe05 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3494,6 +3494,31 @@ def pmaddwd(ir, instr, dst, src): return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], [] +def _absolute(expr): + """Return abs(@expr)""" + signed = expr.msb() + value_unsigned = (expr ^ expr.mask) + m2_expr.ExprInt(1, expr.size) + return m2_expr.ExprCond(signed, value_unsigned, expr) + + +def psadbw(ir, instr, dst, src): + sizedst = 16 + sizesrc = 8 + out_dst = [] + for start in xrange(0, dst.size, 64): + out = [] + for src_start in xrange(0, 64, sizesrc): + beg = start + src_start + end = beg + sizesrc + # Not clear in the doc equations, but in the text, src and dst are: + # "8 unsigned byte integers" + out.append(_absolute(dst[beg: end].zeroExtend(sizedst) - src[beg: end].zeroExtend(sizedst))) + out_dst.append(m2_expr.ExprOp("+", *out)) + out_dst.append(m2_expr.ExprInt(0, 64 - sizedst)) + + return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out_dst))], [] + + # Comparisons # @@ -4768,6 +4793,7 @@ mnemo_func = {'mov': mov, # Mix # SSE "pmaddwd": pmaddwd, + "psadbw": psadbw, # Arithmetic (floating-point) # diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 3d9fd31f..0e6ffdd9 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3054,6 +3054,11 @@ reg_tests = [ "0ff5d9"), (m32, "00000000 PMADDWD XMM0, XMM6", "660ff5c6"), + + (m32, "00000000 PSADBW MM3, MM1", + "0ff6d9"), + (m32, "00000000 PSADBW XMM0, XMM6", + "660ff6c6"), ] -- cgit 1.4.1 From cb95c1f581cfded596cc38d8832361c053f3e4cd Mon Sep 17 00:00:00 2001 From: Ajax Date: Fri, 9 Feb 2018 13:30:58 +0100 Subject: Add PAVGB/PAVGW instruction 0F E0 /r PAVGB mm1, mm2/m64 66 0F E0, /r PAVGB xmm1, xmm2/m128 0F E3 /r PAVGW mm1, mm2/m64 66 0F E3 /r PAVGW xmm1, xmm2/m128 --- miasm2/arch/x86/arch.py | 9 +++++++++ miasm2/arch/x86/sem.py | 13 +++++++++++++ test/arch/x86/arch.py | 10 ++++++++++ 3 files changed, 32 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index a9a59a08..8188cf49 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4539,6 +4539,15 @@ addop("psadbw", [bs8(0x0f), bs8(0xf6), no_xmm_pref] + addop("psadbw", [bs8(0x0f), bs8(0xf6), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pavgb", [bs8(0x0f), bs8(0xe0), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pavgb", [bs8(0x0f), bs8(0xe0), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pavgw", [bs8(0x0f), bs8(0xe3), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pavgw", [bs8(0x0f), bs8(0xe3), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + mn_x86.bintree = factor_one_bit(mn_x86.bintree) # mn_x86.bintree = factor_fields_all(mn_x86.bintree) """ diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 51fcbe05..5a0f1b6b 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3518,6 +3518,17 @@ def psadbw(ir, instr, dst, src): return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out_dst))], [] +def _average(expr): + assert expr.is_op("avg") and len(expr.args) == 2 + + arg1 = expr.args[0].zeroExtend(expr.size * 2) + arg2 = expr.args[1].zeroExtend(expr.size * 2) + one = m2_expr.ExprInt(1, arg1.size) + # avg(unsigned) = (a + b + 1) >> 1, addition beeing at least on one more bit + return ((arg1 + arg2 + one) >> one)[:expr.size] + +pavgb = vec_vertical_instr('avg', 8, _average) +pavgw = vec_vertical_instr('avg', 16, _average) # Comparisons # @@ -4794,6 +4805,8 @@ mnemo_func = {'mov': mov, # SSE "pmaddwd": pmaddwd, "psadbw": psadbw, + "pavgb": pavgb, + "pavgw": pavgw, # Arithmetic (floating-point) # diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 0e6ffdd9..f1f95d46 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3059,6 +3059,16 @@ reg_tests = [ "0ff6d9"), (m32, "00000000 PSADBW XMM0, XMM6", "660ff6c6"), + + (m32, "00000000 PAVGB MM3, MM1", + "0fe0d9"), + (m32, "00000000 PAVGB XMM0, XMM6", + "660fe0c6"), + + (m32, "00000000 PAVGW MM3, MM1", + "0fe3d9"), + (m32, "00000000 PAVGW XMM0, XMM6", + "660fe3c6"), ] -- cgit 1.4.1 From 971b683a5f068068a2d775d5807deacd13918cf9 Mon Sep 17 00:00:00 2001 From: Ajax Date: Fri, 9 Feb 2018 14:51:30 +0100 Subject: Add MASKMOVQ/MASKMOVDQU instruction --- miasm2/arch/x86/arch.py | 6 ++++++ miasm2/arch/x86/sem.py | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- test/arch/x86/arch.py | 5 +++++ 3 files changed, 59 insertions(+), 1 deletion(-) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 4707fde3..40cd4e9c 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4548,6 +4548,12 @@ addop("pavgw", [bs8(0x0f), bs8(0xe3), no_xmm_pref] + addop("pavgw", [bs8(0x0f), bs8(0xe3), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("maskmovq", [bs8(0x0f), bs8(0xf7), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_reg)) +addop("maskmovdqu", [bs8(0x0f), bs8(0xf7), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_reg)) + + mn_x86.bintree = factor_one_bit(mn_x86.bintree) # mn_x86.bintree = factor_fields_all(mn_x86.bintree) """ diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 5a0f1b6b..becee84e 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -4401,6 +4401,52 @@ paddsb = vec_vertical_instr('+', 8, _saturation_add_signed) paddsw = vec_vertical_instr('+', 16, _saturation_add_signed) +# Others SSE operations + +def maskmovq(ir, instr, src, mask): + lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + blks = [] + + # For each possibility, check if a write is necessary + check_labels = [m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) + for _ in xrange(0, mask.size, 8)] + # If the write has to be done, do it (otherwise, nothing happen) + write_labels = [m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) + for _ in xrange(0, mask.size, 8)] + + # Build check blocks + for i, start in enumerate(xrange(0, mask.size, 8)): + bit = mask[start + 7: start + 8] + cur_label = check_labels[i] + next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else lbl_next + write_label = write_labels[i] + check = m2_expr.ExprAff(ir.IRDst, + m2_expr.ExprCond(bit, + write_label, + next_check_label)) + blks.append(IRBlock(cur_label.name, [AssignBlock([check], instr)])) + + # Build write blocks + dst_addr = mRDI[instr.mode] + for i, start in enumerate(xrange(0, mask.size, 8)): + bit = mask[start + 7: start + 8] + cur_label = write_labels[i] + next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else lbl_next + write_addr = dst_addr + m2_expr.ExprInt(i, dst_addr.size) + + # @8[DI/EDI/RDI + i] = src[byte i] + write_mem = m2_expr.ExprAff(m2_expr.ExprMem(write_addr, 8), + src[start: start + 8]) + jump = m2_expr.ExprAff(ir.IRDst, next_check_label) + blks.append(IRBlock(cur_label.name, [AssignBlock([write_mem, jump], instr)])) + + # If mask is null, bypass all + e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(mask, + check_labels[0], + lbl_next))] + return e, blks + + mnemo_func = {'mov': mov, 'xchg': xchg, 'movzx': movzx, @@ -4936,7 +4982,8 @@ mnemo_func = {'mov': mov, "paddsw": paddsw, "smsw": smsw, - + "maskmovq": maskmovq, + "maskmovdqu": maskmovq, } diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 68bc1304..f491c19a 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3069,6 +3069,11 @@ reg_tests = [ "0fe3d9"), (m32, "00000000 PAVGW XMM0, XMM6", "660fe3c6"), + + (m32, "00000000 MASKMOVQ MM2, MM3", + "0ff7d3"), + (m32, "00000000 MASKMOVDQU XMM4, XMM5", + "660ff7e5"), ] -- cgit 1.4.1 From 104d1425792e95a3df64aede5d46b43c324ca125 Mon Sep 17 00:00:00 2001 From: Ajax Date: Fri, 9 Feb 2018 14:54:18 +0100 Subject: Add EMMS, implemtend as a NOP --- miasm2/arch/x86/arch.py | 2 ++ miasm2/arch/x86/sem.py | 6 ++++++ test/arch/x86/arch.py | 3 +++ 3 files changed, 11 insertions(+) (limited to 'miasm2/arch/x86/sem.py') diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 40cd4e9c..72ed3309 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -4553,6 +4553,8 @@ addop("maskmovq", [bs8(0x0f), bs8(0xf7), no_xmm_pref] + addop("maskmovdqu", [bs8(0x0f), bs8(0xf7), pref_66] + rmmod(xmm_reg, rm_arg_xmm_reg)) +addop("emms", [bs8(0x0f), bs8(0x77)]) + mn_x86.bintree = factor_one_bit(mn_x86.bintree) # mn_x86.bintree = factor_fields_all(mn_x86.bintree) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index becee84e..ddc8aaf9 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -4447,6 +4447,11 @@ def maskmovq(ir, instr, src, mask): return e, blks +def emms(ir, instr): + # Implemented as a NOP + return [], [] + + mnemo_func = {'mov': mov, 'xchg': xchg, 'movzx': movzx, @@ -4984,6 +4989,7 @@ mnemo_func = {'mov': mov, "smsw": smsw, "maskmovq": maskmovq, "maskmovdqu": maskmovq, + "emms": emms, } diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index f491c19a..2af90c8a 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3074,6 +3074,9 @@ reg_tests = [ "0ff7d3"), (m32, "00000000 MASKMOVDQU XMM4, XMM5", "660ff7e5"), + + (m32, "00000000 EMMS", + "0f77"), ] -- cgit 1.4.1