From 0c35bb71b6b3f32b2a62618f4b67ef87df81958d Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Thu, 8 Feb 2018 12:14:59 +0100
Subject: Add PACKSSWB instruction

0F 63 /r PACKSSWB mm1, mm2/m64
66 0F 63 /r PACKSSWB xmm1, xmm2/m128
---
 miasm2/arch/x86/sem.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index deebba8c..635206b6 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -4173,6 +4173,54 @@ def palignr(ir, instr, dst, src, imm):
     return [m2_expr.ExprAff(dst, result)], []
 
 
+def _signed_saturation(expr, dst_size):
+    """Saturate the expr @expr for @dst_size bit
+    Signed saturation return MAX_INT / MIN_INT or value depending on the value
+    """
+    assert expr.size > dst_size
+
+    median = 1 << (dst_size - 1)
+    min_int = m2_expr.ExprInt(- median, dst_size)
+    max_int = m2_expr.ExprInt(median - 1, dst_size)
+    signed = expr.msb()
+    value_unsigned = expr ^ expr.mask + m2_expr.ExprInt(1, expr.size)
+    # Re-use the sign bit
+    value = m2_expr.ExprCompose(expr[:dst_size - 1], signed)
+
+    # Bit hack: to avoid a double signed comparison, use mask
+    # ie., in unsigned, 0xXY > 0x0f iff X is not null
+
+    # if expr >s 0
+    #    if expr[dst_size:] > 0: # bigger than max_int
+    #        -> max_int
+    #    else
+    #        -> value
+    # else # negative
+    #    if expr[dst_size:-1] > 0: # smaller than min_int
+    #        -> value
+    #    else
+    #        -> min_int
+
+    return m2_expr.ExprCond(
+        signed,
+        m2_expr.ExprCond(value_unsigned[dst_size:],
+                         min_int,
+                         value),
+        m2_expr.ExprCond(expr[dst_size:],
+                         max_int,
+                         value),
+    )
+
+
+def packsswb(ir, instr, dst, src):
+    out = []
+    for source in [dst, src]:
+        for start in xrange(0, dst.size, 16):
+            out.append(_signed_saturation(source[start:start + 16], 8))
+    return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], []
+
+
+
 mnemo_func = {'mov': mov,
               'xchg': xchg,
               'movzx': movzx,
@@ -4670,6 +4718,8 @@ mnemo_func = {'mov': mov,
 
               "pmovmskb": pmovmskb,
 
+              "packsswb": packsswb,
+
               "smsw": smsw,
 
               }
-- 
cgit 1.4.1


From 615ee255906c0fe036e9dc87ee65ed27e0b6f88d Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Thu, 8 Feb 2018 13:24:29 +0100
Subject: Add PACKUSWB instruction

---
 miasm2/arch/x86/arch.py |  5 +++++
 miasm2/arch/x86/sem.py  | 33 +++++++++++++++++++++++++++++++++
 test/arch/x86/arch.py   |  5 +++++
 3 files changed, 43 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 793d5a47..572c2378 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4463,6 +4463,11 @@ addop("packsswb", [bs8(0x0f), bs8(0x63), no_xmm_pref] +
 addop("packsswb", [bs8(0x0f), bs8(0x63), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
 
+addop("packuswb", [bs8(0x0f), bs8(0x67), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("packuswb", [bs8(0x0f), bs8(0x67), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
 # mn_x86.bintree = factor_fields_all(mn_x86.bintree)
 """
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 635206b6..6dcc76af 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -4212,6 +4212,31 @@ def _signed_saturation(expr, dst_size):
     )
 
 
+def _unsigned_saturation(expr, dst_size):
+    """Saturate the expr @expr for @dst_size bit
+    Unsigned saturation return MAX_INT or value depending on the value
+    """
+    assert expr.size > dst_size
+
+    zero = m2_expr.ExprInt(0, dst_size)
+    max_int = m2_expr.ExprInt(-1, dst_size)
+    value = expr[:dst_size]
+    signed = expr.msb()
+
+
+    # Bit hack: to avoid a double signed comparison, use mask
+    # ie., in unsigned, 0xXY > 0x0f iff X is not null
+
+    return m2_expr.ExprCond(
+        signed,
+        zero,
+        m2_expr.ExprCond(expr[dst_size:],
+                         max_int,
+                         value),
+    )
+
+
+
 def packsswb(ir, instr, dst, src):
     out = []
     for source in [dst, src]:
@@ -4220,6 +4245,13 @@ def packsswb(ir, instr, dst, src):
     return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], []
 
 
+def packuswb(ir, instr, dst, src):
+    out = []
+    for source in [dst, src]:
+        for start in xrange(0, dst.size, 16):
+            out.append(_unsigned_saturation(source[start:start + 16], 8))
+    return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], []
+
 
 mnemo_func = {'mov': mov,
               'xchg': xchg,
@@ -4719,6 +4751,7 @@ mnemo_func = {'mov': mov,
               "pmovmskb": pmovmskb,
 
               "packsswb": packsswb,
+              "packuswb": packuswb,
 
               "smsw": smsw,
 
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index b9bfec74..fbcb88f8 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -2974,6 +2974,11 @@ reg_tests = [
      "0f63f8"),
     (m32, "00000000    PACKSSWB   XMM0, XMM5",
      "660f63c5"),
+
+    (m32, "00000000    PACKUSWB   MM1, MM7",
+     "0f67cf"),
+    (m32, "00000000    PACKUSWB   XMM0, XMM6",
+     "660f67c6"),
 ]
 
 
-- 
cgit 1.4.1


From a328d6d33ce0b513bf41883380025ce8284f26d3 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Thu, 8 Feb 2018 13:30:21 +0100
Subject: Add PACKSSDW instruction

0F 6B /r PACKSSDW mm1, mm2/m64
66 0F 6B /r PACKSSDW xmm1, xmm2/m128
---
 miasm2/arch/x86/arch.py | 4 ++++
 miasm2/arch/x86/sem.py  | 9 +++++++++
 test/arch/x86/arch.py   | 5 +++++
 3 files changed, 18 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 572c2378..71f4409d 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4462,6 +4462,10 @@ addop("packsswb", [bs8(0x0f), bs8(0x63), no_xmm_pref] +
       rmmod(mm_reg, rm_arg_mm_m64))
 addop("packsswb", [bs8(0x0f), bs8(0x63), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
+addop("packssdw", [bs8(0x0f), bs8(0x6b), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("packssdw", [bs8(0x0f), bs8(0x6b), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
 
 addop("packuswb", [bs8(0x0f), bs8(0x67), no_xmm_pref] +
       rmmod(mm_reg, rm_arg_mm_m64))
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 6dcc76af..57716447 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -4245,6 +4245,14 @@ def packsswb(ir, instr, dst, src):
     return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], []
 
 
+def packssdw(ir, instr, dst, src):
+    out = []
+    for source in [dst, src]:
+        for start in xrange(0, dst.size, 32):
+            out.append(_signed_saturation(source[start:start + 32], 16))
+    return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], []
+
+
 def packuswb(ir, instr, dst, src):
     out = []
     for source in [dst, src]:
@@ -4751,6 +4759,7 @@ mnemo_func = {'mov': mov,
               "pmovmskb": pmovmskb,
 
               "packsswb": packsswb,
+              "packssdw": packssdw,
               "packuswb": packuswb,
 
               "smsw": smsw,
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index fbcb88f8..284bb40c 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -2975,6 +2975,11 @@ reg_tests = [
     (m32, "00000000    PACKSSWB   XMM0, XMM5",
      "660f63c5"),
 
+    (m32, "00000000    PACKSSDW   MM2, MM0",
+     "0f6bd0"),
+    (m32, "00000000    PACKSSDW   XMM0, XMM7",
+     "660f6bc7"),
+
     (m32, "00000000    PACKUSWB   MM1, MM7",
      "0f67cf"),
     (m32, "00000000    PACKUSWB   XMM0, XMM6",
-- 
cgit 1.4.1


From d533aee1b340f21974dc3c255d04ac0d35a73e84 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Thu, 8 Feb 2018 13:47:43 +0100
Subject: Add PMULLW instruction

---
 miasm2/arch/x86/arch.py |  6 ++++++
 miasm2/arch/x86/sem.py  | 15 +++++++++++++++
 test/arch/x86/arch.py   |  5 +++++
 3 files changed, 26 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 71f4409d..98e29b63 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4472,6 +4472,12 @@ addop("packuswb", [bs8(0x0f), bs8(0x67), no_xmm_pref] +
 addop("packuswb", [bs8(0x0f), bs8(0x67), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
 
+addop("pmullw", [bs8(0x0f), bs8(0xd5), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("pmullw", [bs8(0x0f), bs8(0xd5), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+
+
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
 # mn_x86.bintree = factor_fields_all(mn_x86.bintree)
 """
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 57716447..7c990199 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -3398,6 +3398,15 @@ psubw = vec_vertical_instr('-', 16)
 psubd = vec_vertical_instr('-', 32)
 psubq = vec_vertical_instr('-', 64)
 
+# Multiplications
+#
+
+# SSE
+pmullb = vec_vertical_instr('*', 8)
+pmullw = vec_vertical_instr('*', 16)
+pmulld = vec_vertical_instr('*', 32)
+pmullq = vec_vertical_instr('*', 64)
+
 # Floating-point arithmetic
 #
 
@@ -4645,6 +4654,12 @@ mnemo_func = {'mov': mov,
               "psubd": psubd,
               "psubq": psubq,
 
+              # SSE
+              "pmullb": pmullb,
+              "pmullw": pmullw,
+              "pmulld": pmulld,
+              "pmullq": pmullq,
+
               # Arithmetic (floating-point)
               #
 
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 284bb40c..9f3256f3 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -2984,6 +2984,11 @@ reg_tests = [
      "0f67cf"),
     (m32, "00000000    PACKUSWB   XMM0, XMM6",
      "660f67c6"),
+
+    (m32, "00000000    PMULLW     MM4, MM2",
+     "0fd5e2"),
+    (m32, "00000000    PMULLW     XMM0, XMM3",
+     "660fd5c3"),
 ]
 
 
-- 
cgit 1.4.1


From c92be77fa7afa23dd06124325e9dc127ebb67e22 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Thu, 8 Feb 2018 15:57:53 +0100
Subject: Add PSUBSUB/PSUBUSW instr

0F D8 /r PSUBUSB mm, mm/m64
66 0F D8 /r PSUBUSB xmm1, xmm2/m128
---
 miasm2/arch/x86/arch.py |  9 ++++++
 miasm2/arch/x86/sem.py  | 84 ++++++++++++++++++++++++++++++++++---------------
 test/arch/x86/arch.py   | 11 +++++++
 3 files changed, 79 insertions(+), 25 deletions(-)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 98e29b63..f4ef7349 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4477,6 +4477,15 @@ addop("pmullw", [bs8(0x0f), bs8(0xd5), no_xmm_pref] +
 addop("pmullw", [bs8(0x0f), bs8(0xd5), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
 
+addop("psubusb", [bs8(0x0f), bs8(0xd8), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("psubusb", [bs8(0x0f), bs8(0xd8), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+addop("psubusw", [bs8(0x0f), bs8(0xd9), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("psubusw", [bs8(0x0f), bs8(0xd9), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+
 
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
 # mn_x86.bintree = factor_fields_all(mn_x86.bintree)
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 7c990199..4b4f40a1 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -3319,62 +3319,77 @@ def vec_op_clip(op, size):
 # Generic vertical operation
 
 
-def vec_vertical_sem(op, elt_size, reg_size, dst, src):
+def vec_vertical_sem(op, elt_size, reg_size, dst, src, apply_on_output):
     assert reg_size % elt_size == 0
     n = reg_size / elt_size
     if op == '-':
         ops = [
-            (dst[i * elt_size:(i + 1) * elt_size]
-             - src[i * elt_size:(i + 1) * elt_size]) for i in xrange(0, n)]
+            apply_on_output((dst[i * elt_size:(i + 1) * elt_size]
+                             - src[i * elt_size:(i + 1) * elt_size]))
+            for i in xrange(0, n)
+        ]
     else:
-        ops = [m2_expr.ExprOp(op, dst[i * elt_size:(i + 1) * elt_size],
-                              src[i * elt_size:(i + 1) * elt_size]) for i in xrange(0, n)]
+        ops = [
+            apply_on_output(m2_expr.ExprOp(op, dst[i * elt_size:(i + 1) * elt_size],
+                                           src[i * elt_size:(i + 1) * elt_size]))
+            for i in xrange(0, n)
+        ]
 
     return m2_expr.ExprCompose(*ops)
 
 
-def float_vec_vertical_sem(op, elt_size, reg_size, dst, src):
+def float_vec_vertical_sem(op, elt_size, reg_size, dst, src, apply_on_output):
     assert reg_size % elt_size == 0
     n = reg_size / elt_size
 
     x_to_int, int_to_x = {32: ('float_to_int_%d', 'int_%d_to_float'),
                           64: ('double_to_int_%d', 'int_%d_to_double')}[elt_size]
     if op == '-':
-        ops = [m2_expr.ExprOp(x_to_int % elt_size,
-                              m2_expr.ExprOp(int_to_x % elt_size, dst[i * elt_size:(i + 1) * elt_size]) -
-                              m2_expr.ExprOp(
-                                  int_to_x % elt_size, src[i * elt_size:(
-                                      i + 1) * elt_size])) for i in xrange(0, n)]
+        ops = [
+            apply_on_output(m2_expr.ExprOp(
+                x_to_int % elt_size,
+                m2_expr.ExprOp(int_to_x % elt_size, dst[i * elt_size:(i + 1) * elt_size]) -
+                m2_expr.ExprOp(
+                    int_to_x % elt_size, src[i * elt_size:(
+                        i + 1) * elt_size])))
+            for i in xrange(0, n)
+        ]
     else:
-        ops = [m2_expr.ExprOp(x_to_int % elt_size,
-                              m2_expr.ExprOp(op,
-                                             m2_expr.ExprOp(
-                                                 int_to_x % elt_size, dst[i * elt_size:(
-                                                     i + 1) * elt_size]),
-                                             m2_expr.ExprOp(
-                                                 int_to_x % elt_size, src[i * elt_size:(
-                                                     i + 1) * elt_size]))) for i in xrange(0, n)]
+        ops = [
+            apply_on_output(m2_expr.ExprOp(
+                x_to_int % elt_size,
+                m2_expr.ExprOp(op,
+                               m2_expr.ExprOp(
+                                   int_to_x % elt_size, dst[i * elt_size:(
+                                       i + 1) * elt_size]),
+                               m2_expr.ExprOp(
+                                   int_to_x % elt_size, src[i * elt_size:(
+                                       i + 1) * elt_size]))))
+            for i in xrange(0, n)]
 
     return m2_expr.ExprCompose(*ops)
 
 
-def __vec_vertical_instr_gen(op, elt_size, sem):
+def __vec_vertical_instr_gen(op, elt_size, sem, apply_on_output):
     def vec_instr(ir, instr, dst, src):
         e = []
         if isinstance(src, m2_expr.ExprMem):
             src = ir.ExprMem(src.arg, dst.size)
         reg_size = dst.size
-        e.append(m2_expr.ExprAff(dst, sem(op, elt_size, reg_size, dst, src)))
+        e.append(m2_expr.ExprAff(dst, sem(op, elt_size, reg_size, dst, src,
+                                          apply_on_output)))
         return e, []
     return vec_instr
 
 
-def vec_vertical_instr(op, elt_size):
-    return __vec_vertical_instr_gen(op, elt_size, vec_vertical_sem)
+def vec_vertical_instr(op, elt_size, apply_on_output=lambda x: x):
+    return __vec_vertical_instr_gen(op, elt_size, vec_vertical_sem,
+                                    apply_on_output)
 
 
-def float_vec_vertical_instr(op, elt_size):
-    return __vec_vertical_instr_gen(op, elt_size, float_vec_vertical_sem)
+def float_vec_vertical_instr(op, elt_size, apply_on_output=lambda x: x):
+    return __vec_vertical_instr_gen(op, elt_size, float_vec_vertical_sem,
+                                    apply_on_output)
 
 
 # Integer arithmetic
@@ -4270,6 +4285,22 @@ def packuswb(ir, instr, dst, src):
     return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], []
 
 
+def _saturation_sub(expr):
+    assert expr.is_op("+") and len(expr.args) == 2 and expr.args[-1].is_op("-")
+
+    # Compute the soustraction on one more bit to be able to distinguish cases:
+    # 0x48 - 0xd7 in 8 bit, should saturate
+    arg1 = expr.args[0].zeroExtend(expr.size + 1)
+    arg2 = expr.args[1].args[0].zeroExtend(expr.size + 1)
+    return _unsigned_saturation(arg1 - arg2, expr.size)
+
+
+# Saturate SSE operations
+
+psubusb = vec_vertical_instr('-', 8, _saturation_sub)
+psubusw = vec_vertical_instr('-', 16, _saturation_sub)
+
+
 mnemo_func = {'mov': mov,
               'xchg': xchg,
               'movzx': movzx,
@@ -4777,6 +4808,9 @@ mnemo_func = {'mov': mov,
               "packssdw": packssdw,
               "packuswb": packuswb,
 
+              "psubusb": psubusb,
+              "psubusw": psubusw,
+
               "smsw": smsw,
 
               }
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 9f3256f3..3e0b9333 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -2989,6 +2989,17 @@ reg_tests = [
      "0fd5e2"),
     (m32, "00000000    PMULLW     XMM0, XMM3",
      "660fd5c3"),
+
+    (m32, "00000000    PSUBUSB    MM5, MM3",
+     "0fd8eb"),
+    (m32, "00000000    PSUBUSB    XMM0, XMM5",
+     "660fd8c5"),
+
+    (m32, "00000000    PSUBUSW    MM5, MM3",
+     "0fd9eb"),
+    (m32, "00000000    PSUBUSW    XMM0, XMM5",
+     "660fd9c5"),
+
 ]
 
 
-- 
cgit 1.4.1


From bee25554ff9c86f81f16e191f09693f336365ad2 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Thu, 8 Feb 2018 16:19:55 +0100
Subject: Add PADDUSB/PADDUSW instruction

0F DC /r PADDUSB mm, mm/m64
66 0F DC /r PADDUSB xmm1, xmm2/m128
---
 miasm2/arch/x86/arch.py |  9 +++++++++
 miasm2/arch/x86/sem.py  | 21 +++++++++++++++++++++
 test/arch/x86/arch.py   |  9 +++++++++
 3 files changed, 39 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index f4ef7349..303cad6e 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4486,6 +4486,15 @@ addop("psubusw", [bs8(0x0f), bs8(0xd9), no_xmm_pref] +
 addop("psubusw", [bs8(0x0f), bs8(0xd9), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
 
+addop("paddusb", [bs8(0x0f), bs8(0xdc), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("paddusb", [bs8(0x0f), bs8(0xdc), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+addop("paddusw", [bs8(0x0f), bs8(0xdd), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("paddusw", [bs8(0x0f), bs8(0xdd), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+
 
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
 # mn_x86.bintree = factor_fields_all(mn_x86.bintree)
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 4b4f40a1..adf5820d 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -4294,11 +4294,30 @@ def _saturation_sub(expr):
     arg2 = expr.args[1].args[0].zeroExtend(expr.size + 1)
     return _unsigned_saturation(arg1 - arg2, expr.size)
 
+def _saturation_add(expr):
+    assert expr.is_op("+") and len(expr.args) == 2
+
+    # Compute the addition on one more bit to be able to distinguish cases:
+    # 0x48 + 0xd7 in 8 bit, should saturate
+
+    arg1 = expr.args[0].zeroExtend(expr.size + 1)
+    arg2 = expr.args[1].zeroExtend(expr.size + 1)
+
+    # We can also use _unsigned_saturation with two additionnal bits (to
+    # distinguish minus and overflow case)
+    # The resulting expression being more complicated with an impossible case
+    # (signed=True), we rewrite the rule here
+
+    return m2_expr.ExprCond((arg1 + arg2).msb(), m2_expr.ExprInt(-1, expr.size),
+                            expr)
+
 
 # Saturate SSE operations
 
 psubusb = vec_vertical_instr('-', 8, _saturation_sub)
 psubusw = vec_vertical_instr('-', 16, _saturation_sub)
+paddusb = vec_vertical_instr('+', 8, _saturation_add)
+paddusw = vec_vertical_instr('+', 16, _saturation_add)
 
 
 mnemo_func = {'mov': mov,
@@ -4810,6 +4829,8 @@ mnemo_func = {'mov': mov,
 
               "psubusb": psubusb,
               "psubusw": psubusw,
+              "paddusb": paddusb,
+              "paddusw": paddusw,
 
               "smsw": smsw,
 
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 3e0b9333..9b1a4d25 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -3000,6 +3000,15 @@ reg_tests = [
     (m32, "00000000    PSUBUSW    XMM0, XMM5",
      "660fd9c5"),
 
+    (m32, "00000000    PADDUSB    MM5, MM3",
+     "0fdceb"),
+    (m32, "00000000    PADDUSB    XMM0, XMM6",
+     "660fdcc6"),
+
+    (m32, "00000000    PADDUSW    MM7, MM5",
+     "0fddfd"),
+    (m32, "00000000    PADDUSW    XMM0, XMM1",
+     "660fddc1"),
 ]
 
 
-- 
cgit 1.4.1


From 98a0c25a6e914089feb73e8bc4b79ff33a38a86b Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Thu, 8 Feb 2018 16:43:37 +0100
Subject: Add PMULHW / PMULHUW instruction

0F E5 /r 	PMULHW mm, mm/m64
66 0F E5 /r 	PMULHW xmm1, xmm2/m128
NP 0F E4 /r PMULHUW mm1, mm2/m64
66 0F E4 /r PMULHUW xmm1, xmm2/m128
---
 miasm2/arch/x86/arch.py |  9 +++++++++
 miasm2/arch/x86/sem.py  | 29 +++++++++++++++++++++++++++++
 test/arch/x86/arch.py   | 10 ++++++++++
 3 files changed, 48 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 303cad6e..d58ac64b 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4476,6 +4476,15 @@ addop("pmullw", [bs8(0x0f), bs8(0xd5), no_xmm_pref] +
       rmmod(mm_reg, rm_arg_mm_m64))
 addop("pmullw", [bs8(0x0f), bs8(0xd5), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
+addop("pmulhuw", [bs8(0x0f), bs8(0xe4), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("pmulhuw", [bs8(0x0f), bs8(0xe4), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+addop("pmulhw", [bs8(0x0f), bs8(0xe5), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("pmulhw", [bs8(0x0f), bs8(0xe5), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+
 
 addop("psubusb", [bs8(0x0f), bs8(0xd8), no_xmm_pref] +
       rmmod(mm_reg, rm_arg_mm_m64))
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index adf5820d..ba56c91c 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -3392,6 +3392,17 @@ def float_vec_vertical_instr(op, elt_size, apply_on_output=lambda x: x):
                                     apply_on_output)
 
 
+def _keep_mul_high(expr, signed=False):
+    assert expr.is_op("*") and len(expr.args) == 2
+
+    if signed:
+        arg1 = expr.args[0].signExtend(expr.size * 2)
+        arg2 = expr.args[1].signExtend(expr.size * 2)
+    else:
+        arg1 = expr.args[0].zeroExtend(expr.size * 2)
+        arg2 = expr.args[1].zeroExtend(expr.size * 2)
+    return m2_expr.ExprOp("*", arg1, arg2)[expr.size:]
+
 # Integer arithmetic
 #
 
@@ -3421,6 +3432,14 @@ pmullb = vec_vertical_instr('*', 8)
 pmullw = vec_vertical_instr('*', 16)
 pmulld = vec_vertical_instr('*', 32)
 pmullq = vec_vertical_instr('*', 64)
+pmulhub = vec_vertical_instr('*', 8, _keep_mul_high)
+pmulhuw = vec_vertical_instr('*', 16, _keep_mul_high)
+pmulhud = vec_vertical_instr('*', 32, _keep_mul_high)
+pmulhuq = vec_vertical_instr('*', 64, _keep_mul_high)
+pmulhb = vec_vertical_instr('*', 8, lambda x: _keep_mul_high(x, signed=True))
+pmulhw = vec_vertical_instr('*', 16, lambda x: _keep_mul_high(x, signed=True))
+pmulhd = vec_vertical_instr('*', 32, lambda x: _keep_mul_high(x, signed=True))
+pmulhq = vec_vertical_instr('*', 64, lambda x: _keep_mul_high(x, signed=True))
 
 # Floating-point arithmetic
 #
@@ -4704,11 +4723,21 @@ mnemo_func = {'mov': mov,
               "psubd": psubd,
               "psubq": psubq,
 
+              # Multiplications
               # SSE
               "pmullb": pmullb,
               "pmullw": pmullw,
               "pmulld": pmulld,
               "pmullq": pmullq,
+              "pmulhub": pmulhub,
+              "pmulhuw": pmulhuw,
+              "pmulhud": pmulhud,
+              "pmulhuq": pmulhuq,
+              "pmulhb": pmulhb,
+              "pmulhw": pmulhw,
+              "pmulhd": pmulhd,
+              "pmulhq": pmulhq,
+
 
               # Arithmetic (floating-point)
               #
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 9b1a4d25..82475e51 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -3009,6 +3009,16 @@ reg_tests = [
      "0fddfd"),
     (m32, "00000000    PADDUSW    XMM0, XMM1",
      "660fddc1"),
+
+    (m32, "00000000    PMULHUW    MM6, MM4",
+     "0fe4f4"),
+    (m32, "00000000    PMULHUW    XMM0, XMM7",
+     "660fe4c7"),
+
+    (m32, "00000000    PMULHW     MM6, MM4",
+     "0fe5f4"),
+    (m32, "00000000    PMULHW     XMM0, XMM7",
+     "660fe5c7"),
 ]
 
 
-- 
cgit 1.4.1


From 426fc42a2b991e0249e73edab304530ba96e8a79 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Fri, 9 Feb 2018 09:19:55 +0100
Subject: Add PSUBSB/PSUBSW instruction

NP 0F E8 /r PSUBSB mm, mm/m64
66 0F E8 /r PSUBSB xmm1, xmm2/m128
NP 0F E9 /r PSUBSW mm, mm/m64
66 0F E9 /r PSUBSW xmm1, xmm2/m128
---
 miasm2/arch/x86/arch.py |  9 +++++++++
 miasm2/arch/x86/sem.py  | 26 +++++++++++++++++++-------
 test/arch/x86/arch.py   | 10 ++++++++++
 3 files changed, 38 insertions(+), 7 deletions(-)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index d58ac64b..03d7449d 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4494,6 +4494,15 @@ addop("psubusw", [bs8(0x0f), bs8(0xd9), no_xmm_pref] +
       rmmod(mm_reg, rm_arg_mm_m64))
 addop("psubusw", [bs8(0x0f), bs8(0xd9), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
+addop("psubsb", [bs8(0x0f), bs8(0xe8), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("psubsb", [bs8(0x0f), bs8(0xe8), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+addop("psubsw", [bs8(0x0f), bs8(0xe9), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("psubsw", [bs8(0x0f), bs8(0xe9), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+
 
 addop("paddusb", [bs8(0x0f), bs8(0xdc), no_xmm_pref] +
       rmmod(mm_reg, rm_arg_mm_m64))
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index ba56c91c..5811502c 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -4226,7 +4226,7 @@ def _signed_saturation(expr, dst_size):
     min_int = m2_expr.ExprInt(- median, dst_size)
     max_int = m2_expr.ExprInt(median - 1, dst_size)
     signed = expr.msb()
-    value_unsigned = expr ^ expr.mask + m2_expr.ExprInt(1, expr.size)
+    value_unsigned = (expr ^ expr.mask) + m2_expr.ExprInt(1, expr.size)
     # Re-use the sign bit
     value = m2_expr.ExprCompose(expr[:dst_size - 1], signed)
 
@@ -4234,7 +4234,7 @@ def _signed_saturation(expr, dst_size):
     # ie., in unsigned, 0xXY > 0x0f iff X is not null
 
     # if expr >s 0
-    #    if expr[dst_size:] > 0: # bigger than max_int
+    #    if expr[dst_size - 1:] > 0: # bigger than max_int
     #        -> max_int
     #    else
     #        -> value
@@ -4246,10 +4246,10 @@ def _signed_saturation(expr, dst_size):
 
     return m2_expr.ExprCond(
         signed,
-        m2_expr.ExprCond(value_unsigned[dst_size:],
+        m2_expr.ExprCond(value_unsigned[dst_size - 1:],
                          min_int,
                          value),
-        m2_expr.ExprCond(expr[dst_size:],
+        m2_expr.ExprCond(expr[dst_size - 1:],
                          max_int,
                          value),
     )
@@ -4304,7 +4304,7 @@ def packuswb(ir, instr, dst, src):
     return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], []
 
 
-def _saturation_sub(expr):
+def _saturation_sub_unsigned(expr):
     assert expr.is_op("+") and len(expr.args) == 2 and expr.args[-1].is_op("-")
 
     # Compute the soustraction on one more bit to be able to distinguish cases:
@@ -4313,6 +4313,14 @@ def _saturation_sub(expr):
     arg2 = expr.args[1].args[0].zeroExtend(expr.size + 1)
     return _unsigned_saturation(arg1 - arg2, expr.size)
 
+def _saturation_sub_signed(expr):
+    assert expr.is_op("+") and len(expr.args) == 2 and expr.args[-1].is_op("-")
+
+    # Compute the substraction on two more bits, see _saturation_sub_unsigned
+    arg1 = expr.args[0].signExtend(expr.size + 2)
+    arg2 = expr.args[1].args[0].signExtend(expr.size + 2)
+    return _signed_saturation(arg1 - arg2, expr.size)
+
 def _saturation_add(expr):
     assert expr.is_op("+") and len(expr.args) == 2
 
@@ -4333,10 +4341,12 @@ def _saturation_add(expr):
 
 # Saturate SSE operations
 
-psubusb = vec_vertical_instr('-', 8, _saturation_sub)
-psubusw = vec_vertical_instr('-', 16, _saturation_sub)
+psubusb = vec_vertical_instr('-', 8, _saturation_sub_unsigned)
+psubusw = vec_vertical_instr('-', 16, _saturation_sub_unsigned)
 paddusb = vec_vertical_instr('+', 8, _saturation_add)
 paddusw = vec_vertical_instr('+', 16, _saturation_add)
+psubsb = vec_vertical_instr('-', 8, _saturation_sub_signed)
+psubsw = vec_vertical_instr('-', 16, _saturation_sub_signed)
 
 
 mnemo_func = {'mov': mov,
@@ -4860,6 +4870,8 @@ mnemo_func = {'mov': mov,
               "psubusw": psubusw,
               "paddusb": paddusb,
               "paddusw": paddusw,
+              "psubsb": psubsb,
+              "psubsw": psubsw,
 
               "smsw": smsw,
 
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 82475e51..fb757b33 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -3019,6 +3019,16 @@ reg_tests = [
      "0fe5f4"),
     (m32, "00000000    PMULHW     XMM0, XMM7",
      "660fe5c7"),
+
+    (m32, "00000000    PSUBSB     MM2, MM0",
+     "0fe8d0"),
+    (m32, "00000000    PSUBSB     XMM0, XMM4",
+     "660fe8c4"),
+
+    (m32, "00000000    PSUBSW     MM3, MM1",
+     "0fe9d9"),
+    (m32, "00000000    PSUBSW     XMM0, XMM6",
+     "660fe9c6"),
 ]
 
 
-- 
cgit 1.4.1


From df4b2904c00bca0d15062493666ce50ff0b56632 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Fri, 9 Feb 2018 09:33:24 +0100
Subject: Fix PMINSW semantic

Tested against QEMU
---
 miasm2/arch/x86/sem.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 5811502c..97373abf 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -3403,6 +3403,14 @@ def _keep_mul_high(expr, signed=False):
         arg2 = expr.args[1].zeroExtend(expr.size * 2)
     return m2_expr.ExprOp("*", arg1, arg2)[expr.size:]
 
+def _signed_min(expr):
+    assert expr.is_op("min") and len(expr.args) == 2
+    return m2_expr.ExprCond(
+        m2_expr.expr_is_signed_lower(expr.args[1], expr.args[0]),
+        expr.args[1],
+        expr.args[0],
+    )
+
 # Integer arithmetic
 #
 
@@ -3441,6 +3449,13 @@ pmulhw = vec_vertical_instr('*', 16, lambda x: _keep_mul_high(x, signed=True))
 pmulhd = vec_vertical_instr('*', 32, lambda x: _keep_mul_high(x, signed=True))
 pmulhq = vec_vertical_instr('*', 64, lambda x: _keep_mul_high(x, signed=True))
 
+# Comparisons
+#
+
+# SSE
+pminsw = vec_vertical_instr('min', 16, _signed_min)
+
+
 # Floating-point arithmetic
 #
 
@@ -3491,12 +3506,6 @@ def por(_, instr, dst, src):
     return e, []
 
 
-def pminsw(_, instr, dst, src):
-    e = []
-    e.append(m2_expr.ExprAff(dst, m2_expr.ExprCond((dst - src).msb(), dst, src)))
-    return e, []
-
-
 def cvtdq2pd(_, instr, dst, src):
     e = []
     e.append(
-- 
cgit 1.4.1


From 60a82ffba739af32ffadd56b16e755a5ea410009 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Fri, 9 Feb 2018 09:49:15 +0100
Subject: Add PADDSB/PADDSW instruction

NP 0F EC /r PADDSB mm, mm/m64
66 0F EC /r PADDSB xmm1, xmm2/m128
NP 0F ED /r PADDSW mm, mm/m64
66 0F ED /r PADDSW xmm1, xmm2/m128
---
 miasm2/arch/x86/arch.py |  8 ++++++++
 miasm2/arch/x86/sem.py  | 14 ++++++++++++++
 test/arch/x86/arch.py   | 10 ++++++++++
 3 files changed, 32 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 03d7449d..1b181f6f 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4512,6 +4512,14 @@ addop("paddusw", [bs8(0x0f), bs8(0xdd), no_xmm_pref] +
       rmmod(mm_reg, rm_arg_mm_m64))
 addop("paddusw", [bs8(0x0f), bs8(0xdd), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
+addop("paddsb", [bs8(0x0f), bs8(0xec), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("paddsb", [bs8(0x0f), bs8(0xec), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+addop("paddsw", [bs8(0x0f), bs8(0xed), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("paddsw", [bs8(0x0f), bs8(0xed), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
 
 
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 97373abf..a30bcdc9 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -4347,6 +4347,16 @@ def _saturation_add(expr):
     return m2_expr.ExprCond((arg1 + arg2).msb(), m2_expr.ExprInt(-1, expr.size),
                             expr)
 
+def _saturation_add_signed(expr):
+    assert expr.is_op("+") and len(expr.args) == 2
+
+    # Compute the substraction on two more bits, see _saturation_add_unsigned
+
+    arg1 = expr.args[0].signExtend(expr.size + 2)
+    arg2 = expr.args[1].signExtend(expr.size + 2)
+
+    return _signed_saturation(arg1 + arg2, expr.size)
+
 
 # Saturate SSE operations
 
@@ -4356,6 +4366,8 @@ paddusb = vec_vertical_instr('+', 8, _saturation_add)
 paddusw = vec_vertical_instr('+', 16, _saturation_add)
 psubsb = vec_vertical_instr('-', 8, _saturation_sub_signed)
 psubsw = vec_vertical_instr('-', 16, _saturation_sub_signed)
+paddsb = vec_vertical_instr('+', 8, _saturation_add_signed)
+paddsw = vec_vertical_instr('+', 16, _saturation_add_signed)
 
 
 mnemo_func = {'mov': mov,
@@ -4881,6 +4893,8 @@ mnemo_func = {'mov': mov,
               "paddusw": paddusw,
               "psubsb": psubsb,
               "psubsw": psubsw,
+              "paddsb": paddsb,
+              "paddsw": paddsw,
 
               "smsw": smsw,
 
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index fb757b33..8c191aad 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -3029,6 +3029,16 @@ reg_tests = [
      "0fe9d9"),
     (m32, "00000000    PSUBSW     XMM0, XMM6",
      "660fe9c6"),
+
+    (m32, "00000000    PADDSB     MM2, MM0",
+     "0fecd0"),
+    (m32, "00000000    PADDSB     XMM0, XMM4",
+     "660fecc4"),
+
+    (m32, "00000000    PADDSW     MM3, MM1",
+     "0fedd9"),
+    (m32, "00000000    PADDSW     XMM0, XMM6",
+     "660fedc6"),
 ]
 
 
-- 
cgit 1.4.1


From deda8791ecbaa3cd541667b04d44759b91a14372 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Fri, 9 Feb 2018 10:03:43 +0100
Subject: Unify the way PMIN / PMAX works

---
 miasm2/arch/x86/sem.py | 78 +++++++++++---------------------------------------
 1 file changed, 17 insertions(+), 61 deletions(-)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index a30bcdc9..0bb534e5 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -3403,10 +3403,17 @@ def _keep_mul_high(expr, signed=False):
         arg2 = expr.args[1].zeroExtend(expr.size * 2)
     return m2_expr.ExprOp("*", arg1, arg2)[expr.size:]
 
-def _signed_min(expr):
-    assert expr.is_op("min") and len(expr.args) == 2
+# Op, signed => associated comparison
+_min_max_func = {
+    ("min", False): m2_expr.expr_is_unsigned_lower,
+    ("min", True): m2_expr.expr_is_signed_lower,
+    ("max", False): m2_expr.expr_is_unsigned_greater,
+    ("max", True): m2_expr.expr_is_signed_greater,
+}
+def _min_max(expr, signed):
+    assert (expr.is_op("min") or expr.is_op("max")) and len(expr.args) == 2
     return m2_expr.ExprCond(
-        m2_expr.expr_is_signed_lower(expr.args[1], expr.args[0]),
+        _min_max_func[(expr.op, signed)](expr.args[1], expr.args[0]),
         expr.args[1],
         expr.args[0],
     )
@@ -3453,8 +3460,13 @@ pmulhq = vec_vertical_instr('*', 64, lambda x: _keep_mul_high(x, signed=True))
 #
 
 # SSE
-pminsw = vec_vertical_instr('min', 16, _signed_min)
-
+pminsw = vec_vertical_instr('min', 16, lambda x: _min_max(x, signed=True))
+pminub = vec_vertical_instr('min', 8, lambda x: _min_max(x, signed=False))
+pminuw = vec_vertical_instr('min', 16, lambda x: _min_max(x, signed=False))
+pminud = vec_vertical_instr('min', 32, lambda x: _min_max(x, signed=False))
+pmaxub = vec_vertical_instr('max', 8, lambda x: _min_max(x, signed=False))
+pmaxuw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=False))
+pmaxud = vec_vertical_instr('max', 32, lambda x: _min_max(x, signed=False))
 
 # Floating-point arithmetic
 #
@@ -3871,62 +3883,6 @@ def iret(ir, instr):
     return exprs, []
 
 
-def pmaxu(_, instr, dst, src, size):
-    e = []
-    for i in xrange(0, dst.size, size):
-        op1 = dst[i:i + size]
-        op2 = src[i:i + size]
-        res = op1 - op2
-        # Compote CF in @res = @op1 - @op2
-        ret = (((op1 ^ op2) ^ res) ^ ((op1 ^ res) & (op1 ^ op2))).msb()
-
-        e.append(m2_expr.ExprAff(dst[i:i + size],
-                                 m2_expr.ExprCond(ret,
-                                                  src[i:i + size],
-                                                  dst[i:i + size])))
-    return e, []
-
-
-def pmaxub(ir, instr, dst, src):
-    return pmaxu(ir, instr, dst, src, 8)
-
-
-def pmaxuw(ir, instr, dst, src):
-    return pmaxu(ir, instr, dst, src, 16)
-
-
-def pmaxud(ir, instr, dst, src):
-    return pmaxu(ir, instr, dst, src, 32)
-
-
-def pminu(_, instr, dst, src, size):
-    e = []
-    for i in xrange(0, dst.size, size):
-        op1 = dst[i:i + size]
-        op2 = src[i:i + size]
-        res = op1 - op2
-        # Compote CF in @res = @op1 - @op2
-        ret = (((op1 ^ op2) ^ res) ^ ((op1 ^ res) & (op1 ^ op2))).msb()
-
-        e.append(m2_expr.ExprAff(dst[i:i + size],
-                                 m2_expr.ExprCond(ret,
-                                                  dst[i:i + size],
-                                                  src[i:i + size])))
-    return e, []
-
-
-def pminub(ir, instr, dst, src):
-    return pminu(ir, instr, dst, src, 8)
-
-
-def pminuw(ir, instr, dst, src):
-    return pminu(ir, instr, dst, src, 16)
-
-
-def pminud(ir, instr, dst, src):
-    return pminu(ir, instr, dst, src, 32)
-
-
 def pcmpeq(_, instr, dst, src, size):
     e = []
     for i in xrange(0, dst.size, size):
-- 
cgit 1.4.1


From 950bb44e32c5bed4dba7ef77949db86b4d36c5ca Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Fri, 9 Feb 2018 10:09:21 +0100
Subject: Add PMAXSW instruction

0F EE /r 	PMAXSW mm1, mm2/m64
66 0F EE /r 	PMAXSW xmm1, xmm2/m128
---
 miasm2/arch/x86/arch.py | 4 ++++
 miasm2/arch/x86/sem.py  | 2 ++
 test/arch/x86/arch.py   | 5 +++++
 3 files changed, 11 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 1b181f6f..aaf877fe 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4306,6 +4306,10 @@ addop("pmaxuw", [bs8(0x0f), bs8(0x38), bs8(0x3e), pref_66] +
 addop("pmaxud", [bs8(0x0f), bs8(0x38), bs8(0x3f), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm))
 
+addop("pmaxsw", [bs8(0x0f), bs8(0xee), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("pmaxsw", [bs8(0x0f), bs8(0xee), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
 
 addop("pminub", [bs8(0x0f), bs8(0xda), no_xmm_pref] +
       rmmod(mm_reg, rm_arg_mm))
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 0bb534e5..5beedede 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -3467,6 +3467,7 @@ pminud = vec_vertical_instr('min', 32, lambda x: _min_max(x, signed=False))
 pmaxub = vec_vertical_instr('max', 8, lambda x: _min_max(x, signed=False))
 pmaxuw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=False))
 pmaxud = vec_vertical_instr('max', 32, lambda x: _min_max(x, signed=False))
+pmaxsw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=True))
 
 # Floating-point arithmetic
 #
@@ -4783,6 +4784,7 @@ mnemo_func = {'mov': mov,
               "pmaxub": pmaxub,
               "pmaxuw": pmaxuw,
               "pmaxud": pmaxud,
+              "pmaxsw": pmaxsw,
 
               "pminub": pminub,
               "pminuw": pminuw,
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 8c191aad..cc0a0a93 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -3039,6 +3039,11 @@ reg_tests = [
      "0fedd9"),
     (m32, "00000000    PADDSW     XMM0, XMM6",
      "660fedc6"),
+
+    (m32, "00000000    PMAXSW     MM3, MM1",
+     "0feed9"),
+    (m32, "00000000    PMAXSW     XMM0, XMM6",
+     "660feec6"),
 ]
 
 
-- 
cgit 1.4.1


From b8bd5c0f24b786616b6f372f7f6dfad43438ab01 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Fri, 9 Feb 2018 10:23:14 +0100
Subject: Add PMULUDQ instruction

NP 0F F4 /r PMULUDQ mm1, mm2/m64
66 0F F4 /r PMULUDQ xmm1, xmm2/m128
---
 miasm2/arch/x86/arch.py |  4 ++++
 miasm2/arch/x86/sem.py  | 23 +++++++++++++++++++++++
 test/arch/x86/arch.py   |  5 +++++
 3 files changed, 32 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index aaf877fe..839487e8 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4488,6 +4488,10 @@ addop("pmulhw", [bs8(0x0f), bs8(0xe5), no_xmm_pref] +
       rmmod(mm_reg, rm_arg_mm_m64))
 addop("pmulhw", [bs8(0x0f), bs8(0xe5), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
+addop("pmuludq", [bs8(0x0f), bs8(0xf4), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("pmuludq", [bs8(0x0f), bs8(0xf4), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
 
 
 addop("psubusb", [bs8(0x0f), bs8(0xd8), no_xmm_pref] +
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 5beedede..d73eac96 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -3418,6 +3418,7 @@ def _min_max(expr, signed):
         expr.args[0],
     )
 
+
 # Integer arithmetic
 #
 
@@ -3456,6 +3457,27 @@ pmulhw = vec_vertical_instr('*', 16, lambda x: _keep_mul_high(x, signed=True))
 pmulhd = vec_vertical_instr('*', 32, lambda x: _keep_mul_high(x, signed=True))
 pmulhq = vec_vertical_instr('*', 64, lambda x: _keep_mul_high(x, signed=True))
 
+def pmuludq(ir, instr, dst, src):
+    e = []
+    if dst.size == 64:
+        e.append(m2_expr.ExprAff(
+            dst,
+            src[:32].zeroExtend(64) * dst[:32].zeroExtend(64)
+        ))
+    elif dst.size == 128:
+        e.append(m2_expr.ExprAff(
+            dst[:64],
+            src[:32].zeroExtend(64) * dst[:32].zeroExtend(64)
+        ))
+        e.append(m2_expr.ExprAff(
+            dst[64:],
+            src[64:96].zeroExtend(64) * dst[64:96].zeroExtend(64)
+        ))
+    else:
+        raise RuntimeError("Unsupported size %d" % dst.size)
+    return e, []
+
+
 # Comparisons
 #
 
@@ -4725,6 +4747,7 @@ mnemo_func = {'mov': mov,
               "pmulhw": pmulhw,
               "pmulhd": pmulhd,
               "pmulhq": pmulhq,
+              "pmuludq": pmuludq,
 
 
               # Arithmetic (floating-point)
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index cc0a0a93..93ab4a48 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -3044,6 +3044,11 @@ reg_tests = [
      "0feed9"),
     (m32, "00000000    PMAXSW     XMM0, XMM6",
      "660feec6"),
+
+    (m32, "00000000    PMULUDQ    MM3, MM1",
+     "0ff4d9"),
+    (m32, "00000000    PMULUDQ    XMM0, XMM6",
+     "660ff4c6"),
 ]
 
 
-- 
cgit 1.4.1


From 4a94e84923d8ac059fc2c41a5876835613204ad2 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Fri, 9 Feb 2018 10:43:48 +0100
Subject: Add PMADDWD instruction

0F F5 /r 	PMADDWD mm, mm/m64
66 0F F5 /r 	PMADDWD xmm1, xmm2/m128
---
 miasm2/arch/x86/arch.py |  4 ++++
 miasm2/arch/x86/sem.py  | 19 +++++++++++++++++++
 test/arch/x86/arch.py   |  5 +++++
 3 files changed, 28 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 839487e8..ae5f3fd7 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4529,6 +4529,10 @@ addop("paddsw", [bs8(0x0f), bs8(0xed), no_xmm_pref] +
 addop("paddsw", [bs8(0x0f), bs8(0xed), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
 
+addop("pmaddwd", [bs8(0x0f), bs8(0xf5), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("pmaddwd", [bs8(0x0f), bs8(0xf5), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
 
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
 # mn_x86.bintree = factor_fields_all(mn_x86.bintree)
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index d73eac96..3880ed67 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -3477,6 +3477,22 @@ def pmuludq(ir, instr, dst, src):
         raise RuntimeError("Unsupported size %d" % dst.size)
     return e, []
 
+# Mix
+#
+
+# SSE
+def pmaddwd(ir, instr, dst, src):
+    sizedst = 32
+    sizesrc = 16
+    out = []
+    for start in xrange(0, dst.size, sizedst):
+        base = start
+        mul1 = src[base: base + sizesrc].signExtend(sizedst) * dst[base: base + sizesrc].signExtend(sizedst)
+        base += sizesrc
+        mul2 = src[base: base + sizesrc].signExtend(sizedst) * dst[base: base + sizesrc].signExtend(sizedst)
+        out.append(mul1 + mul2)
+    return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], []
+
 
 # Comparisons
 #
@@ -4749,6 +4765,9 @@ mnemo_func = {'mov': mov,
               "pmulhq": pmulhq,
               "pmuludq": pmuludq,
 
+              # Mix
+              # SSE
+              "pmaddwd": pmaddwd,
 
               # Arithmetic (floating-point)
               #
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 93ab4a48..3d9fd31f 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -3049,6 +3049,11 @@ reg_tests = [
      "0ff4d9"),
     (m32, "00000000    PMULUDQ    XMM0, XMM6",
      "660ff4c6"),
+
+    (m32, "00000000    PMADDWD    MM3, MM1",
+     "0ff5d9"),
+    (m32, "00000000    PMADDWD    XMM0, XMM6",
+     "660ff5c6"),
 ]
 
 
-- 
cgit 1.4.1


From 649c7b519fc93e9ef5750d03dfcc3e91c2968a36 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Fri, 9 Feb 2018 11:15:14 +0100
Subject: Add PSADBW instruction

0F F6 /r 	PSADBW mm1, mm2/m64
66 0F F6 /r 	PSADBW xmm1, xmm2/m128
---
 miasm2/arch/x86/arch.py |  5 +++++
 miasm2/arch/x86/sem.py  | 26 ++++++++++++++++++++++++++
 test/arch/x86/arch.py   |  5 +++++
 3 files changed, 36 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index ae5f3fd7..a9a59a08 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4534,6 +4534,11 @@ addop("pmaddwd", [bs8(0x0f), bs8(0xf5), no_xmm_pref] +
 addop("pmaddwd", [bs8(0x0f), bs8(0xf5), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
 
+addop("psadbw", [bs8(0x0f), bs8(0xf6), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("psadbw", [bs8(0x0f), bs8(0xf6), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
 # mn_x86.bintree = factor_fields_all(mn_x86.bintree)
 """
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 3880ed67..51fcbe05 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -3494,6 +3494,31 @@ def pmaddwd(ir, instr, dst, src):
     return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out))], []
 
 
+def _absolute(expr):
+    """Return abs(@expr)"""
+    signed = expr.msb()
+    value_unsigned = (expr ^ expr.mask) + m2_expr.ExprInt(1, expr.size)
+    return m2_expr.ExprCond(signed, value_unsigned, expr)
+
+
+def psadbw(ir, instr, dst, src):
+    sizedst = 16
+    sizesrc = 8
+    out_dst = []
+    for start in xrange(0, dst.size, 64):
+        out = []
+        for src_start in xrange(0, 64, sizesrc):
+            beg = start + src_start
+            end = beg + sizesrc
+            # Not clear in the doc equations, but in the text, src and dst are:
+            # "8 unsigned byte integers"
+            out.append(_absolute(dst[beg: end].zeroExtend(sizedst) - src[beg: end].zeroExtend(sizedst)))
+        out_dst.append(m2_expr.ExprOp("+", *out))
+        out_dst.append(m2_expr.ExprInt(0, 64 - sizedst))
+
+    return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out_dst))], []
+
+
 # Comparisons
 #
 
@@ -4768,6 +4793,7 @@ mnemo_func = {'mov': mov,
               # Mix
               # SSE
               "pmaddwd": pmaddwd,
+              "psadbw": psadbw,
 
               # Arithmetic (floating-point)
               #
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 3d9fd31f..0e6ffdd9 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -3054,6 +3054,11 @@ reg_tests = [
      "0ff5d9"),
     (m32, "00000000    PMADDWD    XMM0, XMM6",
      "660ff5c6"),
+
+    (m32, "00000000    PSADBW     MM3, MM1",
+     "0ff6d9"),
+    (m32, "00000000    PSADBW     XMM0, XMM6",
+     "660ff6c6"),
 ]
 
 
-- 
cgit 1.4.1


From cb95c1f581cfded596cc38d8832361c053f3e4cd Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Fri, 9 Feb 2018 13:30:58 +0100
Subject: Add PAVGB/PAVGW instruction

0F E0 /r PAVGB mm1, mm2/m64
66 0F E0, /r PAVGB xmm1, xmm2/m128
0F E3 /r PAVGW mm1, mm2/m64
66 0F E3 /r PAVGW xmm1, xmm2/m128
---
 miasm2/arch/x86/arch.py |  9 +++++++++
 miasm2/arch/x86/sem.py  | 13 +++++++++++++
 test/arch/x86/arch.py   | 10 ++++++++++
 3 files changed, 32 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index a9a59a08..8188cf49 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4539,6 +4539,15 @@ addop("psadbw", [bs8(0x0f), bs8(0xf6), no_xmm_pref] +
 addop("psadbw", [bs8(0x0f), bs8(0xf6), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
 
+addop("pavgb", [bs8(0x0f), bs8(0xe0), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("pavgb", [bs8(0x0f), bs8(0xe0), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+addop("pavgw", [bs8(0x0f), bs8(0xe3), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_m64))
+addop("pavgw", [bs8(0x0f), bs8(0xe3), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m128))
+
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
 # mn_x86.bintree = factor_fields_all(mn_x86.bintree)
 """
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 51fcbe05..5a0f1b6b 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -3518,6 +3518,17 @@ def psadbw(ir, instr, dst, src):
 
     return [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*out_dst))], []
 
+def _average(expr):
+    assert expr.is_op("avg") and len(expr.args) == 2
+
+    arg1 = expr.args[0].zeroExtend(expr.size * 2)
+    arg2 = expr.args[1].zeroExtend(expr.size * 2)
+    one = m2_expr.ExprInt(1, arg1.size)
+    # avg(unsigned) = (a + b + 1) >> 1, addition beeing at least on one more bit
+    return ((arg1 + arg2 + one) >> one)[:expr.size]
+
+pavgb = vec_vertical_instr('avg', 8, _average)
+pavgw = vec_vertical_instr('avg', 16, _average)
 
 # Comparisons
 #
@@ -4794,6 +4805,8 @@ mnemo_func = {'mov': mov,
               # SSE
               "pmaddwd": pmaddwd,
               "psadbw": psadbw,
+              "pavgb": pavgb,
+              "pavgw": pavgw,
 
               # Arithmetic (floating-point)
               #
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 0e6ffdd9..f1f95d46 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -3059,6 +3059,16 @@ reg_tests = [
      "0ff6d9"),
     (m32, "00000000    PSADBW     XMM0, XMM6",
      "660ff6c6"),
+
+    (m32, "00000000    PAVGB      MM3, MM1",
+     "0fe0d9"),
+    (m32, "00000000    PAVGB      XMM0, XMM6",
+     "660fe0c6"),
+
+    (m32, "00000000    PAVGW      MM3, MM1",
+     "0fe3d9"),
+    (m32, "00000000    PAVGW      XMM0, XMM6",
+     "660fe3c6"),
 ]
 
 
-- 
cgit 1.4.1


From 971b683a5f068068a2d775d5807deacd13918cf9 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Fri, 9 Feb 2018 14:51:30 +0100
Subject: Add MASKMOVQ/MASKMOVDQU instruction

---
 miasm2/arch/x86/arch.py |  6 ++++++
 miasm2/arch/x86/sem.py  | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
 test/arch/x86/arch.py   |  5 +++++
 3 files changed, 59 insertions(+), 1 deletion(-)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 4707fde3..40cd4e9c 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4548,6 +4548,12 @@ addop("pavgw", [bs8(0x0f), bs8(0xe3), no_xmm_pref] +
 addop("pavgw", [bs8(0x0f), bs8(0xe3), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_m128))
 
+addop("maskmovq", [bs8(0x0f), bs8(0xf7), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm_reg))
+addop("maskmovdqu", [bs8(0x0f), bs8(0xf7), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_reg))
+
+
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
 # mn_x86.bintree = factor_fields_all(mn_x86.bintree)
 """
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 5a0f1b6b..becee84e 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -4401,6 +4401,52 @@ paddsb = vec_vertical_instr('+', 8, _saturation_add_signed)
 paddsw = vec_vertical_instr('+', 16, _saturation_add_signed)
 
 
+# Others SSE operations
+
+def maskmovq(ir, instr, src, mask):
+    lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size)
+    blks = []
+
+    # For each possibility, check if a write is necessary
+    check_labels = [m2_expr.ExprId(ir.gen_label(), ir.IRDst.size)
+                    for _ in xrange(0, mask.size, 8)]
+    # If the write has to be done, do it (otherwise, nothing happen)
+    write_labels = [m2_expr.ExprId(ir.gen_label(), ir.IRDst.size)
+                    for _ in xrange(0, mask.size, 8)]
+
+    # Build check blocks
+    for i, start in enumerate(xrange(0, mask.size, 8)):
+        bit = mask[start + 7: start + 8]
+        cur_label = check_labels[i]
+        next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else lbl_next
+        write_label = write_labels[i]
+        check = m2_expr.ExprAff(ir.IRDst,
+                                m2_expr.ExprCond(bit,
+                                                 write_label,
+                                                 next_check_label))
+        blks.append(IRBlock(cur_label.name, [AssignBlock([check], instr)]))
+
+    # Build write blocks
+    dst_addr = mRDI[instr.mode]
+    for i, start in enumerate(xrange(0, mask.size, 8)):
+        bit = mask[start + 7: start + 8]
+        cur_label = write_labels[i]
+        next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else lbl_next
+        write_addr = dst_addr + m2_expr.ExprInt(i, dst_addr.size)
+
+        # @8[DI/EDI/RDI + i] = src[byte i]
+        write_mem = m2_expr.ExprAff(m2_expr.ExprMem(write_addr, 8),
+                                    src[start: start + 8])
+        jump = m2_expr.ExprAff(ir.IRDst, next_check_label)
+        blks.append(IRBlock(cur_label.name, [AssignBlock([write_mem, jump], instr)]))
+
+    # If mask is null, bypass all
+    e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(mask,
+                                                    check_labels[0],
+                                                    lbl_next))]
+    return e, blks
+
+
 mnemo_func = {'mov': mov,
               'xchg': xchg,
               'movzx': movzx,
@@ -4936,7 +4982,8 @@ mnemo_func = {'mov': mov,
               "paddsw": paddsw,
 
               "smsw": smsw,
-
+              "maskmovq": maskmovq,
+              "maskmovdqu": maskmovq,
               }
 
 
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 68bc1304..f491c19a 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -3069,6 +3069,11 @@ reg_tests = [
      "0fe3d9"),
     (m32, "00000000    PAVGW      XMM0, XMM6",
      "660fe3c6"),
+
+    (m32, "00000000    MASKMOVQ   MM2, MM3",
+     "0ff7d3"),
+    (m32, "00000000    MASKMOVDQU XMM4, XMM5",
+     "660ff7e5"),
 ]
 
 
-- 
cgit 1.4.1


From 104d1425792e95a3df64aede5d46b43c324ca125 Mon Sep 17 00:00:00 2001
From: Ajax <commial@gmail.com>
Date: Fri, 9 Feb 2018 14:54:18 +0100
Subject: Add EMMS, implemtend as a NOP

---
 miasm2/arch/x86/arch.py | 2 ++
 miasm2/arch/x86/sem.py  | 6 ++++++
 test/arch/x86/arch.py   | 3 +++
 3 files changed, 11 insertions(+)

(limited to 'miasm2/arch/x86/sem.py')

diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 40cd4e9c..72ed3309 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -4553,6 +4553,8 @@ addop("maskmovq", [bs8(0x0f), bs8(0xf7), no_xmm_pref] +
 addop("maskmovdqu", [bs8(0x0f), bs8(0xf7), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm_reg))
 
+addop("emms", [bs8(0x0f), bs8(0x77)])
+
 
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
 # mn_x86.bintree = factor_fields_all(mn_x86.bintree)
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index becee84e..ddc8aaf9 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -4447,6 +4447,11 @@ def maskmovq(ir, instr, src, mask):
     return e, blks
 
 
+def emms(ir, instr):
+    # Implemented as a NOP
+    return [], []
+
+
 mnemo_func = {'mov': mov,
               'xchg': xchg,
               'movzx': movzx,
@@ -4984,6 +4989,7 @@ mnemo_func = {'mov': mov,
               "smsw": smsw,
               "maskmovq": maskmovq,
               "maskmovdqu": maskmovq,
+              "emms": emms,
               }
 
 
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index f491c19a..2af90c8a 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -3074,6 +3074,9 @@ reg_tests = [
      "0ff7d3"),
     (m32, "00000000    MASKMOVDQU XMM4, XMM5",
      "660ff7e5"),
+
+    (m32, "00000000    EMMS",
+     "0f77"),
 ]
 
 
-- 
cgit 1.4.1