about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--example/jitter/x86_32.py1
-rw-r--r--miasm2/arch/x86/arch.py315
-rw-r--r--miasm2/arch/x86/sem.py1127
-rw-r--r--miasm2/expression/smt2_helper.py296
-rw-r--r--miasm2/ir/translators/smt2.py283
-rw-r--r--miasm2/jitter/arch/JitCore_x86.c37
-rw-r--r--miasm2/jitter/vm_mngr.c1
-rw-r--r--test/arch/x86/arch.py283
-rw-r--r--test/arch/x86/unit/mn_pcmpeq.py64
-rw-r--r--test/arch/x86/unit/mn_pextr.py25
-rw-r--r--test/arch/x86/unit/mn_pinsr.py25
-rw-r--r--test/arch/x86/unit/mn_pmaxu.py25
-rw-r--r--test/arch/x86/unit/mn_pminu.py25
-rw-r--r--test/arch/x86/unit/mn_pmovmskb.py26
-rw-r--r--test/arch/x86/unit/mn_pshufb.py25
-rw-r--r--test/arch/x86/unit/mn_psrl_psll.py55
-rw-r--r--test/arch/x86/unit/mn_punpck.py124
-rw-r--r--test/ir/translators/smt2.py40
-rw-r--r--test/test_all.py11
19 files changed, 2515 insertions, 273 deletions
diff --git a/example/jitter/x86_32.py b/example/jitter/x86_32.py
index 1b2aa012..2eee1742 100644
--- a/example/jitter/x86_32.py
+++ b/example/jitter/x86_32.py
@@ -38,4 +38,3 @@ myjit.add_breakpoint(0x1337beef, code_sentinelle)
 
 myjit.init_run(run_addr)
 myjit.continue_run()
-del(myjit)
diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 74ac0939..36e2e3b7 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -2151,6 +2151,54 @@ class x86_rm_wd(x86_rm_sd):
             yield x
 
 
+class x86_rm_08(x86_rm_arg):
+    msize = 8
+
+    def decode(self, v):
+        p = self.parent
+        xx = self.get_modrm()
+        expr = modrm2expr(xx, p, 0)
+        if not isinstance(expr, ExprMem):
+            self.expr = expr
+            return True
+        self.expr = ExprMem(expr.arg, self.msize)
+        return self.expr is not None
+
+    def encode(self):
+        if isinstance(self.expr, ExprInt):
+            raise StopIteration
+        p = self.parent
+        v_cand, segm, ok = expr2modrm(self.expr, p, 0, 0, 0, 0)
+        for x in self.gen_cand(v_cand, p.v_admode()):
+            yield x
+
+class x86_rm_reg_m08(x86_rm_arg):
+    msize = 8
+
+    def decode(self, v):
+        ret = x86_rm_arg.decode(self, v)
+        if not ret:
+            return ret
+        if not isinstance(self.expr, ExprMem):
+            return True
+        self.expr = ExprMem(self.expr.arg, self.msize)
+        return self.expr is not None
+
+    def encode(self):
+        if isinstance(self.expr, ExprInt):
+            raise StopIteration
+        p = self.parent
+        if isinstance(self.expr, ExprMem):
+            expr = ExprMem(self.expr.arg, 32)
+        else:
+            expr = self.expr
+        v_cand, segm, ok = expr2modrm(expr, p, 1, 0, 0, 0)
+        for x in self.gen_cand(v_cand, p.v_admode()):
+            yield x
+
+class x86_rm_reg_m16(x86_rm_reg_m08):
+    msize = 16
+
 class x86_rm_m64(x86_rm_arg):
     msize = 64
 
@@ -2222,8 +2270,11 @@ class x86_rm_mm(x86_rm_m80):
         p = self.parent
         xx = self.get_modrm()
         expr = modrm2expr(xx, p, 0, 0, self.is_xmm, self.is_mm)
-        if isinstance(expr, ExprMem) and expr.size != self.msize:
-            expr = ExprMem(expr.arg, self.msize)
+        if isinstance(expr, ExprMem):
+            if self.msize is None:
+                return False
+            if expr.size != self.msize:
+                expr = ExprMem(expr.arg, self.msize)
         self.expr = expr
         return True
 
@@ -2271,6 +2322,16 @@ class x86_rm_xmm_m64(x86_rm_mm):
     is_xmm = True
 
 
+class x86_rm_xmm_reg(x86_rm_mm):
+    msize = None
+    is_mm = False
+    is_xmm = True
+
+class x86_rm_mm_reg(x86_rm_mm):
+    msize = None
+    is_mm = True
+    is_xmm = False
+
 class x86_rm_reg_noarg(object):
     prio = default_prio + 1
 
@@ -3068,17 +3129,22 @@ rm_arg_sx = bs(l=0, cls=(x86_rm_sx,), fname='rmarg')
 rm_arg_sxd = bs(l=0, cls=(x86_rm_sxd,), fname='rmarg')
 rm_arg_sd = bs(l=0, cls=(x86_rm_sd,), fname='rmarg')
 rm_arg_wd = bs(l=0, cls=(x86_rm_wd,), fname='rmarg')
+rm_arg_08 = bs(l=0, cls=(x86_rm_08,), fname='rmarg')
+rm_arg_reg_m08 = bs(l=0, cls=(x86_rm_reg_m08,), fname='rmarg')
+rm_arg_reg_m16 = bs(l=0, cls=(x86_rm_reg_m16,), fname='rmarg')
+rm_arg_m08 = bs(l=0, cls=(x86_rm_m08,), fname='rmarg')
 rm_arg_m64 = bs(l=0, cls=(x86_rm_m64,), fname='rmarg')
 rm_arg_m80 = bs(l=0, cls=(x86_rm_m80,), fname='rmarg')
-rm_arg_m08 = bs(l=0, cls=(x86_rm_m08,), fname='rmarg')
 rm_arg_m16 = bs(l=0, cls=(x86_rm_m16,), fname='rmarg')
 
 rm_arg_mm = bs(l=0, cls=(x86_rm_mm,), fname='rmarg')
 rm_arg_mm_m64 = bs(l=0, cls=(x86_rm_mm_m64,), fname='rmarg')
+rm_arg_mm_reg = bs(l=0, cls=(x86_rm_mm_reg,), fname='rmarg')
 
 rm_arg_xmm = bs(l=0, cls=(x86_rm_xmm,), fname='rmarg')
 rm_arg_xmm_m32 = bs(l=0, cls=(x86_rm_xmm_m32,), fname='rmarg')
 rm_arg_xmm_m64 = bs(l=0, cls=(x86_rm_xmm_m64,), fname='rmarg')
+rm_arg_xmm_reg = bs(l=0, cls=(x86_rm_xmm_reg,), fname='rmarg')
 
 swapargs = bs_swapargs(l=1, fname="swap", mn_mod=range(1 << 1))
 
@@ -3725,7 +3791,7 @@ addop("sbb", [bs("100000"), se, w8] + rmmod(d3, rm_arg_w8) + [d_imm])
 addop("sbb", [bs("000110"), swapargs, w8] +
       rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg])
 
-addop("set", [bs8(0x0f), bs('1001'), cond] + rmmod(regnoarg, rm_arg_m08))
+addop("set", [bs8(0x0f), bs('1001'), cond] + rmmod(regnoarg, rm_arg_08))
 addop("sgdt", [bs8(0x0f), bs8(0x01)] + rmmod(d0, modrm=mod_mem))
 addop("shld", [bs8(0x0f), bs8(0xa4)] +
       rmmod(rmreg) + [u08], [rm_arg, rmreg, u08])
@@ -3807,6 +3873,22 @@ addop("movdqu", [bs8(0x0f), bs("011"), swapargs, bs("1111"), pref_f3]
 addop("movdqa", [bs8(0x0f), bs("011"), swapargs, bs("1111"), pref_66]
       + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm])
 
+addop("movhpd", [bs8(0x0f), bs("0001011"), swapargs, pref_66] +
+      rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64])
+addop("movhps", [bs8(0x0f), bs("0001011"), swapargs, no_xmm_pref] +
+      rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64])
+addop("movlpd", [bs8(0x0f), bs("0001001"), swapargs, pref_66] +
+      rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64])
+addop("movlps", [bs8(0x0f), bs("0001001"), swapargs, no_xmm_pref] +
+      rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64])
+
+addop("movhlps", [bs8(0x0f), bs8(0x12), no_xmm_pref] +
+      rmmod(xmm_reg, rm_arg_xmm_reg), [xmm_reg, rm_arg_xmm_reg])
+addop("movlhps", [bs8(0x0f), bs8(0x16), no_xmm_pref] +
+      rmmod(xmm_reg, rm_arg_xmm_reg), [xmm_reg, rm_arg_xmm_reg])
+
+addop("movdq2q", [bs8(0x0f), bs8(0xd6), pref_f2] +
+      rmmod(mm_reg, rm_arg_xmm_reg), [mm_reg, rm_arg_xmm_reg])
 
 
 ## Additions
@@ -3867,6 +3949,9 @@ addop("xorpd", [bs8(0x0f), bs8(0x57), pref_66] + rmmod(xmm_reg, rm_arg_xmm))
 addop("andps", [bs8(0x0f), bs8(0x54), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm))
 addop("andpd", [bs8(0x0f), bs8(0x54), pref_66] + rmmod(xmm_reg, rm_arg_xmm))
 
+addop("andnps", [bs8(0x0f), bs8(0x55), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm))
+addop("andnpd", [bs8(0x0f), bs8(0x55), pref_66] + rmmod(xmm_reg, rm_arg_xmm))
+
 ## OR
 addop("orps", [bs8(0x0f), bs8(0x56), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm))
 addop("orpd", [bs8(0x0f), bs8(0x56), pref_66] + rmmod(xmm_reg, rm_arg_xmm))
@@ -3879,6 +3964,14 @@ addop("pand", [bs8(0x0f), bs8(0xdb), no_xmm_pref] +
 addop("pand", [bs8(0x0f), bs8(0xdb), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm])
 
+## ANDN
+# MMX
+addop("pandn", [bs8(0x0f), bs8(0xdf), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm])
+# SSE
+addop("pandn", [bs8(0x0f), bs8(0xdf), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm])
+
 ## OR
 # MMX
 addop("por", [bs8(0x0f), bs8(0xeb), no_xmm_pref] +
@@ -3895,6 +3988,15 @@ addop("pxor", [bs8(0x0f), bs8(0xef), no_xmm_pref] +
 addop("pxor", [bs8(0x0f), bs8(0xef), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm))
 
+addop("pshufb", [bs8(0x0f), bs8(0x38), bs8(0x00), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("pshufb", [bs8(0x0f), bs8(0x38), bs8(0x00), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+addop("pshufd", [bs8(0x0f), bs8(0x70), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm) + [u08])
+
+
+
 ### Convert
 ### SS = single precision
 ### SD = double precision
@@ -3948,6 +4050,211 @@ addop("cvttsd2si",[bs8(0x0f), bs8(0x2c), pref_f2]
 addop("cvttss2si",[bs8(0x0f), bs8(0x2c), pref_f3]
       + rmmod(reg, rm_arg_xmm_m32))
 
+addop("psrlq", [bs8(0x0f), bs8(0x73), no_xmm_pref] +
+      rmmod(d2, rm_arg_mm) + [u08], [rm_arg_mm, u08])
+addop("psrlq", [bs8(0x0f), bs8(0x73), pref_66] +
+      rmmod(d2, rm_arg_xmm) + [u08], [rm_arg_xmm, u08])
+
+addop("psrlq", [bs8(0x0f), bs8(0xd3), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm])
+addop("psrlq", [bs8(0x0f), bs8(0xd3), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm])
+
+
+addop("psrld", [bs8(0x0f), bs8(0x72), no_xmm_pref] +
+      rmmod(d2, rm_arg_mm) + [u08], [rm_arg_mm, u08])
+addop("psrld", [bs8(0x0f), bs8(0x72), pref_66] +
+      rmmod(d2, rm_arg_xmm) + [u08], [rm_arg_xmm, u08])
+
+addop("psrld", [bs8(0x0f), bs8(0xd2), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm])
+addop("psrld", [bs8(0x0f), bs8(0xd2), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm])
+
+
+addop("psrlw", [bs8(0x0f), bs8(0x71), no_xmm_pref] +
+      rmmod(d2, rm_arg_mm) + [u08], [rm_arg_mm, u08])
+addop("psrlw", [bs8(0x0f), bs8(0x71), pref_66] +
+      rmmod(d2, rm_arg_xmm) + [u08], [rm_arg_xmm, u08])
+
+addop("psrlw", [bs8(0x0f), bs8(0xd1), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm])
+addop("psrlw", [bs8(0x0f), bs8(0xd1), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm])
+
+
+
+addop("psllq", [bs8(0x0f), bs8(0x73), no_xmm_pref] +
+      rmmod(d6, rm_arg_mm) + [u08], [rm_arg_mm, u08])
+addop("psllq", [bs8(0x0f), bs8(0x73), pref_66] +
+      rmmod(d6, rm_arg_xmm) + [u08], [rm_arg_xmm, u08])
+
+addop("psllq", [bs8(0x0f), bs8(0xf3), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm])
+addop("psllq", [bs8(0x0f), bs8(0xf3), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm])
+
+
+addop("pslld", [bs8(0x0f), bs8(0x72), no_xmm_pref] +
+      rmmod(d6, rm_arg_mm) + [u08], [rm_arg_mm, u08])
+addop("pslld", [bs8(0x0f), bs8(0x72), pref_66] +
+      rmmod(d6, rm_arg_xmm) + [u08], [rm_arg_xmm, u08])
+
+addop("pslld", [bs8(0x0f), bs8(0xf2), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm])
+addop("pslld", [bs8(0x0f), bs8(0xf2), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm])
+
+
+addop("psllw", [bs8(0x0f), bs8(0x71), no_xmm_pref] +
+      rmmod(d6, rm_arg_mm) + [u08], [rm_arg_mm, u08])
+addop("psllw", [bs8(0x0f), bs8(0x71), pref_66] +
+      rmmod(d6, rm_arg_xmm) + [u08], [rm_arg_xmm, u08])
+
+addop("psllw", [bs8(0x0f), bs8(0xf1), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm])
+addop("psllw", [bs8(0x0f), bs8(0xf1), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm])
+
+
+addop("pmaxub", [bs8(0x0f), bs8(0xde), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("pmaxub", [bs8(0x0f), bs8(0xde), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("pmaxuw", [bs8(0x0f), bs8(0x38), bs8(0x3e), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("pmaxud", [bs8(0x0f), bs8(0x38), bs8(0x3f), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+
+addop("pminub", [bs8(0x0f), bs8(0xda), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("pminub", [bs8(0x0f), bs8(0xda), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("pminuw", [bs8(0x0f), bs8(0x38), bs8(0x3a), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("pminud", [bs8(0x0f), bs8(0x38), bs8(0x3b), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+
+addop("pcmpeqb", [bs8(0x0f), bs8(0x74), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("pcmpeqb", [bs8(0x0f), bs8(0x74), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("pcmpeqw", [bs8(0x0f), bs8(0x75), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("pcmpeqw", [bs8(0x0f), bs8(0x75), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("pcmpeqd", [bs8(0x0f), bs8(0x76), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("pcmpeqd", [bs8(0x0f), bs8(0x76), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+
+addop("punpckhbw", [bs8(0x0f), bs8(0x68), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("punpckhbw", [bs8(0x0f), bs8(0x68), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("punpckhwd", [bs8(0x0f), bs8(0x69), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("punpckhwd", [bs8(0x0f), bs8(0x69), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("punpckhdq", [bs8(0x0f), bs8(0x6a), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("punpckhdq", [bs8(0x0f), bs8(0x6a), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("punpckhqdq", [bs8(0x0f), bs8(0x6d), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+
+
+addop("punpcklbw", [bs8(0x0f), bs8(0x60), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("punpcklbw", [bs8(0x0f), bs8(0x60), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("punpcklwd", [bs8(0x0f), bs8(0x61), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("punpcklwd", [bs8(0x0f), bs8(0x61), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("punpckldq", [bs8(0x0f), bs8(0x62), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+addop("punpckldq", [bs8(0x0f), bs8(0x62), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+addop("punpcklqdq", [bs8(0x0f), bs8(0x6c), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+
+addop("unpckhps", [bs8(0x0f), bs8(0x15), no_xmm_pref] +
+      rmmod(xmm_reg, rm_arg_xmm))
+addop("unpckhpd", [bs8(0x0f), bs8(0x15), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+
+addop("unpcklps", [bs8(0x0f), bs8(0x14), no_xmm_pref] +
+      rmmod(xmm_reg, rm_arg_xmm))
+addop("unpcklpd", [bs8(0x0f), bs8(0x14), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
+
+
+addop("pinsrb", [bs8(0x0f), bs8(0x3a), bs8(0x20), pref_66] +
+      rmmod(xmm_reg, rm_arg_reg_m08) + [u08])
+addop("pinsrd", [bs8(0x0f), bs8(0x3a), bs8(0x22), pref_66, bs_opmode32] +
+      rmmod(xmm_reg, rm_arg) + [u08])
+addop("pinsrq", [bs8(0x0f), bs8(0x3a), bs8(0x22), pref_66] +
+      rmmod(xmm_reg, rm_arg_m64) + [bs_opmode64] + [u08])
+
+addop("pinsrw", [bs8(0x0f), bs8(0xc4), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_reg_m16) + [u08])
+addop("pinsrw", [bs8(0x0f), bs8(0xc4), pref_66] +
+      rmmod(xmm_reg, rm_arg_reg_m16) + [u08])
+
+
+addop("pextrb", [bs8(0x0f), bs8(0x3a), bs8(0x14), pref_66] +
+      rmmod(xmm_reg, rm_arg_reg_m08) + [u08], [rm_arg_reg_m08, xmm_reg, u08])
+addop("pextrd", [bs8(0x0f), bs8(0x3a), bs8(0x16), pref_66, bs_opmode32] +
+      rmmod(xmm_reg, rm_arg) + [u08], [rm_arg, xmm_reg, u08])
+addop("pextrq", [bs8(0x0f), bs8(0x3a), bs8(0x16), pref_66] +
+      rmmod(xmm_reg, rm_arg_m64) + [bs_opmode64] + [u08], [rm_arg_m64, xmm_reg, u08])
+
+
+addop("pextrw", [bs8(0x0f), bs8(0x3a), bs8(0x15), pref_66] +
+      rmmod(xmm_reg, rm_arg_m16) + [u08], [rm_arg_m16, xmm_reg, u08])
+#addop("pextrw", [bs8(0x0f), bs8(0x3a), bs8(0x15), no_xmm_pref] +
+#      rmmod(mm_reg, rm_arg_m16) + [u08], [rm_arg_m16, mm_reg, u08])
+
+addop("pextrw", [bs8(0x0f), bs8(0xc5), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_reg_m16) + [u08], [rm_arg_reg_m16, mm_reg, u08])
+addop("pextrw", [bs8(0x0f), bs8(0xc5), pref_66] +
+      rmmod(xmm_reg, rm_arg_reg_m16) + [u08], [rm_arg_reg_m16, xmm_reg, u08])
+
+
+addop("sqrtpd", [bs8(0x0f), bs8(0x51), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+addop("sqrtps", [bs8(0x0f), bs8(0x51), no_xmm_pref] +
+      rmmod(xmm_reg, rm_arg_xmm))
+addop("sqrtsd", [bs8(0x0f), bs8(0x51), pref_f2] +
+      rmmod(xmm_reg, rm_arg_xmm_m64))
+addop("sqrtss", [bs8(0x0f), bs8(0x51), pref_f3] +
+      rmmod(xmm_reg, rm_arg_xmm_m32))
+
+addop("pmovmskb", [bs8(0x0f), bs8(0xd7), no_xmm_pref] +
+      rmmod(reg, rm_arg_mm_reg))
+addop("pmovmskb", [bs8(0x0f), bs8(0xd7), pref_66] +
+      rmmod(reg, rm_arg_xmm_reg))
+
 
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
 # mn_x86.bintree = factor_fields_all(mn_x86.bintree)
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index 802b6283..ff27937f 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -65,13 +65,13 @@ OF(A-B) = ((A XOR D) AND (A XOR B)) < 0
 """
 
 
-
 # XXX TODO make default check against 0 or not 0 (same eq as in C)
 
 
 def update_flag_zf(a):
-    return [m2_expr.ExprAff(zf, m2_expr.ExprCond(a, m2_expr.ExprInt_from(zf, 0),
-                                                 m2_expr.ExprInt_from(zf, 1)))]
+    return [m2_expr.ExprAff(
+        zf, m2_expr.ExprCond(a, m2_expr.ExprInt_from(zf, 0),
+                             m2_expr.ExprInt_from(zf, 1)))]
 
 
 def update_flag_nf(a):
@@ -123,6 +123,7 @@ def arith_flag(a, b, c):
 
 # checked: ok for adc add because b & c before +cf
 
+
 def update_flag_add_cf(op1, op2, res):
     "Compute cf in @res = @op1 + @op2"
     ret = (((op1 ^ op2) ^ res) ^ ((op1 ^ res) & (~(op1 ^ op2)))).msb()
@@ -172,6 +173,7 @@ def set_float_cs_eip(instr):
     e.append(m2_expr.ExprAff(float_cs, CS))
     return e
 
+
 def mem2double(arg):
     """
     Add float convertion if argument is an ExprMem
@@ -185,6 +187,7 @@ def mem2double(arg):
     else:
         return arg
 
+
 def float_implicit_st0(arg1, arg2):
     """
     Generate full float operators if one argument is implicit (float_st0)
@@ -279,6 +282,7 @@ def movzx(ir, instr, a, b):
     e = [m2_expr.ExprAff(a, b.zeroExtend(a.size))]
     return e, []
 
+
 def movsx(ir, instr, a, b):
     e = [m2_expr.ExprAff(a, b.signExtend(a.size))]
     return e, []
@@ -317,7 +321,7 @@ def adc(ir, instr, a, b):
     e = []
     c = a + (b + m2_expr.ExprCompose([(m2_expr.ExprInt(0, a.size - 1),
                                        1, a.size),
-                              (cf, 0, 1)]))
+                                      (cf, 0, 1)]))
     e += update_flag_arith(c)
     e += update_flag_af(a, b, c)
     e += update_flag_add(a, b, c)
@@ -341,7 +345,7 @@ def sbb(ir, instr, a, b):
     e = []
     c = a - (b + m2_expr.ExprCompose([(m2_expr.ExprInt(0, a.size - 1),
                                        1, a.size),
-                              (cf, 0, 1)]))
+                                      (cf, 0, 1)]))
     e += update_flag_arith(c)
     e += update_flag_af(a, b, c)
     e += update_flag_sub(a, b, c)
@@ -391,6 +395,7 @@ def pxor(ir, instr, a, b):
     e.append(m2_expr.ExprAff(a, c))
     return e, []
 
+
 def l_or(ir, instr, a, b):
     e = []
     c = a | b
@@ -414,7 +419,6 @@ def l_test(ir, instr, a, b):
     return e, []
 
 
-
 def get_shift(a, b):
     # b.size must match a
     b = b.zeroExtend(a.size)
@@ -425,6 +429,7 @@ def get_shift(a, b):
     shift = expr_simp(shift)
     return shift
 
+
 def _rotate_tpl(ir, instr, a, b, op, left=False, include_cf=False):
     """Template to generate a rotater with operation @op
     A temporary basic block is generated to handle 0-rotate
@@ -515,7 +520,8 @@ def _shift_tpl(op, ir, instr, a, b, c=None, op_inv=None, left=False,
 
         # An overflow can occured, emulate the 'undefined behavior'
         # Overflow behavior if (shift / size % 2)
-        base_cond_overflow = c if left else (c - m2_expr.ExprInt(1, size=c.size))
+        base_cond_overflow = c if left else (
+            c - m2_expr.ExprInt(1, size=c.size))
         cond_overflow = base_cond_overflow & m2_expr.ExprInt(a.size, c.size)
         if left:
             # Overflow occurs one round before right
@@ -584,9 +590,9 @@ def shrd_cl(ir, instr, a, b):
     c = (a >> shifter) | (b << (m2_expr.ExprInt_from(a, a.size) - shifter))
     new_cf = (a >> (shifter - m2_expr.ExprInt_from(a, 1)))[:1]
     e.append(m2_expr.ExprAff(cf, m2_expr.ExprCond(shifter,
-                                  new_cf,
-                                  cf)
-                     )
+                                                  new_cf,
+                                                  cf)
+                             )
              )
     e.append(m2_expr.ExprAff(of, a.msb()))
     e += update_flag_znp(c)
@@ -604,9 +610,9 @@ def sal(ir, instr, a, b):
     c = m2_expr.ExprOp('a<<', a, shifter)
     new_cf = (a >> (m2_expr.ExprInt_from(a, a.size) - shifter))[:1]
     e.append(m2_expr.ExprAff(cf, m2_expr.ExprCond(shifter,
-                                  new_cf,
-                                  cf)
-                     )
+                                                  new_cf,
+                                                  cf)
+                             )
              )
     e += update_flag_znp(c)
     e.append(m2_expr.ExprAff(of, c.msb() ^ new_cf))
@@ -674,6 +680,7 @@ def inc(ir, instr, a):
     e.append(m2_expr.ExprAff(a, c))
     return e, []
 
+
 def dec(ir, instr, a):
     e = []
     b = m2_expr.ExprInt_from(a, -1)
@@ -705,9 +712,11 @@ def push_gen(ir, instr, a, size):
     e.append(m2_expr.ExprAff(m2_expr.ExprMem(new_sp, size), a))
     return e, []
 
+
 def push(ir, instr, a):
     return push_gen(ir, instr, a, instr.mode)
 
+
 def pushw(ir, instr, a):
     return push_gen(ir, instr, a, 16)
 
@@ -731,32 +740,37 @@ def pop_gen(ir, instr, a, size):
     e.append(m2_expr.ExprAff(a, m2_expr.ExprMem(c, a.size)))
     return e, []
 
+
 def pop(ir, instr, a):
     return pop_gen(ir, instr, a, instr.mode)
 
+
 def popw(ir, instr, a):
     return pop_gen(ir, instr, a, 16)
 
 
 def sete(ir, instr, a):
     e = []
-    e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(zf, m2_expr.ExprInt_from(a, 1),
-                                                 m2_expr.ExprInt_from(a, 0))))
+    e.append(
+        m2_expr.ExprAff(a, m2_expr.ExprCond(zf, m2_expr.ExprInt_from(a, 1),
+                                            m2_expr.ExprInt_from(a, 0))))
     return e, []
 
 
 def setnz(ir, instr, a):
     e = []
-    e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(zf, m2_expr.ExprInt_from(a, 0),
-                                                 m2_expr.ExprInt_from(a, 1))))
+    e.append(
+        m2_expr.ExprAff(a, m2_expr.ExprCond(zf, m2_expr.ExprInt_from(a, 0),
+                                            m2_expr.ExprInt_from(a, 1))))
     return e, []
 
 
 def setl(ir, instr, a):
     e = []
     e.append(
-        m2_expr.ExprAff(a, m2_expr.ExprCond(nf - of, m2_expr.ExprInt_from(a, 1),
-                                            m2_expr.ExprInt_from(a, 0))))
+        m2_expr.ExprAff(
+            a, m2_expr.ExprCond(nf - of, m2_expr.ExprInt_from(a, 1),
+                                m2_expr.ExprInt_from(a, 0))))
     return e, []
 
 
@@ -772,75 +786,83 @@ def setg(ir, instr, a):
 def setge(ir, instr, a):
     e = []
     e.append(
-        m2_expr.ExprAff(a, m2_expr.ExprCond(nf - of, m2_expr.ExprInt_from(a, 0),
-                                            m2_expr.ExprInt_from(a, 1))))
+        m2_expr.ExprAff(
+            a, m2_expr.ExprCond(nf - of, m2_expr.ExprInt_from(a, 0),
+                                m2_expr.ExprInt_from(a, 1))))
     return e, []
 
 
 def seta(ir, instr, a):
     e = []
     e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(cf | zf,
-                                 m2_expr.ExprInt_from(a, 0),
-                                 m2_expr.ExprInt_from(a, 1))))
+                                                 m2_expr.ExprInt_from(a, 0),
+                                                 m2_expr.ExprInt_from(a, 1))))
 
     return e, []
 
 
 def setae(ir, instr, a):
     e = []
-    e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(cf, m2_expr.ExprInt_from(a, 0),
-                                                 m2_expr.ExprInt_from(a, 1))))
+    e.append(
+        m2_expr.ExprAff(a, m2_expr.ExprCond(cf, m2_expr.ExprInt_from(a, 0),
+                                            m2_expr.ExprInt_from(a, 1))))
     return e, []
 
 
 def setb(ir, instr, a):
     e = []
-    e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(cf, m2_expr.ExprInt_from(a, 1),
-                                                 m2_expr.ExprInt_from(a, 0))))
+    e.append(
+        m2_expr.ExprAff(a, m2_expr.ExprCond(cf, m2_expr.ExprInt_from(a, 1),
+                                            m2_expr.ExprInt_from(a, 0))))
     return e, []
 
 
 def setbe(ir, instr, a):
     e = []
     e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(cf | zf,
-                                 m2_expr.ExprInt_from(a, 1),
-                                 m2_expr.ExprInt_from(a, 0)))
+                                                 m2_expr.ExprInt_from(a, 1),
+                                                 m2_expr.ExprInt_from(a, 0)))
              )
     return e, []
 
 
 def setns(ir, instr, a):
     e = []
-    e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(nf, m2_expr.ExprInt_from(a, 0),
-                                                 m2_expr.ExprInt_from(a, 1))))
+    e.append(
+        m2_expr.ExprAff(a, m2_expr.ExprCond(nf, m2_expr.ExprInt_from(a, 0),
+                                            m2_expr.ExprInt_from(a, 1))))
     return e, []
 
 
 def sets(ir, instr, a):
     e = []
-    e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(nf, m2_expr.ExprInt_from(a, 1),
-                                                 m2_expr.ExprInt_from(a, 0))))
+    e.append(
+        m2_expr.ExprAff(a, m2_expr.ExprCond(nf, m2_expr.ExprInt_from(a, 1),
+                                            m2_expr.ExprInt_from(a, 0))))
     return e, []
 
 
 def seto(ir, instr, a):
     e = []
-    e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(of, m2_expr.ExprInt_from(a, 1),
-                                                 m2_expr.ExprInt_from(a, 0))))
+    e.append(
+        m2_expr.ExprAff(a, m2_expr.ExprCond(of, m2_expr.ExprInt_from(a, 1),
+                                            m2_expr.ExprInt_from(a, 0))))
     return e, []
 
 
 def setp(ir, instr, a):
     e = []
-    e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(pf, m2_expr.ExprInt_from(a, 1),
-                                                 m2_expr.ExprInt_from(a, 0))))
+    e.append(
+        m2_expr.ExprAff(a, m2_expr.ExprCond(pf, m2_expr.ExprInt_from(a, 1),
+                                            m2_expr.ExprInt_from(a, 0))))
     return e, []
 
 
 def setnp(ir, instr, a):
     e = []
-    e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(pf, m2_expr.ExprInt_from(a, 0),
-                                                 m2_expr.ExprInt_from(a, 1))))
+    e.append(
+        m2_expr.ExprAff(a, m2_expr.ExprCond(pf, m2_expr.ExprInt_from(a, 0),
+                                            m2_expr.ExprInt_from(a, 1))))
     return e, []
 
 
@@ -865,23 +887,25 @@ def setna(ir, instr, a):
 def setnbe(ir, instr, a):
     e = []
     e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(cf | zf,
-                                 m2_expr.ExprInt_from(a, 0),
-                                 m2_expr.ExprInt_from(a, 1)))
+                                                 m2_expr.ExprInt_from(a, 0),
+                                                 m2_expr.ExprInt_from(a, 1)))
              )
     return e, []
 
 
 def setno(ir, instr, a):
     e = []
-    e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(of, m2_expr.ExprInt_from(a, 0),
-                                                 m2_expr.ExprInt_from(a, 1))))
+    e.append(
+        m2_expr.ExprAff(a, m2_expr.ExprCond(of, m2_expr.ExprInt_from(a, 0),
+                                            m2_expr.ExprInt_from(a, 1))))
     return e, []
 
 
 def setnb(ir, instr, a):
     e = []
-    e.append(m2_expr.ExprAff(a, m2_expr.ExprCond(cf, m2_expr.ExprInt_from(a, 0),
-                                                 m2_expr.ExprInt_from(a, 1))))
+    e.append(
+        m2_expr.ExprAff(a, m2_expr.ExprCond(cf, m2_expr.ExprInt_from(a, 0),
+                                            m2_expr.ExprInt_from(a, 1))))
     return e, []
 
 
@@ -898,24 +922,24 @@ def bswap(ir, instr, a):
     e = []
     if a.size == 16:
         c = m2_expr.ExprCompose([(a[:8],        8, 16),
-                         (a[8:16],      0,  8),
-                         ])
+                                 (a[8:16],      0,  8),
+                                 ])
     elif a.size == 32:
         c = m2_expr.ExprCompose([(a[:8],      24, 32),
-                         (a[8:16],    16, 24),
-                         (a[16:24],   8, 16),
-                         (a[24:32],   0, 8),
-                         ])
+                                 (a[8:16],    16, 24),
+                                 (a[16:24],   8, 16),
+                                 (a[24:32],   0, 8),
+                                 ])
     elif a.size == 64:
         c = m2_expr.ExprCompose([(a[:8],      56, 64),
-                         (a[8:16],    48, 56),
-                         (a[16:24],   40, 48),
-                         (a[24:32],   32, 40),
-                         (a[32:40],   24, 32),
-                         (a[40:48],   16, 24),
-                         (a[48:56],    8, 16),
-                         (a[56:64],    0, 8),
-                         ])
+                                 (a[8:16],    48, 56),
+                                 (a[16:24],   40, 48),
+                                 (a[24:32],   32, 40),
+                                 (a[32:40],   24, 32),
+                                 (a[40:48],   16, 24),
+                                 (a[48:56],    8, 16),
+                                 (a[56:64],    0, 8),
+                                 ])
     else:
         raise ValueError('the size DOES matter')
     e.append(m2_expr.ExprAff(a, c))
@@ -1008,9 +1032,11 @@ def compose_eflag(s=32):
 def pushfd(ir, instr):
     return push(ir, instr, compose_eflag())
 
+
 def pushfq(ir, instr):
     return push(ir, instr, compose_eflag().zeroExtend(64))
 
+
 def pushfw(ir, instr):
     return pushw(ir, instr, compose_eflag(16))
 
@@ -1036,13 +1062,14 @@ def popfd(ir, instr):
     e.append(m2_expr.ExprAff(vip, m2_expr.ExprSlice(tmp, 20, 21)))
     e.append(m2_expr.ExprAff(i_d, m2_expr.ExprSlice(tmp, 21, 22)))
     e.append(m2_expr.ExprAff(mRSP[instr.mode],
-                             mRSP[instr.mode] + m2_expr.ExprInt_from(mRSP[instr.mode], instr.mode/8)))
+                             mRSP[instr.mode] + m2_expr.ExprInt_from(mRSP[instr.mode], instr.mode / 8)))
     e.append(m2_expr.ExprAff(exception_flags,
-                     m2_expr.ExprCond(m2_expr.ExprSlice(tmp, 8, 9),
-                              m2_expr.ExprInt32(EXCEPT_SOFT_BP),
-                              exception_flags
-                              )
-                     )
+                             m2_expr.ExprCond(m2_expr.ExprSlice(tmp, 8, 9),
+                                              m2_expr.ExprInt32(
+                                                  EXCEPT_SOFT_BP),
+                                              exception_flags
+                                              )
+                             )
              )
     return e, []
 
@@ -1060,7 +1087,8 @@ def _tpl_eflags(tmp):
 def popfw(ir, instr):
     tmp = m2_expr.ExprMem(mRSP[instr.mode])
     e = _tpl_eflags(tmp)
-    e.append(m2_expr.ExprAff(mRSP[instr.mode], mRSP[instr.mode] + m2_expr.ExprInt(2, mRSP[instr.mode].size)))
+    e.append(
+        m2_expr.ExprAff(mRSP[instr.mode], mRSP[instr.mode] + m2_expr.ExprInt(2, mRSP[instr.mode].size)))
     return e, []
 
 
@@ -1118,7 +1146,6 @@ def call(ir, instr, dst):
     myesp = mRSP[instr.mode][:opmode]
     n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size)
 
-
     if (isinstance(dst, m2_expr.ExprOp) and dst.op == "segm"):
         # call far
         if instr.mode != 16:
@@ -1132,19 +1159,18 @@ def call(ir, instr, dst):
 
         e.append(m2_expr.ExprAff(ir.IRDst, m2))
 
-        c = myesp + m2_expr.ExprInt(-s/8, s)
+        c = myesp + m2_expr.ExprInt(-s / 8, s)
         e.append(m2_expr.ExprAff(m2_expr.ExprMem(c, size=s).zeroExtend(s),
                                  CS.zeroExtend(s)))
 
-        c = myesp + m2_expr.ExprInt(-2*s/8, s)
+        c = myesp + m2_expr.ExprInt(-2 * s / 8, s)
         e.append(m2_expr.ExprAff(m2_expr.ExprMem(c, size=s).zeroExtend(s),
                                  meip.zeroExtend(s)))
 
-        c = myesp + m2_expr.ExprInt((-2*s) / 8, s)
+        c = myesp + m2_expr.ExprInt((-2 * s) / 8, s)
         e.append(m2_expr.ExprAff(myesp, c))
         return e, []
 
-
     c = myesp + m2_expr.ExprInt((-s / 8), s)
     e.append(m2_expr.ExprAff(myesp, c))
     if ir.do_stk_segm:
@@ -1152,7 +1178,7 @@ def call(ir, instr, dst):
     e.append(m2_expr.ExprAff(m2_expr.ExprMem(c, size=s), n))
     e.append(m2_expr.ExprAff(meip, dst.zeroExtend(ir.IRDst.size)))
     e.append(m2_expr.ExprAff(ir.IRDst, dst.zeroExtend(ir.IRDst.size)))
-    #if not expr_is_int_or_label(dst):
+    # if not expr_is_int_or_label(dst):
     #    dst = meip
     return e, []
 
@@ -1167,10 +1193,10 @@ def ret(ir, instr, a=None):
 
     if a is None:
         a = m2_expr.ExprInt(0, s)
-        value =  (myesp + (m2_expr.ExprInt((s / 8), s)))
+        value = (myesp + (m2_expr.ExprInt((s / 8), s)))
     else:
         a = a.zeroExtend(s)
-        value =  (myesp + (m2_expr.ExprInt((s / 8), s) + a))
+        value = (myesp + (m2_expr.ExprInt((s / 8), s) + a))
 
     e.append(m2_expr.ExprAff(myesp, value))
     c = myesp
@@ -1206,7 +1232,7 @@ def retf(ir, instr, a=None):
         c = m2_expr.ExprOp('segm', SS, c)
     e.append(m2_expr.ExprAff(CS, m2_expr.ExprMem(c, size=16)))
 
-    value =  myesp + (m2_expr.ExprInt((2*s) / 8, s) + a)
+    value = myesp + (m2_expr.ExprInt((2 * s) / 8, s) + a)
     e.append(m2_expr.ExprAff(myesp, value))
     return e, []
 
@@ -1235,7 +1261,7 @@ def enter(ir, instr, a, b):
     esp_tmp = myesp - m2_expr.ExprInt(s / 8, s)
     e.append(m2_expr.ExprAff(m2_expr.ExprMem(esp_tmp,
                              size=s),
-                     myebp))
+                             myebp))
     e.append(m2_expr.ExprAff(myebp, esp_tmp))
     e.append(m2_expr.ExprAff(myesp,
                              myesp - (a + m2_expr.ExprInt(s / 8, s))))
@@ -1260,8 +1286,11 @@ def jmpf(ir, instr, a):
     if (isinstance(a, m2_expr.ExprOp) and a.op == "segm"):
         segm = a.args[0]
         base = a.args[1]
-        m1 = segm.zeroExtend(CS.size)#m2_expr.ExprMem(m2_expr.ExprOp('segm', segm, base), 16)
-        m2 = base.zeroExtend(meip.size)#m2_expr.ExprMem(m2_expr.ExprOp('segm', segm, base + m2_expr.ExprInt_from(base, 2)), s)
+        m1 = segm.zeroExtend(
+            CS.size)  # m2_expr.ExprMem(m2_expr.ExprOp('segm', segm, base), 16)
+        m2 = base.zeroExtend(meip.size)
+                             # m2_expr.ExprMem(m2_expr.ExprOp('segm', segm,
+                             # base + m2_expr.ExprInt_from(base, 2)), s)
     else:
         m1 = m2_expr.ExprMem(a, 16)
         m2 = m2_expr.ExprMem(a + m2_expr.ExprInt_from(a, 2), meip.size)
@@ -1301,7 +1330,7 @@ def jnp(ir, instr, dst):
 
 
 def ja(ir, instr, dst):
-    return gen_jcc(ir, instr, cf|zf, dst, False)
+    return gen_jcc(ir, instr, cf | zf, dst, False)
 
 
 def jae(ir, instr, dst):
@@ -1313,23 +1342,23 @@ def jb(ir, instr, dst):
 
 
 def jbe(ir, instr, dst):
-    return gen_jcc(ir, instr, cf|zf, dst, True)
+    return gen_jcc(ir, instr, cf | zf, dst, True)
 
 
 def jge(ir, instr, dst):
-    return gen_jcc(ir, instr, nf-of, dst, False)
+    return gen_jcc(ir, instr, nf - of, dst, False)
 
 
 def jg(ir, instr, dst):
-    return gen_jcc(ir, instr, zf|(nf-of), dst, False)
+    return gen_jcc(ir, instr, zf | (nf - of), dst, False)
 
 
 def jl(ir, instr, dst):
-    return gen_jcc(ir, instr, nf-of, dst, True)
+    return gen_jcc(ir, instr, nf - of, dst, True)
 
 
 def jle(ir, instr, dst):
-    return gen_jcc(ir, instr, zf|(nf-of), dst, True)
+    return gen_jcc(ir, instr, zf | (nf - of), dst, True)
 
 
 def js(ir, instr, dst):
@@ -1374,8 +1403,8 @@ def loopne(ir, instr, dst):
     n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size)
 
     c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size),
-                 m2_expr.ExprInt1(1),
-                 m2_expr.ExprInt1(0))
+                         m2_expr.ExprInt1(1),
+                         m2_expr.ExprInt1(0))
     c &= zf ^ m2_expr.ExprInt1(1)
 
     e.append(m2_expr.ExprAff(myecx, myecx - m2_expr.ExprInt_from(myecx, 1)))
@@ -1395,8 +1424,8 @@ def loope(ir, instr, dst):
 
     n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size)
     c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size),
-                 m2_expr.ExprInt1(1),
-                 m2_expr.ExprInt1(0))
+                         m2_expr.ExprInt1(1),
+                         m2_expr.ExprInt1(0))
     c &= zf
     e.append(m2_expr.ExprAff(myecx, myecx - m2_expr.ExprInt_from(myecx, 1)))
     dst_o = m2_expr.ExprCond(c,
@@ -1416,7 +1445,7 @@ def div(ir, instr, a):
     elif size in [16, 32, 64]:
         s1, s2 = mRDX[size], mRAX[size]
         b = m2_expr.ExprCompose([(s2, 0, size),
-                         (s1, size, size*2)])
+                                 (s1, size, size * 2)])
     else:
         raise ValueError('div arg not impl', a)
 
@@ -1426,7 +1455,7 @@ def div(ir, instr, a):
     # if 8 bit div, only ax is affected
     if size == 8:
         e.append(m2_expr.ExprAff(b, m2_expr.ExprCompose([(c_d[:8], 0, 8),
-                                         (c_r[:8], 8, 16)])))
+                                                         (c_r[:8], 8, 16)])))
     else:
         e.append(m2_expr.ExprAff(s1, c_r[:size]))
         e.append(m2_expr.ExprAff(s2, c_d[:size]))
@@ -1444,7 +1473,7 @@ def idiv(ir, instr, a):
     elif size in [16, 32]:
         s1, s2 = mRDX[size], mRAX[size]
         b = m2_expr.ExprCompose([(s2, 0, size),
-                         (s1, size, size*2)])
+                                 (s1, size, size * 2)])
     else:
         raise ValueError('div arg not impl', a)
 
@@ -1454,7 +1483,7 @@ def idiv(ir, instr, a):
     # if 8 bit div, only ax is affected
     if size == 8:
         e.append(m2_expr.ExprAff(b, m2_expr.ExprCompose([(c_d[:8], 0, 8),
-                                         (c_r[:8], 8, 16)])))
+                                                         (c_r[:8], 8, 16)])))
     else:
         e.append(m2_expr.ExprAff(s1, c_r[:size]))
         e.append(m2_expr.ExprAff(s2, c_d[:size]))
@@ -1468,25 +1497,25 @@ def mul(ir, instr, a):
     size = a.size
     if a.size in [16, 32, 64]:
         result = m2_expr.ExprOp('*',
-                        mRAX[size].zeroExtend(size * 2),
-                        a.zeroExtend(size * 2))
+                                mRAX[size].zeroExtend(size * 2),
+                                a.zeroExtend(size * 2))
         e.append(m2_expr.ExprAff(mRAX[size], result[:size]))
         e.append(m2_expr.ExprAff(mRDX[size], result[size:size * 2]))
 
     elif a.size == 8:
         result = m2_expr.ExprOp('*',
-                        mRAX[instr.mode][:8].zeroExtend(16),
-                        a.zeroExtend(16))
+                                mRAX[instr.mode][:8].zeroExtend(16),
+                                a.zeroExtend(16))
         e.append(m2_expr.ExprAff(mRAX[instr.mode][:16], result))
     else:
         raise ValueError('unknow size')
 
     e.append(m2_expr.ExprAff(of, m2_expr.ExprCond(result[size:size * 2],
-                                  m2_expr.ExprInt1(1),
-                                  m2_expr.ExprInt1(0))))
+                                                  m2_expr.ExprInt1(1),
+                                                  m2_expr.ExprInt1(0))))
     e.append(m2_expr.ExprAff(cf, m2_expr.ExprCond(result[size:size * 2],
-                                  m2_expr.ExprInt1(1),
-                                  m2_expr.ExprInt1(0))))
+                                                  m2_expr.ExprInt1(1),
+                                                  m2_expr.ExprInt1(0))))
 
     return e, []
 
@@ -1497,15 +1526,15 @@ def imul(ir, instr, a, b=None, c=None):
     if b is None:
         if size in [16, 32, 64]:
             result = m2_expr.ExprOp('*',
-                            mRAX[size].signExtend(size * 2),
-                            a.signExtend(size * 2))
+                                    mRAX[size].signExtend(size * 2),
+                                    a.signExtend(size * 2))
             e.append(m2_expr.ExprAff(mRAX[size], result[:size]))
             e.append(m2_expr.ExprAff(mRDX[size], result[size:size * 2]))
         elif size == 8:
             dst = mRAX[instr.mode][:16]
             result = m2_expr.ExprOp('*',
-                            mRAX[instr.mode][:8].signExtend(16),
-                            a.signExtend(16))
+                                    mRAX[instr.mode][:8].signExtend(16),
+                                    a.signExtend(16))
 
             e.append(m2_expr.ExprAff(dst, result))
         value = m2_expr.ExprCond(result - result[:size].signExtend(size * 2),
@@ -1522,17 +1551,17 @@ def imul(ir, instr, a, b=None, c=None):
             c = b
             b = a
         result = m2_expr.ExprOp('*',
-                        b.signExtend(size * 2),
-                        c.signExtend(size * 2))
+                                b.signExtend(size * 2),
+                                c.signExtend(size * 2))
         e.append(m2_expr.ExprAff(a, result[:size]))
 
         value = m2_expr.ExprCond(result - result[:size].signExtend(size * 2),
                                  m2_expr.ExprInt1(1),
                                  m2_expr.ExprInt1(0))
         e.append(m2_expr.ExprAff(cf, value))
-        value =  m2_expr.ExprCond(result - result[:size].signExtend(size * 2),
-                                  m2_expr.ExprInt1(1),
-                                  m2_expr.ExprInt1(0))
+        value = m2_expr.ExprCond(result - result[:size].signExtend(size * 2),
+                                 m2_expr.ExprInt1(1),
+                                 m2_expr.ExprInt1(0))
         e.append(m2_expr.ExprAff(of, value))
     return e, []
 
@@ -1703,6 +1732,7 @@ def movs(ir, instr, size):
                              m2_expr.ExprCond(df, lbl_df_1, lbl_df_0)))
     return e, [e0, e1]
 
+
 def movsd(ir, instr, a, b):
     e = []
     if isinstance(a, m2_expr.ExprId) and isinstance(b, m2_expr.ExprMem):
@@ -1713,7 +1743,8 @@ def movsd(ir, instr, a, b):
     e.append(m2_expr.ExprAff(a, b))
     return e, []
 
-def movsd_dispatch(ir, instr, a = None, b = None):
+
+def movsd_dispatch(ir, instr, a=None, b=None):
     if a is None and b is None:
         return movs(ir, instr, 32)
     else:
@@ -1739,11 +1770,11 @@ def float_pop(avoid_flt=None, popcount=1):
     """
     avoid_flt = float_prev(avoid_flt, popcount)
     e = []
-    for i in xrange(8-popcount):
+    for i in xrange(8 - popcount):
         if avoid_flt != float_list[i]:
             e.append(m2_expr.ExprAff(float_list[i],
-                                     float_list[i+popcount]))
-    for i in xrange(8-popcount, 8):
+                                     float_list[i + popcount]))
+    for i in xrange(8 - popcount, 8):
         e.append(m2_expr.ExprAff(float_list[i],
                                  m2_expr.ExprInt_from(float_list[i], 0)))
     e.append(
@@ -1800,7 +1831,7 @@ def fxam(ir, instr):
     return e, []
 
 
-def ficom(ir, instr, a, b = None):
+def ficom(ir, instr, a, b=None):
 
     a, b = float_implicit_st0(a, b)
 
@@ -1823,7 +1854,6 @@ def ficom(ir, instr, a, b = None):
     return e, []
 
 
-
 def fcomi(ir, instr, a=None, b=None):
     # TODO unordered float
     if a is None and b is None:
@@ -1857,6 +1887,7 @@ def fucomi(ir, instr, a=None, b=None):
     # TODO unordered float
     return fcomi(ir, instr, a, b)
 
+
 def fucomip(ir, instr, a=None, b=None):
     # TODO unordered float
     return fcomip(ir, instr, a, b)
@@ -1876,7 +1907,7 @@ def fcompp(ir, instr, a=None, b=None):
     return e, extra
 
 
-def ficomp(ir, instr, a, b = None):
+def ficomp(ir, instr, a, b=None):
     e, extra = ficom(ir, instr, a, b)
     e += float_pop()
     e += set_float_cs_eip(instr)
@@ -2003,11 +2034,13 @@ def fist(ir, instr, a):
     e += set_float_cs_eip(instr)
     return e, []
 
+
 def fistp(ir, instr, a):
     e, extra = fist(ir, instr, a)
     e += float_pop(a)
     return e, extra
 
+
 def fisttp(ir, instr, a):
     e = []
     e.append(m2_expr.ExprAff(a,
@@ -2040,7 +2073,7 @@ def fld1(ir, instr):
 
 
 def fldl2t(ir, instr):
-    value_f = math.log(10)/math.log(2)
+    value_f = math.log(10) / math.log(2)
     value = struct.unpack('I', struct.pack('f', value_f))[0]
     return fld(ir, instr, m2_expr.ExprOp('int_32_to_double',
                                          m2_expr.ExprInt32(value)))
@@ -2083,6 +2116,7 @@ def fadd(ir, instr, a, b=None):
     e += set_float_cs_eip(instr)
     return e, []
 
+
 def fiadd(ir, instr, a, b=None):
     a, b = float_implicit_st0(a, b)
     e = []
@@ -2122,7 +2156,8 @@ def fpatan(ir, instr):
 
 def fprem(ir, instr):
     e = []
-    e.append(m2_expr.ExprAff(float_st0, m2_expr.ExprOp('fprem', float_st0, float_st1)))
+    e.append(
+        m2_expr.ExprAff(float_st0, m2_expr.ExprOp('fprem', float_st0, float_st1)))
     # Remaining bits (ex: used in argument reduction in tan)
     remain = m2_expr.ExprOp('fprem_lsb', float_st0, float_st1)
     e += [m2_expr.ExprAff(float_c0, remain[2:3]),
@@ -2130,14 +2165,15 @@ def fprem(ir, instr):
           m2_expr.ExprAff(float_c1, remain[0:1]),
           # Consider the reduction is always completed
           m2_expr.ExprAff(float_c2, m2_expr.ExprInt1(0)),
-    ]
+          ]
     e += set_float_cs_eip(instr)
     return e, []
 
 
 def fprem1(ir, instr):
     e = []
-    e.append(m2_expr.ExprAff(float_st0, m2_expr.ExprOp('fprem1', float_st0, float_st1)))
+    e.append(
+        m2_expr.ExprAff(float_st0, m2_expr.ExprOp('fprem1', float_st0, float_st1)))
     e += set_float_cs_eip(instr)
     return e, []
 
@@ -2161,7 +2197,8 @@ def fninit(ir, instr):
 def fyl2x(ir, instr):
     e = []
     a = float_st1
-    e.append(m2_expr.ExprAff(float_prev(a), m2_expr.ExprOp('fyl2x', float_st0, float_st1)))
+    e.append(
+        m2_expr.ExprAff(float_prev(a), m2_expr.ExprOp('fyl2x', float_st0, float_st1)))
     e += set_float_cs_eip(instr)
     e += float_pop(a)
     return e, []
@@ -2171,13 +2208,13 @@ def fnstenv(ir, instr, a):
     e = []
     # XXX TODO tag word, ...
     status_word = m2_expr.ExprCompose([(m2_expr.ExprInt8(0), 0, 8),
-                               (float_c0,           8, 9),
-                               (float_c1,           9, 10),
-                               (float_c2,           10, 11),
-                               (float_stack_ptr,    11, 14),
-                               (float_c3,           14, 15),
-                               (m2_expr.ExprInt1(0), 15, 16),
-                               ])
+                                       (float_c0,           8, 9),
+                                       (float_c1,           9, 10),
+                                       (float_c2,           10, 11),
+                                       (float_stack_ptr,    11, 14),
+                                       (float_c3,           14, 15),
+                                       (m2_expr.ExprInt1(0), 15, 16),
+                                       ])
 
     s = instr.mode
     # The behaviour in 64bit is identical to 32 bit
@@ -2202,6 +2239,7 @@ def fnstenv(ir, instr, a):
     e.append(m2_expr.ExprAff(ad, float_ds))
     return e, []
 
+
 def fldenv(ir, instr, a):
     e = []
     # Inspired from fnstenv (same TODOs / issues)
@@ -2211,11 +2249,11 @@ def fldenv(ir, instr, a):
     # This will truncate addresses
     s = min(32, s)
 
-    ## Float control
+    # Float control
     ad = m2_expr.ExprMem(a.arg, size=16)
     e.append(m2_expr.ExprAff(float_control, ad))
 
-    ## Status word
+    # Status word
     ad = m2_expr.ExprMem(a.arg + m2_expr.ExprInt(s / 8 * 1, size=a.arg.size),
                          size=16)
     e += [m2_expr.ExprAff(x, y) for x, y in ((float_c0, ad[8:9]),
@@ -2225,7 +2263,7 @@ def fldenv(ir, instr, a):
                                              (float_c3, ad[14:15]))
           ]
 
-    ## EIP, CS, Address, DS
+    # EIP, CS, Address, DS
     for offset, target in ((3, float_eip[:s]),
                            (4, float_cs),
                            (5, float_address[:s]),
@@ -2247,6 +2285,7 @@ def fsub(ir, instr, a, b=None):
     e += set_float_cs_eip(instr)
     return e, []
 
+
 def fsubp(ir, instr, a, b=None):
     a, b = float_implicit_st0(a, b)
     e = []
@@ -2284,6 +2323,7 @@ def fmul(ir, instr, a, b=None):
     e += set_float_cs_eip(instr)
     return e, []
 
+
 def fimul(ir, instr, a, b=None):
     a, b = float_implicit_st0(a, b)
     e = []
@@ -2301,6 +2341,7 @@ def fdiv(ir, instr, a, b=None):
     e += set_float_cs_eip(instr)
     return e, []
 
+
 def fdivr(ir, instr, a, b=None):
     a, b = float_implicit_st0(a, b)
     e = []
@@ -2446,6 +2487,7 @@ def f2xm1(ir, instr):
     e += set_float_cs_eip(instr)
     return e, []
 
+
 def fchs(ir, instr):
     e = []
     e.append(m2_expr.ExprAff(float_st0, m2_expr.ExprOp('fchs', float_st0)))
@@ -2470,7 +2512,7 @@ def fabs(ir, instr):
 def fnstsw(ir, instr, dst):
     args = [
         # Exceptions -> 0
-        (m2_expr.ExprInt8(0),0, 8),
+        (m2_expr.ExprInt8(0), 0, 8),
         (float_c0,           8, 9),
         (float_c1,           9, 10),
         (float_c2,           10, 11),
@@ -2507,7 +2549,7 @@ def fcmove(ir, instr, arg1, arg2):
 
 
 def fcmovbe(ir, instr, arg1, arg2):
-    return gen_fcmov(ir, instr, cf|zf, arg1, arg2, True)
+    return gen_fcmov(ir, instr, cf | zf, arg1, arg2, True)
 
 
 def fcmovu(ir, instr, arg1, arg2):
@@ -2523,7 +2565,7 @@ def fcmovne(ir, instr, arg1, arg2):
 
 
 def fcmovnbe(ir, instr, arg1, arg2):
-    return gen_fcmov(ir, instr, cf|zf, arg1, arg2, False)
+    return gen_fcmov(ir, instr, cf | zf, arg1, arg2, False)
 
 
 def fcmovnu(ir, instr, arg1, arg2):
@@ -2545,7 +2587,8 @@ def rdtsc(ir, instr):
     e = []
     e.append(m2_expr.ExprAff(tsc1, tsc1 + m2_expr.ExprInt32(1)))
     e.append(m2_expr.ExprAff(tsc2, tsc2 + m2_expr.ExprCond(tsc1 - tsc1.mask,
-                                                           m2_expr.ExprInt32(0),
+                                                           m2_expr.ExprInt32(
+                                                               0),
                                                            m2_expr.ExprInt32(1))))
     e.append(m2_expr.ExprAff(mRAX[32], tsc1))
     e.append(m2_expr.ExprAff(mRDX[32], tsc2))
@@ -2559,11 +2602,9 @@ def daa(ir, instr):
     cond1 = expr_cmpu(r_al[:4], m2_expr.ExprInt(0x9, 4)) | af
     e.append(m2_expr.ExprAff(af, cond1))
 
-
     cond2 = expr_cmpu(m2_expr.ExprInt8(6), r_al)
     cond3 = expr_cmpu(r_al, m2_expr.ExprInt8(0x99)) | cf
 
-
     cf_c1 = m2_expr.ExprCond(cond1,
                              cf | (cond2),
                              m2_expr.ExprInt1(0))
@@ -2583,6 +2624,7 @@ def daa(ir, instr):
     e += update_flag_znp(new_al)
     return e, []
 
+
 def das(ir, instr):
     e = []
     r_al = mRAX[instr.mode][:8]
@@ -2590,11 +2632,9 @@ def das(ir, instr):
     cond1 = expr_cmpu(r_al[:4], m2_expr.ExprInt(0x9, 4)) | af
     e.append(m2_expr.ExprAff(af, cond1))
 
-
     cond2 = expr_cmpu(m2_expr.ExprInt8(6), r_al)
     cond3 = expr_cmpu(r_al, m2_expr.ExprInt8(0x99)) | cf
 
-
     cf_c1 = m2_expr.ExprCond(cond1,
                              cf | (cond2),
                              m2_expr.ExprInt1(0))
@@ -2619,10 +2659,10 @@ def aam(ir, instr, a):
     e = []
     tempAL = mRAX[instr.mode][0:8]
     newEAX = m2_expr.ExprCompose([
-                         (tempAL % a,           0,  8),
+        (tempAL % a,           0,  8),
                         (tempAL / a,           8,  16),
                         (mRAX[instr.mode][16:], 16, mRAX[instr.mode].size),
-                         ])
+    ])
     e += [m2_expr.ExprAff(mRAX[instr.mode], newEAX)]
     e += update_flag_arith(newEAX)
     e.append(m2_expr.ExprAff(af, m2_expr.ExprInt1(0)))
@@ -2634,11 +2674,11 @@ def aad(ir, instr, a):
     tempAL = mRAX[instr.mode][0:8]
     tempAH = mRAX[instr.mode][8:16]
     newEAX = m2_expr.ExprCompose([
-            ((tempAL + (tempAH * a)) & m2_expr.ExprInt8(0xFF), 0,  8),
+        ((tempAL + (tempAH * a)) & m2_expr.ExprInt8(0xFF), 0,  8),
             (m2_expr.ExprInt8(0),                              8,  16),
             (mRAX[instr.mode][16:],
              16, mRAX[instr.mode].size),
-            ])
+    ])
     e += [m2_expr.ExprAff(mRAX[instr.mode], newEAX)]
     e += update_flag_arith(newEAX)
     e.append(m2_expr.ExprAff(af, m2_expr.ExprInt1(0)))
@@ -2708,9 +2748,11 @@ def bsr_bsf(ir, instr, a, b, op_name):
     return e, [irbloc(lbl_src_null.name, [e_src_null]),
                irbloc(lbl_src_not_null.name, [e_src_not_null])]
 
+
 def bsf(ir, instr, a, b):
     return bsr_bsf(ir, instr, a, b, "bsf")
 
+
 def bsr(ir, instr, a, b):
     return bsr_bsf(ir, instr, a, b, "bsr")
 
@@ -2737,7 +2779,7 @@ def sidt(ir, instr, a):
                              m2_expr.ExprInt32(0xe40007ff)))
     e.append(
         m2_expr.ExprAff(m2_expr.ExprMem(m2_expr.ExprOp("+", b,
-        m2_expr.ExprInt_from(b, 4)), 16), m2_expr.ExprInt16(0x8245)))
+                                                       m2_expr.ExprInt_from(b, 4)), 16), m2_expr.ExprInt16(0x8245)))
     return e, []
 
 
@@ -2750,6 +2792,7 @@ def sldt(ir, instr, a):
 def cmovz(ir, instr, arg1, arg2):
     return gen_cmov(ir, instr, zf, arg1, arg2, True)
 
+
 def cmovnz(ir, instr, arg1, arg2):
     return gen_cmov(ir, instr, zf, arg1, arg2, False)
 
@@ -2763,23 +2806,23 @@ def cmovnp(ir, instr, arg1, arg2):
 
 
 def cmovge(ir, instr, arg1, arg2):
-    return gen_cmov(ir, instr, nf^of, arg1, arg2, False)
+    return gen_cmov(ir, instr, nf ^ of, arg1, arg2, False)
 
 
 def cmovg(ir, instr, arg1, arg2):
-    return gen_cmov(ir, instr, zf|(nf^of), arg1, arg2, False)
+    return gen_cmov(ir, instr, zf | (nf ^ of), arg1, arg2, False)
 
 
 def cmovl(ir, instr, arg1, arg2):
-    return gen_cmov(ir, instr, nf^of, arg1, arg2, True)
+    return gen_cmov(ir, instr, nf ^ of, arg1, arg2, True)
 
 
 def cmovle(ir, instr, arg1, arg2):
-    return gen_cmov(ir, instr, zf|(nf^of), arg1, arg2, True)
+    return gen_cmov(ir, instr, zf | (nf ^ of), arg1, arg2, True)
 
 
 def cmova(ir, instr, arg1, arg2):
-    return gen_cmov(ir, instr, cf|zf, arg1, arg2, False)
+    return gen_cmov(ir, instr, cf | zf, arg1, arg2, False)
 
 
 def cmovae(ir, instr, arg1, arg2):
@@ -2787,7 +2830,7 @@ def cmovae(ir, instr, arg1, arg2):
 
 
 def cmovbe(ir, instr, arg1, arg2):
-    return gen_cmov(ir, instr, cf|zf, arg1, arg2, True)
+    return gen_cmov(ir, instr, cf | zf, arg1, arg2, True)
 
 
 def cmovb(ir, instr, arg1, arg2):
@@ -2813,7 +2856,7 @@ def cmovns(ir, instr, arg1, arg2):
 def icebp(ir, instr):
     e = []
     e.append(m2_expr.ExprAff(exception_flags,
-                     m2_expr.ExprInt32(EXCEPT_PRIV_INSN)))
+                             m2_expr.ExprInt32(EXCEPT_PRIV_INSN)))
     return e, []
 # XXX
 
@@ -2826,7 +2869,7 @@ def l_int(ir, instr, a):
     else:
         except_int = EXCEPT_INT_XX
     e.append(m2_expr.ExprAff(exception_flags,
-                     m2_expr.ExprInt32(except_int)))
+                             m2_expr.ExprInt32(except_int)))
     e.append(m2_expr.ExprAff(interrupt_num, a))
     return e, []
 
@@ -2834,7 +2877,7 @@ def l_int(ir, instr, a):
 def l_sysenter(ir, instr):
     e = []
     e.append(m2_expr.ExprAff(exception_flags,
-                     m2_expr.ExprInt32(EXCEPT_PRIV_INSN)))
+                             m2_expr.ExprInt32(EXCEPT_PRIV_INSN)))
     return e, []
 
 # XXX
@@ -2843,7 +2886,7 @@ def l_sysenter(ir, instr):
 def l_out(ir, instr, a, b):
     e = []
     e.append(m2_expr.ExprAff(exception_flags,
-                     m2_expr.ExprInt32(EXCEPT_PRIV_INSN)))
+                             m2_expr.ExprInt32(EXCEPT_PRIV_INSN)))
     return e, []
 
 # XXX
@@ -2852,7 +2895,7 @@ def l_out(ir, instr, a, b):
 def l_outs(ir, instr, size):
     e = []
     e.append(m2_expr.ExprAff(exception_flags,
-                     m2_expr.ExprInt32(EXCEPT_PRIV_INSN)))
+                             m2_expr.ExprInt32(EXCEPT_PRIV_INSN)))
     return e, []
 
 # XXX actually, xlat performs al = (ds:[e]bx + ZeroExtend(al))
@@ -2861,7 +2904,7 @@ def l_outs(ir, instr, size):
 def xlat(ir, instr):
     e = []
     a = m2_expr.ExprCompose([(m2_expr.ExprInt(0, 24), 8, 32),
-                     (mRAX[instr.mode][0:8], 0, 8)])
+                             (mRAX[instr.mode][0:8], 0, 8)])
     b = m2_expr.ExprMem(m2_expr.ExprOp('+', mRBX[instr.mode], a), 8)
     e.append(m2_expr.ExprAff(mRAX[instr.mode][0:8], b))
     return e, []
@@ -2871,30 +2914,31 @@ def cpuid(ir, instr):
     e = []
     e.append(
         m2_expr.ExprAff(mRAX[instr.mode],
-        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt(0, instr.mode))))
+                        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt(0, instr.mode))))
     e.append(
         m2_expr.ExprAff(mRBX[instr.mode],
-        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt(1, instr.mode))))
+                        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt(1, instr.mode))))
     e.append(
         m2_expr.ExprAff(mRCX[instr.mode],
-        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt(2, instr.mode))))
+                        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt(2, instr.mode))))
     e.append(
         m2_expr.ExprAff(mRDX[instr.mode],
-        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt(3, instr.mode))))
+                        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt(3, instr.mode))))
     return e, []
 
 
 def bittest_get(a, b):
     b = b.zeroExtend(a.size)
     if isinstance(a, m2_expr.ExprMem):
-        b_mask = {16:4, 32:5, 64:6}
-        b_decal = {16:1, 32:3, 64:7}
+        b_mask = {16: 4, 32: 5, 64: 6}
+        b_decal = {16: 1, 32: 3, 64: 7}
         ptr = a.arg
-        off_bit = b.zeroExtend(a.size) & m2_expr.ExprInt((1<<b_mask[a.size])-1,
-                                                         a.size)
+        off_bit = b.zeroExtend(
+            a.size) & m2_expr.ExprInt((1 << b_mask[a.size]) - 1,
+                                      a.size)
         off_byte = ((b.zeroExtend(ptr.size) >> m2_expr.ExprInt_from(ptr, 3)) &
                     m2_expr.ExprInt_from(ptr,
-                                         ((1<<a.size)-1) ^ b_decal[a.size]))
+                                         ((1 << a.size) - 1) ^ b_decal[a.size]))
 
         d = m2_expr.ExprMem(ptr + off_byte, a.size)
     else:
@@ -2950,7 +2994,7 @@ def into(ir, instr):
 def l_in(ir, instr, a, b):
     e = []
     e.append(m2_expr.ExprAff(exception_flags,
-                     m2_expr.ExprInt32(EXCEPT_PRIV_INSN)))
+                             m2_expr.ExprInt32(EXCEPT_PRIV_INSN)))
     return e, []
 
 
@@ -2980,7 +3024,7 @@ def cmpxchg8b(arg1):
 def lds(ir, instr, a, b):
     e = []
     e.append(m2_expr.ExprAff(a, m2_expr.ExprMem(b.arg, size=a.size)))
-    DS_value = m2_expr.ExprMem(b.arg + m2_expr.ExprInt_from(b.arg, a.size/8),
+    DS_value = m2_expr.ExprMem(b.arg + m2_expr.ExprInt_from(b.arg, a.size / 8),
                                size=16)
     e.append(m2_expr.ExprAff(DS, DS_value))
     return e, []
@@ -2989,7 +3033,7 @@ def lds(ir, instr, a, b):
 def les(ir, instr, a, b):
     e = []
     e.append(m2_expr.ExprAff(a, m2_expr.ExprMem(b.arg, size=a.size)))
-    ES_value = m2_expr.ExprMem(b.arg + m2_expr.ExprInt_from(b.arg, a.size/8),
+    ES_value = m2_expr.ExprMem(b.arg + m2_expr.ExprInt_from(b.arg, a.size / 8),
                                size=16)
     e.append(m2_expr.ExprAff(ES, ES_value))
     return e, []
@@ -2998,23 +3042,25 @@ def les(ir, instr, a, b):
 def lss(ir, instr, a, b):
     e = []
     e.append(m2_expr.ExprAff(a, m2_expr.ExprMem(b.arg, size=a.size)))
-    SS_value = m2_expr.ExprMem(b.arg + m2_expr.ExprInt_from(b.arg, a.size/8),
+    SS_value = m2_expr.ExprMem(b.arg + m2_expr.ExprInt_from(b.arg, a.size / 8),
                                size=16)
     e.append(m2_expr.ExprAff(SS, SS_value))
     return e, []
 
+
 def lfs(ir, instr, a, b):
     e = []
     e.append(m2_expr.ExprAff(a, m2_expr.ExprMem(b.arg, size=a.size)))
-    FS_value = m2_expr.ExprMem(b.arg + m2_expr.ExprInt_from(b.arg, a.size/8),
+    FS_value = m2_expr.ExprMem(b.arg + m2_expr.ExprInt_from(b.arg, a.size / 8),
                                size=16)
     e.append(m2_expr.ExprAff(FS, FS_value))
     return e, []
 
+
 def lgs(ir, instr, a, b):
     e = []
     e.append(m2_expr.ExprAff(a, m2_expr.ExprMem(b.arg, size=a.size)))
-    GS_value = m2_expr.ExprMem(b.arg + m2_expr.ExprInt_from(b.arg, a.size/8),
+    GS_value = m2_expr.ExprMem(b.arg + m2_expr.ExprInt_from(b.arg, a.size / 8),
                                size=16)
     e.append(m2_expr.ExprAff(GS, GS_value))
     return e, []
@@ -3027,7 +3073,8 @@ def lahf(ir, instr):
             m2_expr.ExprInt1(0), zf, nf]
     for i in xrange(len(regs)):
         args.append((regs[i], i, i + 1))
-    e.append(m2_expr.ExprAff(mRAX[instr.mode][8:16], m2_expr.ExprCompose(args)))
+    e.append(
+        m2_expr.ExprAff(mRAX[instr.mode][8:16], m2_expr.ExprCompose(args)))
     return e, []
 
 
@@ -3085,6 +3132,7 @@ def movd(ir, instr, a, b):
         e.append(m2_expr.ExprAff(a, b[:32]))
     return e, []
 
+
 def movdqu(ir, instr, a, b):
     # XXX TODO alignement check
     return [m2_expr.ExprAff(a, b)], []
@@ -3101,6 +3149,12 @@ def andps(ir, instr, a, b):
     return e, []
 
 
+def andnps(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('&', a ^ a.mask, b)))
+    return e, []
+
+
 def orps(ir, instr, a, b):
     e = []
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('|', a, b)))
@@ -3114,22 +3168,28 @@ def xorps(ir, instr, a, b):
 
 
 def rdmsr(ir, instr):
-    msr_addr = m2_expr.ExprId('MSR') + m2_expr.ExprInt32(8) * mRCX[instr.mode][:32]
+    msr_addr = m2_expr.ExprId('MSR') + m2_expr.ExprInt32(
+        8) * mRCX[instr.mode][:32]
     e = []
-    e.append(m2_expr.ExprAff(mRAX[instr.mode][:32], m2_expr.ExprMem(msr_addr, 32)))
-    e.append(m2_expr.ExprAff(mRDX[instr.mode][:32], m2_expr.ExprMem(msr_addr + m2_expr.ExprInt_from(msr_addr, 4), 32)))
+    e.append(
+        m2_expr.ExprAff(mRAX[instr.mode][:32], m2_expr.ExprMem(msr_addr, 32)))
+    e.append(m2_expr.ExprAff(mRDX[instr.mode][:32], m2_expr.ExprMem(
+        msr_addr + m2_expr.ExprInt_from(msr_addr, 4), 32)))
     return e, []
 
+
 def wrmsr(ir, instr):
-    msr_addr = m2_expr.ExprId('MSR') + m2_expr.ExprInt32(8) * mRCX[instr.mode][:32]
+    msr_addr = m2_expr.ExprId('MSR') + m2_expr.ExprInt32(
+        8) * mRCX[instr.mode][:32]
     e = []
     src = m2_expr.ExprCompose([(mRAX[instr.mode][:32], 0, 32),
                                (mRDX[instr.mode][:32], 32, 64)])
     e.append(m2_expr.ExprAff(m2_expr.ExprMem(msr_addr, 64), src))
     return e, []
 
-### MMX/SSE/AVX operations
-###
+# MMX/SSE/AVX operations
+#
+
 
 def vec_op_clip(op, size):
     """
@@ -3145,40 +3205,52 @@ def vec_op_clip(op, size):
     return vec_op_clip_instr
 
 # Generic vertical operation
+
+
 def vec_vertical_sem(op, elt_size, reg_size, a, b):
     assert(reg_size % elt_size == 0)
-    n = reg_size/elt_size
+    n = reg_size / elt_size
     if op == '-':
-        ops = [((a[i*elt_size:(i+1)*elt_size] - b[i*elt_size:(i+1)*elt_size]),
-               i*elt_size, (i+1)*elt_size) for i in xrange(0, n)]
+        ops = [(
+            (a[i * elt_size:(i + 1) * elt_size]
+             - b[i * elt_size:(i + 1) * elt_size]),
+               i * elt_size, (i + 1) * elt_size) for i in xrange(0, n)]
     else:
-        ops = [(m2_expr.ExprOp(op, a[i*elt_size:(i+1)*elt_size],
-                               b[i*elt_size:(i+1)*elt_size]),
-                i*elt_size,
-                (i+1)*elt_size) for i in xrange(0, n)]
+        ops = [(m2_expr.ExprOp(op, a[i * elt_size:(i + 1) * elt_size],
+                               b[i * elt_size:(i + 1) * elt_size]),
+                i * elt_size,
+                (i + 1) * elt_size) for i in xrange(0, n)]
 
     return m2_expr.ExprCompose(ops)
 
+
 def float_vec_vertical_sem(op, elt_size, reg_size, a, b):
     assert(reg_size % elt_size == 0)
-    n = reg_size/elt_size
+    n = reg_size / elt_size
 
     x_to_int, int_to_x = {32: ('float_to_int_%d', 'int_%d_to_float'),
                           64: ('double_to_int_%d', 'int_%d_to_double')}[elt_size]
     if op == '-':
         ops = [(m2_expr.ExprOp(x_to_int % elt_size,
-                               m2_expr.ExprOp(int_to_x % elt_size, a[i*elt_size:(i+1)*elt_size]) -
-                               m2_expr.ExprOp(int_to_x % elt_size, b[i*elt_size:(i+1)*elt_size])),
-                i*elt_size, (i+1)*elt_size) for i in xrange(0, n)]
+                               m2_expr.ExprOp(int_to_x % elt_size, a[i * elt_size:(i + 1) * elt_size]) -
+                               m2_expr.ExprOp(
+                                   int_to_x % elt_size, b[i * elt_size:(
+                                       i + 1) * elt_size])),
+                i * elt_size, (i + 1) * elt_size) for i in xrange(0, n)]
     else:
         ops = [(m2_expr.ExprOp(x_to_int % elt_size,
                                m2_expr.ExprOp(op,
-                                              m2_expr.ExprOp(int_to_x % elt_size, a[i*elt_size:(i+1)*elt_size]),
-                                              m2_expr.ExprOp(int_to_x % elt_size, b[i*elt_size:(i+1)*elt_size]))),
-                i*elt_size, (i+1)*elt_size) for i in xrange(0, n)]
+                                              m2_expr.ExprOp(
+                                                  int_to_x % elt_size, a[i * elt_size:(
+                                                      i + 1) * elt_size]),
+                                              m2_expr.ExprOp(
+                                                  int_to_x % elt_size, b[i * elt_size:(
+                                                      i + 1) * elt_size]))),
+                i * elt_size, (i + 1) * elt_size) for i in xrange(0, n)]
 
     return m2_expr.ExprCompose(ops)
 
+
 def __vec_vertical_instr_gen(op, elt_size, sem):
     def vec_instr(ir, instr, a, b):
         e = []
@@ -3189,17 +3261,19 @@ def __vec_vertical_instr_gen(op, elt_size, sem):
         return e, []
     return vec_instr
 
+
 def vec_vertical_instr(op, elt_size):
     return __vec_vertical_instr_gen(op, elt_size, vec_vertical_sem)
 
+
 def float_vec_vertical_instr(op, elt_size):
     return __vec_vertical_instr_gen(op, elt_size, float_vec_vertical_sem)
 
-### Integer arithmetic
-###
+# Integer arithmetic
+#
 
-## Additions
-##
+# Additions
+#
 
 # SSE
 paddb = vec_vertical_instr('+', 8)
@@ -3207,8 +3281,8 @@ paddw = vec_vertical_instr('+', 16)
 paddd = vec_vertical_instr('+', 32)
 paddq = vec_vertical_instr('+', 64)
 
-## Substractions
-##
+# Substractions
+#
 
 # SSE
 psubb = vec_vertical_instr('-', 8)
@@ -3216,8 +3290,8 @@ psubw = vec_vertical_instr('-', 16)
 psubd = vec_vertical_instr('-', 32)
 psubq = vec_vertical_instr('-', 64)
 
-### Floating-point arithmetic
-###
+# Floating-point arithmetic
+#
 
 # SSE
 addss = vec_op_clip('+', 32)
@@ -3237,10 +3311,12 @@ divsd = vec_op_clip('/', 64)
 divps = float_vec_vertical_instr('/', 32)
 divpd = float_vec_vertical_instr('/', 64)
 
-### Logical (floating-point)
-###
+# Logical (floating-point)
+#
 
 # MMX/SSE/AVX
+
+
 def pand(ir, instr, a, b):
     e = []
     c = a & b
@@ -3249,6 +3325,14 @@ def pand(ir, instr, a, b):
     return e, []
 
 
+def pandn(ir, instr, a, b):
+    e = []
+    c = (a ^ a.mask) & b
+    # No flag affected
+    e.append(m2_expr.ExprAff(a, c))
+    return e, []
+
+
 def por(ir, instr, a, b):
     e = []
     c = a | b
@@ -3261,139 +3345,204 @@ def pminsw(ir, instr, a, b):
     e.append(m2_expr.ExprAff(a, m2_expr.ExprCond((a - b).msb(), a, b)))
     return e, []
 
+
 def cvtdq2pd(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:64], m2_expr.ExprOp('int_32_to_double', b[:32])))
-    e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprOp('int_32_to_double', b[32:64])))
+    e.append(
+        m2_expr.ExprAff(a[:64], m2_expr.ExprOp('int_32_to_double', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[64:128], m2_expr.ExprOp('int_32_to_double', b[32:64])))
     return e, []
 
+
 def cvtdq2ps(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('int_32_to_float', b[:32])))
-    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('int_32_to_float', b[32:64])))
-    e.append(m2_expr.ExprAff(a[64:96], m2_expr.ExprOp('int_32_to_float', b[64:96])))
-    e.append(m2_expr.ExprAff(a[96:128], m2_expr.ExprOp('int_32_to_float', b[96:128])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('int_32_to_float', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('int_32_to_float', b[32:64])))
+    e.append(
+        m2_expr.ExprAff(a[64:96], m2_expr.ExprOp('int_32_to_float', b[64:96])))
+    e.append(
+        m2_expr.ExprAff(a[96:128], m2_expr.ExprOp('int_32_to_float', b[96:128])))
     return e, []
 
+
 def cvtpd2dq(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_int_32', b[:64])))
-    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_to_int_32', b[64:128])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_int_32', b[:64])))
+    e.append(
+        m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_to_int_32', b[64:128])))
     e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprInt64(0)))
     return e, []
 
+
 def cvtpd2pi(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_int_32', b[:64])))
-    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_to_int_32', b[64:128])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_int_32', b[:64])))
+    e.append(
+        m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_to_int_32', b[64:128])))
     return e, []
 
+
 def cvtpd2ps(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_float', b[:64])))
-    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_to_float', b[64:128])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_float', b[:64])))
+    e.append(
+        m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_to_float', b[64:128])))
     e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprInt64(0)))
     return e, []
 
+
 def cvtpi2pd(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:64], m2_expr.ExprOp('int_32_to_double', b[:32])))
-    e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprOp('int_32_to_double', b[32:64])))
+    e.append(
+        m2_expr.ExprAff(a[:64], m2_expr.ExprOp('int_32_to_double', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[64:128], m2_expr.ExprOp('int_32_to_double', b[32:64])))
     return e, []
 
+
 def cvtpi2ps(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('int_32_to_float', b[:32])))
-    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('int_32_to_float', b[32:64])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('int_32_to_float', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('int_32_to_float', b[32:64])))
     return e, []
 
+
 def cvtps2dq(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_to_int_32', b[:32])))
-    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_to_int_32', b[32:64])))
-    e.append(m2_expr.ExprAff(a[64:96], m2_expr.ExprOp('float_to_int_32', b[64:96])))
-    e.append(m2_expr.ExprAff(a[96:128], m2_expr.ExprOp('float_to_int_32', b[96:128])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_to_int_32', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_to_int_32', b[32:64])))
+    e.append(
+        m2_expr.ExprAff(a[64:96], m2_expr.ExprOp('float_to_int_32', b[64:96])))
+    e.append(
+        m2_expr.ExprAff(a[96:128], m2_expr.ExprOp('float_to_int_32', b[96:128])))
     return e, []
 
+
 def cvtps2pd(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:64], m2_expr.ExprOp('float_to_double', b[:32])))
-    e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprOp('float_to_double', b[32:64])))
+    e.append(
+        m2_expr.ExprAff(a[:64], m2_expr.ExprOp('float_to_double', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[64:128], m2_expr.ExprOp('float_to_double', b[32:64])))
     return e, []
 
+
 def cvtps2pi(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_to_int_32', b[:32])))
-    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_to_int_32', b[32:64])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_to_int_32', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_to_int_32', b[32:64])))
     return e, []
 
+
 def cvtsd2si(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_int_32', b[:64])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_int_32', b[:64])))
     return e, []
 
+
 def cvtsd2ss(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_float', b[:64])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_float', b[:64])))
     return e, []
 
+
 def cvtsi2sd(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:64], m2_expr.ExprOp('int_32_to_double', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[:64], m2_expr.ExprOp('int_32_to_double', b[:32])))
     return e, []
 
+
 def cvtsi2ss(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('int_32_to_float', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('int_32_to_float', b[:32])))
     return e, []
 
+
 def cvtss2sd(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:64], m2_expr.ExprOp('float_to_double', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[:64], m2_expr.ExprOp('float_to_double', b[:32])))
     return e, []
 
+
 def cvtss2si(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_to_int_32', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_to_int_32', b[:32])))
     return e, []
 
+
 def cvttpd2pi(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_trunc_to_int_32', b[:64])))
-    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_trunc_to_int_32', b[64:128])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_trunc_to_int_32', b[:64])))
+    e.append(
+        m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_trunc_to_int_32', b[64:128])))
     return e, []
 
+
 def cvttpd2dq(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_trunc_to_int_32', b[:64])))
-    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_trunc_to_int_32', b[64:128])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_trunc_to_int_32', b[:64])))
+    e.append(
+        m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_trunc_to_int_32', b[64:128])))
     e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprInt64(0)))
     return e, []
 
+
 def cvttps2dq(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_trunc_to_int_32', b[:32])))
-    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_trunc_to_int_32', b[32:64])))
-    e.append(m2_expr.ExprAff(a[64:96], m2_expr.ExprOp('float_trunc_to_int_32', b[64:96])))
-    e.append(m2_expr.ExprAff(a[96:128], m2_expr.ExprOp('float_trunc_to_int_32', b[96:128])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_trunc_to_int_32', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_trunc_to_int_32', b[32:64])))
+    e.append(
+        m2_expr.ExprAff(a[64:96], m2_expr.ExprOp('float_trunc_to_int_32', b[64:96])))
+    e.append(
+        m2_expr.ExprAff(a[96:128], m2_expr.ExprOp('float_trunc_to_int_32', b[96:128])))
     return e, []
 
+
 def cvttps2pi(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_trunc_to_int_32', b[:32])))
-    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_trunc_to_int_32', b[32:64])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_trunc_to_int_32', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_trunc_to_int_32', b[32:64])))
     return e, []
 
+
 def cvttsd2si(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_trunc_to_int_32', b[:64])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_trunc_to_int_32', b[:64])))
     return e, []
 
+
 def cvttss2si(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_trunc_to_int_32', b[:32])))
+    e.append(
+        m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_trunc_to_int_32', b[:32])))
     return e, []
 
+
 def movss(ir, instr, a, b):
     e = []
     if not isinstance(a, m2_expr.ExprMem) and not isinstance(b, m2_expr.ExprMem):
@@ -3422,6 +3571,89 @@ def ucomiss(ir, instr, a, b):
     return e, []
 
 
+def pshufb(ir, instr, a, b):
+    e = []
+    if a.size == 64:
+        bit_l = 3
+    elif a.size == 128:
+        bit_l = 4
+    else:
+        raise NotImplementedError("bad size")
+    for i in xrange(0, b.size, 8):
+        index = b[i:i + bit_l].zeroExtend(a.size) << m2_expr.ExprInt(3, a.size)
+        value = (a >> index)[:8]
+        e.append(m2_expr.ExprAff(a[i:i + 8],
+                                 m2_expr.ExprCond(b[i + 7:i + 8],
+                                                  m2_expr.ExprInt8(0),
+                                                  value)))
+    return e, []
+
+
+def pshufd(ir, instr, a, b, c):
+    e = []
+    for i in xrange(4):
+        index = c[2 * i:2 * (i + 1)].zeroExtend(a.size)
+        index <<= m2_expr.ExprInt(5, a.size)
+        value = (a >> index)[:32]
+        e.append(m2_expr.ExprAff(a[32 * i:32 * (i + 1)], value))
+    return e, []
+
+
+def ps_rl_ll(ir, instr, a, b, op, size):
+    lbl_zero = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size)
+    lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size)
+    lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size)
+
+    if b.size == 8:
+        count = b.zeroExtend(a.size)
+    else:
+        count = b.zeroExtend(a.size)
+
+    mask = {16: 0xF,
+            32: 0x1F,
+            64: 0x3F}[size]
+    test = count & m2_expr.ExprInt(((1 << a.size) - 1) ^ mask, a.size)
+    e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(test,
+                                                    lbl_zero,
+                                                    lbl_do))]
+
+    e_zero = [m2_expr.ExprAff(a, m2_expr.ExprInt(0, a.size)),
+              m2_expr.ExprAff(ir.IRDst, lbl_next)]
+
+    e_do = []
+    for i in xrange(0, a.size, size):
+        e.append(m2_expr.ExprAff(a[i:i + size], m2_expr.ExprOp(op,
+                                                               a[i:i + size],
+                                                               count[:size])))
+
+    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_next))
+    return e, [irbloc(lbl_do.name, [e_do]), irbloc(lbl_zero.name, [e_zero])]
+
+
+def psrlw(ir, instr, a, b):
+    return ps_rl_ll(ir, instr, a, b, ">>", 16)
+
+
+def psrld(ir, instr, a, b):
+    return ps_rl_ll(ir, instr, a, b, ">>", 32)
+
+
+def psrlq(ir, instr, a, b):
+    return ps_rl_ll(ir, instr, a, b, ">>", 64)
+
+
+def psllw(ir, instr, a, b):
+    return ps_rl_ll(ir, instr, a, b, "<<", 16)
+
+
+def pslld(ir, instr, a, b):
+    return ps_rl_ll(ir, instr, a, b, "<<",  32)
+
+
+def psllq(ir, instr, a, b):
+    return ps_rl_ll(ir, instr, a, b, "<<",  64)
+
+
 def iret(ir, instr):
     """IRET implementation
     XXX: only support "no-privilege change"
@@ -3433,6 +3665,299 @@ def iret(ir, instr):
     return exprs, []
 
 
+def pmaxu(ir, instr, a, b, size):
+    e = []
+    for i in xrange(0, a.size, size):
+        op1 = a[i:i + size]
+        op2 = b[i:i + size]
+        res = op1 - op2
+        # Compote CF in @res = @op1 - @op2
+        ret = (((op1 ^ op2) ^ res) ^ ((op1 ^ res) & (op1 ^ op2))).msb()
+
+        e.append(m2_expr.ExprAff(a[i:i + size],
+                                 m2_expr.ExprCond(ret,
+                                                  b[i:i + size],
+                                                  a[i:i + size])))
+    return e, []
+
+
+def pmaxub(ir, instr, a, b):
+    return pmaxu(ir, instr, a, b, 8)
+
+
+def pmaxuw(ir, instr, a, b):
+    return pmaxu(ir, instr, a, b, 16)
+
+
+def pmaxud(ir, instr, a, b):
+    return pmaxu(ir, instr, a, b, 32)
+
+
+def pminu(ir, instr, a, b, size):
+    e = []
+    for i in xrange(0, a.size, size):
+        op1 = a[i:i + size]
+        op2 = b[i:i + size]
+        res = op1 - op2
+        # Compote CF in @res = @op1 - @op2
+        ret = (((op1 ^ op2) ^ res) ^ ((op1 ^ res) & (op1 ^ op2))).msb()
+
+        e.append(m2_expr.ExprAff(a[i:i + size],
+                                 m2_expr.ExprCond(ret,
+                                                  a[i:i + size],
+                                                  b[i:i + size])))
+    return e, []
+
+
+def pminub(ir, instr, a, b):
+    return pminu(ir, instr, a, b, 8)
+
+
+def pminuw(ir, instr, a, b):
+    return pminu(ir, instr, a, b, 16)
+
+
+def pminud(ir, instr, a, b):
+    return pminu(ir, instr, a, b, 32)
+
+
+def pcmpeq(ir, instr, a, b, size):
+    e = []
+    for i in xrange(0, a.size, size):
+        test = a[i:i + size] - b[i:i + size]
+        e.append(m2_expr.ExprAff(a[i:i + size],
+                                 m2_expr.ExprCond(test,
+                                                  m2_expr.ExprInt(0, size),
+                                                  m2_expr.ExprInt(-1, size))))
+    return e, []
+
+
+def pcmpeqb(ir, instr, a, b):
+    return pcmpeq(ir, instr, a, b, 8)
+
+
+def pcmpeqw(ir, instr, a, b):
+    return pcmpeq(ir, instr, a, b, 16)
+
+
+def pcmpeqd(ir, instr, a, b):
+    return pcmpeq(ir, instr, a, b, 32)
+
+
+def punpck(ir, instr, a, b, size, off):
+    e = []
+    for i in xrange(a.size / (2 * size)):
+        src1 = a[size * i + off: size * i + off + size]
+        src2 = b[size * i + off: size * i + off + size]
+        e.append(m2_expr.ExprAff(a[size * 2 * i: size * 2 * i + size], src1))
+        e.append(
+            m2_expr.ExprAff(a[size * (2 * i + 1): size * (2 * i + 1) + size], src2))
+    return e, []
+
+
+def punpckhbw(ir, instr, a, b):
+    return punpck(ir, instr, a, b, 8, a.size / 2)
+
+
+def punpckhwd(ir, instr, a, b):
+    return punpck(ir, instr, a, b, 16, a.size / 2)
+
+
+def punpckhdq(ir, instr, a, b):
+    return punpck(ir, instr, a, b, 32, a.size / 2)
+
+
+def punpckhqdq(ir, instr, a, b):
+    return punpck(ir, instr, a, b, 64, a.size / 2)
+
+
+def punpcklbw(ir, instr, a, b):
+    return punpck(ir, instr, a, b, 8, 0)
+
+
+def punpcklwd(ir, instr, a, b):
+    return punpck(ir, instr, a, b, 16, 0)
+
+
+def punpckldq(ir, instr, a, b):
+    return punpck(ir, instr, a, b, 32, 0)
+
+
+def punpcklqdq(ir, instr, a, b):
+    return punpck(ir, instr, a, b, 64, 0)
+
+
+def pinsr(ir, instr, a, b, c, size):
+    e = []
+
+    mask = {8: 0xF,
+            16: 0x7,
+            32: 0x3,
+            64: 0x1}[size]
+
+    sel = (int(c.arg) & mask) * size
+    e.append(m2_expr.ExprAff(a[sel:sel + size], b[:size]))
+
+    return e, []
+
+
+def pinsrb(ir, instr, a, b, c):
+    return pinsr(ir, instr, a, b, c, 8)
+
+
+def pinsrw(ir, instr, a, b, c):
+    return pinsr(ir, instr, a, b, c, 16)
+
+
+def pinsrd(ir, instr, a, b, c):
+    return pinsr(ir, instr, a, b, c, 32)
+
+
+def pinsrq(ir, instr, a, b, c):
+    return pinsr(ir, instr, a, b, c, 64)
+
+
+def pextr(ir, instr, a, b, c, size):
+    e = []
+
+    mask = {8: 0xF,
+            16: 0x7,
+            32: 0x3,
+            64: 0x1}[size]
+
+    sel = (int(c.arg) & mask) * size
+    e.append(m2_expr.ExprAff(a, b[sel:sel + size].zeroExtend(a.size)))
+
+    return e, []
+
+
+def pextrb(ir, instr, a, b, c):
+    return pextr(ir, instr, a, b, c, 8)
+
+
+def pextrw(ir, instr, a, b, c):
+    return pextr(ir, instr, a, b, c, 16)
+
+
+def pextrd(ir, instr, a, b, c):
+    return pextr(ir, instr, a, b, c, 32)
+
+
+def pextrq(ir, instr, a, b, c):
+    return pextr(ir, instr, a, b, c, 64)
+
+
+def unpckhps(ir, instr, a, b):
+    e = []
+    src = m2_expr.ExprCompose([(a[64:96], 0, 32),
+                               (b[64:96], 32, 64),
+                               (a[96:128], 64, 96),
+                               (b[96:128], 96, 128)])
+    e.append(m2_expr.ExprAff(a, src))
+    return e, []
+
+
+def unpckhpd(ir, instr, a, b):
+    e = []
+    src = m2_expr.ExprCompose([(a[64:128], 0, 64),
+                               (b[64:128], 64, 128)])
+    e.append(m2_expr.ExprAff(a, src))
+    return e, []
+
+
+def unpcklps(ir, instr, a, b):
+    e = []
+    src = m2_expr.ExprCompose([(a[0:32], 0, 32),
+                               (b[0:32], 32, 64),
+                               (a[32:64], 64, 96),
+                               (b[32:64], 96, 128)])
+    e.append(m2_expr.ExprAff(a, src))
+    return e, []
+
+
+def unpcklpd(ir, instr, a, b):
+    e = []
+    src = m2_expr.ExprCompose([(a[0:64], 0, 64),
+                               (b[0:64], 64, 128)])
+    e.append(m2_expr.ExprAff(a, src))
+    return e, []
+
+
+def movlpd(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:64], b[:64]))
+    return e, []
+
+
+def movlps(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:64], b[:64]))
+    return e, []
+
+
+def movhpd(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[64:128], b[:64]))
+    return e, []
+
+
+def movhps(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[64:128], b[:64]))
+    return e, []
+
+
+def movdq2q(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a, b[:64]))
+    return e, []
+
+
+def sqrt_gen(ir, instr, a, b, size):
+    e = []
+    out = []
+    for i in b.size / size:
+        out.append((m2_expr.ExprOp('fsqrt' % size,
+                                   b[i * size: (i + 1) * size]),
+                    i * size, (i + 1) * size))
+    src = m2_expr.ExprCompose(out)
+    e.append(m2_expr.ExprAff(a, src))
+    return e, []
+
+
+def sqrtpd(ir, instr, a, b):
+    return sqrt_gen(ir, instr, a, b, 64)
+
+
+def sqrtps(ir, instr, a, b):
+    return sqrt_gen(ir, instr, a, b, 32)
+
+
+def sqrtsd(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:64],
+                             m2_expr.ExprOp('fsqrt',
+                                            b[:64])))
+    return e, []
+
+
+def sqrtss(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32],
+                             m2_expr.ExprOp('fsqrt',
+                                            b[:32])))
+    return e, []
+
+
+def pmovmskb(ir, instr, a, b):
+    e = []
+    out = []
+    for i in xrange(b.size / 8):
+        out.append((b[8 * i + 7:8 * (i + 1)], i, i + 1))
+    src = m2_expr.ExprCompose(out)
+    e.append(m2_expr.ExprAff(a, src.zeroExtend(a.size)))
+    return e, []
+
 mnemo_func = {'mov': mov,
               'xchg': xchg,
               'movzx': movzx,
@@ -3676,8 +4201,8 @@ mnemo_func = {'mov': mov,
               'cmovz': cmovz,
               'cmove': cmovz,
               'cmovnz': cmovnz,
-              'cmovpe':cmovpe,
-              'cmovnp':cmovnp,
+              'cmovpe': cmovpe,
+              'cmovnp': cmovnp,
               'cmovge': cmovge,
               'cmovnl': cmovge,
               'cmovg': cmovg,
@@ -3736,14 +4261,16 @@ mnemo_func = {'mov': mov,
               "fnclex": fnclex,
               "str": l_str,
               "movd": movd,
-              "movdqu":movdqu,
-              "movdqa":movdqu,
-              "movapd": movapd, # XXX TODO alignement check
-              "movupd": movapd, # XXX TODO alignement check
-              "movaps": movapd, # XXX TODO alignement check
-              "movups": movapd, # XXX TODO alignement check
+              "movdqu": movdqu,
+              "movdqa": movdqu,
+              "movapd": movapd,  # XXX TODO alignement check
+              "movupd": movapd,  # XXX TODO alignement check
+              "movaps": movapd,  # XXX TODO alignement check
+              "movups": movapd,  # XXX TODO alignement check
               "andps": andps,
               "andpd": andps,
+              "andnps": andnps,
+              "andnpd": andnps,
               "orps": orps,
               "orpd": orps,
               "xorps": xorps,
@@ -3787,13 +4314,13 @@ mnemo_func = {'mov': mov,
 
               "ucomiss": ucomiss,
 
-              ####
-              #### MMX/AVX/SSE operations
+              #
+              # MMX/AVX/SSE operations
 
-              ### Arithmetic (integers)
-              ###
+              # Arithmetic (integers)
+              #
 
-              ## Additions
+              # Additions
               # SSE
               "paddb": paddb,
               "paddw": paddw,
@@ -3807,45 +4334,108 @@ mnemo_func = {'mov': mov,
               "psubd": psubd,
               "psubq": psubq,
 
-              ### Arithmetic (floating-point)
-              ###
+              # Arithmetic (floating-point)
+              #
 
-              ## Additions
+              # Additions
               # SSE
               "addss": addss,
               "addsd": addsd,
               "addps": addps,
               "addpd": addpd,
 
-              ## Substractions
+              # Substractions
               # SSE
               "subss": subss,
               "subsd": subsd,
               "subps": subps,
               "subpd": subpd,
 
-              ## Multiplications
+              # Multiplications
               # SSE
               "mulss": mulss,
               "mulsd": mulsd,
               "mulps": mulps,
               "mulpd": mulpd,
 
-              ## Divisions
+              # Divisions
               # SSE
               "divss": divss,
               "divsd": divsd,
               "divps": divps,
               "divpd": divpd,
 
-              ### Logical (floating-point)
-              ###
+              # Logical (floating-point)
+              #
 
               "pand": pand,
+              "pandn": pandn,
               "por": por,
 
               "rdmsr": rdmsr,
               "wrmsr": wrmsr,
+              "pshufb": pshufb,
+              "pshufd": pshufd,
+
+              "psrlw": psrlw,
+              "psrld": psrld,
+              "psrlq": psrlq,
+              "psllw": psllw,
+              "pslld": pslld,
+              "psllq": psllq,
+
+              "pmaxub": pmaxub,
+              "pmaxuw": pmaxuw,
+              "pmaxud": pmaxud,
+
+              "pminub": pminub,
+              "pminuw": pminuw,
+              "pminud": pminud,
+
+              "pcmpeqb": pcmpeqb,
+              "pcmpeqw": pcmpeqw,
+              "pcmpeqd": pcmpeqd,
+
+              "punpckhbw": punpckhbw,
+              "punpckhwd": punpckhwd,
+              "punpckhdq": punpckhdq,
+              "punpckhqdq": punpckhqdq,
+
+
+              "punpcklbw": punpcklbw,
+              "punpcklwd": punpcklwd,
+              "punpckldq": punpckldq,
+              "punpcklqdq": punpcklqdq,
+
+              "pinsrb": pinsrb,
+              "pinsrw": pinsrw,
+              "pinsrd": pinsrd,
+              "pinsrq": pinsrq,
+
+              "pextrb": pextrb,
+              "pextrw": pextrw,
+              "pextrd": pextrd,
+              "pextrq": pextrq,
+
+              "unpckhps": unpckhps,
+              "unpckhpd": unpckhpd,
+              "unpcklps": unpcklps,
+              "unpcklpd": unpcklpd,
+
+              "movlpd": movlpd,
+              "movlps": movlps,
+              "movhpd": movhpd,
+              "movhps": movhps,
+              "movlhps": movhps,
+              "movhlps": movlps,
+              "movdq2q": movdq2q,
+
+              "sqrtpd": sqrtpd,
+              "sqrtps": sqrtps,
+              "sqrtsd": sqrtsd,
+              "sqrtss": sqrtss,
+
+              "pmovmskb": pmovmskb,
 
               }
 
@@ -3881,7 +4471,8 @@ class ir_x86_16(ir):
                                                              a.arg), a.size)
 
         if not instr.name.lower() in mnemo_func:
-            raise NotImplementedError("Mnemonic %s not implemented" % instr.name)
+            raise NotImplementedError(
+                "Mnemonic %s not implemented" % instr.name)
 
         instr_ir, extra_ir = mnemo_func[
             instr.name.lower()](self, instr, *args)
@@ -3960,8 +4551,8 @@ class ir_x86_16(ir):
                 if destination is a 32 bit reg, zero extend the 64 bit reg
                 """
                 if mode == 64:
-                    if (isinstance(e.dst, m2_expr.ExprId) and \
-                            e.dst.size == 32 and \
+                    if (isinstance(e.dst, m2_expr.ExprId) and
+                            e.dst.size == 32 and
                             e.dst in replace_regs[64]):
                         src = self.expr_fix_regs_for_mode(e.src, mode)
                         dst = replace_regs[64][e.dst].arg
diff --git a/miasm2/expression/smt2_helper.py b/miasm2/expression/smt2_helper.py
new file mode 100644
index 00000000..53d323e8
--- /dev/null
+++ b/miasm2/expression/smt2_helper.py
@@ -0,0 +1,296 @@
+# Helper functions for the generation of SMT2 expressions
+# The SMT2 expressions will be returned as a string.
+# The expressions are divided as follows
+#
+# - generic SMT2 operations
+# - definitions of SMT2 structures
+# - bit vector operations
+# - array operations
+
+# generic SMT2 operations
+
+def smt2_eq(a, b):
+    """
+    Assignment: a = b
+    """
+    return "(= {} {})".format(a, b)
+
+
+def smt2_implies(a, b):
+    """
+    Implication: a => b
+    """
+    return "(=> {} {})".format(a, b)
+
+
+def smt2_and(*args):
+    """
+    Conjunction: a and b and c ...
+    """
+    # transform args into strings
+    args = [str(arg) for arg in args]
+    return "(and {})".format(' '.join(args))
+
+
+def smt2_or(*args):
+    """
+    Disjunction: a or b or c ...
+    """
+    # transform args into strings
+    args = [str(arg) for arg in args]
+    return "(or {})".format(' '.join(args))
+
+
+def smt2_ite(cond, a, b):
+    """
+    If-then-else: cond ? a : b
+    """
+    return "(ite {} {} {})".format(cond, a, b)
+
+
+def smt2_distinct(*args):
+    """
+    Distinction: a != b != c != ...
+    """
+    # transform args into strings
+    args = [str(arg) for arg in args]
+    return "(distinct {})".format(' '.join(args))
+
+
+def smt2_assert(expr):
+    """
+    Assertion that @expr holds
+    """
+    return "(assert {})".format(expr)
+
+
+# definitions
+
+def declare_bv(bv, size):
+    """
+    Declares an bit vector @bv of size @size
+    """
+    return "(declare-fun {} () {})".format(bv, bit_vec(size))
+
+
+def declare_array(a, bv1, bv2):
+    """
+    Declares an SMT2 array represented as a map
+    from a bit vector to another bit vector.
+    :param a: array name
+    :param bv1: SMT2 bit vector
+    :param bv2: SMT2 bit vector
+    """
+    return "(declare-fun {} () (Array {} {}))".format(a, bv1, bv2)
+
+
+def bit_vec_val(v, size):
+    """
+    Declares a bit vector value
+    :param v: int, value of the bit vector
+    :param size: size of the bit vector
+    """
+    return "(_ bv{} {})".format(v, size)
+
+
+def bit_vec(size):
+    """
+    Returns a bit vector of size @size
+    """
+    return "(_ BitVec {})".format(size)
+
+
+# bit vector operations
+
+def bvadd(a, b):
+    """
+    Addition: a + b
+    """
+    return "(bvadd {} {})".format(a, b)
+
+
+def bvsub(a, b):
+    """
+    Subtraction: a - b
+    """
+    return "(bvsub {} {})".format(a, b)
+
+
+def bvmul(a, b):
+    """
+    Multiplication: a * b
+    """
+    return "(bvmul {} {})".format(a, b)
+
+
+def bvand(a, b):
+    """
+    Bitwise AND: a & b
+    """
+    return "(bvand {} {})".format(a, b)
+
+
+def bvor(a, b):
+    """
+    Bitwise OR: a | b
+    """
+    return "(bvor {} {})".format(a, b)
+
+
+def bvxor(a, b):
+    """
+    Bitwise XOR: a ^ b
+    """
+    return "(bvxor {} {})".format(a, b)
+
+
+def bvneg(bv):
+    """
+    Unary minus: - bv
+    """
+    return "(bvneg {})".format(bv)
+
+
+def bvsdiv(a, b):
+    """
+    Signed division: a / b
+    """
+    return "(bvsdiv {} {})".format(a, b)
+
+
+def bvudiv(a, b):
+    """
+    Unsigned division: a / b
+    """
+    return "(bvudiv {} {})".format(a, b)
+
+
+def bvsmod(a, b):
+    """
+    Signed modulo: a mod b
+    """
+    return "(bvsmod {} {})".format(a, b)
+
+
+def bvurem(a, b):
+    """
+    Unsigned modulo: a mod b
+    """
+    return "(bvurem {} {})".format(a, b)
+
+
+def bvshl(a, b):
+    """
+    Shift left: a << b
+    """
+    return "(bvshl {} {})".format(a, b)
+
+
+def bvlshr(a, b):
+    """
+    Logical shift right: a >> b
+    """
+    return "(bvlshr {} {})".format(a, b)
+
+
+def bvashr(a, b):
+    """
+    Arithmetic shift right: a a>> b
+    """
+    return "(bvashr {} {})".format(a, b)
+
+
+def bv_rotate_left(a, b, size):
+    """
+    Rotates bits of a to the left b times: a <<< b
+
+    Since ((_ rotate_left b) a) does not support
+    symbolic values for b, the implementation is
+    based on a C implementation.
+
+    Therefore, the rotation will be computed as
+    a << (b & (size - 1))) | (a >> (size - (b & (size - 1))))
+
+    :param a: bit vector
+    :param b: bit vector
+    :param size: size of a
+    """
+
+    # define constant
+    s = bit_vec_val(size, size)
+
+    # shift = b & (size  - 1)
+    shift = bvand(b, bvsub(s, bit_vec_val(1, size)))
+
+    # (a << shift) | (a >> size - shift)
+    rotate = bvor(bvshl(a, shift),
+                  bvlshr(a, bvsub(s, shift)))
+
+    return rotate
+
+
+def bv_rotate_right(a, b, size):
+    """
+    Rotates bits of a to the right b times: a >>> b
+
+    Since ((_ rotate_right b) a) does not support
+    symbolic values for b, the implementation is
+    based on a C implementation.
+
+    Therefore, the rotation will be computed as
+    a >> (b & (size - 1))) | (a << (size - (b & (size - 1))))
+
+    :param a: bit vector
+    :param b: bit vector
+    :param size: size of a
+    """
+
+    # define constant
+    s = bit_vec_val(size, size)
+
+    # shift = b & (size  - 1)
+    shift = bvand(b, bvsub(s, bit_vec_val(1, size)))
+
+    # (a >> shift) | (a << size - shift)
+    rotate = bvor(bvlshr(a, shift),
+                  bvshl(a, bvsub(s, shift)))
+
+    return rotate
+
+
+def bv_extract(high, low, bv):
+    """
+    Extracts bits from a bit vector
+    :param high: end bit
+    :param low: start bit
+    :param bv: bit vector
+    """
+    return "((_ extract {} {}) {})".format(high, low, bv)
+
+
+def bv_concat(a, b):
+    """
+    Concatenation of two SMT2 expressions
+    """
+    return "(concat {} {})".format(a, b)
+
+
+# array operations
+
+def array_select(array, index):
+    """
+    Reads from an SMT2 array at index @index
+    :param array: SMT2 array
+    :param index: SMT2 expression, index of the array
+    """
+    return "(select {} {})".format(array, index)
+
+
+def array_store(array, index, value):
+    """
+    Writes an value into an SMT2 array at address @index
+    :param array: SMT array
+    :param index: SMT2 expression, index of the array
+    :param value: SMT2 expression, value to write
+    """
+    return "(store {} {} {})".format(array, index, value)
diff --git a/miasm2/ir/translators/smt2.py b/miasm2/ir/translators/smt2.py
new file mode 100644
index 00000000..96f8dab3
--- /dev/null
+++ b/miasm2/ir/translators/smt2.py
@@ -0,0 +1,283 @@
+import logging
+import operator
+
+from miasm2.core.asmbloc import asm_label
+from miasm2.ir.translators.translator import Translator
+from miasm2.expression.smt2_helper import *
+
+log = logging.getLogger("translator_smt2")
+console_handler = logging.StreamHandler()
+console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
+log.addHandler(console_handler)
+log.setLevel(logging.WARNING)
+
+class SMT2Mem(object):
+    """
+    Memory abstraction for TranslatorSMT2. Memory elements are only accessed,
+    never written. To give a concrete value for a given memory cell in a solver,
+    add "mem32.get(address, size) == <value>" constraints to your equation.
+    The endianness of memory accesses is handled accordingly to the "endianness"
+    attribute.
+    Note: Will have one memory space for each addressing size used.
+    For example, if memory is accessed via 32 bits values and 16 bits values,
+    these access will not occur in the same address space.
+
+    Adapted from Z3Mem
+    """
+
+    def __init__(self, endianness="<", name="mem"):
+        """Initializes an SMT2Mem object with a given @name and @endianness.
+        @endianness: Endianness of memory representation. '<' for little endian,
+            '>' for big endian.
+        @name: name of memory Arrays generated. They will be named
+            name+str(address size) (for example mem32, mem16...).
+        """
+        if endianness not in ['<', '>']:
+            raise ValueError("Endianness should be '>' (big) or '<' (little)")
+        self.endianness = endianness
+        self.mems = {} # Address size -> SMT2 memory array
+        self.name = name
+        # initialise address size
+        self.addr_size = 0
+
+    def get_mem_array(self, size):
+        """Returns an SMT Array used internally to represent memory for addresses
+        of size @size.
+        @size: integer, size in bit of addresses in the memory to get.
+        Return an string with the name of the SMT array..
+        """
+        try:
+            mem = self.mems[size]
+        except KeyError:
+            # Lazy instanciation
+            self.mems[size] = self.name + str(size)
+            mem = self.mems[size]
+        return mem
+
+    def __getitem__(self, addr):
+        """One byte memory access. Different address sizes with the same value
+        will result in different memory accesses.
+        @addr: an SMT2 expression, the address to read.
+        Return an SMT2 expression of size 8 bits representing a memory access.
+        """
+        size = self.addr_size
+        mem = self.get_mem_array(size)
+        return array_select(mem, addr)
+
+    def get(self, addr, size, addr_size):
+        """ Memory access at address @addr of size @size with
+        address size @addr_size.
+        @addr: an SMT2 expression, the address to read.
+        @size: int, size of the read in bits.
+        @addr_size: int, size of the address
+        Return a SMT2 expression representing a memory access.
+        """
+        # set address size per read access
+        self.addr_size = addr_size
+
+        original_size = size
+        if original_size % 8 != 0:
+            # Size not aligned on 8bits -> read more than size and extract after
+            size = ((original_size / 8) + 1) * 8
+        res = self[addr]
+        if self.is_little_endian():
+            for i in xrange(1, size/8):
+                index = bvadd(addr, bit_vec_val(i, addr_size))
+                res = bv_concat(self[index], res)
+        else:
+            for i in xrange(1, size/8):
+                res = bv_concat(res, self[index])
+        if size == original_size:
+            return res
+        else:
+            # Size not aligned, extract right sized result
+            return bv_extract(original_size-1, 0, res)
+
+    def is_little_endian(self):
+        """True if this memory is little endian."""
+        return self.endianness == "<"
+
+    def is_big_endian(self):
+        """True if this memory is big endian."""
+        return not self.is_little_endian()
+
+
+class TranslatorSMT2(Translator):
+    """Translate a Miasm expression into an equivalent SMT2
+    expression. Memory is abstracted via SMT2Mem.
+    The result of from_expr will be an SMT2 expression.
+
+    If you want to interract with the memory abstraction after the translation,
+    you can instantiate your own SMT2Mem that will be equivalent to the one
+    used by TranslatorSMT2.
+
+    TranslatorSMT2 provides the creation of a valid SMT2 file. For this,
+    it keeps track of the translated bit vectors.
+
+    Adapted from TranslatorZ3
+    """
+
+    # Implemented language
+    __LANG__ = "smt2"
+
+    def __init__(self, endianness="<", **kwargs):
+        """Instance a SMT2 translator
+        @endianness: (optional) memory endianness
+        """
+        super(TranslatorSMT2, self).__init__(**kwargs)
+        # memory abstraction
+        self._mem = SMT2Mem(endianness)
+        # map of translated bit vectors
+        self._bitvectors = dict()
+
+    def from_ExprInt(self, expr):
+        return bit_vec_val(expr.arg.arg, expr.size)
+
+    def from_ExprId(self, expr):
+        if isinstance(expr.name, asm_label):
+            if expr.name.offset is not None:
+                return bit_vec_val(str(expr.name.offset), expr.size)
+            else:
+                # SMT2-escape expression name
+                name = "|{}|".format(str(expr.name))
+                if name not in self._bitvectors:
+                    self._bitvectors[name] = expr.size
+                return name
+        else:
+            if str(expr) not in self._bitvectors:
+                self._bitvectors[str(expr)] = expr.size
+            return str(expr)
+
+    def from_ExprMem(self, expr):
+        addr = self.from_expr(expr.arg)
+        # size to read from memory
+        size = expr.size
+        # size of memory address
+        addr_size = expr.arg.size
+        return self._mem.get(addr, size, addr_size)
+
+    def from_ExprSlice(self, expr):
+        res = self.from_expr(expr.arg)
+        res = bv_extract(expr.stop-1, expr.start, res)
+        return res
+
+    def from_ExprCompose(self, expr):
+        res = None
+        args = sorted(expr.args, key=operator.itemgetter(2))  # sort by start off
+        for subexpr, start, stop in args:
+            sube = self.from_expr(subexpr)
+            e = bv_extract(stop-start-1, 0, sube)
+            if res:
+                res = bv_concat(e, res)
+            else:
+                res = e
+        return res
+
+    def from_ExprCond(self, expr):
+        cond = self.from_expr(expr.cond)
+        src1 = self.from_expr(expr.src1)
+        src2 = self.from_expr(expr.src2)
+
+        # (and (distinct cond (_ bv0 <size>)) true)
+        zero = bit_vec_val(0, expr.cond.size)
+        distinct = smt2_distinct(cond, zero)
+        distinct_and = smt2_and(distinct, "true")
+
+        # (ite ((and (distinct cond (_ bv0 <size>)) true) src1 src2))
+        return smt2_ite(distinct_and, src1, src2)
+
+    def from_ExprOp(self, expr):
+        args = map(self.from_expr, expr.args)
+        res = args[0]
+
+        if len(args) > 1:
+            for arg in args[1:]:
+                if expr.op == "+":
+                    res = bvadd(res, arg)
+                elif expr.op == "-":
+                    res = bvsub(res, arg)
+                elif expr.op == "*":
+                    res = bvmul(res, arg)
+                elif expr.op == "/":
+                    res = bvsdiv(res, arg)
+                elif expr.op == "idiv":
+                    res = bvsdiv(res, arg)
+                elif expr.op == "udiv":
+                    res = bvudiv(res, arg)
+                elif expr.op == "%":
+                    res = bvsmod(res, arg)
+                elif expr.op == "imod":
+                    res = bvsmod(res, arg)
+                elif expr.op == "umod":
+                    res = bvurem(res, arg)
+                elif expr.op == "&":
+                    res = bvand(res, arg)
+                elif expr.op == "^":
+                    res = bvxor(res, arg)
+                elif expr.op == "|":
+                    res = bvor(res, arg)
+                elif expr.op == "<<":
+                    res = bvshl(res, arg)
+                elif expr.op == ">>":
+                    res = bvlshr(res, arg)
+                elif expr.op == "a<<":
+                    res = bvshl(res, arg)
+                elif expr.op == "a>>":
+                    res = bvashr(res, arg)
+                elif expr.op == "<<<":
+                    res = bv_rotate_left(res, arg, expr.size)
+                elif expr.op == ">>>":
+                    res = bv_rotate_right(res, arg, expr.size)
+                else:
+                    raise NotImplementedError("Unsupported OP yet: %s" % expr.op)
+        elif expr.op == 'parity':
+            arg = bv_extract(7, 0, res)
+            res = bit_vec_val(1, 1)
+            for i in xrange(8):
+                res = bvxor(res, bv_extract(i, i, arg))
+        elif expr.op == '-':
+            res = bvneg(res)
+        else:
+            raise NotImplementedError("Unsupported OP yet: %s" % expr.op)
+
+        return res
+
+    def from_ExprAff(self, expr):
+        src = self.from_expr(expr.src)
+        dst = self.from_expr(expr.dst)
+        return smt2_assert(smt2_eq(src, dst))
+
+    def to_smt2(self, exprs, logic="QF_ABV"):
+        """
+        Converts a valid SMT2 file for a given list of
+        SMT2 expressions.
+
+        :param exprs: list of SMT2 expressions
+        :param logic: SMT2 logic
+        :return: String of the SMT2 file
+        """
+        ret = ""
+        ret += "(set-logic {})\n".format(logic)
+
+        # define bit vectors
+        for bv in self._bitvectors:
+            size = self._bitvectors[bv]
+            ret += "{}\n".format(declare_bv(bv, size))
+
+        # define memory arrays
+        for size in self._mem.mems:
+            mem = self._mem.mems[size]
+            ret += "{}\n".format(declare_array(mem, bit_vec(size), bit_vec(8)))
+
+        # merge SMT2 expressions
+        for expr in exprs:
+            ret += expr + "\n"
+
+        # define action
+        ret += "(check-sat)\n"
+
+        return ret
+
+
+# Register the class
+Translator.register(TranslatorSMT2)
diff --git a/miasm2/jitter/arch/JitCore_x86.c b/miasm2/jitter/arch/JitCore_x86.c
index 9d15cd20..12fc1b7e 100644
--- a/miasm2/jitter/arch/JitCore_x86.c
+++ b/miasm2/jitter/arch/JitCore_x86.c
@@ -45,6 +45,15 @@ reg_dict gpreg_dict[] = { {.name = "RAX", .offset = offsetof(vm_cpu_t, RAX)},
 			  {.name = "FS", .offset = offsetof(vm_cpu_t, FS)},
 			  {.name = "GS", .offset = offsetof(vm_cpu_t, GS)},
 
+			  {.name = "MM0", .offset = offsetof(vm_cpu_t, MM0)},
+			  {.name = "MM1", .offset = offsetof(vm_cpu_t, MM1)},
+			  {.name = "MM2", .offset = offsetof(vm_cpu_t, MM2)},
+			  {.name = "MM3", .offset = offsetof(vm_cpu_t, MM3)},
+			  {.name = "MM4", .offset = offsetof(vm_cpu_t, MM4)},
+			  {.name = "MM5", .offset = offsetof(vm_cpu_t, MM5)},
+			  {.name = "MM6", .offset = offsetof(vm_cpu_t, MM6)},
+			  {.name = "MM7", .offset = offsetof(vm_cpu_t, MM7)},
+
 };
 
 
@@ -96,6 +105,16 @@ PyObject* cpu_get_gpreg(JitCpu* self)
     get_reg(FS);
     get_reg(GS);
 
+    get_reg(MM0);
+    get_reg(MM1);
+    get_reg(MM2);
+    get_reg(MM3);
+    get_reg(MM4);
+    get_reg(MM5);
+    get_reg(MM6);
+    get_reg(MM7);
+
+
     return dict;
 }
 
@@ -475,6 +494,15 @@ getset_reg_R_u16(BP);
 
 getset_reg_R_u16(IP);
 
+getset_reg_u64(MM0);
+getset_reg_u64(MM1);
+getset_reg_u64(MM2);
+getset_reg_u64(MM3);
+getset_reg_u64(MM4);
+getset_reg_u64(MM5);
+getset_reg_u64(MM6);
+getset_reg_u64(MM7);
+
 
 PyObject* get_gpreg_offset_all(void)
 {
@@ -750,6 +778,15 @@ static PyGetSetDef JitCpu_getseters[] = {
 
     {"IP", (getter)JitCpu_get_IP, (setter)JitCpu_set_IP, "IP", NULL},
 
+    {"MM0", (getter)JitCpu_get_MM0, (setter)JitCpu_set_MM0, "MM0", NULL},
+    {"MM1", (getter)JitCpu_get_MM1, (setter)JitCpu_set_MM1, "MM1", NULL},
+    {"MM2", (getter)JitCpu_get_MM2, (setter)JitCpu_set_MM2, "MM2", NULL},
+    {"MM3", (getter)JitCpu_get_MM3, (setter)JitCpu_set_MM3, "MM3", NULL},
+    {"MM4", (getter)JitCpu_get_MM4, (setter)JitCpu_set_MM4, "MM4", NULL},
+    {"MM5", (getter)JitCpu_get_MM5, (setter)JitCpu_set_MM5, "MM5", NULL},
+    {"MM6", (getter)JitCpu_get_MM6, (setter)JitCpu_set_MM6, "MM6", NULL},
+    {"MM7", (getter)JitCpu_get_MM7, (setter)JitCpu_set_MM7, "MM7", NULL},
+
 
     {NULL}  /* Sentinel */
 };
diff --git a/miasm2/jitter/vm_mngr.c b/miasm2/jitter/vm_mngr.c
index b86bee1a..fd5b870d 100644
--- a/miasm2/jitter/vm_mngr.c
+++ b/miasm2/jitter/vm_mngr.c
@@ -1354,6 +1354,7 @@ void init_memory_breakpoint(vm_mngr_t* vm_mngr)
 void reset_memory_page_pool(vm_mngr_t* vm_mngr)
 {
 	free(vm_mngr->memory_pages_array);
+	vm_mngr->memory_pages_array = NULL;
 	vm_mngr->memory_pages_number = 0;
 }
 
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 7d6260a2..dfe4ef91 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -2238,6 +2238,33 @@ reg_tests = [
     (m32, "00000000    MOVAPD     XMMWORD PTR [EBP+0xFFFFFFB8], XMM0",
      "660f2945b8"),
 
+
+    (m32, "00000000    MOVLPD     XMM0, QWORD PTR [ESP+0x4]",
+     "660F12442404"),
+    (m32, "00000000    MOVLPS     XMM0, QWORD PTR [ESP+0x4]",
+     "0F12442404"),
+    (m32, "00000000    MOVLPD     QWORD PTR [ESP+0x4], XMM0",
+     "660F13442404"),
+    (m32, "00000000    MOVLPS     QWORD PTR [ESP+0x4], XMM0",
+     "0F13442404"),
+
+    (m32, "00000000    MOVHPD     XMM0, QWORD PTR [ESP+0x4]",
+     "660F16442404"),
+    (m32, "00000000    MOVHPS     XMM0, QWORD PTR [ESP+0x4]",
+     "0F16442404"),
+    (m32, "00000000    MOVHPD     QWORD PTR [ESP+0x4], XMM0",
+     "660F17442404"),
+    (m32, "00000000    MOVHPS     QWORD PTR [ESP+0x4], XMM0",
+     "0F17442404"),
+
+    (m32, "00000000    MOVLHPS    XMM2, XMM1",
+     "0F16D1"),
+    (m32, "00000000    MOVHLPS    XMM2, XMM1",
+     "0F12D1"),
+
+    (m32, "00000000    MOVDQ2Q    MM2, XMM1",
+     "F20Fd6D1"),
+
     (m32, "00000000    MOVUPS     XMM2, XMMWORD PTR [ECX]",
      "0f1011"),
     (m32, "00000000    MOVSD      XMM2, QWORD PTR [ECX]",
@@ -2287,6 +2314,11 @@ reg_tests = [
      "0f548327bd2c00"),
     (m32, "00000000    ANDPD      XMM0, XMMWORD PTR [EBX+0x2CBD27]",
      "660f548327bd2c00"),
+    (m32, "00000000    ANDNPS     XMM0, XMMWORD PTR [EBX+0x2CBD27]",
+     "0f558327bd2c00"),
+    (m32, "00000000    ANDNPD     XMM0, XMMWORD PTR [EBX+0x2CBD27]",
+     "660f558327bd2c00"),
+
 
     (m32, "00000000    SUBSD      XMM1, XMM0",
      "f20f5cc8"),
@@ -2434,6 +2466,18 @@ reg_tests = [
     (m32, "00000000    PAND       XMM0, XMM4",
      "660fdbc4"),
 
+    (m32, "00000000    PANDN      MM2, MM6",
+     "0fdfd6"),
+    (m32, "00000000    PANDN      XMM2, XMM6",
+     "660fdfd6"),
+
+
+    (m32, "00000000    PANDN      MM0, MM4",
+     "0fdfc4"),
+    (m32, "00000000    PANDN      XMM0, XMM4",
+     "660fdfc4"),
+
+
     (m32, "00000000    POR        XMM0, XMM1",
      "660febc1"),
     (m32, "00000000    POR        XMM6, XMMWORD PTR [ECX+0x10]",
@@ -2574,6 +2618,245 @@ reg_tests = [
     (m32, "00000000    COMISD     XMM7, XMM6",
     "660F2FFE"),
 
+    (m32, "00000000    PSHUFB     MM6, QWORD PTR [ESI]",
+    "0F380036"),
+    (m32, "00000000    PSHUFB     XMM6, XMMWORD PTR [ESI]",
+    "660F380036"),
+    (m32, "00000000    PSHUFD     XMM6, XMMWORD PTR [ESI], 0xEE",
+    "660F7036EE"),
+
+
+    (m32, "00000000    PSRLQ      MM6, 0x5",
+    "0F73D605"),
+    (m32, "00000000    PSRLQ      XMM6, 0x5",
+    "660F73D605"),
+    (m32, "00000000    PSRLD      MM6, 0x5",
+    "0F72D605"),
+    (m32, "00000000    PSRLD      XMM6, 0x5",
+    "660F72D605"),
+    (m32, "00000000    PSRLW      MM6, 0x5",
+    "0F71D605"),
+    (m32, "00000000    PSRLW      XMM6, 0x5",
+    "660F71D605"),
+
+
+    (m32, "00000000    PSRLQ      MM2, QWORD PTR [EDX]",
+    "0FD312"),
+    (m32, "00000000    PSRLQ      XMM2, XMMWORD PTR [EDX]",
+    "660FD312"),
+
+    (m32, "00000000    PSRLD      MM2, QWORD PTR [EDX]",
+    "0FD212"),
+    (m32, "00000000    PSRLD      XMM2, XMMWORD PTR [EDX]",
+    "660FD212"),
+
+    (m32, "00000000    PSRLW      MM2, QWORD PTR [EDX]",
+    "0FD112"),
+    (m32, "00000000    PSRLW      XMM2, XMMWORD PTR [EDX]",
+    "660FD112"),
+
+
+
+
+    (m32, "00000000    PSLLQ      MM6, 0x5",
+    "0F73F605"),
+    (m32, "00000000    PSLLQ      XMM6, 0x5",
+    "660F73F605"),
+    (m32, "00000000    PSLLD      MM6, 0x5",
+    "0F72F605"),
+    (m32, "00000000    PSLLD      XMM6, 0x5",
+    "660F72F605"),
+    (m32, "00000000    PSLLW      MM6, 0x5",
+    "0F71F605"),
+    (m32, "00000000    PSLLW      XMM6, 0x5",
+    "660F71F605"),
+
+
+    (m32, "00000000    PSLLQ      MM2, QWORD PTR [EDX]",
+    "0FF312"),
+    (m32, "00000000    PSLLQ      XMM2, XMMWORD PTR [EDX]",
+    "660FF312"),
+
+    (m32, "00000000    PSLLD      MM2, QWORD PTR [EDX]",
+    "0FF212"),
+    (m32, "00000000    PSLLD      XMM2, XMMWORD PTR [EDX]",
+    "660FF212"),
+
+    (m32, "00000000    PSLLW      MM2, QWORD PTR [EDX]",
+    "0FF112"),
+    (m32, "00000000    PSLLW      XMM2, XMMWORD PTR [EDX]",
+    "660FF112"),
+
+    (m32, "00000000    PMAXUB     MM2, QWORD PTR [EDX]",
+    "0FDE12"),
+    (m32, "00000000    PMAXUB     XMM2, XMMWORD PTR [EDX]",
+    "660FDE12"),
+
+    (m32, "00000000    PMAXUW     XMM2, XMMWORD PTR [EDX]",
+    "660F383E12"),
+    (m32, "00000000    PMAXUD     XMM2, XMMWORD PTR [EDX]",
+    "660F383F12"),
+
+
+
+    (m32, "00000000    PMINUB     MM2, QWORD PTR [EDX]",
+    "0FDA12"),
+    (m32, "00000000    PMINUB     XMM2, XMMWORD PTR [EDX]",
+    "660FDA12"),
+
+    (m32, "00000000    PMINUW     XMM2, XMMWORD PTR [EDX]",
+    "660F383A12"),
+    (m32, "00000000    PMINUD     XMM2, XMMWORD PTR [EDX]",
+    "660F383B12"),
+
+
+    (m32, "00000000    PCMPEQB    MM2, QWORD PTR [EDX]",
+    "0F7412"),
+    (m32, "00000000    PCMPEQB    XMM2, XMMWORD PTR [EDX]",
+    "660F7412"),
+
+    (m32, "00000000    PCMPEQW    MM2, QWORD PTR [EDX]",
+    "0F7512"),
+    (m32, "00000000    PCMPEQW    XMM2, XMMWORD PTR [EDX]",
+    "660F7512"),
+
+    (m32, "00000000    PCMPEQD    MM2, QWORD PTR [EDX]",
+    "0F7612"),
+    (m32, "00000000    PCMPEQD    XMM2, XMMWORD PTR [EDX]",
+    "660F7612"),
+
+
+    (m32, "00000000    PUNPCKHBW  MM2, QWORD PTR [EDX]",
+    "0F6812"),
+    (m32, "00000000    PUNPCKHBW  XMM2, XMMWORD PTR [EDX]",
+    "660F6812"),
+
+    (m32, "00000000    PUNPCKHWD  MM2, QWORD PTR [EDX]",
+    "0F6912"),
+    (m32, "00000000    PUNPCKHWD  XMM2, XMMWORD PTR [EDX]",
+    "660F6912"),
+
+    (m32, "00000000    PUNPCKHDQ  MM2, QWORD PTR [EDX]",
+    "0F6A12"),
+    (m32, "00000000    PUNPCKHDQ  XMM2, XMMWORD PTR [EDX]",
+    "660F6A12"),
+
+    (m32, "00000000    PUNPCKHQDQ XMM2, XMMWORD PTR [EDX]",
+    "660F6D12"),
+
+
+    (m32, "00000000    PUNPCKLBW  MM2, QWORD PTR [EDX]",
+    "0F6012"),
+    (m32, "00000000    PUNPCKLBW  XMM2, XMMWORD PTR [EDX]",
+    "660F6012"),
+
+    (m32, "00000000    PUNPCKLWD  MM2, QWORD PTR [EDX]",
+    "0F6112"),
+    (m32, "00000000    PUNPCKLWD  XMM2, XMMWORD PTR [EDX]",
+    "660F6112"),
+
+    (m32, "00000000    PUNPCKLDQ  MM2, QWORD PTR [EDX]",
+    "0F6212"),
+    (m32, "00000000    PUNPCKLDQ  XMM2, XMMWORD PTR [EDX]",
+    "660F6212"),
+
+    (m32, "00000000    PUNPCKLQDQ XMM2, XMMWORD PTR [EDX]",
+    "660F6C12"),
+
+
+    (m32, "00000000    PINSRB     XMM2, BYTE PTR [EDX], 0x5",
+    "660F3A201205"),
+
+    (m32, "00000000    PINSRW     MM2, WORD PTR [EDX], 0x5",
+    "0FC41205"),
+    (m32, "00000000    PINSRW     XMM2, WORD PTR [EDX], 0x5",
+    "660FC41205"),
+
+    (m32, "00000000    PINSRD     XMM2, DWORD PTR [EDX], 0x5",
+    "660F3A221205"),
+
+
+    (m64, "00000000    PINSRB     XMM2, BYTE PTR [RDX], 0x5",
+    "660F3A201205"),
+
+    (m64, "00000000    PINSRW     MM2, WORD PTR [RDX], 0x5",
+    "0FC41205"),
+    (m64, "00000000    PINSRW     XMM2, WORD PTR [RDX], 0x5",
+    "660FC41205"),
+
+
+    (m64, "00000000    PINSRB     XMM2, EDX, 0x5",
+    "660F3A20D205"),
+
+    (m64, "00000000    PINSRW     MM2, EDX, 0x5",
+    "0FC4D205"),
+    (m64, "00000000    PINSRW     XMM2, EDX, 0x5",
+    "660FC4D205"),
+
+    (m64, "00000000    PINSRB     XMM2, RDX, 0x5",
+    "66480F3A20D205"),
+
+    (m64, "00000000    PINSRW     MM2, RDX, 0x5",
+    "480FC4D205"),
+    (m64, "00000000    PINSRW     XMM2, RDX, 0x5",
+    "66480FC4D205"),
+
+
+    (m64, "00000000    PINSRD     XMM2, DWORD PTR [RDX], 0x5",
+    "660F3A221205"),
+    (m64, "00000000    PINSRQ     XMM2, QWORD PTR [RDX], 0x5",
+    "66480F3A221205"),
+
+
+
+
+
+    (m32, "00000000    PEXTRB     BYTE PTR [EDX], XMM2, 0x5",
+    "660F3A141205"),
+    (m32, "00000000    PEXTRB     EAX, XMM2, 0x5",
+    "660F3A14D005"),
+
+    (m32, "00000000    PEXTRW     WORD PTR [EDX], XMM2, 0x5",
+    "660F3A151205"),
+
+
+    (m32, "00000000    PEXTRW     WORD PTR [EDX], MM2, 0x5",
+    "0FC51205"),
+    (m32, "00000000    PEXTRW     WORD PTR [EDX], XMM2, 0x5",
+    "660FC51205"),
+
+    (m32, "00000000    PEXTRD     DWORD PTR [EDX], XMM2, 0x5",
+    "660F3A161205"),
+
+    (m64, "00000000    PEXTRD     DWORD PTR [RDX], XMM2, 0x5",
+    "660F3A161205"),
+    (m64, "00000000    PEXTRQ     QWORD PTR [RDX], XMM2, 0x5",
+    "66480F3A161205"),
+
+
+    (m32, "00000000    UNPCKHPS   XMM2, XMMWORD PTR [EDX]",
+     "0f1512"),
+    (m32, "00000000    UNPCKHPD   XMM2, XMMWORD PTR [EDX]",
+     "660f1512"),
+
+    (m32, "00000000    UNPCKLPS   XMM2, XMMWORD PTR [EDX]",
+     "0f1412"),
+    (m32, "00000000    UNPCKLPD   XMM2, XMMWORD PTR [EDX]",
+     "660f1412"),
+
+    (m32, "00000000    SQRTPD     XMM2, XMMWORD PTR [EDX]",
+     "660f5112"),
+    (m32, "00000000    SQRTPS     XMM2, XMMWORD PTR [EDX]",
+     "0f5112"),
+    (m32, "00000000    SQRTSD     XMM2, QWORD PTR [EDX]",
+     "F20f5112"),
+    (m32, "00000000    SQRTSS     XMM2, DWORD PTR [EDX]",
+     "F30f5112"),
+
+    (m32, "00000000    PMOVMSKB   EAX, MM7",
+     "0FD7C7"),
+    (m32, "00000000    PMOVMSKB   EAX, XMM7",
+     "660FD7C7"),
 
 
 ]
diff --git a/test/arch/x86/unit/mn_pcmpeq.py b/test/arch/x86/unit/mn_pcmpeq.py
new file mode 100644
index 00000000..a8774cbc
--- /dev/null
+++ b/test/arch/x86/unit/mn_pcmpeq.py
@@ -0,0 +1,64 @@
+#! /usr/bin/env python
+from asm_test import Asm_Test
+import sys
+
+class Test_PCMPEQB(Asm_Test):
+    TXT = '''
+    main:
+       CALL    next
+       .byte 0x88, 0x78, 0x66, 0x56, 0x44, 0x3F, 0xFF, 0x11
+       .byte 0x89, 0x77, 0x66, 0x55, 0xF9, 0x33, 0x22, 0x11
+    next:
+       POP     EBP
+       MOVQ    MM0, QWORD PTR [EBP]
+       MOVQ    MM1, MM0
+       PCMPEQB MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x11FF3F4456667888
+        assert self.myjit.cpu.MM1 == 0xFF00000000FF0000
+
+
+class Test_PCMPEQW(Asm_Test):
+    TXT = '''
+    main:
+       CALL    next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x3F, 0x22, 0x11
+       .byte 0x89, 0x77, 0x66, 0x55, 0xF9, 0x33, 0x22, 0x11
+    next:
+       POP     EBP
+       MOVQ    MM0, QWORD PTR [EBP]
+       MOVQ    MM1, MM0
+       PCMPEQW MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x11223F4455667788
+        assert self.myjit.cpu.MM1 == 0xFFFF0000FFFF0000
+
+
+
+class Test_PCMPEQD(Asm_Test):
+    TXT = '''
+    main:
+       CALL    next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x3F, 0x22, 0x11
+       .byte 0x88, 0x77, 0x66, 0x55, 0xF9, 0x33, 0x22, 0x11
+    next:
+       POP     EBP
+       MOVQ    MM0, QWORD PTR [EBP]
+       MOVQ    MM1, MM0
+       PCMPEQD MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x11223F4455667788
+        assert self.myjit.cpu.MM1 == 0x00000000FFFFFFFF
+
+
+if __name__ == "__main__":
+    [test()() for test in [Test_PCMPEQB, Test_PCMPEQW, Test_PCMPEQD]]
diff --git a/test/arch/x86/unit/mn_pextr.py b/test/arch/x86/unit/mn_pextr.py
new file mode 100644
index 00000000..eb724cf9
--- /dev/null
+++ b/test/arch/x86/unit/mn_pextr.py
@@ -0,0 +1,25 @@
+#! /usr/bin/env python
+from asm_test import Asm_Test
+import sys
+
+class Test_PEXTRB(Asm_Test):
+    TXT = '''
+    main:
+       CALL      next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11
+       .byte 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01
+    next:
+       POP       EBP
+       MOV       EAX, 0xFFFFFFFF
+       MOVQ      MM0, QWORD PTR [EBP]
+       PEXTRW    EAX, MM0, 2
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1122334455667788
+        assert self.myjit.cpu.EAX == 0x3344
+
+
+if __name__ == "__main__":
+    [test()() for test in [Test_PEXTRB]]
diff --git a/test/arch/x86/unit/mn_pinsr.py b/test/arch/x86/unit/mn_pinsr.py
new file mode 100644
index 00000000..b7a86d2d
--- /dev/null
+++ b/test/arch/x86/unit/mn_pinsr.py
@@ -0,0 +1,25 @@
+#! /usr/bin/env python
+from asm_test import Asm_Test
+import sys
+
+class Test_PINSRB(Asm_Test):
+    TXT = '''
+    main:
+       CALL      next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11
+       .byte 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01
+    next:
+       POP       EBP
+       MOVQ      MM0, QWORD PTR [EBP]
+       MOVQ      MM1, MM0
+       PINSRW    MM1, QWORD PTR [EBP+0x8], 2
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1122334455667788
+        assert self.myjit.cpu.MM1 == 0x1122070855667788
+
+
+if __name__ == "__main__":
+    [test()() for test in [Test_PINSRB]]
diff --git a/test/arch/x86/unit/mn_pmaxu.py b/test/arch/x86/unit/mn_pmaxu.py
new file mode 100644
index 00000000..08e54c03
--- /dev/null
+++ b/test/arch/x86/unit/mn_pmaxu.py
@@ -0,0 +1,25 @@
+#! /usr/bin/env python
+from asm_test import Asm_Test
+import sys
+
+class Test_PMAXU(Asm_Test):
+    TXT = '''
+    main:
+       CALL   next
+       .byte 0x88, 0x76, 0x66, 0x54, 0x44, 0x32, 0x00, 0x10
+       .byte 0x87, 0x77, 0x66, 0x55, 0x40, 0x33, 0x22, 0x11
+    next:
+       POP    EBP
+       MOVQ   MM0, QWORD PTR [EBP]
+       MOVQ   MM1, MM0
+       PMAXUB MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1000324454667688
+        assert self.myjit.cpu.MM1 == 0x1122334455667788
+
+
+if __name__ == "__main__":
+    [test()() for test in [Test_PMAXU]]
diff --git a/test/arch/x86/unit/mn_pminu.py b/test/arch/x86/unit/mn_pminu.py
new file mode 100644
index 00000000..38a29787
--- /dev/null
+++ b/test/arch/x86/unit/mn_pminu.py
@@ -0,0 +1,25 @@
+#! /usr/bin/env python
+from asm_test import Asm_Test
+import sys
+
+class Test_PMINU(Asm_Test):
+    TXT = '''
+    main:
+       CALL   next
+       .byte 0x88, 0x78, 0x66, 0x56, 0x44, 0x3F, 0xFF, 0x1F
+       .byte 0x89, 0x77, 0x66, 0x55, 0xF9, 0x33, 0x22, 0x11
+    next:
+       POP    EBP
+       MOVQ   MM0, QWORD PTR [EBP]
+       MOVQ   MM1, MM0
+       PMINUB MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1FFF3F4456667888
+        assert self.myjit.cpu.MM1 == 0x1122334455667788
+
+
+if __name__ == "__main__":
+    [test()() for test in [Test_PMINU]]
diff --git a/test/arch/x86/unit/mn_pmovmskb.py b/test/arch/x86/unit/mn_pmovmskb.py
new file mode 100644
index 00000000..97435794
--- /dev/null
+++ b/test/arch/x86/unit/mn_pmovmskb.py
@@ -0,0 +1,26 @@
+#! /usr/bin/env python
+from asm_test import Asm_Test
+import sys
+
+class Test_PMOVMSKB(Asm_Test):
+    TXT = '''
+    main:
+       CALL      next
+       .byte 0x88, 0x77, 0xE6, 0x55, 0xC4, 0x33, 0x22, 0x11
+       .byte 0x01, 0x02, 0xFF, 0xEE, 0xDD, 0xCC, 0xBB, 0xAA
+    next:
+       POP       EBP
+       MOV       EAX, 0xFFFFFFFF
+       MOVQ      MM0, QWORD PTR [EBP]
+       MOVQ      MM1, MM0
+       PMOVMSKB  EAX, MM1
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x112233C455E67788
+        assert self.myjit.cpu.MM1 == 0x112233C455E67788
+        assert self.myjit.cpu.EAX == 0x00000015
+
+if __name__ == "__main__":
+    [test()() for test in [Test_PMOVMSKB,]]
diff --git a/test/arch/x86/unit/mn_pshufb.py b/test/arch/x86/unit/mn_pshufb.py
new file mode 100644
index 00000000..187b2f72
--- /dev/null
+++ b/test/arch/x86/unit/mn_pshufb.py
@@ -0,0 +1,25 @@
+#! /usr/bin/env python
+from asm_test import Asm_Test
+import sys
+
+class Test_PSHUFB(Asm_Test):
+    TXT = '''
+    main:
+       CALL   next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11
+       .byte 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0
+    next:
+       POP    EBP
+       MOVQ   MM0, QWORD PTR [EBP]
+       MOVQ   MM1, MM0
+       PSHUFB MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1122334455667788L
+        assert self.myjit.cpu.MM1 == 0x8877665544332211L
+
+
+if __name__ == "__main__":
+    [test()() for test in [Test_PSHUFB]]
diff --git a/test/arch/x86/unit/mn_psrl_psll.py b/test/arch/x86/unit/mn_psrl_psll.py
new file mode 100644
index 00000000..93a356f7
--- /dev/null
+++ b/test/arch/x86/unit/mn_psrl_psll.py
@@ -0,0 +1,55 @@
+#! /usr/bin/env python
+from asm_test import Asm_Test
+import sys
+
+class Test_PSRL(Asm_Test):
+    TXT = '''
+    main:
+       CALL   next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11
+       .byte 0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
+    next:
+       POP    EBP
+       MOVQ   MM0, QWORD PTR [EBP]
+       MOVQ   MM1, MM0
+       MOVQ   MM2, MM0
+       MOVQ   MM3, MM0
+       PSRLW  MM1, QWORD PTR [EBP+0x8]
+       PSRLD  MM2, QWORD PTR [EBP+0x8]
+       PSRLQ  MM3, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1122334455667788L
+        assert self.myjit.cpu.MM1 == 0x0112033405560778L
+        assert self.myjit.cpu.MM2 == 0x0112233405566778L
+        assert self.myjit.cpu.MM3 == 0x0112233445566778L
+
+class Test_PSLL(Asm_Test):
+    TXT = '''
+    main:
+       CALL   next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11
+       .byte 0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
+    next:
+       POP    EBP
+       MOVQ   MM0, QWORD PTR [EBP]
+       MOVQ   MM1, MM0
+       MOVQ   MM2, MM0
+       MOVQ   MM3, MM0
+       PSLLW  MM1, QWORD PTR [EBP+0x8]
+       PSLLD  MM2, QWORD PTR [EBP+0x8]
+       PSLLQ  MM3, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1122334455667788L
+        assert self.myjit.cpu.MM1 == 0x1220344056607880L
+        assert self.myjit.cpu.MM2 == 0x1223344056677880L
+        assert self.myjit.cpu.MM3 == 0x1223344556677880L
+
+
+if __name__ == "__main__":
+    [test()() for test in [Test_PSRL, Test_PSLL]]
diff --git a/test/arch/x86/unit/mn_punpck.py b/test/arch/x86/unit/mn_punpck.py
new file mode 100644
index 00000000..84d86c32
--- /dev/null
+++ b/test/arch/x86/unit/mn_punpck.py
@@ -0,0 +1,124 @@
+#! /usr/bin/env python
+from asm_test import Asm_Test
+import sys
+
+class Test_PUNPCKHBW(Asm_Test):
+    TXT = '''
+    main:
+       CALL      next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11
+       .byte 0x01, 0x02, 0xFF, 0xEE, 0xDD, 0xCC, 0xBB, 0xAA
+    next:
+       POP       EBP
+       MOVQ      MM0, QWORD PTR [EBP]
+       MOVQ      MM1, MM0
+       PUNPCKHBW MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1122334455667788
+        assert self.myjit.cpu.MM1 == 0xAA11BB22CC33DD44
+
+
+class Test_PUNPCKHWD(Asm_Test):
+    TXT = '''
+    main:
+       CALL      next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11
+       .byte 0x01, 0x02, 0xFF, 0xEE, 0xDD, 0xCC, 0xBB, 0xAA
+    next:
+       POP       EBP
+       MOVQ      MM0, QWORD PTR [EBP]
+       MOVQ      MM1, MM0
+       PUNPCKHWD MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1122334455667788
+        assert self.myjit.cpu.MM1 == 0xAABB1122CCDD3344
+
+
+
+class Test_PUNPCKHDQ(Asm_Test):
+    TXT = '''
+    main:
+       CALL      next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11
+       .byte 0x01, 0x02, 0xFF, 0xEE, 0xDD, 0xCC, 0xBB, 0xAA
+    next:
+       POP       EBP
+       MOVQ      MM0, QWORD PTR [EBP]
+       MOVQ      MM1, MM0
+       PUNPCKHDQ MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1122334455667788
+        assert self.myjit.cpu.MM1 == 0xAABBCCDD11223344
+
+
+
+
+class Test_PUNPCKLBW(Asm_Test):
+    TXT = '''
+    main:
+       CALL      next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11
+       .byte 0x01, 0x02, 0xFF, 0xEE, 0xDD, 0xCC, 0xBB, 0xAA
+    next:
+       POP       EBP
+       MOVQ      MM0, QWORD PTR [EBP]
+       MOVQ      MM1, MM0
+       PUNPCKLBW MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1122334455667788
+        assert self.myjit.cpu.MM1 == 0xEE55FF6602770188
+
+
+class Test_PUNPCKLWD(Asm_Test):
+    TXT = '''
+    main:
+       CALL      next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11
+       .byte 0x01, 0x02, 0xFF, 0xEE, 0xDD, 0xCC, 0xBB, 0xAA
+    next:
+       POP       EBP
+       MOVQ      MM0, QWORD PTR [EBP]
+       MOVQ      MM1, MM0
+       PUNPCKLWD MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1122334455667788
+        assert self.myjit.cpu.MM1 == 0xEEFF556602017788
+
+
+
+class Test_PUNPCKLDQ(Asm_Test):
+    TXT = '''
+    main:
+       CALL      next
+       .byte 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11
+       .byte 0x01, 0x02, 0xFF, 0xEE, 0xDD, 0xCC, 0xBB, 0xAA
+    next:
+       POP       EBP
+       MOVQ      MM0, QWORD PTR [EBP]
+       MOVQ      MM1, MM0
+       PUNPCKLDQ MM1, QWORD PTR [EBP+0x8]
+       RET
+    '''
+
+    def check(self):
+        assert self.myjit.cpu.MM0 == 0x1122334455667788
+        assert self.myjit.cpu.MM1 == 0xEEFF020155667788
+
+if __name__ == "__main__":
+    [test()() for test in [Test_PUNPCKHBW, Test_PUNPCKHWD, Test_PUNPCKHDQ,
+                           Test_PUNPCKLBW, Test_PUNPCKLWD, Test_PUNPCKLDQ,]]
diff --git a/test/ir/translators/smt2.py b/test/ir/translators/smt2.py
new file mode 100644
index 00000000..97877a3b
--- /dev/null
+++ b/test/ir/translators/smt2.py
@@ -0,0 +1,40 @@
+from z3 import Solver, unsat, parse_smt2_string
+from miasm2.expression.expression import *
+from miasm2.ir.translators.smt2 import TranslatorSMT2
+from miasm2.ir.translators.z3_ir import TranslatorZ3
+
+# create nested expression
+a = ExprId("a", 64)
+b = ExprId('b', 32)
+c = ExprId('c', 16)
+d = ExprId('d', 8)
+e = ExprId('e', 1)
+
+left = ExprCond(e + ExprOp('parity', a),
+                ExprMem(a * a, 64),
+                ExprMem(a, 64))
+
+cond = ExprSlice(ExprSlice(ExprSlice(a, 0, 32) + b, 0, 16) * c, 0, 8) << ExprOp('>>>', d, ExprInt(uint8(0x5L)))
+right = ExprCond(cond,
+                 a + ExprInt(uint64(0x64L)),
+                 ExprInt(uint64(0x16L)))
+
+e = ExprAff(left, right)
+
+# initialise translators
+t_z3 = TranslatorZ3()
+t_smt2 = TranslatorSMT2()
+
+# translate to z3
+e_z3 = t_z3.from_expr(e)
+# translate to smt2
+smt2 = t_smt2.to_smt2([t_smt2.from_expr(e)])
+
+# parse smt2 string with z3
+smt2_z3 = parse_smt2_string(smt2)
+# initialise SMT solver
+s = Solver()
+
+# prove equivalence of z3 and smt2 translation
+s.add(e_z3 != smt2_z3)
+assert (s.check() == unsat)
diff --git a/test/test_all.py b/test/test_all.py
index 71ea51a5..bc019104 100644
--- a/test/test_all.py
+++ b/test/test_all.py
@@ -40,6 +40,15 @@ for script in ["x86/sem.py",
                "x86/unit/mn_daa.py",
                "x86/unit/mn_das.py",
                "x86/unit/mn_int.py",
+               "x86/unit/mn_pshufb.py",
+               "x86/unit/mn_psrl_psll.py",
+               "x86/unit/mn_pmaxu.py",
+               "x86/unit/mn_pminu.py",
+               "x86/unit/mn_pcmpeq.py",
+               "x86/unit/mn_punpck.py",
+               "x86/unit/mn_pinsr.py",
+               "x86/unit/mn_pextr.py",
+               "x86/unit/mn_pmovmskb.py",
                "arm/arch.py",
                "arm/sem.py",
                "aarch64/unit/mn_ubfm.py",
@@ -204,6 +213,8 @@ testset += RegressionTest(["analysis.py"], base_dir="ir",
                                     for fname in fnames])
 testset += RegressionTest(["z3_ir.py"], base_dir="ir/translators",
                           tags=[TAGS["z3"]])
+testset += RegressionTest(["smt2.py"], base_dir="ir/translators",
+                          tags=[TAGS["z3"]])
 ## OS_DEP
 for script in ["win_api_x86_32.py",
                ]: