about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCamille Mougey <commial@gmail.com>2015-10-23 11:16:44 +0200
committerCamille Mougey <commial@gmail.com>2015-10-23 11:16:44 +0200
commitbcd4822ab014156c4977667b165072ba396d1f17 (patch)
treed930db4a269b56df95a1c979bee19e2959d7a761
parent7bac485ac9efffb4ab07e5b0e337972eef963f80 (diff)
parent8cdc2d6ad3b3cb0aff77df4503312225feff18b1 (diff)
downloadmiasm-bcd4822ab014156c4977667b165072ba396d1f17.tar.gz
miasm-bcd4822ab014156c4977667b165072ba396d1f17.zip
Merge pull request #235 from serpilliere/x86_fix
X86 fix
-rw-r--r--miasm2/arch/x86/arch.py137
-rw-r--r--miasm2/arch/x86/regs.py11
-rw-r--r--miasm2/arch/x86/sem.py1327
-rw-r--r--miasm2/expression/expression.py16
-rw-r--r--test/arch/x86/arch.py137
5 files changed, 1028 insertions, 600 deletions
diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py
index 027d9a01..bfd6e504 100644
--- a/miasm2/arch/x86/arch.py
+++ b/miasm2/arch/x86/arch.py
@@ -8,7 +8,6 @@ from miasm2.core.cpu import *
 from collections import defaultdict
 import miasm2.arch.x86.regs as regs_module
 from miasm2.arch.x86.regs import *
-from miasm2.ir.ir import *
 from miasm2.core.asmbloc import asm_label
 
 log = logging.getLogger("x86_arch")
@@ -510,7 +509,7 @@ class instruction_x86(instruction):
             return True
         if self.name.startswith('SYS'):
             return True
-        return self.name in ['CALL', 'HLT', 'IRET', 'ICEBP']
+        return self.name in ['CALL', 'HLT', 'IRET', 'IRETD', 'IRETQ', 'ICEBP']
 
     def splitflow(self):
         if self.name in conditional_branch:
@@ -2966,8 +2965,6 @@ sx = bs(l=0, fname="sx")
 sxd = bs(l=0, fname="sx")
 
 
-xmm = bs(l=0, fname="xmm")
-mm = bs(l=0, fname="mm")
 xmmreg = bs(l=0, fname="xmmreg")
 mmreg = bs(l=0, fname="mmreg")
 
@@ -3018,6 +3015,14 @@ class field_size:
     def get(self, opm, adm=None):
         return self.d[opm]
 
+class bs_mem(object):
+    def encode(self):
+        return self.value != 0b11
+
+    def decode(self, v):
+        self.value = v
+        return v != 0b11
+
 d_imm64 = bs(l=0, fname="imm64")
 
 d_eax = bs(l=0, cls=(bs_eax, ), fname='eax')
@@ -3044,6 +3049,7 @@ moff = bs(l=0, cls=(bs_moff,), fname="off")
 msegoff = bs(l=16, cls=(bs_msegoff,), fname="mseg")
 movoff = bs(l=0, cls=(bs_movoff,), fname="off")
 mod = bs(l=2, fname="mod")
+mod_mem = bs(l=2, cls=(bs_mem,), fname="mod")
 
 rmreg = bs(l=3, cls=(x86_rm_reg, ), order =1, fname = "reg")
 reg = bs(l=3, cls=(x86_reg, ), order =1, fname = "reg")
@@ -3090,8 +3096,8 @@ cond_list = ["O", "NO", "B", "AE",
 cond = bs_mod_name(l=4, fname='cond', mn_mod=cond_list)
 
 
-def rmmod(r, rm_arg_x=rm_arg):
-    return [mod, r, rm, sib_scale, sib_index, sib_base, disp, rm_arg_x]
+def rmmod(r, rm_arg_x=rm_arg, modrm=mod):
+    return [modrm, r, rm, sib_scale, sib_index, sib_base, disp, rm_arg_x]
 
 #
 # mode | reg | rm #
@@ -3155,6 +3161,7 @@ addop("bts", [bs8(0x0f), bs8(0xba)] + rmmod(d5) + [u08])
 
 addop("call", [bs8(0xe8), rel_off])
 addop("call", [bs8(0xff), stk] + rmmod(d2))
+addop("call", [bs8(0xff), stk] + rmmod(d3, modrm=mod_mem))
 addop("call", [bs8(0x9a), moff, msegoff])
 
 
@@ -3251,6 +3258,12 @@ addop("cmpsq", [bs8(0xa7), bs_opmode64])
 addop("cmpxchg", [bs8(0x0f), bs('1011000'), w8]
       + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg])
 # XXX TODO CMPXCHG8/16
+
+addop("comiss", [bs8(0x0f), bs8(0x2f), no_xmm_pref] +
+      rmmod(xmm_reg, rm_arg_xmm_m32), [xmm_reg, rm_arg_xmm_m32])
+addop("comisd", [bs8(0x0f), bs8(0x2f), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm_m64), [xmm_reg, rm_arg_xmm_m64])
+
 addop("cpuid", [bs8(0x0f), bs8(0xa2)])
 
 addop("cwd", [bs8(0x99), bs_opmode16])
@@ -3480,6 +3493,10 @@ addop("lss", [bs8(0x0f), bs8(0xb2)] + rmmod(rmreg))
 addop("lfs", [bs8(0x0f), bs8(0xb4)] + rmmod(rmreg))
 addop("lgs", [bs8(0x0f), bs8(0xb5)] + rmmod(rmreg))
 
+addop("lgdt", [bs8(0x0f), bs8(0x01)] + rmmod(d2, modrm=mod_mem))
+addop("lidt", [bs8(0x0f), bs8(0x01)] + rmmod(d3, modrm=mod_mem))
+
+
 addop("leave", [bs8(0xc9), stk])
 
 addop("lodsb", [bs8(0xac)])
@@ -3522,7 +3539,9 @@ addop("movupd", [bs8(0x0f), bs8(0x10), pref_66] + rmmod(xmm_reg, rm_arg_xmm))
 
 addop("movd", [bs8(0x0f), bs('011'), swapargs, bs('1110'), no_xmm_pref] +
       rmmod(mm_reg, rm_arg), [mm_reg, rm_arg])
-addop("movd", [bs8(0x0f), bs('011'), swapargs, bs('1110'), pref_66] +
+addop("movd", [bs8(0x0f), bs('011'), swapargs, bs('1110'), pref_66, bs_opmode32] +
+      rmmod(xmm_reg, rm_arg), [xmm_reg, rm_arg])
+addop("movq", [bs8(0x0f), bs('011'), swapargs, bs('1110'), pref_66, bs_opmode64] +
       rmmod(xmm_reg, rm_arg), [xmm_reg, rm_arg])
 
 addop("movq", [bs8(0x0f), bs('011'), swapargs, bs('1111'), no_xmm_pref] +
@@ -3551,29 +3570,12 @@ addop("divsd", [bs8(0x0f), bs8(0x5e), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64))
 addop("pminsw", [bs8(0x0f), bs8(0xea), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm))
 addop("pminsw", [bs8(0x0f), bs8(0xea), pref_66] + rmmod(xmm_reg, rm_arg_xmm))
 
-
-addop("pxor", [bs8(0x0f), bs8(0xef), xmm] + rmmod(xmm_reg, rm_arg_xmm))
-
 addop("ucomiss", [bs8(0x0f), bs8(0x2e), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm_m32))
 addop("ucomisd", [bs8(0x0f), bs8(0x2e), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m64))
 
-addop("andps", [bs8(0x0f), bs8(0x54), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm))
-addop("andpd", [bs8(0x0f), bs8(0x54), pref_66] + rmmod(xmm_reg, rm_arg_xmm))
-
-
 addop("maxsd", [bs8(0x0f), bs8(0x5f), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64))
 addop("maxss", [bs8(0x0f), bs8(0x5f), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32))
 
-addop("cvtsi2sd",
-      [bs8(0x0f), bs8(0x2a), xmmreg, pref_f2] + rmmod(xmm_reg, rm_arg))
-addop("cvtsi2ss",
-      [bs8(0x0f), bs8(0x2a), xmmreg, pref_f3] + rmmod(xmm_reg, rm_arg))
-
-
-addop("cvttsd2ss",
-      [bs8(0x0f), bs8(0x2c), xmmreg, pref_f2] + rmmod(rmreg, rm_arg))
-addop("cvttss2si",
-      [bs8(0x0f), bs8(0x2c), xmmreg, pref_f3] + rmmod(rmreg, rm_arg))
 
 
 addop("movzx", [bs8(0x0f), bs("1011011"), w8, sx] + rmmod(rmreg, rm_arg_sx))
@@ -3725,7 +3727,7 @@ addop("sbb", [bs("000110"), swapargs, w8] +
       rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg])
 
 addop("set", [bs8(0x0f), bs('1001'), cond] + rmmod(regnoarg, rm_arg_m08))
-addop("sgdt", [bs8(0x0f), bs8(0x01)] + rmmod(d0))
+addop("sgdt", [bs8(0x0f), bs8(0x01)] + rmmod(d0, modrm=mod_mem))
 addop("shld", [bs8(0x0f), bs8(0xa4)] +
       rmmod(rmreg) + [u08], [rm_arg, rmreg, u08])
 addop("shld", [bs8(0x0f), bs8(0xa5)] +
@@ -3734,8 +3736,8 @@ addop("shrd", [bs8(0x0f), bs8(0xac)] +
       rmmod(rmreg) + [u08], [rm_arg, rmreg, u08])
 addop("shrd", [bs8(0x0f), bs8(0xad)] +
       rmmod(rmreg) + [d_cl], [rm_arg, rmreg, d_cl])
-addop("sidt", [bs8(0x0f), bs8(0x01)] + rmmod(d1))
-addop("sldt", [bs8(0x0f), bs8(0x00)] + rmmod(d0))
+addop("sidt", [bs8(0x0f), bs8(0x01)] + rmmod(d1, modrm=mod_mem))
+addop("sldt", [bs8(0x0f), bs8(0x00)] + rmmod(d0, modrm=mod_mem))
 addop("smsw", [bs8(0x0f), bs8(0x01)] + rmmod(d4))
 addop("stc", [bs8(0xf9)])
 addop("std", [bs8(0xfd)])
@@ -3763,7 +3765,7 @@ addop("test", [bs("1000010"), w8] +
 addop("ud2", [bs8(0x0f), bs8(0x0b)])
 addop("verr", [bs8(0x0f), bs8(0x00)] + rmmod(d4))
 addop("verw", [bs8(0x0f), bs8(0x00)] + rmmod(d5))
-addop("wbind", [bs8(0x0f), bs8(0x09)])
+addop("wbinvd", [bs8(0x0f), bs8(0x09)])
 addop("wrmsr", [bs8(0x0f), bs8(0x30)])
 addop("xadd", [bs8(0x0f), bs("1100000"), w8]
       + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg])
@@ -3785,7 +3787,6 @@ addop("xor", [bs("001100"), swapargs, w8] +
 addop("xgetbv", [bs8(0x0f), bs8(0x01), bs8(0xd0)])
 
 
-#addop("pand", [bs8(0x0f), bs8(0xdb), pref_66])# + rmmod(rmreg, rm_arg))
 
 #### MMX/SSE/AVX operations
 #### Categories are the same than here: https://software.intel.com/sites/landingpage/IntrinsicsGuide/
@@ -3796,11 +3797,11 @@ addop("xgetbv", [bs8(0x0f), bs8(0x01), bs8(0xd0)])
 
 ## Move
 # SSE
-addop("movapd", [bs8(0x0f), bs("0010100"), swapargs, xmm]
+addop("movapd", [bs8(0x0f), bs("0010100"), swapargs]
       + rmmod(xmm_reg, rm_arg_xmm) + [bs_opmode16], [xmm_reg, rm_arg_xmm])
-addop("movaps", [bs8(0x0f), bs("0010100"), swapargs, xmm]
+addop("movaps", [bs8(0x0f), bs("0010100"), swapargs]
       + rmmod(xmm_reg, rm_arg_xmm) + [bs_opmode32], [xmm_reg, rm_arg_xmm])
-addop("movaps", [bs8(0x0f), bs("0010100"), swapargs, xmm]
+addop("movaps", [bs8(0x0f), bs("0010100"), swapargs]
       + rmmod(xmm_reg, rm_arg_xmm) + [bs_opmode64], [xmm_reg, rm_arg_xmm])
 addop("movdqu", [bs8(0x0f), bs("011"), swapargs, bs("1111"), pref_f3]
       + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm])
@@ -3860,10 +3861,16 @@ addop("divpd", [bs8(0x0f), bs8(0x5e), pref_66] + rmmod(xmm_reg, rm_arg_xmm))
 ###
 
 ## XOR
-# SSE
-addop("xorpd", [bs8(0x0f), bs8(0x57), xmm] + rmmod(xmm_reg, rm_arg_xmm) + [bs_opmode16])
-addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(xmm_reg, rm_arg_xmm) + [bs_opmode32])
-addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(xmm_reg, rm_arg_xmm) + [bs_opmode64])
+addop("xorps", [bs8(0x0f), bs8(0x57), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm))
+addop("xorpd", [bs8(0x0f), bs8(0x57), pref_66] + rmmod(xmm_reg, rm_arg_xmm))
+
+## AND
+addop("andps", [bs8(0x0f), bs8(0x54), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm))
+addop("andpd", [bs8(0x0f), bs8(0x54), pref_66] + rmmod(xmm_reg, rm_arg_xmm))
+
+## OR
+addop("orps", [bs8(0x0f), bs8(0x56), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm))
+addop("orpd", [bs8(0x0f), bs8(0x56), pref_66] + rmmod(xmm_reg, rm_arg_xmm))
 
 ## AND
 # MMX
@@ -3881,6 +3888,14 @@ addop("por", [bs8(0x0f), bs8(0xeb), no_xmm_pref] +
 addop("por", [bs8(0x0f), bs8(0xeb), pref_66] +
       rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm])
 
+## XOR
+# MMX
+addop("pxor", [bs8(0x0f), bs8(0xef), no_xmm_pref] +
+      rmmod(mm_reg, rm_arg_mm))
+# MMX
+addop("pxor", [bs8(0x0f), bs8(0xef), pref_66] +
+      rmmod(xmm_reg, rm_arg_xmm))
+
 ### Convert
 ### SS = single precision
 ### SD = double precision
@@ -3889,16 +3904,50 @@ addop("por", [bs8(0x0f), bs8(0xeb), pref_66] +
 ## SS -> SD
 ##
 
-# SSE
-addop("cvtss2sd", [bs8(0x0f), bs8(0x5a), pref_f3]
-      + rmmod(xmm_reg, rm_arg_xmm_m32))
-
-## SD -> SS
-##
-
-# SSE
+addop("cvtdq2pd", [bs8(0x0f), bs8(0xe6), pref_f3]
+      + rmmod(xmm_reg, rm_arg_xmm_m64))
+addop("cvtdq2ps", [bs8(0x0f), bs8(0x5b), no_xmm_pref]
+      + rmmod(xmm_reg, rm_arg_xmm))
+addop("cvtpd2dq", [bs8(0x0f), bs8(0xe6), pref_f2]
+      + rmmod(xmm_reg, rm_arg_xmm))
+addop("cvtpd2pi", [bs8(0x0f), bs8(0x2d), pref_66]
+      + rmmod(mm_reg, rm_arg_xmm))
+addop("cvtpd2ps", [bs8(0x0f), bs8(0x5a), pref_66]
+      + rmmod(xmm_reg, rm_arg_xmm))
+addop("cvtpi2pd", [bs8(0x0f), bs8(0x2a), pref_66]
+      + rmmod(xmm_reg, rm_arg_mm_m64))
+addop("cvtpi2ps", [bs8(0x0f), bs8(0x2a), no_xmm_pref]
+      + rmmod(xmm_reg, rm_arg_mm_m64))
+addop("cvtps2dq", [bs8(0x0f), bs8(0x5b), pref_66]
+      + rmmod(xmm_reg, rm_arg_xmm))
+addop("cvtps2pd", [bs8(0x0f), bs8(0x5a), no_xmm_pref]
+      + rmmod(xmm_reg, rm_arg_xmm_m64))
+addop("cvtps2pi", [bs8(0x0f), bs8(0x2d), no_xmm_pref]
+      + rmmod(mm_reg, rm_arg_xmm_m64))
+addop("cvtsd2si", [bs8(0x0f), bs8(0x2d), pref_f2]
+      + rmmod(reg, rm_arg_xmm_m64))
 addop("cvtsd2ss", [bs8(0x0f), bs8(0x5a), pref_f2]
       + rmmod(xmm_reg, rm_arg_xmm_m64))
+addop("cvtsi2sd", [bs8(0x0f), bs8(0x2a), pref_f2]
+      + rmmod(xmm_reg, rm_arg))
+addop("cvtsi2ss", [bs8(0x0f), bs8(0x2a), xmmreg, pref_f3]
+      + rmmod(xmm_reg, rm_arg))
+addop("cvtss2sd", [bs8(0x0f), bs8(0x5a), pref_f3]
+      + rmmod(xmm_reg, rm_arg_xmm_m32))
+addop("cvtss2si", [bs8(0x0f), bs8(0x2d), pref_f3]
+      + rmmod(rmreg, rm_arg_xmm_m32))
+addop("cvttpd2pi",[bs8(0x0f), bs8(0x2c), pref_66]
+      + rmmod(mm_reg, rm_arg_xmm))
+addop("cvttpd2dq",[bs8(0x0f), bs8(0xe6), pref_66]
+      + rmmod(xmm_reg, rm_arg_xmm))
+addop("cvttps2dq",[bs8(0x0f), bs8(0x5b), pref_f3]
+      + rmmod(xmm_reg, rm_arg_xmm))
+addop("cvttps2pi",[bs8(0x0f), bs8(0x2c), no_xmm_pref]
+      + rmmod(mm_reg, rm_arg_xmm_m64))
+addop("cvttsd2si",[bs8(0x0f), bs8(0x2c), pref_f2]
+      + rmmod(reg, rm_arg_xmm_m64))
+addop("cvttss2si",[bs8(0x0f), bs8(0x2c), pref_f3]
+      + rmmod(reg, rm_arg_xmm_m32))
 
 
 mn_x86.bintree = factor_one_bit(mn_x86.bintree)
diff --git a/miasm2/arch/x86/regs.py b/miasm2/arch/x86/regs.py
index 31f55483..9c87834f 100644
--- a/miasm2/arch/x86/regs.py
+++ b/miasm2/arch/x86/regs.py
@@ -248,7 +248,6 @@ reg_float_address = 'reg_float_address'
 reg_float_ds = 'reg_float_ds'
 
 
-
 dr0 = ExprId(reg_dr0)
 dr1 = ExprId(reg_dr1)
 dr2 = ExprId(reg_dr2)
@@ -342,6 +341,14 @@ float_st5 = ExprId("float_st5", 64)
 float_st6 = ExprId("float_st6", 64)
 float_st7 = ExprId("float_st7", 64)
 
+
+float_list = [float_st0, float_st1, float_st2, float_st3,
+              float_st4, float_st5, float_st6, float_st7]
+
+float_replace = {fltregs32_expr[i]: float_list[i] for i in xrange(8)}
+float_replace[r_st_all.expr[0]] = float_st0
+
+
 EAX_init = ExprId('EAX_init')
 EBX_init = ExprId('EBX_init')
 ECX_init = ExprId('ECX_init')
@@ -428,7 +435,7 @@ for i, r in enumerate(all_regs_ids):
 
 regs_flt_expr = [float_st0, float_st1, float_st2, float_st3,
                  float_st4, float_st5, float_st6, float_st7,
-             ]
+                 ]
 
 mRAX = {16: AX, 32: EAX, 64: RAX}
 mRBX = {16: BX, 32: EBX, 64: RBX}
diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py
index f99d2d9d..c0dff4fd 100644
--- a/miasm2/arch/x86/sem.py
+++ b/miasm2/arch/x86/sem.py
@@ -54,18 +54,6 @@ OF(A-B) = ((A XOR D) AND (A XOR B)) < 0
 """
 
 
-float_list = [
-    m2_expr.ExprId("ST", 64),
-    m2_expr.ExprId("ST(0)", 64),
-    m2_expr.ExprId("ST(1)", 64),
-    m2_expr.ExprId("ST(2)", 64),
-    m2_expr.ExprId("ST(3)", 64),
-    m2_expr.ExprId("ST(4)", 64),
-    m2_expr.ExprId("ST(5)", 64),
-    m2_expr.ExprId("ST(6)", 64),
-    m2_expr.ExprId("ST(7)", 64),
-]
-
 
 # XXX TODO make default check against 0 or not 0 (same eq as in C)
 
@@ -176,6 +164,92 @@ def set_float_cs_eip(instr):
     e.append(m2_expr.ExprAff(float_cs, CS))
     return e
 
+def mem2double(arg):
+    """
+    Add float convertion if argument is an ExprMem
+    @arg: argument to tranform
+    """
+    if isinstance(arg, m2_expr.ExprMem):
+        if arg.size > 64:
+            raise NotImplementedError('float to long')
+        return m2_expr.ExprOp('mem_%.2d_to_double' % arg.size, arg)
+    else:
+        return arg
+
+def float_implicit_st0(arg1, arg2):
+    """
+    Generate full float operators if one argument is implicit (float_st0)
+    """
+    if arg2 is None:
+        arg2 = arg1
+        arg1 = float_st0
+    return arg1, arg2
+
+
+def gen_jcc(ir, instr, cond, dst, jmp_if):
+    """
+    Macro to generate jcc semantic
+    @ir: ir instance
+    @instr: instruction
+    @cond: condtion of the jcc
+    @dst: the dstination if jcc is taken
+    @jmp_if: jump if/notif cond
+    """
+
+    e = []
+    meip = mRIP[instr.mode]
+    next_lbl = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
+    if jmp_if:
+        dstA, dstB = dst, next_lbl
+    else:
+        dstA, dstB = next_lbl, dst
+    mn_dst = m2_expr.ExprCond(cond,
+                              dstA.zeroExtend(instr.mode),
+                              dstB.zeroExtend(instr.mode))
+    e.append(m2_expr.ExprAff(meip, mn_dst))
+    e.append(m2_expr.ExprAff(ir.IRDst, mn_dst))
+    return e, []
+
+
+def gen_fcmov(ir, instr, cond, arg1, arg2, mov_if):
+    """Generate fcmov
+    @ir: ir instance
+    @instr: instruction instance
+    @cond: condition
+    @mov_if: invert condition if False"""
+
+    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
+    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
+    if mov_if:
+        dstA, dstB = lbl_do, lbl_skip
+    else:
+        dstA, dstB = lbl_skip, lbl_do
+    e = []
+    e_do, extra_irs = [m2_expr.ExprAff(arg1, arg2)], []
+    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
+    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB)))
+    return e, [irbloc(lbl_do.name, [e_do])]
+
+
+def gen_cmov(ir, instr, cond, arg1, arg2, mov_if):
+    """Generate cmov
+    @ir: ir instance
+    @instr: instruction instance
+    @cond: condition
+    @mov_if: invert condition if False"""
+
+    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
+    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
+    if mov_if:
+        dstA, dstB = lbl_do, lbl_skip
+    else:
+        dstA, dstB = lbl_skip, lbl_do
+    e = []
+    e_do, extra_irs = mov(ir, instr, arg1, arg2)
+    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
+    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB)))
+    return e, [irbloc(lbl_do.name, [e_do])]
+
 
 def mov(ir, instr, a, b):
     if a in [ES, CS, SS, DS, FS, GS]:
@@ -305,10 +379,6 @@ def xor(ir, instr, a, b):
 
 def pxor(ir, instr, a, b):
     e = []
-    if isinstance(a, m2_expr.ExprMem):
-        a = m2_expr.ExprMem(a.arg, b.size)
-    if isinstance(b, m2_expr.ExprMem):
-        b = m2_expr.ExprMem(b.arg, a.size)
     c = a ^ b
     e.append(m2_expr.ExprAff(a, c))
     return e, []
@@ -945,6 +1015,8 @@ def compose_eflag(s=32):
 def pushfd(ir, instr):
     return push(ir, instr, compose_eflag())
 
+def pushfq(ir, instr):
+    return push(ir, instr, compose_eflag().zeroExtend(64))
 
 def pushfw(ir, instr):
     return pushw(ir, instr, compose_eflag(16))
@@ -971,7 +1043,7 @@ def popfd(ir, instr):
     e.append(m2_expr.ExprAff(vip, m2_expr.ExprSlice(tmp, 20, 21)))
     e.append(m2_expr.ExprAff(i_d, m2_expr.ExprSlice(tmp, 21, 22)))
     e.append(m2_expr.ExprAff(mRSP[instr.mode],
-                             mRSP[instr.mode] + m2_expr.ExprInt32(4)))
+                             mRSP[instr.mode] + m2_expr.ExprInt_from(mRSP[instr.mode], instr.mode/8)))
     e.append(m2_expr.ExprAff(exception_flags,
                      m2_expr.ExprCond(m2_expr.ExprSlice(tmp, 8, 9),
                               m2_expr.ExprInt32(EXCEPT_SOFT_BP),
@@ -1210,232 +1282,79 @@ def jmpf(ir, instr, a):
 
 
 def jz(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(zf,
-                             dst.zeroExtend(instr.mode),
-                             n.zeroExtend(instr.mode))
-    e = [m2_expr.ExprAff(meip, dst_o),
-         m2_expr.ExprAff(ir.IRDst, dst_o),
-     ]
-    return e, []
+    return gen_jcc(ir, instr, zf, dst, True)
 
 
 def jcxz(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(mRCX[instr.mode][:16],
-                             n.zeroExtend(instr.mode),
-                             dst.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, mRCX[instr.mode][:16], dst, False)
 
 
 def jecxz(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(mRCX[instr.mode][:32],
-                             n.zeroExtend(instr.mode),
-                             dst.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, mRCX[instr.mode][:32], dst, False)
 
 
 def jrcxz(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(mRCX[instr.mode],
-                             n.zeroExtend(instr.mode),
-                             dst.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, mRCX[instr.mode], dst, False)
 
 
 def jnz(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(zf,
-                             n.zeroExtend(instr.mode),
-                             dst.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, zf, dst, False)
 
 
 def jp(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(pf,
-                             dst.zeroExtend(instr.mode),
-                             n.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, pf, dst, True)
 
 
 def jnp(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(pf,
-                             n.zeroExtend(instr.mode),
-                             dst.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, pf, dst, False)
 
 
 def ja(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(cf | zf,
-                             n.zeroExtend(instr.mode),
-                             dst.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, cf|zf, dst, False)
 
 
 def jae(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(cf,
-                             n.zeroExtend(instr.mode),
-                             dst.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, cf, dst, False)
 
 
 def jb(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(cf,
-                             dst.zeroExtend(instr.mode),
-                             n.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, cf, dst, True)
 
 
 def jbe(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(cf | zf,
-                             dst.zeroExtend(instr.mode),
-                             n.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, cf|zf, dst, True)
 
 
 def jge(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(nf - of,
-                             n.zeroExtend(instr.mode),
-                             dst.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, nf-of, dst, False)
 
 
 def jg(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(zf | (nf - of),
-                             n.zeroExtend(instr.mode),
-                             dst.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, zf|(nf-of), dst, False)
 
 
 def jl(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(nf - of,
-                             dst.zeroExtend(instr.mode),
-                             n.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, nf-of, dst, True)
 
 
 def jle(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(zf | (nf - of),
-                             dst.zeroExtend(instr.mode),
-                             n.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, zf|(nf-of), dst, True)
 
 
 def js(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(nf,
-                             dst.zeroExtend(instr.mode),
-                             n.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, nf, dst, True)
 
 
 def jns(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(nf,
-                             n.zeroExtend(instr.mode),
-                             dst.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, nf, dst, False)
 
 
 def jo(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(of,
-                             dst.zeroExtend(instr.mode),
-                             n.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, of, dst, True)
 
 
 def jno(ir, instr, dst):
-    e = []
-    meip = mRIP[instr.mode]
-    n = m2_expr.ExprId(ir.get_next_label(instr), dst.size)
-    dst_o = m2_expr.ExprCond(of,
-                             n.zeroExtend(instr.mode),
-                             dst.zeroExtend(instr.mode))
-    e.append(m2_expr.ExprAff(meip, dst_o))
-    e.append(m2_expr.ExprAff(ir.IRDst, dst_o))
-    return e, []
+    return gen_jcc(ir, instr, of, dst, False)
 
 
 def loop(ir, instr, dst):
@@ -1680,7 +1599,7 @@ def cqo(ir, instr):
     tempRDX = mRDX[instr.mode][:64]
     c = tempRAX.signExtend(128)
     e.append(m2_expr.ExprAff(tempRAX, c[:64]))
-    e.append(m2_expr.ExprAff(tempRDX, c[64:127]))
+    e.append(m2_expr.ExprAff(tempRDX, c[64:128]))
     return e, []
 
 
@@ -1813,56 +1732,64 @@ def movsd_dispatch(ir, instr, a = None, b = None):
         return movsd(ir, instr, a, b)
 
 
-def float_prev(flt):
+def float_prev(flt, popcount=1):
     if not flt in float_list:
         return None
     i = float_list.index(flt)
-    if i == 0:
+    if i < popcount:
         raise ValueError('broken index')
-    flt = float_list[i - 1]
+    flt = float_list[i - popcount]
     return flt
 
 
-def float_pop(avoid_flt=None):
-    avoid_flt = float_prev(avoid_flt)
-    e = []
-    if avoid_flt != float_st0:
-        e.append(m2_expr.ExprAff(float_st0, float_st1))
-    if avoid_flt != float_st1:
-        e.append(m2_expr.ExprAff(float_st1, float_st2))
-    if avoid_flt != float_st2:
-        e.append(m2_expr.ExprAff(float_st2, float_st3))
-    if avoid_flt != float_st3:
-        e.append(m2_expr.ExprAff(float_st3, float_st4))
-    if avoid_flt != float_st4:
-        e.append(m2_expr.ExprAff(float_st4, float_st5))
-    if avoid_flt != float_st5:
-        e.append(m2_expr.ExprAff(float_st5, float_st6))
-    if avoid_flt != float_st6:
-        e.append(m2_expr.ExprAff(float_st6, float_st7))
-    if avoid_flt != float_st7:
-        e.append(m2_expr.ExprAff(float_st7, m2_expr.ExprInt_from(float_st7, 0)))
+def float_pop(avoid_flt=None, popcount=1):
+    """
+    Generate floatpop semantic (@popcount times), avoiding the avoid_flt@ float
+    @avoid_flt: float avoided in the generated semantic
+    @popcount: pop count
+    """
+    avoid_flt = float_prev(avoid_flt, popcount)
+    e = []
+    for i in xrange(8-popcount):
+        if avoid_flt != float_list[i]:
+            e.append(m2_expr.ExprAff(float_list[i],
+                                     float_list[i+popcount]))
+    for i in xrange(8-popcount, 8):
+        e.append(m2_expr.ExprAff(float_list[i],
+                                 m2_expr.ExprInt_from(float_list[i], 0)))
     e.append(
         m2_expr.ExprAff(float_stack_ptr,
-                        float_stack_ptr - m2_expr.ExprInt_fromsize(3, 1)))
+                        float_stack_ptr - m2_expr.ExprInt_fromsize(3, popcount)))
     return e
 
 # XXX TODO
 
 
-def fcom(ir, instr, a, b = None):
+def fcom(ir, instr, a=None, b=None):
 
-    if b is None:
+    if a is None and b is None:
+        a, b = float_st0, float_st1
+    elif b is None:
         b = a
         a = float_st0
 
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        b = m2_expr.ExprOp('mem_%.2d_to_double'%b.size, b)
+    b = mem2double(b)
+
+    e.append(m2_expr.ExprAff(float_c0, m2_expr.ExprOp('fcom_c0', a, b)))
+    e.append(m2_expr.ExprAff(float_c1, m2_expr.ExprOp('fcom_c1', a, b)))
+    e.append(m2_expr.ExprAff(float_c2, m2_expr.ExprOp('fcom_c2', a, b)))
+    e.append(m2_expr.ExprAff(float_c3, m2_expr.ExprOp('fcom_c3', a, b)))
+
+    e += set_float_cs_eip(instr)
+    return e, []
 
 
+def ftst(ir, instr):
+    a = float_st0
+
+    e = []
+    b = m2_expr.ExprOp('int_32_to_double', m2_expr.ExprInt32(0))
     e.append(m2_expr.ExprAff(float_c0, m2_expr.ExprOp('fcom_c0', a, b)))
     e.append(m2_expr.ExprAff(float_c1, m2_expr.ExprOp('fcom_c1', a, b)))
     e.append(m2_expr.ExprAff(float_c2, m2_expr.ExprOp('fcom_c2', a, b)))
@@ -1872,11 +1799,21 @@ def fcom(ir, instr, a, b = None):
     return e, []
 
 
+def fxam(ir, instr):
+    a = float_st0
+
+    e = []
+    e.append(m2_expr.ExprAff(float_c0, m2_expr.ExprOp('fxam_c0', a)))
+    e.append(m2_expr.ExprAff(float_c2, m2_expr.ExprOp('fxam_c2', a)))
+    e.append(m2_expr.ExprAff(float_c3, m2_expr.ExprOp('fxam_c3', a)))
+
+    e += set_float_cs_eip(instr)
+    return e, []
+
+
 def ficom(ir, instr, a, b = None):
 
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
 
     e = []
 
@@ -1898,38 +1835,58 @@ def ficom(ir, instr, a, b = None):
 
 
 
-def fcomi(ir, instr, a):
-    raise NotImplementedError("Invalid emulation")
-
-
-def fcomip(ir, instr, a):
-    raise NotImplementedError("Invalid emulation")
-
-
-def fucomi(ir, instr, a):
-    raise NotImplementedError("Invalid emulation")
-
+def fcomi(ir, instr, a=None, b=None):
+    # TODO unordered float
+    if a is None and b is None:
+        a, b = float_st0, float_st1
+    elif b is None:
+        b = a
+        a = float_st0
 
-def fucomip(ir, instr, a, b):
     e = []
-    # XXX TODO add exception on NaN
+
     e.append(m2_expr.ExprAff(cf, m2_expr.ExprOp('fcom_c0', a, b)))
-    #e.append(m2_expr.ExprAff(float_c1, m2_expr.ExprOp('fcom_c1', a, b)))
     e.append(m2_expr.ExprAff(pf, m2_expr.ExprOp('fcom_c2', a, b)))
     e.append(m2_expr.ExprAff(zf, m2_expr.ExprOp('fcom_c3', a, b)))
 
-    e += float_pop()
+    e.append(m2_expr.ExprAff(of, m2_expr.ExprInt1(0)))
+    e.append(m2_expr.ExprAff(nf, m2_expr.ExprInt1(0)))
+    e.append(m2_expr.ExprAff(af, m2_expr.ExprInt1(0)))
 
     e += set_float_cs_eip(instr)
     return e, []
 
 
-def fcomp(ir, instr, a, b = None):
+def fcomip(ir, instr, a=None, b=None):
+    e, extra = fcomi(ir, instr, a, b)
+    e += float_pop()
+    e += set_float_cs_eip(instr)
+    return e, extra
+
+
+def fucomi(ir, instr, a=None, b=None):
+    # TODO unordered float
+    return fcomi(ir, instr, a, b)
+
+def fucomip(ir, instr, a=None, b=None):
+    # TODO unordered float
+    return fcomip(ir, instr, a, b)
+
+
+def fcomp(ir, instr, a=None, b=None):
     e, extra = fcom(ir, instr, a, b)
     e += float_pop()
     e += set_float_cs_eip(instr)
     return e, extra
 
+
+def fcompp(ir, instr, a=None, b=None):
+    e, extra = fcom(ir, instr, a, b)
+    e += float_pop(popcount=2)
+    e += set_float_cs_eip(instr)
+    return e, extra
+
+
 def ficomp(ir, instr, a, b = None):
     e, extra = ficom(ir, instr, a, b)
     e += float_pop()
@@ -1937,13 +1894,63 @@ def ficomp(ir, instr, a, b = None):
     return e, extra
 
 
+def fucom(ir, instr, a=None, b=None):
+    # TODO unordered float
+    return fcom(ir, instr, a, b)
+
+
+def fucomp(ir, instr, a=None, b=None):
+    # TODO unordered float
+    return fcomp(ir, instr, a, b)
+
+
+def fucompp(ir, instr, a=None, b=None):
+    # TODO unordered float
+    return fcompp(ir, instr, a, b)
+
+
+def comiss(ir, instr, a, b):
+    # TODO unordered float
+
+    e = []
+
+    a = m2_expr.ExprOp('int_32_to_float', a[:32])
+    b = m2_expr.ExprOp('int_32_to_float', b[:32])
+
+    e.append(m2_expr.ExprAff(cf, m2_expr.ExprOp('fcom_c0', a, b)))
+    e.append(m2_expr.ExprAff(pf, m2_expr.ExprOp('fcom_c2', a, b)))
+    e.append(m2_expr.ExprAff(zf, m2_expr.ExprOp('fcom_c3', a, b)))
+
+    e.append(m2_expr.ExprAff(of, m2_expr.ExprInt1(0)))
+    e.append(m2_expr.ExprAff(nf, m2_expr.ExprInt1(0)))
+    e.append(m2_expr.ExprAff(af, m2_expr.ExprInt1(0)))
+
+    e += set_float_cs_eip(instr)
+    return e, []
+
+
+def comisd(ir, instr, a, b):
+    # TODO unordered float
+
+    e = []
+
+    a = m2_expr.ExprOp('int_64_to_double', a[:64])
+    b = m2_expr.ExprOp('int_64_to_double', b[:64])
+
+    e.append(m2_expr.ExprAff(cf, m2_expr.ExprOp('fcom_c0', a, b)))
+    e.append(m2_expr.ExprAff(pf, m2_expr.ExprOp('fcom_c2', a, b)))
+    e.append(m2_expr.ExprAff(zf, m2_expr.ExprOp('fcom_c3', a, b)))
+
+    e.append(m2_expr.ExprAff(of, m2_expr.ExprInt1(0)))
+    e.append(m2_expr.ExprAff(nf, m2_expr.ExprInt1(0)))
+    e.append(m2_expr.ExprAff(af, m2_expr.ExprInt1(0)))
+
+    e += set_float_cs_eip(instr)
+    return e, []
+
+
 def fld(ir, instr, a):
-    if isinstance(a, m2_expr.ExprMem):
-        if a.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % a.size, a)
-    else:
-        src = a
+    src = mem2double(a)
 
     e = []
     e.append(m2_expr.ExprAff(float_st7, float_st6))
@@ -1964,14 +1971,15 @@ def fld(ir, instr, a):
 
 def fst(ir, instr, a):
     e = []
+
     if isinstance(a, m2_expr.ExprMem):
         if a.size > 64:
             raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('double_to_mem_%2d' % a.size, float_st0)
+        src = m2_expr.ExprOp('double_to_mem_%.2d' % a.size, a)
     else:
-        src = float_st0
-    e.append(m2_expr.ExprAff(a, src))
+        src = a
 
+    e.append(m2_expr.ExprAff(a, src))
     e += set_float_cs_eip(instr)
     return e, []
 
@@ -2034,6 +2042,27 @@ def fld1(ir, instr):
                                          m2_expr.ExprInt32(1)))
 
 
+def fldl2t(ir, instr):
+    value_f = math.log(10)/math.log(2)
+    value = struct.unpack('I', struct.pack('f', value_f))[0]
+    return fld(ir, instr, m2_expr.ExprOp('int_32_to_double',
+                                         m2_expr.ExprInt32(value)))
+
+
+def fldpi(ir, instr):
+    value_f = math.pi
+    value = struct.unpack('I', struct.pack('f', value_f))[0]
+    return fld(ir, instr, m2_expr.ExprOp('int_32_to_double',
+                                         m2_expr.ExprInt32(value)))
+
+
+def fldln2(ir, instr):
+    value_f = math.log(2)
+    value = struct.unpack('I', struct.pack('f', value_f))[0]
+    return fld(ir, instr, m2_expr.ExprOp('int_32_to_double',
+                                         m2_expr.ExprInt32(value)))
+
+
 def fldl2e(ir, instr):
     x = struct.pack('d', 1 / math.log(2))
     x = struct.unpack('Q', x)[0]
@@ -2049,48 +2078,68 @@ def fldlg2(ir, instr):
 
 
 def fadd(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fadd', a, src)))
 
     e += set_float_cs_eip(instr)
     return e, []
 
 def fiadd(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fiadd', a, src)))
     e += set_float_cs_eip(instr)
     return e, []
 
 
+def fisub(ir, instr, a, b=None):
+    a, b = float_implicit_st0(a, b)
+    e = []
+    src = mem2double(b)
+    e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fisub', a, src)))
+    e += set_float_cs_eip(instr)
+    return e, []
+
+
+def fisubr(ir, instr, a, b=None):
+    a, b = float_implicit_st0(a, b)
+    e = []
+    src = mem2double(b)
+    e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fisub', src, a)))
+    e += set_float_cs_eip(instr)
+    return e, []
+
+
+def fpatan(ir, instr):
+    e = []
+    a = float_st1
+    e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fpatan', float_st0, float_st1)))
+    e += set_float_cs_eip(instr)
+    e += float_pop(a)
+    return e, []
+
+
+def fprem(ir, instr):
+    e = []
+    e.append(m2_expr.ExprAff(float_st0, m2_expr.ExprOp('fprem', float_st0, float_st1)))
+    e += set_float_cs_eip(instr)
+    return e, []
+
+
+def fprem1(ir, instr):
+    e = []
+    e.append(m2_expr.ExprAff(float_st0, m2_expr.ExprOp('fprem1', float_st0, float_st1)))
+    e += set_float_cs_eip(instr)
+    return e, []
+
+
 def faddp(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(float_prev(a), m2_expr.ExprOp('fadd', a, src)))
     e += set_float_cs_eip(instr)
     e += float_pop(a)
@@ -2103,6 +2152,15 @@ def fninit(ir, instr):
     return e, []
 
 
+def fyl2x(ir, instr):
+    e = []
+    a = float_st1
+    e.append(m2_expr.ExprAff(float_prev(a), m2_expr.ExprOp('fyl2x', float_st0, float_st1)))
+    e += set_float_cs_eip(instr)
+    e += float_pop(a)
+    return e, []
+
+
 def fnstenv(ir, instr, a):
     e = []
     # XXX TODO tag word, ...
@@ -2140,31 +2198,17 @@ def fnstenv(ir, instr, a):
 
 
 def fsub(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fsub', a, src)))
     e += set_float_cs_eip(instr)
     return e, []
 
 def fsubp(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(float_prev(a), m2_expr.ExprOp('fsub', a, src)))
     e += set_float_cs_eip(instr)
     e += float_pop(a)
@@ -2172,110 +2216,81 @@ def fsubp(ir, instr, a, b=None):
 
 
 def fsubr(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fsub', src, a)))
     e += set_float_cs_eip(instr)
     return e, []
 
 
+def fsubrp(ir, instr, a, b=None):
+    a, b = float_implicit_st0(a, b)
+    e = []
+    src = mem2double(b)
+    e.append(m2_expr.ExprAff(float_prev(a), m2_expr.ExprOp('fsub', src, a)))
+    e += set_float_cs_eip(instr)
+    e += float_pop(a)
+    return e, []
+
+
 def fmul(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fmul', a, src)))
     e += set_float_cs_eip(instr)
     return e, []
 
 def fimul(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fimul', a, src)))
     e += set_float_cs_eip(instr)
     return e, []
 
 
 def fdiv(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fdiv', a, src)))
     e += set_float_cs_eip(instr)
     return e, []
 
 def fdivr(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fdiv', src, a)))
     e += set_float_cs_eip(instr)
     return e, []
 
 
+def fdivrp(ir, instr, a, b=None):
+    a, b = float_implicit_st0(a, b)
+    e = []
+    src = mem2double(b)
+    e.append(m2_expr.ExprAff(float_prev(a), m2_expr.ExprOp('fdiv', src, a)))
+    e += set_float_cs_eip(instr)
+    e += float_pop(a)
+    return e, []
+
+
 def fidiv(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fidiv', a, src)))
     e += set_float_cs_eip(instr)
     return e, []
 
 
 def fidivr(ir, instr, a, b=None):
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('fidiv', src, a)))
     e += set_float_cs_eip(instr)
     return e, []
@@ -2283,16 +2298,9 @@ def fidivr(ir, instr, a, b=None):
 
 def fdivp(ir, instr, a, b=None):
     # Invalid emulation
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(float_prev(a), m2_expr.ExprOp('fdiv', a, src)))
     e += set_float_cs_eip(instr)
     e += float_pop(a)
@@ -2301,16 +2309,9 @@ def fdivp(ir, instr, a, b=None):
 
 def fmulp(ir, instr, a, b=None):
     # Invalid emulation
-    if b is None:
-        b = a
-        a = float_st0
+    a, b = float_implicit_st0(a, b)
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        if b.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % b.size, b)
-    else:
-        src = b
+    src = mem2double(b)
     e.append(m2_expr.ExprAff(float_prev(a), m2_expr.ExprOp('fmul', a, src)))
     e += set_float_cs_eip(instr)
     e += float_pop(a)
@@ -2319,12 +2320,7 @@ def fmulp(ir, instr, a, b=None):
 
 def ftan(ir, instr, a):
     e = []
-    if isinstance(a, m2_expr.ExprMem):
-        if a.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % a.size, a)
-    else:
-        src = a
+    src = mem2double(a)
     e.append(m2_expr.ExprAff(float_st0, m2_expr.ExprOp('ftan', src)))
     e += set_float_cs_eip(instr)
     return e, []
@@ -2332,12 +2328,7 @@ def ftan(ir, instr, a):
 
 def fxch(ir, instr, a):
     e = []
-    if isinstance(a, m2_expr.ExprMem):
-        if a.size > 64:
-            raise NotImplementedError('float to long')
-        src = m2_expr.ExprOp('mem_%.2d_to_double' % a.size, a)
-    else:
-        src = a
+    src = mem2double(a)
     e.append(m2_expr.ExprAff(float_st0, src))
     e.append(m2_expr.ExprAff(src, float_st0))
     e += set_float_cs_eip(instr)
@@ -2383,6 +2374,22 @@ def fcos(ir, instr):
     return e, []
 
 
+def fsincos(ir, instr):
+    e = []
+    e.append(m2_expr.ExprAff(float_st7, float_st6))
+    e.append(m2_expr.ExprAff(float_st6, float_st5))
+    e.append(m2_expr.ExprAff(float_st5, float_st4))
+    e.append(m2_expr.ExprAff(float_st4, float_st3))
+    e.append(m2_expr.ExprAff(float_st3, float_st2))
+    e.append(m2_expr.ExprAff(float_st2, float_st1))
+    e.append(m2_expr.ExprAff(float_st1, m2_expr.ExprOp('fsin', float_st0)))
+    e.append(m2_expr.ExprAff(float_st0, m2_expr.ExprOp('fcos', float_st0)))
+    e.append(
+        m2_expr.ExprAff(float_stack_ptr,
+                        float_stack_ptr + m2_expr.ExprInt_fromsize(3, 1)))
+    return e, []
+
+
 def fscale(ir, instr):
     e = []
     e.append(m2_expr.ExprAff(float_st0, m2_expr.ExprOp('fscale', float_st0,
@@ -2397,6 +2404,12 @@ def f2xm1(ir, instr):
     e += set_float_cs_eip(instr)
     return e, []
 
+def fchs(ir, instr):
+    e = []
+    e.append(m2_expr.ExprAff(float_st0, m2_expr.ExprOp('fchs', float_st0)))
+    e += set_float_cs_eip(instr)
+    return e, []
+
 
 def fsqrt(ir, instr):
     e = []
@@ -2440,6 +2453,38 @@ def fwait(ir, instr):
     return [], None
 
 
+def fcmovb(ir, instr, arg1, arg2):
+    return gen_fcmov(ir, instr, cf, arg1, arg2, True)
+
+
+def fcmove(ir, instr, arg1, arg2):
+    return gen_fcmov(ir, instr, zf, arg1, arg2, True)
+
+
+def fcmovbe(ir, instr, arg1, arg2):
+    return gen_fcmov(ir, instr, cf|zf, arg1, arg2, True)
+
+
+def fcmovu(ir, instr, arg1, arg2):
+    return gen_fcmov(ir, instr, pf, arg1, arg2, True)
+
+
+def fcmovnb(ir, instr, arg1, arg2):
+    return gen_fcmov(ir, instr, cf, arg1, arg2, False)
+
+
+def fcmovne(ir, instr, arg1, arg2):
+    return gen_fcmov(ir, instr, zf, arg1, arg2, False)
+
+
+def fcmovnbe(ir, instr, arg1, arg2):
+    return gen_fcmov(ir, instr, cf|zf, arg1, arg2, False)
+
+
+def fcmovnu(ir, instr, arg1, arg2):
+    return gen_fcmov(ir, instr, pf, arg1, arg2, False)
+
+
 def nop(ir, instr, a=None):
     return [], []
 
@@ -2675,150 +2720,67 @@ def sldt(ir, instr, a):
     return e, []
 
 
-def cmovz(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(zf, lbl_do, lbl_skip)))
-    return e, [irbloc(lbl_do.name, [e_do])]
+def cmovz(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, zf, arg1, arg2, True)
 
+def cmovnz(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, zf, arg1, arg2, False)
 
-def cmovnz(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(zf, lbl_skip, lbl_do)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmovpe(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, pf, arg1, arg2, True)
 
-def cmovge(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(nf ^ of, lbl_skip,
-                                                        lbl_do)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmovnp(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, pf, arg1, arg2, False)
 
-def cmovg(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(zf | (nf ^ of),
-                                                        lbl_skip, lbl_do)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmovge(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, nf^of, arg1, arg2, False)
 
-def cmovl(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(nf ^ of, lbl_do,
-                                                        lbl_skip)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmovg(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, zf|(nf^of), arg1, arg2, False)
 
-def cmovle(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(zf | (nf ^ of), lbl_do,
-                                                        lbl_skip)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmovl(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, nf^of, arg1, arg2, True)
 
-def cmova(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cf | zf, lbl_skip,
-                                                        lbl_do)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmovle(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, zf|(nf^of), arg1, arg2, True)
 
-def cmovae(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cf, lbl_skip, lbl_do)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmova(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, cf|zf, arg1, arg2, False)
 
-def cmovbe(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cf | zf, lbl_do,
-                                                        lbl_skip)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmovae(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, cf, arg1, arg2, False)
 
-def cmovb(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cf, lbl_do, lbl_skip)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmovbe(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, cf|zf, arg1, arg2, True)
 
-def cmovo(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(of, lbl_do, lbl_skip)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmovb(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, cf, arg1, arg2, True)
 
-def cmovno(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(of, lbl_skip, lbl_do)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmovo(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, of, arg1, arg2, True)
 
-def cmovs(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(nf, lbl_do, lbl_skip)))
-    return e, [irbloc(lbl_do.name, [e_do])]
 
+def cmovno(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, of, arg1, arg2, False)
 
-def cmovns(ir, instr, a, b):
-    e = []
-    lbl_do = m2_expr.ExprId(ir.gen_label(), instr.mode)
-    lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), instr.mode)
-    e_do, extra_irs = mov(ir, instr, a, b)
-    e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip))
-    e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(nf, lbl_skip, lbl_do)))
-    return e, [irbloc(lbl_do.name, [e_do])]
+
+def cmovs(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, nf, arg1, arg2, True)
+
+
+def cmovns(ir, instr, arg1, arg2):
+    return gen_cmov(ir, instr, nf, arg1, arg2, False)
 
 
 def icebp(ir, instr):
@@ -2881,16 +2843,16 @@ def cpuid(ir, instr):
     e = []
     e.append(
         m2_expr.ExprAff(mRAX[instr.mode],
-        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt32(0))))
+        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt_fromsize(instr.mode, 0))))
     e.append(
         m2_expr.ExprAff(mRBX[instr.mode],
-        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt32(1))))
+        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt_fromsize(instr.mode, 1))))
     e.append(
         m2_expr.ExprAff(mRCX[instr.mode],
-        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt32(2))))
+        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt_fromsize(instr.mode, 2))))
     e.append(
         m2_expr.ExprAff(mRDX[instr.mode],
-        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt32(3))))
+        m2_expr.ExprOp('cpuid', mRAX[instr.mode], m2_expr.ExprInt_fromsize(instr.mode, 3))))
     return e, []
 
 
@@ -3083,54 +3045,108 @@ def l_str(ir, instr, a):
 
 def movd(ir, instr, a, b):
     e = []
-    if a.size == 64:
-        value = m2_expr.ExprCompose([(m2_expr.ExprInt32(0), 32, 64),
-                                     (b, 0, 32)])
-        e.append(m2_expr.ExprAff(a, value))
+    if a in regs_mm_expr:
+        e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose([(b, 0, 32),
+                                                         (m2_expr.ExprInt32(0), 32, 64)])))
+    elif a in regs_xmm_expr:
+        e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose([(b, 0, 32),
+                                                         (m2_expr.ExprInt_fromsize(96, 0), 32, 128)])))
     else:
-        e.append(m2_expr.ExprAff(a, b[0:32]))
+        e.append(m2_expr.ExprAff(a, b[:32]))
     return e, []
 
 def movdqu(ir, instr, a, b):
+    # XXX TODO alignement check
+    return [m2_expr.ExprAff(a, b)], []
+
+
+def movapd(ir, instr, a, b):
+    # XXX TODO alignement check
+    return [m2_expr.ExprAff(a, b)], []
+
+
+def andps(ir, instr, a, b):
     e = []
-    if isinstance(a, m2_expr.ExprMem):
-        a = m2_expr.ExprMem(a.arg, b.size)
-    elif isinstance(b, m2_expr.ExprMem):
-        b = m2_expr.ExprMem(b.arg, a.size)
-    e.append(m2_expr.ExprAff(a, b))
+    e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('&', a, b)))
+    return e, []
+
+
+def orps(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('|', a, b)))
     return e, []
 
 
 def xorps(ir, instr, a, b):
     e = []
-    if isinstance(b, m2_expr.ExprMem):
-        b = m2_expr.ExprMem(b.arg, a.size)
     e.append(m2_expr.ExprAff(a, m2_expr.ExprOp('^', a, b)))
     return e, []
 
+
+def rdmsr(ir, instr):
+    msr_addr = m2_expr.ExprId('MSR') + m2_expr.ExprInt32(8) * mRCX[instr.mode][:32]
+    e = []
+    e.append(m2_expr.ExprAff(mRAX[instr.mode][:32], m2_expr.ExprMem(msr_addr, 32)))
+    e.append(m2_expr.ExprAff(mRDX[instr.mode][:32], m2_expr.ExprMem(msr_addr + m2_expr.ExprInt_from(msr_addr, 4), 32)))
+    return e, []
+
+def wrmsr(ir, instr):
+    msr_addr = m2_expr.ExprId('MSR') + m2_expr.ExprInt32(8) * mRCX[instr.mode][:32]
+    e = []
+    src = m2_expr.ExprCompose([(mRAX[instr.mode][:32], 0, 32),
+                               (mRDX[instr.mode][:32], 32, 64)])
+    e.append(m2_expr.ExprAff(m2_expr.ExprMem(msr_addr, 64), src))
+    return e, []
+
 ### MMX/SSE/AVX operations
 ###
 
+def vec_op_clip(op, size):
+    """
+    Generate simd operations
+    @op: the operator
+    @size: size of an element
+    """
+    def vec_op_clip_instr(ir, instr, a, b):
+        if op == '-':
+            return [m2_expr.ExprAff(a[:size], a[:size] - b[:size])], []
+        else:
+            return [m2_expr.ExprAff(a[:size], m2_expr.ExprOp(op, a[:size], b[:size]))], []
+    return vec_op_clip_instr
+
 # Generic vertical operation
 def vec_vertical_sem(op, elt_size, reg_size, a, b):
     assert(reg_size % elt_size == 0)
     n = reg_size/elt_size
-    ops = [(m2_expr.ExprOp(op, a[i*elt_size:(i+1)*elt_size],
-                   b[i*elt_size:(i+1)*elt_size]),
-            i*elt_size,
-            (i+1)*elt_size) for i in xrange(0, n)]
+    if op == '-':
+        ops = [((a[i*elt_size:(i+1)*elt_size] - b[i*elt_size:(i+1)*elt_size]),
+               i*elt_size, (i+1)*elt_size) for i in xrange(0, n)]
+    else:
+        ops = [(m2_expr.ExprOp(op, a[i*elt_size:(i+1)*elt_size],
+                               b[i*elt_size:(i+1)*elt_size]),
+                i*elt_size,
+                (i+1)*elt_size) for i in xrange(0, n)]
+
     return m2_expr.ExprCompose(ops)
 
 def float_vec_vertical_sem(op, elt_size, reg_size, a, b):
     assert(reg_size % elt_size == 0)
     n = reg_size/elt_size
-    ops = [(m2_expr.ExprOp('double_to_int_%d' % elt_size, m2_expr.ExprOp(op,
-                   m2_expr.ExprOp('int_%d_to_double' % elt_size,
-                          a[i*elt_size:(i+1)*elt_size]),
-                   m2_expr.ExprOp('int_%d_to_double' % elt_size,
-                          b[i*elt_size:(i+1)*elt_size]))
-                  ),
-            i*elt_size, (i+1)*elt_size) for i in xrange(0, n)]
+
+    x_to_int, int_to_x = {32: ('float_to_int_%d', 'int_%d_to_float'),
+                          64: ('double_to_int_%d', 'int_%d_to_double')}[elt_size]
+    if op == '-':
+        ops = [(m2_expr.ExprOp(x_to_int % elt_size,
+                               m2_expr.ExprOp(int_to_x % elt_size, a[i*elt_size:(i+1)*elt_size]) -
+                               m2_expr.ExprOp(int_to_x % elt_size, b[i*elt_size:(i+1)*elt_size])),
+                i*elt_size, (i+1)*elt_size) for i in xrange(0, n)]
+    else:
+        ops = [(m2_expr.ExprOp(x_to_int % elt_size,
+                               m2_expr.ExprOp(op,
+                                              m2_expr.ExprOp(int_to_x % elt_size, a[i*elt_size:(i+1)*elt_size]),
+                                              m2_expr.ExprOp(int_to_x % elt_size, b[i*elt_size:(i+1)*elt_size]))),
+                i*elt_size, (i+1)*elt_size) for i in xrange(0, n)]
+
     return m2_expr.ExprCompose(ops)
 
 def __vec_vertical_instr_gen(op, elt_size, sem):
@@ -3174,12 +3190,20 @@ psubq = vec_vertical_instr('-', 64)
 ###
 
 # SSE
+addss = vec_op_clip('+', 32)
+addsd = vec_op_clip('+', 64)
 addps = float_vec_vertical_instr('+', 32)
 addpd = float_vec_vertical_instr('+', 64)
+subss = vec_op_clip('-', 32)
+subsd = vec_op_clip('-', 64)
 subps = float_vec_vertical_instr('-', 32)
 subpd = float_vec_vertical_instr('-', 64)
+mulss = vec_op_clip('*', 32)
+mulsd = vec_op_clip('*', 64)
 mulps = float_vec_vertical_instr('*', 32)
 mulpd = float_vec_vertical_instr('*', 64)
+divss = vec_op_clip('/', 32)
+divsd = vec_op_clip('/', 64)
 divps = float_vec_vertical_instr('/', 32)
 divpd = float_vec_vertical_instr('/', 64)
 
@@ -3194,13 +3218,11 @@ def pand(ir, instr, a, b):
     e.append(m2_expr.ExprAff(a, c))
     return e, []
 
-def movaps(ir, instr, a, b):
+
+def por(ir, instr, a, b):
     e = []
-    if isinstance(a, m2_expr.ExprMem):
-        a = m2_expr.ExprMem(a.arg, b.size)
-    if isinstance(b, m2_expr.ExprMem):
-        b = m2_expr.ExprMem(b.arg, a.size)
-    e.append(m2_expr.ExprAff(a, b))
+    c = a | b
+    e.append(m2_expr.ExprAff(a, c))
     return e, []
 
 
@@ -3209,16 +3231,151 @@ def pminsw(ir, instr, a, b):
     e.append(m2_expr.ExprAff(a, m2_expr.ExprCond((a - b).msb(), a, b)))
     return e, []
 
+def cvtdq2pd(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:64], m2_expr.ExprOp('int_32_to_double', b[:32])))
+    e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprOp('int_32_to_double', b[32:64])))
+    return e, []
+
+def cvtdq2ps(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('int_32_to_float', b[:32])))
+    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('int_32_to_float', b[32:64])))
+    e.append(m2_expr.ExprAff(a[64:96], m2_expr.ExprOp('int_32_to_float', b[64:96])))
+    e.append(m2_expr.ExprAff(a[96:128], m2_expr.ExprOp('int_32_to_float', b[96:128])))
+    return e, []
+
+def cvtpd2dq(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_int_32', b[:64])))
+    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_to_int_32', b[64:128])))
+    e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprInt64(0)))
+    return e, []
+
+def cvtpd2pi(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_int_32', b[:64])))
+    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_to_int_32', b[64:128])))
+    return e, []
+
+def cvtpd2ps(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_float', b[:64])))
+    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_to_float', b[64:128])))
+    e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprInt64(0)))
+    return e, []
+
+def cvtpi2pd(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:64], m2_expr.ExprOp('int_32_to_double', b[:32])))
+    e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprOp('int_32_to_double', b[32:64])))
+    return e, []
+
+def cvtpi2ps(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('int_32_to_float', b[:32])))
+    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('int_32_to_float', b[32:64])))
+    return e, []
+
+def cvtps2dq(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_to_int_32', b[:32])))
+    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_to_int_32', b[32:64])))
+    e.append(m2_expr.ExprAff(a[64:96], m2_expr.ExprOp('float_to_int_32', b[64:96])))
+    e.append(m2_expr.ExprAff(a[96:128], m2_expr.ExprOp('float_to_int_32', b[96:128])))
+    return e, []
+
+def cvtps2pd(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:64], m2_expr.ExprOp('float_to_double', b[:32])))
+    e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprOp('float_to_double', b[32:64])))
+    return e, []
+
+def cvtps2pi(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_to_int_32', b[:32])))
+    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_to_int_32', b[32:64])))
+    return e, []
+
+def cvtsd2si(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_int_32', b[:64])))
+    return e, []
+
+def cvtsd2ss(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_to_float', b[:64])))
+    return e, []
 
 def cvtsi2sd(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:b.size], m2_expr.ExprOp('cvtsi2sd', b)))
+    e.append(m2_expr.ExprAff(a[:64], m2_expr.ExprOp('int_32_to_double', b[:32])))
     return e, []
 
+def cvtsi2ss(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('int_32_to_float', b[:32])))
+    return e, []
+
+def cvtss2sd(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:64], m2_expr.ExprOp('float_to_double', b[:32])))
+    return e, []
+
+def cvtss2si(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_to_int_32', b[:32])))
+    return e, []
+
+def cvttpd2pi(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_trunc_to_int_32', b[:64])))
+    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_trunc_to_int_32', b[64:128])))
+    return e, []
+
+def cvttpd2dq(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_trunc_to_int_32', b[:64])))
+    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('double_trunc_to_int_32', b[64:128])))
+    e.append(m2_expr.ExprAff(a[64:128], m2_expr.ExprInt64(0)))
+    return e, []
+
+def cvttps2dq(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_trunc_to_int_32', b[:32])))
+    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_trunc_to_int_32', b[32:64])))
+    e.append(m2_expr.ExprAff(a[64:96], m2_expr.ExprOp('float_trunc_to_int_32', b[64:96])))
+    e.append(m2_expr.ExprAff(a[96:128], m2_expr.ExprOp('float_trunc_to_int_32', b[96:128])))
+    return e, []
+
+def cvttps2pi(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_trunc_to_int_32', b[:32])))
+    e.append(m2_expr.ExprAff(a[32:64], m2_expr.ExprOp('float_trunc_to_int_32', b[32:64])))
+    return e, []
+
+def cvttsd2si(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('double_trunc_to_int_32', b[:64])))
+    return e, []
+
+def cvttss2si(ir, instr, a, b):
+    e = []
+    e.append(m2_expr.ExprAff(a[:32], m2_expr.ExprOp('float_trunc_to_int_32', b[:32])))
+    return e, []
 
 def movss(ir, instr, a, b):
     e = []
-    e.append(m2_expr.ExprAff(a[:b.size], m2_expr.ExprOp('movss', b)))
+    if not isinstance(a, m2_expr.ExprMem) and not isinstance(b, m2_expr.ExprMem):
+        # Source and Destination xmm
+        e.append(m2_expr.ExprAff(a[:32], b[:32]))
+    elif not isinstance(b, m2_expr.ExprMem) and isinstance(a, m2_expr.ExprMem):
+        # Source XMM Destination Mem
+        e.append(m2_expr.ExprAff(a, b[:32]))
+    else:
+        # Source Mem Destination XMM
+        e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose([(b, 0, 32),
+                                                         (m2_expr.ExprInt_fromsize(96, 0), 32, 128)])))
     return e, []
 
 
@@ -3318,8 +3475,10 @@ mnemo_func = {'mov': mov,
               'scasw': lambda ir, instr: scas(ir, instr, 16),
               'scasd': lambda ir, instr: scas(ir, instr, 32),
               'pushfd': pushfd,
+              'pushfq': pushfq,
               'pushfw': pushfw,
               'popfd': popfd,
+              'popfq': popfd,
               'popfw': popfw,
               'pushad': pushad,
               'pusha': pushad,
@@ -3392,7 +3551,15 @@ mnemo_func = {'mov': mov,
               'movsd': movsd_dispatch,
               'movsq': lambda ir, instr: movs(ir, instr, 64),
               'fcomp': fcomp,
+              'fcompp': fcompp,
               'ficomp': ficomp,
+              'fucom': fucom,
+              'fucomp': fucomp,
+              'fucompp': fucompp,
+              'comiss': comiss,
+              'comisd': comisd,
+              'fcomi': fcomi,
+              'fcomip': fcomip,
               'nop': nop,
               'fnop': nop,  # XXX
               'hlt': hlt,
@@ -3405,21 +3572,32 @@ mnemo_func = {'mov': mov,
               'fld': fld,
               'fldz': fldz,
               'fld1': fld1,
+              'fldl2t': fldl2t,
+              'fldpi': fldpi,
+              'fldln2': fldln2,
               'fldl2e': fldl2e,
               'fldlg2': fldlg2,
               'fild': fild,
               'fadd': fadd,
               'fiadd': fiadd,
+              'fisub': fisub,
+              'fisubr': fisubr,
+              'fpatan': fpatan,
+              'fprem': fprem,
+              'fprem1': fprem1,
               'fninit': fninit,
+              'fyl2x': fyl2x,
               'faddp': faddp,
               'fsub': fsub,
               'fsubp': fsubp,
               'fsubr': fsubr,
+              'fsubrp': fsubrp,
               'fmul': fmul,
               'fimul': fimul,
               'fmulp': fmulp,
               'fdiv': fdiv,
               'fdivr': fdivr,
+              'fdivrp': fdivrp,
               'fidiv': fidiv,
               'fidivr': fidivr,
               'fdivp': fdivp,
@@ -3428,14 +3606,24 @@ mnemo_func = {'mov': mov,
               'frndint': frndint,
               'fsin': fsin,
               'fcos': fcos,
+              'fsincos': fsincos,
               'fscale': fscale,
               'f2xm1': f2xm1,
+              'fchs': fchs,
               'fsqrt': fsqrt,
               'fabs': fabs,
               'fnstsw': fnstsw,
               'fnstcw': fnstcw,
               'fldcw': fldcw,
               'fwait': fwait,
+              'fcmovb':   fcmovb,
+              'fcmove':   fcmove,
+              'fcmovbe':  fcmovbe,
+              'fcmovu':   fcmovu,
+              'fcmovnb':  fcmovnb,
+              'fcmovne':  fcmovne,
+              'fcmovnbe': fcmovnbe,
+              'fcmovnu':  fcmovnu,
               'fnstenv': fnstenv,
               'sidt': sidt,
               'sldt': sldt,
@@ -3443,6 +3631,8 @@ mnemo_func = {'mov': mov,
               'cmovz': cmovz,
               'cmove': cmovz,
               'cmovnz': cmovnz,
+              'cmovpe':cmovpe,
+              'cmovnp':cmovnp,
               'cmovge': cmovge,
               'cmovnl': cmovge,
               'cmovg': cmovg,
@@ -3465,6 +3655,8 @@ mnemo_func = {'mov': mov,
               'cpuid': cpuid,
               'jo': jo,
               'fcom': fcom,
+              'ftst': ftst,
+              'fxam': fxam,
               'ficom': ficom,
               'fcomi': fcomi,
               'fcomip': fcomip,
@@ -3499,11 +3691,50 @@ mnemo_func = {'mov': mov,
               "str": l_str,
               "movd": movd,
               "movdqu":movdqu,
-              "movaps": movaps,
+              "movdqa":movdqu,
+              "movapd": movapd, # XXX TODO alignement check
+              "movupd": movapd, # XXX TODO alignement check
+              "movaps": movapd, # XXX TODO alignement check
+              "movups": movapd, # XXX TODO alignement check
+              "andps": andps,
+              "andpd": andps,
+              "orps": orps,
+              "orpd": orps,
               "xorps": xorps,
+              "xorpd": xorps,
 
               "pminsw": pminsw,
+              "cvtdq2pd": cvtdq2pd,
+              "cvtdq2ps": cvtdq2ps,
+              "cvtpd2dq": cvtpd2dq,
+              "cvtpd2pi": cvtpd2pi,
+              "cvtpd2ps": cvtpd2ps,
+              "cvtpi2pd": cvtpi2pd,
+              "cvtpi2ps": cvtpi2ps,
+              "cvtps2dq": cvtps2dq,
+              "cvtps2pd": cvtps2pd,
+              "cvtps2pi": cvtps2pi,
+              "cvtsd2si": cvtsd2si,
+              "cvtsd2ss": cvtsd2ss,
               "cvtsi2sd": cvtsi2sd,
+              "cvtsi2ss": cvtsi2ss,
+              "cvtss2sd": cvtss2sd,
+              "cvtss2si": cvtss2si,
+              "cvttpd2pi": cvttpd2pi,
+              "cvttpd2dq": cvttpd2dq,
+              "cvttps2dq": cvttps2dq,
+              "cvttps2pi": cvttps2pi,
+              "cvttsd2si": cvttsd2si,
+              "cvttss2si": cvttss2si,
+
+
+
+
+
+
+
+
+
               "movss": movss,
 
               "ucomiss": ucomiss,
@@ -3533,28 +3764,40 @@ mnemo_func = {'mov': mov,
 
               ## Additions
               # SSE
+              "addss": addss,
+              "addsd": addsd,
               "addps": addps,
               "addpd": addpd,
 
               ## Substractions
               # SSE
+              "subss": subss,
+              "subsd": subsd,
               "subps": subps,
               "subpd": subpd,
 
               ## Multiplications
               # SSE
+              "mulss": mulss,
+              "mulsd": mulsd,
               "mulps": mulps,
               "mulpd": mulpd,
 
               ## Divisions
               # SSE
+              "divss": divss,
+              "divsd": divsd,
               "divps": divps,
               "divpd": divpd,
 
               ### Logical (floating-point)
               ###
 
-              "pand": pand
+              "pand": pand,
+              "por": por,
+
+              "rdmsr": rdmsr,
+              "wrmsr": wrmsr,
 
               }
 
@@ -3576,6 +3819,7 @@ class ir_x86_16(ir):
 
     def get_ir(self, instr):
         args = instr.args[:]
+        args = [arg.replace_expr(float_replace) for arg in args]
         my_ss = None
         if self.do_ds_segm:
             my_ss = DS
@@ -3588,6 +3832,9 @@ class ir_x86_16(ir):
                     args[i] = m2_expr.ExprMem(m2_expr.ExprOp('segm', my_ss,
                                                              a.arg), a.size)
 
+        if not instr.name.lower() in mnemo_func:
+            raise NotImplementedError("Mnemonic %s not implemented" % instr.name)
+
         instr_ir, extra_ir = mnemo_func[
             instr.name.lower()](self, instr, *args)
         self.mod_pc(instr, instr_ir, extra_ir)
diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py
index c82aec2b..5ceb17d6 100644
--- a/miasm2/expression/expression.py
+++ b/miasm2/expression/expression.py
@@ -745,6 +745,7 @@ class ExprOp(Expr):
         # Set size for special cases
         if self._op in [
                 '==', 'parity', 'fcom_c0', 'fcom_c1', 'fcom_c2', 'fcom_c3',
+                'fxam_c0', 'fxam_c1', 'fxam_c2', 'fxam_c3',
                 "access_segment_ok", "load_segment_limit_ok", "bcdadd_cf",
                 "ucomiss_zf", "ucomiss_pf", "ucomiss_cf"]:
             sz = 1
@@ -760,13 +761,20 @@ class ExprOp(Expr):
                           'int_16_to_double', 'int_32_to_double',
                           'int_64_to_double', 'int_80_to_double']:
             sz = 64
-        elif self._op in ['double_to_mem_16', 'double_to_int_16', 'double_trunc_to_int_16']:
+        elif self._op in ['double_to_mem_16', 'double_to_int_16',
+                          'float_trunc_to_int_16', 'double_trunc_to_int_16']:
             sz = 16
-        elif self._op in ['double_to_mem_32', 'double_to_int_32', 'double_trunc_to_int_32']:
+        elif self._op in ['double_to_mem_32', 'double_to_int_32',
+                          'float_trunc_to_int_32', 'double_trunc_to_int_32',
+                          'double_to_float']:
             sz = 32
-        elif self._op in ['double_to_mem_64', 'double_to_int_64', 'double_trunc_to_int_64']:
+        elif self._op in ['double_to_mem_64', 'double_to_int_64',
+                          'float_trunc_to_int_64', 'double_trunc_to_int_64',
+                          'float_to_double']:
             sz = 64
-        elif self._op in ['double_to_mem_80', 'double_to_int_80', 'double_trunc_to_int_80']:
+        elif self._op in ['double_to_mem_80', 'double_to_int_80',
+                          'float_trunc_to_int_80',
+                          'double_trunc_to_int_80']:
             sz = 80
         elif self._op in ['segm']:
             sz = self._args[1].size
diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py
index 2109aa53..5721d72a 100644
--- a/test/arch/x86/arch.py
+++ b/test/arch/x86/arch.py
@@ -1,6 +1,10 @@
 import os
 import time
-from miasm2.arch.x86.arch import *
+import miasm2.expression.expression as m2_expr
+from miasm2.arch.x86.arch import mn_x86, deref_mem_ad, parse_ast, ast_int2expr, \
+    base_expr, rmarg, print_size
+from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64
+from miasm2.core.bin_stream import bin_stream_str
 
 filename = os.environ.get('PYTHONSTARTUP')
 if filename and os.path.isfile(filename):
@@ -15,9 +19,9 @@ for s in ["[EAX]",
 
 print '---'
 
-mylabel16 = ExprId('mylabel16', 16)
-mylabel32 = ExprId('mylabel32', 32)
-mylabel64 = ExprId('mylabel64', 64)
+mylabel16 = m2_expr.ExprId('mylabel16', 16)
+mylabel32 = m2_expr.ExprId('mylabel32', 32)
+mylabel64 = m2_expr.ExprId('mylabel64', 64)
 
 reg_and_id = dict(mn_x86.regs.all_regs_ids_byname)
 reg_and_id.update({'mylabel16': mylabel16,
@@ -27,7 +31,7 @@ reg_and_id.update({'mylabel16': mylabel16,
 
 
 def my_ast_id2expr(t):
-    r = reg_and_id.get(t, ExprId(t, size=32))
+    r = reg_and_id.get(t, m2_expr.ExprId(t, size=32))
     return r
 
 my_var_parser = parse_ast(my_ast_id2expr, ast_int2expr)
@@ -61,6 +65,12 @@ m16 = 16  # (16, 16)
 m32 = 32  # (32, 32)
 m64 = 64  # (64, 64)
 reg_tests = [
+    (m16, "XXXXXXXX    CPUID",
+    "0fa2"),
+    (m32, "XXXXXXXX    CPUID",
+    "0fa2"),
+    (m64, "XXXXXXXX    CPUID",
+    "0fa2"),
 
 
     (m32, "XXXXXXXX    PMINSW     MM0, QWORD PTR [EAX]",
@@ -765,6 +775,11 @@ reg_tests = [
     (m32, "00000000    CALL       0x6655:0xFF332211",
      "9a112233FF5566"),
 
+    (m32, "00000000    CALL       DWORD PTR [0xFFFFFFA3]",
+     "FF1DA3FFFFFF"),
+    (m64, "00000000    CALL       QWORD PTR [RIP+0xFFFFFFFFFFFFFFA3]",
+     "FF1DA3FFFFFF"),
+
 
     (m16, "00000000    CBW",
      "98"),
@@ -893,6 +908,20 @@ reg_tests = [
 
     (m32, "00000000    FCMOVB     ST, ST(2)",
      "dac2"),
+    (m32, "00000000    FCMOVE     ST, ST(2)",
+     "daca"),
+    (m32, "00000000    FCMOVBE    ST, ST(2)",
+     "dad2"),
+    (m32, "00000000    FCMOVU     ST, ST(2)",
+     "dada"),
+    (m32, "00000000    FCMOVNB    ST, ST(2)",
+     "dbc2"),
+    (m32, "00000000    FCMOVNE    ST, ST(2)",
+     "dbca"),
+    (m32, "00000000    FCMOVNBE   ST, ST(2)",
+     "dbd2"),
+    (m32, "00000000    FCMOVNU    ST, ST(2)",
+     "dbda"),
 
     (m32, "00000000    FCOM       DWORD PTR [EAX]",
      "d810"),
@@ -902,9 +931,13 @@ reg_tests = [
      "d818"),
     (m32, "00000000    FCOMP      QWORD PTR [EAX]",
      "dC18"),
+    (m32, "00000000    FCOM       ST, ST(1)",
+     "d8d1"),
     (m32, "00000000    FCOM       ST, ST(2)",
      "d8d2"),
 
+    (m32, "00000000    FCOMP      ST, ST(1)",
+     "d8d9"),
     (m32, "00000000    FCOMPP",
      "ded9"),
 
@@ -2056,6 +2089,14 @@ reg_tests = [
 
     (m32, "00000000    SIDT       DWORD PTR [EAX]",
      "0f0108"),
+    (m32, "00000000    SLDT       DWORD PTR [EAX]",
+     "0f0000"),
+
+
+    (m32, "00000000    LGDT       DWORD PTR [EAX]",
+     "0f0110"),
+    (m32, "00000000    LIDT       DWORD PTR [EAX]",
+     "0f0118"),
 
 
 
@@ -2119,7 +2160,7 @@ reg_tests = [
     (m32, "00000000    VERW       DWORD PTR [EAX]",
      "0f0028"),
 
-    (m32, "00000000    WBIND",
+    (m32, "00000000    WBINVD",
      "0f09"),
 
     (m32, "00000000    WRMSR",
@@ -2175,6 +2216,13 @@ reg_tests = [
     (m32, "00000000    XORPD      XMM1, XMM2",
      "660f57ca"),
 
+    (m32, "00000000    ORPS       XMM1, XMM2",
+     "0f56ca"),
+    (m32, "00000000    ORPS       XMM1, XMMWORD PTR [EDI+0x42]",
+     "0f564f42"),
+    (m32, "00000000    ORPD       XMM1, XMM2",
+     "660f56ca"),
+
     (m32, "00000000    MOVAPS     XMMWORD PTR [EBP+0xFFFFFFB8], XMM0",
      "0f2945b8"),
     (m32, "00000000    MOVAPS     XMM0, XMMWORD PTR [EBP+0xFFFFFFB8]",
@@ -2218,8 +2266,13 @@ reg_tests = [
      "f20f5911"),
 
 
-    (m32, "00000000    PXOR       XMM0, XMM0",
-     "0fefc0"),
+    (m32, "00000000    PXOR       MM0, MM1",
+     "0fefc1"),
+    (m32, "00000000    PXOR       XMM0, XMM1",
+     "660fefc1"),
+    (m32, "00000000    PXOR       XMM6, XMMWORD PTR [ECX+0x10]",
+     "660fef7110"),
+
     (m32, "00000000    UCOMISD    XMM0, QWORD PTR [EBP+0xFFFFFFD8]",
      "660f2e45d8"),
     (m32, "00000000    ANDPS      XMM0, XMMWORD PTR [EBX+0x2CBD27]",
@@ -2235,10 +2288,56 @@ reg_tests = [
     (m32, "00000000    MAXSS      XMM0, DWORD PTR [EBX+0x2CBD37]",
      "f30f5f8337bd2c00"),
 
+    (m32, "00000000    CVTDQ2PD   XMM0, XMM3",
+     "f30fe6c3"),
+    (m32, "00000000    CVTDQ2PS   XMM0, XMM3",
+     "0f5bc3"),
+    (m32, "00000000    CVTPD2DQ   XMM0, XMM3",
+     "f20fe6c3"),
+    (m32, "00000000    CVTPD2PI   MM0, XMM3",
+     "660f2dc3"),
+    (m32, "00000000    CVTPD2PS   XMM0, XMM3",
+     "660f5ac3"),
+    (m32, "00000000    CVTPI2PD   XMM0, MM3",
+     "660f2ac3"),
+    (m32, "00000000    CVTPI2PS   XMM0, MM3",
+     "0f2ac3"),
+    (m32, "00000000    CVTPS2DQ   XMM0, XMM3",
+     "660f5bc3"),
+    (m32, "00000000    CVTPS2PD   XMM0, XMM3",
+     "0f5ac3"),
+    (m32, "00000000    CVTPS2PI   MM0, XMM3",
+     "0f2dc3"),
+    (m32, "00000000    CVTSD2SI   EAX, XMM3",
+     "f20f2dc3"),
+    (m32, "00000000    CVTSD2SS   XMM0, XMM3",
+     "f20f5ac3"),
     (m32, "00000000    CVTSI2SD   XMM0, EBX",
      "f20f2ac3"),
     (m32, "00000000    CVTSI2SS   XMM0, EBX",
      "f30f2ac3"),
+    (m32, "00000000    CVTSS2SD   XMM0, XMM0",
+     "f30f5ac0"),
+    (m32, "00000000    CVTSS2SI   EAX, XMM3",
+     "f30f2dc3"),
+    (m32, "00000000    CVTTPD2PI  MM0, XMM3",
+     "660f2cc3"),
+    (m32, "00000000    CVTTPD2DQ  XMM0, XMM3",
+     "660fe6c3"),
+    (m32, "00000000    CVTTPS2DQ  XMM0, XMM3",
+     "f30f5bc3"),
+    (m32, "00000000    CVTTPS2PI  MM0, XMM3",
+     "0f2cc3"),
+    (m32, "00000000    CVTTSD2SI  EAX, XMM3",
+     "f20f2cc3"),
+    (m32, "00000000    CVTTSS2SI  EAX, XMM3",
+     "f30f2cc3"),
+
+
+
+
+    (m32, "00000000    CVTSI2SD   XMM0, EBX",
+     "f20f2ac3"),
 
     (m32, "00000000    PMINSW     MM0, MM1",
      "0feac1"),
@@ -2313,6 +2412,8 @@ reg_tests = [
     (m64, "00000000    MOVQ       XMM1, QWORD PTR [R12+0xFFFFFFFFFFFFFFE0]",
      "f3410f7e4c24e0"),
 
+    (m64, "00000000    MOVQ       RCX, XMM0",
+     "66480F7EC1"),
 
     (m32, "00000000    PAND       MM2, MM6",
      "0fdbd6"),
@@ -2327,6 +2428,10 @@ reg_tests = [
 
     (m32, "00000000    POR        XMM0, XMM1",
      "660febc1"),
+    (m32, "00000000    POR        XMM6, XMMWORD PTR [ECX+0x10]",
+     "660febb110000000"),
+    (m32, "00000000    POR        MM6, QWORD PTR [ECX+0x10]",
+     "0febb110000000"),
 
     (m32, "00000000    MOVDQU     XMM1, XMMWORD PTR [ESI]",
      "f30f6f0e"),
@@ -2441,8 +2546,6 @@ reg_tests = [
     ##
 
     # SSE
-    (m32, "00000000    CVTSS2SD   XMM0, XMM0",
-     "f30f5ac0"),
     (m32, "00000000    CVTSS2SD   XMM0, DWORD PTR [EBP+0xFFFFFFD0]",
      "f30f5a45d0"),
 
@@ -2451,6 +2554,20 @@ reg_tests = [
     (m32, "00000006    CVTSS2SD   XMM4, DWORD PTR [EAX+EDX*0x8]",
      "F30F5A24D0"),
 
+
+    (m32, "00000000    COMISS     XMM0, XMM0",
+    "0f2fc0"),
+    (m64, "00000000    COMISS     XMM0, XMM8",
+    "410f2fc0"),
+    (m64, "00000000    COMISS     XMM0, DWORD PTR [RAX]",
+    "0f2f00"),
+    (m64, "00000000    COMISS     XMM0, DWORD PTR [RSP+0x34]",
+    "0F2F442434"),
+    (m32, "00000000    COMISD     XMM7, XMM6",
+    "660F2FFE"),
+
+
+
 ]