diff options
| -rw-r--r-- | miasm2/arch/x86/arch.py | 103 | ||||
| -rw-r--r-- | miasm2/arch/x86/regs.py | 16 | ||||
| -rw-r--r-- | miasm2/arch/x86/sem.py | 140 |
3 files changed, 242 insertions, 17 deletions
diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 8d45f438..0a1d83e2 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -3602,8 +3602,6 @@ addop("movq", [bs8(0x0f), bs8(0xd6), xmm, pref_66] + addop("addss", [bs8(0x0f), bs8(0x58), xmm, pref_f3] + rmmod(rmreg, rm_arg)) addop("addsd", [bs8(0x0f), bs8(0x58), xmm, pref_f2] + rmmod(rmreg, rm_arg)) -addop("addps", [bs8(0x0f), bs8(0x58), xmm, no_xmm_pref] + rmmod(rmreg, rm_arg)) -addop("addpd", [bs8(0x0f), bs8(0x58), xmm, pref_66] + rmmod(rmreg, rm_arg)) addop("subss", [bs8(0x0f), bs8(0x5c), xmm, pref_f3] + rmmod(rmreg, rm_arg)) addop("subsd", [bs8(0x0f), bs8(0x5c), xmm, pref_f2] + rmmod(rmreg, rm_arg)) @@ -3834,14 +3832,20 @@ addop("xor", [bs("001100"), swapargs, w8] + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) -# xorps_name = {16:'XORPD', 32:'XORPS', 64:'XORPS'} -# bs_xorps_name = bs_modname_size(l=0, name=xorps_name) -# addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [ -# bs_xorps_name] ) -addop("xorpd", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode16]) -addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode32]) -addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode64]) +addop("xgetbv", [bs8(0x0f), bs8(0x01), bs8(0xd0)]) + + +#addop("pand", [bs8(0x0f), bs8(0xdb), xmm, pref_66])# + rmmod(rmreg, rm_arg)) + +#### MMX/SSE/AVX operations +#### Categories are the same than here: https://software.intel.com/sites/landingpage/IntrinsicsGuide/ +#### + +### Arithmetic (integers) +### +## Move +# SSE # movaps_name = {16:'MOVAPD', 32:'MOVAPS', 64:'MOVAPS'} # bs_movaps_name = bs_modname_size(l=0, name=movaps_name) # addop("movaps", [bs8(0x0f), bs("0010100"), swapargs, xmm] + rmmod(rmreg, @@ -3852,32 +3856,97 @@ addop("movaps", [bs8(0x0f), bs("0010100"), swapargs, xmm] + rmmod(rmreg, rm_arg) + [bs_opmode32], [rmreg, rm_arg]) addop("movaps", [bs8(0x0f), bs("0010100"), swapargs, xmm] + rmmod(rmreg, rm_arg) + [bs_opmode64], [rmreg, rm_arg]) +addop("movdqu", [bs8(0x0f), bs("011"), swapargs, bs("1111"), xmm, pref_f3] + + rmmod(rmreg, rm_arg), [rmreg, rm_arg]) +addop("movdqa", [bs8(0x0f), bs("011"), swapargs, bs("1111"), xmm, pref_66] + + rmmod(rmreg, rm_arg), [rmreg, rm_arg]) -addop("xgetbv", [bs8(0x0f), bs8(0x01), bs8(0xd0)]) + +## Additions +# SSE +addop("paddb", [bs8(0x0f), bs8(0xfc), xmm, pref_66] + rmmod(rmreg, rm_arg)) +addop("paddw", [bs8(0x0f), bs8(0xfd), xmm, pref_66] + rmmod(rmreg, rm_arg)) +addop("paddd", [bs8(0x0f), bs8(0xfe), xmm, pref_66] + rmmod(rmreg, rm_arg)) +addop("paddq", [bs8(0x0f), bs8(0xd4), xmm, pref_66] + rmmod(rmreg, rm_arg)) + +## Substractions +# SSE +addop("psubb", [bs8(0x0f), bs8(0xf8), xmm, pref_66] + rmmod(rmreg, rm_arg)) +addop("psubw", [bs8(0x0f), bs8(0xf9), xmm, pref_66] + rmmod(rmreg, rm_arg)) +addop("psubd", [bs8(0x0f), bs8(0xfa), xmm, pref_66] + rmmod(rmreg, rm_arg)) +addop("psubq", [bs8(0x0f), bs8(0xfb), xmm, pref_66] + rmmod(rmreg, rm_arg)) + +### Arithmetic (floating-point) +### + +## Additions +# SSE +addop("addps", [bs8(0x0f), bs8(0x58), xmm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("addpd", [bs8(0x0f), bs8(0x58), xmm, pref_66] + rmmod(rmreg, rm_arg)) + +## Substractions +# SSE +addop("subps", [bs8(0x0f), bs8(0x5c), xmm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("subpd", [bs8(0x0f), bs8(0x5c), xmm, pref_66] + rmmod(rmreg, rm_arg)) + +## Multiplications +# SSE +addop("mulps", [bs8(0x0f), bs8(0x59), xmm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("mulpd", [bs8(0x0f), bs8(0x59), xmm, pref_66] + rmmod(rmreg, rm_arg)) + +## Divisions +# SSE +addop("divps", [bs8(0x0f), bs8(0x5e), xmm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("divpd", [bs8(0x0f), bs8(0x5e), xmm, pref_66] + rmmod(rmreg, rm_arg)) + +### Logical (floating-point) +### + +## XOR +# SSE +# xorps_name = {16:'XORPD', 32:'XORPS', 64:'XORPS'} +# bs_xorps_name = bs_modname_size(l=0, name=xorps_name) +# addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [ +# bs_xorps_name] ) +addop("xorpd", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode16]) +addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode32]) +addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode64]) + +## AND +# MMX addop("pand", [bs8(0x0f), bs8(0xdb), mm, no_xmm_pref] + rmmod(rmreg, rm_arg), [rmreg, rm_arg]) +# SSE addop("pand", [bs8(0x0f), bs8(0xdb), xmm, pref_66] + rmmod(rmreg, rm_arg), [rmreg, rm_arg]) +## OR +# MMX addop("por", [bs8(0x0f), bs8(0xeb), mm, no_xmm_pref] + rmmod(rmreg, rm_arg), [rmreg, rm_arg]) +# SSE addop("por", [bs8(0x0f), bs8(0xeb), xmm, pref_66] + rmmod(rmreg, rm_arg), [rmreg, rm_arg]) +### Convert +### SS = single precision +### SD = double precision +### -addop("movdqu", [bs8(0x0f), bs("011"), swapargs, bs("1111"), xmm, pref_f3] - + rmmod(rmreg, rm_arg), [rmreg, rm_arg]) -addop("movdqa", [bs8(0x0f), bs("011"), swapargs, bs("1111"), xmm, pref_66] - + rmmod(rmreg, rm_arg), [rmreg, rm_arg]) +## SS -> SD +## +# SSE addop("cvtss2sd", [bs8(0x0f), bs8(0x5a), xmm, pref_f3] + rmmod(rmreg, rm_arg)) -addop("cvtsd2ss", [bs8(0x0f), bs8(0x5a), xmm, pref_f2] - + rmmod(rmreg, rm_arg)) +## SD -> SS +## -#addop("pand", [bs8(0x0f), bs8(0xdb), xmm, pref_66])# + rmmod(rmreg, rm_arg)) +# SSE +addop("cvtsd2ss", [bs8(0x0f), bs8(0x5a), xmm, pref_f2] + + rmmod(rmreg, rm_arg)) mn_x86.bintree = factor_one_bit(mn_x86.bintree) diff --git a/miasm2/arch/x86/regs.py b/miasm2/arch/x86/regs.py index 293f81e7..37935edc 100644 --- a/miasm2/arch/x86/regs.py +++ b/miasm2/arch/x86/regs.py @@ -285,6 +285,22 @@ mm5 = ExprId(reg_mm5, 64) mm6 = ExprId(reg_mm6, 64) mm7 = ExprId(reg_mm7, 64) +XMM0 = regs_xmm_expr[0] +XMM1 = regs_xmm_expr[1] +XMM2 = regs_xmm_expr[2] +XMM3 = regs_xmm_expr[3] +XMM4 = regs_xmm_expr[4] +XMM5 = regs_xmm_expr[5] +XMM6 = regs_xmm_expr[6] +XMM7 = regs_xmm_expr[7] +XMM8 = regs_xmm_expr[8] +XMM9 = regs_xmm_expr[9] +XMM10 = regs_xmm_expr[10] +XMM11 = regs_xmm_expr[11] +XMM12 = regs_xmm_expr[12] +XMM13 = regs_xmm_expr[13] +XMM14 = regs_xmm_expr[14] +XMM15 = regs_xmm_expr[15] # tmp1= ExprId(reg_tmp1) zf = ExprId(reg_zf, size=1) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 10a5b291..318195b7 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -2580,6 +2580,97 @@ def xorps(ir, instr, a, b): e.append(ExprAff(a, ExprOp('xorps', a, b))) return None, e, [] +### MMX/SSE/AVX operations +### + +# Generic vertical operation +def vec_vertical_sem(op, elt_size, reg_size, a, b): + assert(reg_size % elt_size == 0) + n = reg_size/elt_size + if op == '-': + ops = [(ExprOp('+', ExprSlice(a, i*elt_size, (i+1)*elt_size), + ExprOp('-', ExprSlice(b, i*elt_size, (i+1)*elt_size))), + i*elt_size, + (i+1)*elt_size) for i in xrange(0, n)] + else: + ops = [(ExprOp(op, ExprSlice(a, i*elt_size, (i+1)*elt_size), + ExprSlice(b, i*elt_size, (i+1)*elt_size)), + i*elt_size, + (i+1)*elt_size) for i in xrange(0, n)] + return ExprCompose(ops) + +def float_vec_vertical_sem(op, elt_size, reg_size, a, b): + assert(reg_size % elt_size == 0) + n = reg_size/elt_size + ops = [(ExprOp('double_to_int_%d' % elt_size, ExprOp(op, + ExprOp('int_%d_to_double' % elt_size, + ExprSlice(a, i*elt_size, (i+1)*elt_size)), + ExprOp('int_%d_to_double' % elt_size, + ExprSlice(b, i*elt_size, (i+1)*elt_size))) + ), + i*elt_size, (i+1)*elt_size) for i in xrange(0, n)] + return ExprCompose(ops) + +def __vec_vertical_instr_gen(op, elt_size, sem): + def vec_instr(ir, instr, a, b): + e = [] + if isinstance(b, ExprMem): + b = ExprMem(b.arg, a.size) + reg_size = a.size + e.append(ExprAff(a, sem(op, elt_size, reg_size, a, b))) + return None, e, [] + return vec_instr + +def vec_vertical_instr(op, elt_size): + return __vec_vertical_instr_gen(op, elt_size, vec_vertical_sem) + +def float_vec_vertical_instr(op, elt_size): + return __vec_vertical_instr_gen(op, elt_size, float_vec_vertical_sem) + +### Integer arithmetic +### + +## Additions +## + +# SSE +paddb = vec_vertical_instr('+', 8) +paddw = vec_vertical_instr('+', 16) +paddd = vec_vertical_instr('+', 32) +paddq = vec_vertical_instr('+', 64) + +## Substractions +## + +# SSE +psubb = vec_vertical_instr('-', 8) +psubw = vec_vertical_instr('-', 16) +psubd = vec_vertical_instr('-', 32) +psubq = vec_vertical_instr('-', 64) + +### Floating-point arithmetic +### + +# SSE +addps = float_vec_vertical_instr('+', 32) +addpd = float_vec_vertical_instr('+', 64) +subps = float_vec_vertical_instr('-', 32) +subpd = float_vec_vertical_instr('-', 64) +mulps = float_vec_vertical_instr('*', 32) +mulpd = float_vec_vertical_instr('*', 64) +divps = float_vec_vertical_instr('/', 32) +divpd = float_vec_vertical_instr('/', 64) + +### Logical (floating-point) +### + +# MMX/SSE/AVX +def pand(ir, instr, a, b): + e = [] + c = a & b + # No flag affected + e.append(ExprAff(a, c)) + return None, e, [] def movaps(ir, instr, a, b): e = [] @@ -2878,6 +2969,55 @@ mnemo_func = {'mov': mov, "movss": movss, "ucomiss": ucomiss, + + #### + #### MMX/AVX/SSE operations + + ### Arithmetic (integers) + ### + + ## Additions + # SSE + "paddb": paddb, + "paddw": paddw, + "paddd": paddd, + "paddq": paddq, + + # Substractions + # SSE + "psubb": psubb, + "psubw": psubw, + "psubd": psubd, + "psubq": psubq, + + ### Arithmetic (floating-point) + ### + + ## Additions + # SSE + "addps": addps, + "addpd": addpd, + + ## Substractions + # SSE + "subps": subps, + "subpd": subpd, + + ## Multiplications + # SSE + "mulps": mulps, + "mulpd": mulpd, + + ## Divisions + # SSE + "divps": divps, + "divpd": divpd, + + ### Logical (floating-point) + ### + + "pand": pand + } |