# # Copyright (C) 2011 EADS France, Fabrice Desclaux # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # from numpy import uint8, uint16, uint32, uint64, int8, int16, int32, int64 import struct import logging from miasm.core.parse_ad import parse_ad, ad_to_generic from miasm.arch.ia32_reg import x86_afs import shlex log = logging.getLogger("x86escape") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) log.addHandler(console_handler) log.setLevel(logging.WARN) tab_int_size = {int8:8, uint8:8, int16:16, uint16:16, int32:32, uint32:32, int64:64, uint64:64 } tab_size2int = {x86_afs.u08:uint8, x86_afs.u16:uint16, x86_afs.u32:uint32, x86_afs.s08:int8, x86_afs.s16:int16, x86_afs.s32:int32} tab_max_uint = {x86_afs.u08:0xFF, x86_afs.u16:0xFFFF, x86_afs.u32:0xFFFFFFFF, x86_afs.u64:0xFFFFFFFFFFFFFFFFL} prefix_dic = {"lock":0xF0, "repnz":0xF2, "repne":0xF2, "repz":0xF3, "repe":0xF3, "rep":0xF3, } prefix_dic_inv = dict(map(lambda x:(x[1],x[0]), prefix_dic.items())) #'es'|'cs'|'ss'|'ds'|'fs'|'gs') ':' ''' prefix_seg = {0:0x26, 1:0x2E, 2:0x36, 3:0x3E, 4:0x64, 5:0x65} prefix_seg_inv = dict(map(lambda x:(x[1],x[0]), prefix_seg.items())) class mnemonic: def __init__(self, name, opc, afs, rm, modifs, modifs_orig, sem): self.name = name self.opc = opc self.afs = afs self.rm = rm self.modifs = modifs self.modifs_orig = modifs_orig def __str__(self): return self.name+' '+str(self.opc)+' '+str(self.afs)+' '+str(self.rm)+' '+str(self.modifs)+' '+str(self.modifs_orig)#+' '+str(self.sem)+' ' def mask_opc_to_i(mask, opc): log.debug("mask %x opc %x"%(mask, opc)) return [i for i in range(0x100) if (i & mask) == opc] mask_d = 0x38 mask_reg = 0xF8 mask_cond = 0xF0 d0 = 0<<3 d1 = 1<<3 d2 = 2<<3 d3 = 3<<3 d4 = 4<<3 d5 = 5<<3 d6 = 6<<3 d7 = 7<<3 reg = "reg" noafs = "noafs" cond = "cond" cond_list = ["o", "no", "b", "ae", "e", "nz", "be", "a", "s", "ns", "p", "np", "l",# "nge", "ge", "le", "g" ] no_rm = [] rmr = "rmr" imm = x86_afs.imm ims = x86_afs.ims mim = x86_afs.mim u08 = x86_afs.u08 s08 = x86_afs.s08 u16 = x86_afs.u16 s16 = x86_afs.s16 u32 = x86_afs.u32 s32 = x86_afs.s32 im1 = x86_afs.im1 im3 = x86_afs.im3 r_eax = {x86_afs.r_eax:1, x86_afs.ad:False} r_cl = {x86_afs.reg_list8.index(x86_afs.r_cl):1, x86_afs.ad:False, x86_afs.size:x86_afs.u08} r_dx = {x86_afs.reg_list16.index(x86_afs.r_dx):1, x86_afs.ad:False, x86_afs.size:x86_afs.u16} r_es = 'es' r_ss = 'ss' r_cs = 'cs' r_ds = 'ds' segm_regs = [r_es, r_ss, r_cs, r_ds] w8 = "w8" se = "se" sw = "sw" ww = "ww" sg = "sg" # segment reg dr = "dr" # debug reg cr = "cr" # control reg ft = "ft" # float w64= "w64" sd = "sd" # single/double wd = "wd" # word/dword bkf = "breakflow" spf = "splitflow" dtf = "dstflow" seip = "seip" #seteip stpeip = "stpeip" #stop eip unsanity_mnemo = ['nop', 'monitor', 'mwait', 'fadd', 'faddp', 'fiadd', 'fcmovb', 'fcom', 'fcomp', 'fcomip', 'fdiv', 'fdivr', 'fidivr', 'fdivrp', 'ficom', 'ficomp', 'fild', 'fist', 'fistp', 'fisttp', 'fld', 'fldcw', 'fld1', 'fldl2t', "fldl2e", "fldpi", "fldlg2", "fldln2", "fldz", 'fldenv', 'fmul', 'fimul', 'fmulp', 'fst', 'fstp', 'fnstcw', 'fnstenv', 'f2xm1', 'fnstsw', 'fsub', 'fsubr', 'fisubr', 'fsubrp', 'ftst', 'fucom', 'fucompp', 'fxam', 'fxtract', 'fyl2x', 'fyl2xp1', 'fsqrt', 'fsincos', 'fsin', 'fscale', 'fcos', 'fdecstp', 'fnop', 'fpatan', 'fprem', 'fprem1', 'fptan', 'frndint', "shl", 'sal', 'sar'] mask_drcrsg = {cr:0x100, dr:0x200, sg:0x400} def hexdump(a): return reduce(lambda x,y:x+"%.2X"%ord(y), a, "") def is_address(a): if x86_afs.ad in a and a[x86_afs.ad]: return True return False def is_imm(a): if x86_afs.ad in a and a[x86_afs.ad]: return False if not (x86_afs.imm in a or x86_afs.symb in a) : return False for k in a: if not k in [x86_afs.imm, x86_afs.size, x86_afs.ad, x86_afs.symb]: return False return True def is_ad_lookup(a): if not x86_afs.ad in a or not a[x86_afs.ad]: return False if not (x86_afs.imm in a or x86_afs.symb in a) : return False for k in a: if not k in [x86_afs.imm, x86_afs.size, x86_afs.ad, x86_afs.symb]: return False return True def is_reg(a): if x86_afs.ad in a and a[x86_afs.ad]: return False if x86_afs.imm in a: return False if x86_afs.symb in a: return False return True def get_label(a): if x86_afs.ad in a: return None if x86_afs.imm in a: return None if not x86_afs.symb in a: return None n = a[x86_afs.symb] if len(n)!=1: return None k = n.keys()[0] if n[k] != 1: return None return k def check_imm_size(imm, size): i = int32(uint32(imm)) if not size in [u08, s08, u16, s16, u32, s32]: raise ValueError("unknown size %s"%size) if size == u08 and imm >= 0 and imm < 0x100: return uint8(imm) elif size == s08 and i >=-0x80 and i < 0x80: return int8(i) elif size == u16 and imm >= 0 and imm < 0x10000: return uint16(imm) elif size == s16 and i >=-0x8000 and i < 0x8000: return int16(i) elif size == u32 and imm >=-0x100000000L and imm < 0x100000000L: return uint32(imm) elif size == s32 and i >=-0x80000000 and i < 0x80000000: return int32(i) return None def dict_to_ad(d, modifs = {}, mnemo_mode = u32): size = [x86_afs.u32, x86_afs.u08][modifs[w8]==True] #overwrite w8 if modifs[sd]!=None: size = [x86_afs.f32, x86_afs.f64][modifs[sd]==True] elif modifs[wd]: size = x86_afs.u16 tab32 = {x86_afs.u08:x86_afs.reg_list8, x86_afs.u16:x86_afs.reg_list16, x86_afs.u32:x86_afs.reg_list32,x86_afs.f32:x86_afs.reg_flt} tab16 = {x86_afs.u08:x86_afs.reg_list8, x86_afs.u16:x86_afs.reg_list32, x86_afs.u32:x86_afs.reg_list16} ad_size = {x86_afs.u08:"byte ptr", x86_afs.u16:"word ptr", x86_afs.u32:"dword ptr", x86_afs.f32:"single ptr", x86_afs.f64:"double ptr"} if is_reg(d): n = [x for x in d if type(x) in [int, long]] if len(n)!=1: raise ValueError("bad reg! %s"%str(d)) n = n[0] if x86_afs.size in d and d[x86_afs.size] == x86_afs.size_seg : t = x86_afs.reg_sg elif x86_afs.size in d: my_s = d[x86_afs.size] if my_s == x86_afs.f64: my_s = x86_afs.u32 t = tab32[my_s] else: if mnemo_mode == u32: t = tab32[size] else: t = tab16[size] if modifs[dr] and n>0x7: t = x86_afs.reg_dr n&=7 if modifs[cr] and n>0x7: t = x86_afs.reg_cr n&=7 if modifs[sg] and n>0x7: t = x86_afs.reg_sg n&=7 if modifs[sd] is not None: t = tab32[size] n&=7 try: out = t[n] except: print 'WARNING!dict2ad', t, str(d) out = "" elif is_imm(d): out = "" if x86_afs.imm in d: imm_tmp = int(d[x86_afs.imm]) &0xffffffffL if imm_tmp<0: out+='-0x%.8X'%-imm_tmp else: out+='0x%.8X'%imm_tmp if x86_afs.symb in d: #XXX todo multiple ref if out!="": out+='+' for c in d[x86_afs.symb]: if d[x86_afs.symb][c]==1: out += '%s'%str(c.name) else: out += '%d,%s'%(int(d[x86_afs.symb][c]), str(c)) elif is_address(d): if x86_afs.size in d: size = d[x86_afs.size] out = [ad_size[size]] segment = " " if x86_afs.segm in d: segment += x86_afs.reg_sg[d[x86_afs.segm]]+':' for k in d: if k in [x86_afs.ad, x86_afs.size, x86_afs.segm]: continue elif k == x86_afs.imm: if int(d[k])<0: out.append('-0x%.8X'%-int(d[k])) else: out.append('0x%.8X'%int(d[k])) elif type(k) in [int, long]: if d[k] ==1: out.append(x86_afs.reg_list32[k]) else: out.append(str(int(d[k]))+'*'+x86_afs.reg_list32[k]) elif k == x86_afs.symb: out.append(str(d[k])) else: raise ValueError('strange ad componoant: %s'%str(d)) out = out[0]+segment+'['+ reduce(lambda x,y: x+"+"+y, out[1:], "")[1:] + ']' else: raise ValueError('unknown arg %s'%str(d)) return out class x86allmncs: def print_op(self, optab, decal): cpt = -1 for i in optab: cpt+=1 if type(i) == list: self.print_op(i, decal+1) elif i == None: pass else: print "%.3d "%cpt+"\t"*decal + str(i) def print_tab(self): for i in range(0x100): if type(self.db_afs[i]) == list: for j in range(0x100): print "%.2X %.2X\t"%(i,j), print self.db_afs[i][j] else: print "%.2X\t"%i+str(self.db_afs[i]) def get_afs(self, bin, m, size_m): my_uint = uint32 if size_m == u32: db_afs = self.db_afs else: db_afs = self.db_afs_16 mod, re, rm = self.modrm(m) if type(db_afs[m])==list: a = dict(db_afs[m][ord(bin.readbs())]) else: a = dict(db_afs[m]) if x86_afs.imm in a: if a[x86_afs.imm] == x86_afs.u08: a[x86_afs.imm] = my_uint(struct.unpack('B', bin.readbs())[0]) elif a[x86_afs.imm] == x86_afs.s08: a[x86_afs.imm] = my_uint(struct.unpack('b', bin.readbs())[0]) elif a[x86_afs.imm] == x86_afs.u32: a[x86_afs.imm] = my_uint(struct.unpack('I', bin.readbs(4))[0]) elif a[x86_afs.imm] == x86_afs.u16: a[x86_afs.imm] = my_uint(struct.unpack('H', bin.readbs(2))[0]) else: raise ValueError('imple other afs ... ', str(a[x86_afs.imm])) return re, a def get_afs_re(self, re): return {x86_afs.ad:False, re:1} def get_im_fmt(self, modifs, mnemo_mode, im): if modifs[se]: fmt,t = ('b',s08) elif modifs[w8]: if im == x86_afs.imm: fmt,t = ('B',u08) elif im == x86_afs.ims: fmt,t = ('b',s08) else: raise ValueError("error encode %s"%str(im)) else: if im == x86_afs.imm: if mnemo_mode == u32: fmt,t = ('I',u32) else: fmt,t = ('H',u16) elif im == x86_afs.ims: if mnemo_mode == u32: fmt,t = ('i',s32) else: fmt,t = ('h',s16) return struct.calcsize(fmt), fmt,t def modrm(self, c): return (c>>6)&3, (c>>3)&7, c&7 def sib(self, c): return self.modrm(c) def init_pre_modrm(self): self.sib_rez_u08_ebp = [{x86_afs.ad:True} for i in range(0x100)] self.sib_rez_u32_ebp = [{x86_afs.ad:True} for i in range(0x100)] self.sib_rez_u32 = [{x86_afs.ad:True} for i in range(0x100)] for sib_rez in [self.sib_rez_u08_ebp, self.sib_rez_u32_ebp, self.sib_rez_u32 ]: for index in range(0x100): ss, i, r = self.modrm(index) if r == 5: if sib_rez == self.sib_rez_u08_ebp: sib_rez[index][x86_afs.imm] = x86_afs.s08 sib_rez[index][x86_afs.reg_dict[x86_afs.r_ebp]] = 1 elif sib_rez == self.sib_rez_u32_ebp: sib_rez[index][x86_afs.imm] = x86_afs.u32 sib_rez[index][x86_afs.reg_dict[x86_afs.r_ebp]] = 1 elif sib_rez == self.sib_rez_u32: sib_rez[index][x86_afs.imm] = x86_afs.u32 else: if sib_rez == self.sib_rez_u08_ebp: sib_rez[index][r]=1 sib_rez[index][x86_afs.imm] = x86_afs.s08 elif sib_rez == self.sib_rez_u32_ebp: sib_rez[index][r]=1 sib_rez[index][x86_afs.imm] = x86_afs.u32 elif sib_rez == self.sib_rez_u32: sib_rez[index][r]=1 if i == 4: continue tmp = i if tmp in sib_rez[index]: sib_rez[index][tmp]+=[1, 2, 4, 8][ss] else: sib_rez[index][tmp] =[1, 2, 4, 8][ss] #32bit self.db_afs = [None for i in range(0x100)] for i in range(0x100): index = i mod, re, rm = self.modrm(i) if mod == 0: if rm == 4: self.db_afs[index] = self.sib_rez_u32 elif rm == 5: self.db_afs[index] = {x86_afs.ad:True, x86_afs.imm:x86_afs.u32} else: self.db_afs[index] = {x86_afs.ad:True, rm:1} elif mod == 1: if rm == 4: self.db_afs[index] = self.sib_rez_u08_ebp continue tmp = {x86_afs.ad:True, rm:1} if rm == 0: tmp[x86_afs.imm] = x86_afs.s08 else: tmp[x86_afs.imm] = x86_afs.s08 self.db_afs[index] = tmp elif mod == 2: if rm == 4: self.db_afs[index] = self.sib_rez_u32_ebp else: self.db_afs[index] = {x86_afs.ad:True, rm:1,x86_afs.imm:x86_afs.u32} elif mod == 3: self.db_afs[index] = {x86_afs.ad:False, rm:1} #16bit self.db_afs_16 = [None for i in range(0x100)] _si = x86_afs.reg_dict[x86_afs.r_si] _di = x86_afs.reg_dict[x86_afs.r_di] _bx = x86_afs.reg_dict[x86_afs.r_bx] _bp = x86_afs.reg_dict[x86_afs.r_bp] for i in range(0x100): index = i mod, re, rm = self.modrm(i) if mod == 0: if rm == 4: self.db_afs_16[index] = {x86_afs.ad:True,_si:1} elif rm == 5: self.db_afs_16[index] = {x86_afs.ad:True,_di:1} elif rm == 6: self.db_afs_16[index] = {x86_afs.ad:True,x86_afs.imm:x86_afs.u16}#{x86_afs.ad:True,_bp:1} elif rm == 7: self.db_afs_16[index] = {x86_afs.ad:True,_bx:1} else: self.db_afs_16[index] = {x86_afs.ad:True, [_si, _di][rm%2]:1, [_bx, _bp][(rm>>1)%2]:1} elif mod in [1,2]: if mod==1: if rm==0: my_imm=x86_afs.s08 else: my_imm=x86_afs.u08 else: my_imm=x86_afs.u16 if rm==4: self.db_afs_16[index] = {x86_afs.ad:True,_si:1, x86_afs.imm:my_imm} elif rm==5: self.db_afs_16[index] = {x86_afs.ad:True,_di:1, x86_afs.imm:my_imm} elif rm==6: self.db_afs_16[index] = {x86_afs.ad:True,_bp:1, x86_afs.imm:my_imm} elif rm==7: self.db_afs_16[index] = {x86_afs.ad:True,_bx:1, x86_afs.imm:my_imm} else: self.db_afs_16[index] = {x86_afs.ad:True, [_si, _di][rm%2]:1, [_bx, _bp][(rm>>1)%2]:1, x86_afs.imm:my_imm} elif mod == 3: self.db_afs_16[index] = {x86_afs.ad:False, rm:1} def addop(self, name, opc, afs, rm, modif_desc, prop_dict, sem): prop_dict.update(sem) modifs = dict([[x, True] for x in modif_desc]) base_modif = dict([[x, None] for x in [w8, se, sw, ww, sg, dr, cr, ft, w64, sd, wd, bkf, spf, dtf]]) base_modif.update(modifs) #update with forced properties base_modif.update(prop_dict) base_mnemo = [(opc, base_modif)] #XXX zarb: default se inverted? if se in modif_desc: tmp = base_mnemo[0][1] tmp[se] = False base_mnemo = [(base_mnemo[0][0], tmp)] log.debug(modifs) for modif in modifs: base_mnemo_add = [] for opc, n_m in base_mnemo: n_m = dict(n_m) n_m[modif]= not n_m[modif] opc = opc[:] opc[modif_desc[modif][0]] |=(1< modrm encoding , {} addop("swapgs",[0x0F, 0x01], d7 , no_rm , {} ,{} , {}, ) """ , {} addop("syscall",[0x0F, 0x05], noafs, no_rm , {} ,{} , {bkf:True}, ) addop("sysenter",[0x0F, 0x34], noafs, no_rm , {} ,{} , {bkf:True}, ) addop("sysexit",[0x0F, 0x35], noafs, no_rm , {} ,{} , {bkf:True}, ) addop("sysret",[0x0F, 0x07], noafs, no_rm , {} ,{} , {bkf:True}, ) addop("test", [0xA8], noafs, [r_eax,imm] , {w8:(0,0)} ,{} , {}, ) addop("test", [0xF6], d0, [imm] , {w8:(0,0)} ,{} , {}, ) addop("test", [0x84], noafs, [rmr] , {w8:(0,0)} ,{sw:True} , {}, ) addop("ud2", [0x0F, 0x0B], noafs, no_rm , {} ,{} , {}, ) addop("verr", [0x0F, 0x00], d4 , no_rm , {} ,{} , {}, ) addop("verw", [0x0F, 0x00], d5 , no_rm , {} ,{} , {}, ) #ddop("wait", [0x9B], noafs, no_rm , {} ,{} , {}, ) addop("wbinvd",[0x0F, 0x09], noafs, no_rm , {} ,{} , {}, ) addop("wrmsr", [0x0F, 0x30], noafs, no_rm , {} ,{} , {}, ) addop("xadd", [0x0F, 0xC0], noafs, [rmr] , {w8:(1,0)} ,{} , {}, ) addop("xchg", [0x90], reg , [r_eax] , {} ,{} , {}, ) addop("nop", [0x90], noafs, no_rm , {} ,{} , {}, ) addop("xchg", [0x86], noafs, [rmr] , {w8:(0,0)} ,{} , {}, ) addop("xlat", [0xD7], noafs, no_rm , {} ,{} , {}, ) addop("xor", [0x34], noafs, [r_eax,imm] , {w8:(0,0)} ,{} , {}, ) addop("xor", [0x80], d6, [imm] , {w8:(0,0),se:(0,1)},{} , {}, ) addop("xor", [0x30], noafs, [rmr] , {w8:(0,0),sw:(0,1)},{} , {}, ) addop("monitor",[0x0F, 0x01, 0xC8],noafs, no_rm , {} ,{} , {}, ) addop("mwait", [0x0F, 0x01, 0xC9], noafs, no_rm , {} ,{} , {}, ) #x87 fpu , {} addop("fabs", [0xD9, 0xE1], noafs, no_rm , {} ,{} , {}, ) addop("fadd", [0xD8], d0, no_rm , {sd:(0,2)} ,{} , {}, ) addop("fadd", [0xD8, 0xC0], reg, [r_eax] , {sw:(0,2)} ,{sd:False,sw:False},{}, ) addop("fiadd", [0xDA], d0, no_rm , {wd:(0,2)} ,{} , {}, ) addop("faddp", [0xDE, 0xC0], reg, no_rm , {} ,{sd:False} , {}, ) addop("fbld", [0xDF], d4, no_rm , {} ,{} , {}, ) addop("fbstp", [0xDF], d6, no_rm , {} ,{} , {}, ) addop("fchs", [0xD9, 0xE0], noafs, no_rm , {} ,{} , {}, ) #ddop("fclex", [0x9B, 0xDB, 0xE2], noafs, no_rm , {} ,{} , {}, ) #XXX no mnemo addop("fnclex",[0xDB, 0xE2], noafs, no_rm , {} ,{} , {}, ) addop("fcmovb",[0xDA, 0xC0], reg, [r_eax] , {} ,{} , {}, ) addop("fcmove",[0xDA, 0xC8], reg, [r_eax] , {} ,{} , {}, ) addop("fcmovbe",[0xDA, 0xD0], reg, [r_eax] , {} ,{} , {}, ) addop("fcmovu",[0xDA, 0xD8], reg, [r_eax] , {} ,{} , {}, ) addop("fcmovnb",[0xDB, 0xC0], reg, [r_eax] , {} ,{} , {}, ) addop("fcmovne",[0xDB, 0xC8], reg, [r_eax] , {} ,{} , {}, ) addop("fcmovnbe",[0xDB, 0xD0], reg, [r_eax] , {} ,{} , {}, ) addop("fcmovnu",[0xDB, 0xD8], reg, [r_eax] , {} ,{} , {}, ) addop("fcom", [0xD8], d2, no_rm , {sd:(0,2)} ,{} , {}, ) addop("fcom", [0xD8, 0xD0], reg, no_rm , {} ,{sd:False} , {}, ) addop("fcomp", [0xD8], d3, no_rm , {sd:(0,2)} ,{} , {}, ) addop("fcomp", [0xD8, 0xD8], reg, no_rm , {} ,{sd:False} , {}, ) addop("fcompp",[0xDE, 0xD9], noafs, no_rm , {} ,{} , {}, ) addop("fcomi", [0xDB, 0xF0], reg, no_rm , {} ,{sd:False} , {}, ) addop("fcomip",[0xDF, 0xF0], reg, no_rm , {} ,{sd:False} , {}, ) addop("fucomi",[0xDB, 0xE8], reg, no_rm , {} ,{sd:False} , {}, ) addop("fucomi",[0xDF, 0xE8], reg, no_rm , {} ,{sd:False} , {}, ) addop("ficom", [0xDA], d2, no_rm , {wd:(0,2)} ,{} , {}, ) addop("ficomp",[0xDA], d3, no_rm , {wd:(0,2)} ,{} , {}, ) addop("fdiv", [0xD8], d6, no_rm , {sd:(0,2)} ,{} , {}, ) addop("fdiv", [0xD8, 0xF0], reg, [r_eax] , {sw:(0,2)} ,{sd:False,sw:False},{}, ) addop("fidiv", [0xDA], d6, no_rm , {wd:(0,2)} ,{} , {}, ) addop("fdivp", [0xDE, 0xF8], reg, no_rm , {} ,{sd:False} , {}, ) addop("fdivr", [0xD8], d7, no_rm , {sd:(0,2)} ,{} , {}, ) addop("fdivr", [0xD8, 0xF8], reg, [r_eax] , {sw:(0,2)} ,{sd:False,sw:False},{}, ) addop("fidivr",[0xDA], d7, no_rm , {wd:(0,2)} ,{} , {}, ) addop("fdivrp",[0xDE, 0xF0], reg, no_rm , {} ,{sd:False} , {}, ) addop("ffree", [0xDD, 0xC0], reg, no_rm , {} ,{sd:False} , {}, ) addop("fwait", [0x9B], noafs, no_rm , {} ,{} , {}, ) addop("fild", [0xDB], d0, no_rm , {wd:(0,2)} ,{wd:False} , {}, ) addop("fild", [0xDF], d5, no_rm , {} ,{sd:True,wd:False}, {}, ) #XXX 64 addop("fincstp",[0xD9, 0xF7], noafs, no_rm , {} ,{} , {}, ) #ddop("finit", [0x9B, 0xDB, 0xE3], noafs, no_rm , {} ,{} , {}, ) #XXX no mnemo addop("fninit",[0xDB, 0xE3], noafs, no_rm , {} ,{} , {}, ) addop("fist", [0xDB], d2, no_rm , {wd:(0,2)} ,{wd:False} , {}, ) addop("fistp", [0xDB], d3, no_rm , {wd:(0,2)} ,{wd:False} , {}, ) addop("fistp", [0xDF], d7, no_rm , {} ,{sd:False} , {}, ) #XXX 64 addop("fisttp",[0xDB], d1, no_rm , {wd:(0,2)} ,{wd:False} , {}, ) addop("fisttp",[0xDD], d1, no_rm , {} ,{sd:False} , {}, ) #XXX 64 addop("fmul", [0xD8], d1, no_rm , {sd:(0,2)} ,{} , {}, ) addop("fmul", [0xD8, 0xC8], reg, [r_eax] , {sw:(0,2)} ,{sd:False,sw:False},{}, ) addop("fimul", [0xDA], d1, no_rm , {wd:(0,2)} ,{} , {}, ) addop("fmulp", [0xDE, 0xC8], reg, no_rm , {} ,{sd:False} , {}, ) addop("frstor",[0xDD], d4, no_rm , {} ,{wd:False} , {}, ) #XXX 94/108 #ddop("fsave", [0x9B, 0xDD], d6, no_rm , {} ,{wd:False} , {}, ) #XXX no mnemo addop("fnsave",[0xDD], d6, no_rm , {} ,{wd:False} , {}, ) #XXX 94/108 addop("fst", [0xD9], d2, no_rm , {sd:(0,2)} ,{} , {}, ) addop("fst", [0xDD, 0xD0], reg, no_rm , {} ,{sd:False} , {}, ) addop("fstp", [0xD9], d3, no_rm , {sd:(0,2)} ,{sd:True} , {}, ) addop("fstp", [0xDB], d7, no_rm , {} ,{sd:False} , {}, ) #XXX 80 addop("fstp", [0xDD, 0xD8], reg, no_rm , {} ,{sd:False} , {}, ) #ddop("fstcw", [0x9B, 0xD9], d7, no_rm , {} ,{wd:False} , {}, ) #XXX no mnemo addop("fnstcw",[0xD9], d7, no_rm , {} ,{wd:True} , {}, ) #ddop("fstenv",[0x9B, 0xD9], d6, no_rm , {} ,{wd:False} , {}, ) #XXX no mnemo addop("fnstenv",[0xD9], d6, no_rm , {} ,{wd:False} , {}, ) addop("f2xm1", [0xD9, 0xF0], noafs, no_rm , {} ,{} , {}, ) addop("fnop", [0xD9, 0xD0], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fpatan",[0xD9, 0xF3], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fprem", [0xD9, 0xF8], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fprem1",[0xD9, 0xF5], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fptan", [0xD9, 0xF2], noafs, no_rm , {} ,{sd:False} , {}, ) addop("frndint",[0xD9, 0xFC], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fscale",[0xD9, 0xFD], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fsin", [0xD9, 0xFE], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fsincos",[0xD9, 0xFB], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fsqrt", [0xD9, 0xFA], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fcos", [0xD9, 0xFF], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fdecstp",[0xD9, 0xF6], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fld", [0xD9], d0, no_rm , {sd:(0,2)} ,{} , {}, ) addop("fld", [0xDB], d5, no_rm , {} ,{sd:False} , {}, ) #XXX 80 addop("fld", [0xD9, 0xC0], reg, no_rm , {} ,{sd:False} , {}, ) addop("fldcw", [0xD9], d5, no_rm , {} ,{wd:True} , {}, ) addop("fldenv",[0xD9], d4, no_rm , {} ,{wd:False} , {}, ) addop("fld1", [0xD9, 0xE8], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fldl2t",[0xD9, 0xE9], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fldl2e",[0xD9, 0xEA], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fldpi", [0xD9, 0xEB], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fldlg2",[0xD9, 0xEC], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fldln2",[0xD9, 0xED], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fldz", [0xD9, 0xEE], noafs, no_rm , {} ,{sd:False} , {}, ) #ddop("fstsw", [0x9B, 0xDD], d7, no_rm , {} ,{wd:False} , {}, ) #XXX no mnemo addop("fnstsw",[0xDD], d7, no_rm , {} ,{wd:True} , {}, ) #ddop("fstsw",[0x9B, 0xDF, 0xE0], noafs, no_rm , {} ,{wd:False} , {}, ) #XXX no mnemo addop("fnstsw",[0xDF, 0xE0], noafs, no_rm , {} ,{wd:False} , {}, ) addop("fsub", [0xD8], d4, no_rm , {sd:(0,2)} ,{} , {}, ) addop("fsub", [0xD8, 0xE0], reg, [r_eax] , {sw:(0,2)} ,{sd:False,sw:False},{}, ) addop("fisub", [0xDA], d4, no_rm , {wd:(0,2)} ,{} , {}, ) addop("fsubp", [0xDE, 0xE8], reg, no_rm , {} ,{sd:False} , {}, ) addop("fsubr", [0xD8], d5, no_rm , {sd:(0,2)} ,{} , {}, ) addop("fsubr", [0xD8, 0xE8], reg, [r_eax] , {sw:(0,2)} ,{sd:False,sw:False},{}, ) addop("fisubr",[0xDA], d5, no_rm , {wd:(0,2)} ,{} , {}, ) addop("fsubrp",[0xDE, 0xE0], reg, no_rm , {} ,{sd:False} , {}, ) addop("ftst", [0xD9, 0xE4], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fucom", [0xDD, 0xE0], reg, no_rm , {} ,{sd:False} , {}, ) addop("fucomp",[0xDD, 0xE8], reg, no_rm , {} ,{sd:False} , {}, ) addop("fucompp",[0xDA, 0xE9], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fxam", [0xD9, 0xE5], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fxch", [0xD9, 0xC8], reg, no_rm , {} ,{sd:False} , {}, ) addop("fxrstor",[0x0f, 0xAE], d1, no_rm , {} ,{sd:False} , {}, ) #XXX 512 addop("fxrsave",[0x0f, 0xAE], d0, no_rm , {} ,{sd:False} , {}, ) #XXX 512 addop("fxtract",[0xD9, 0xF4], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fyl2x", [0xD9, 0xF1], noafs, no_rm , {} ,{sd:False} , {}, ) addop("fyl2xp1",[0xD9, 0xF9], noafs, no_rm , {} ,{sd:False} , {}, ) pm = self.db_mnemo[0x9c] self.pushfw_m = mnemonic(pm.name, pm.opc, pm.afs, pm.rm, pm.modifs, pm.modifs_orig, None)#, pm.sem) self.pushfw_m.name = "pushfw" pm = self.find_mnemo("lodsd")[0] self.lodsw_m = mnemonic(pm.name, pm.opc, pm.afs, pm.rm, pm.modifs, pm.modifs_orig, None)#, pm.sem) self.lodsw_m.name = "lodsw" pm = self.find_mnemo("stosd")[0] self.stosw_m = mnemonic(pm.name, pm.opc, pm.afs, pm.rm, pm.modifs, pm.modifs_orig, None)#, pm.sem) self.stosw_m.name = "stosw" pm = self.find_mnemo("movsd")[0] self.movsw_m = mnemonic(pm.name, pm.opc, pm.afs, pm.rm, pm.modifs, pm.modifs_orig, None)#, pm.sem) self.movsw_m.name = "movsw" x86mndb = x86allmncs() class x86_mnemo_metaclass(type): rebuilt_inst = True def dis(cls, op, admode = u32, opmode = u32, sex = 0): i = cls.__new__(cls) i.__init__(admode, opmode, sex) i.size_op = opmode i.size_ad = admode u = i._dis(op) if not u: return None #XXX error in scasd mnemo 16 bit if i.m.name == "scasd": if i.size_op == u16: i.m.name = "scasw" return i def asm(cls, l, symbol_off = []): i = cls.__new__(cls) i.__init__(admode = u32, opmode = u32, sex = 0) return i._asm(l, symbol_off) def has_symb(cls, a): if type(a) in [int, long]+tab_int_size.keys(): return False if x86_afs.symb in a: return True return False def fix_symbol(cls, a, symbol_pool = None): if type(a) in [int, long]+tab_int_size.keys(): return a cp = dict(a) if not x86_afs.symb in cp: return cp if not symbol_pool: del cp[x86_afs.symb] if not x86_afs.imm in cp: cp[x86_afs.imm] = 0 return cp imm_total = 0 if x86_afs.imm in cp: imm_total+=cp[x86_afs.imm] for s in cp[x86_afs.symb]: base_ad = symbol_pool.s['base_address'].offset_g imm_total+=cp[x86_afs.symb][s]*(symbol_pool.s[s.name].offset_g+base_ad) cp[x86_afs.imm] = imm_total del cp[x86_afs.symb] return cp def is_mem(cls, a): return x86_afs.ad in a and a[x86_afs.ad] def get_label(cls, a): if not x86_afs.symb in a: return None n = a[x86_afs.symb] if len(n)!=1: return None k = n.keys()[0] if n[k] != 1: return None return k def get_symbols(cls, a): if not x86_afs.symb in a: return None return a[x86_afs.symb].items() def set_symbols(cls, a, s): print a, s def names2symbols(cls, a, s_dict): all_s = a[x86_afs.symb] for name, s in s_dict.items(): count = all_s[name] del(all_s[name]) all_s[s] = count class x86_mn: __metaclass__ = x86_mnemo_metaclass def __init__(self, admode = u32, opmode = u32, sex = 0): self.admode = admode self.opmode = opmode self.mnemo_mode = self.admode self.size_op = u32 self.size_ad = u32 @classmethod def prefix2hex(self, prefix): return reduce(lambda x,y:x+chr(y), prefix, "") def breakflow(self): return self.m.modifs[bkf] def splitflow(self): return self.m.modifs[spf] def dstflow(self): return self.m.modifs[dtf] def getnextflow(self): return self.offset+self.l def getdstflow(self): if len(self.arg) !=1: print ValueError('should be 1 arg %s'%str(self)) return [] a = self.arg[0] if is_imm(a) and not x86_afs.symb in a: dst = (self.offset+self.l+a[x86_afs.imm])&tab_max_uint[self.size_ad] out = [dst] else: out = [a] return out def setdstflow(self, dst): if len(self.arg) !=1: print ValueError('should be 1 arg %s'%str(self)) return if len(dst)==0: return if len(dst)!=1: raise ValueError('should be 1 dst') l = dst[0] #patch only known symbols if l.offset !=None: self.arg = [{x86_afs.symb:{l:1}}] def fixdst(self, lbls, my_offset, is_mem): if len(self.arg) !=1: raise ValueError('should be 1 arg %s'%str(self)) a = self.arg[0] l = a[x86_afs.symb].keys()[0] offset = lbls[l.name] if is_mem: arg = {x86_afs.ad:is_mem, x86_afs.imm:offset} else: arg = {x86_afs.imm:offset-(my_offset)} self.arg = [arg] def is_subcall(self): return self.m.name == 'call' def __str__(self): if type(self.instr_string) is str: return self.instr_string args_str = "" for p in self.prefix: if p in prefix_dic_inv: args_str += prefix_dic_inv[p]+" " args_str+="%-10s"%(self.m.name) for a in self.arg: if type(a) in [int, long]: raise ValueError("should be intsized %s"%str(a)) if type(a) in tab_int_size: raise ValueError("should be dict.. %s"%str(a)) elif type(a) == dict: args_str+="%s, "%dict_to_ad(a, self.m.modifs, self.mnemo_mode) else: raise ValueError("arg zarbi %s"%str(a)) return args_str[:-2] def intsize(self, im, ext = False): if ext: return [uint16, uint32][self.mnemo_mode == u32](im) if self.m.modifs[w8]: return uint8(im) if self.mnemo_mode == u32: return uint32(im) elif self.mnemo_mode == u16: return uint16(im) else: raise ValueError('unknown mnemo mode %s'%str(im)) def _dis(self, bin): size_op = self.size_op size_ad = self.size_ad if type(bin) == str: from miasm.core.bin_stream import bin_stream bin = bin_stream(bin) init_offset = bin.offset try: #find mnemonic l = x86mndb.db_mnemo index = 0 m = None read_prefix = [] prefix_done =False while True: c = ord(bin.readbs()) if not prefix_done and c in x86_afs.x86_prefix: read_prefix.append(c) continue else: prefix_done = True if l[c] == None: log.debug( "unknown mnemo") break if isinstance(l[c] ,mnemonic): m = l[c] break if type(l[c]) == list: l = l[c] if m == None: return None self.m = m log.debug(m) log.debug("prefix: %s"%str(read_prefix)) self.mnemo_mode = self.admode if 0x66 in read_prefix: self.mnemo_mode = [u16,u32][self.mnemo_mode==u16] self.size_op = [u16,u32][size_op == u16] if 0x67 in read_prefix: self.size_ad = [u16,u32][size_ad == u16] #parse mnemonic args mnemo_args = [] afs, dibs = m.afs, m.rm modrm = None #digit if afs in [d0, d1, d2, d3, d4, d5, d6, d7]: re, modr = x86mndb.get_afs(bin, c, self.size_ad) mnemo_args.append(modr) mnemo_args[-1][x86_afs.size] = self.mnemo_mode if m.modifs[sd] is not None: if m.modifs[sd]: mnemo_args[-1][x86_afs.size] = x86_afs.f32 else: mnemo_args[-1][x86_afs.size] = x86_afs.f64 if m.modifs[w8]: mnemo_args[-1][x86_afs.size] = x86_afs.u08 if m.modifs[wd]: #XXX check (for fnst??)= mnemo_args[-1][x86_afs.size] = x86_afs.u16 #+reg elif afs == reg: mafs = dict(x86mndb.get_afs_re(c&(0xFF^mask_reg))) if m.modifs[w8]: mafs[x86_afs.size] = x86_afs.u08 else: mafs[x86_afs.size] = self.mnemo_mode mnemo_args.append(mafs) #rm mod elif afs in [noafs, cond]: if rmr in m.rm: c = ord(bin.readbs()) re, modr = x86mndb.get_afs(bin, c, self.size_ad) reg_cat = 0 if m.modifs[dr]: reg_cat+=0x8 if m.modifs[cr]: reg_cat+=0x10 if m.modifs[sg]: reg_cat+=0x20 mafs = dict(x86mndb.get_afs_re(re+reg_cat)) if m.modifs[w8]: mafs[x86_afs.size] = x86_afs.u08 else: mafs[x86_afs.size] = self.mnemo_mode mnemo_args.append(mafs) mnemo_args.append(modr) mnemo_args[-1][x86_afs.size] = self.mnemo_mode if m.modifs[w8] : mnemo_args[-1][x86_afs.size] = x86_afs.u08 if m.modifs[se] !=None and not (imm in dibs or ims in dibs): mnemo_args[-1][x86_afs.size] = [x86_afs.u08, x86_afs.u16][m.modifs[se]] if m.modifs[wd]: mnemo_args[-1][x86_afs.size] = x86_afs.u16 mnemo_args[-2][x86_afs.size] = x86_afs.u16 if m.modifs[sg]: mnemo_args[-2][x86_afs.size] = x86_afs.size_seg if afs == cond and m.name.startswith('set'): mnemo_args.pop(0) elif afs == cond: pass else: raise ValueError('bug in %s %d'%(name, afs)) #swap args? if m.modifs[sw]: mnemo_args.reverse() dib_out = [] for dib in dibs: #unsigned log.debug(m.modifs) if dib in [u08, s08, u16, s16, u32, s32]: if self.mnemo_mode !=u32: if dib == u32: dib = u16 if dib == s32: dib = s16 l = struct.calcsize(x86_afs.dict_size[dib]) d = struct.unpack(x86_afs.dict_size[dib], bin.readbs(l))[0] d = self.intsize(d) dib_out.append({x86_afs.imm:d}) elif dib in [imm, ims]: taille, fmt, t = x86mndb.get_im_fmt(m.modifs, self.mnemo_mode, dib) dib_out.append({x86_afs.imm:self.intsize(struct.unpack(fmt, bin.readbs(taille))[0], dib==ims)}) elif dib in [im1, im3]: dib_out.append({im1:{x86_afs.imm:self.intsize(1)},im3:{x86_afs.imm:self.intsize(3)}}[dib]) elif dib == rmr: continue elif dib == r_eax: mafs = dict(x86mndb.get_afs_re(x86_afs.reg_dict[x86_afs.r_eax])) if m.modifs[w8]: mafs[x86_afs.size] = x86_afs.u08 else: mafs[x86_afs.size] = self.mnemo_mode r = mafs if len(mnemo_args): if m.modifs[sw]: mnemo_args = mnemo_args+[r] else: mnemo_args = [r]+mnemo_args else: dib_out.append(r) elif dib == mim: l = struct.calcsize(x86_afs.dict_size[self.size_ad]) d = struct.unpack(x86_afs.dict_size[self.size_ad], bin.readbs(l))[0] d = uint32(d) size = [self.mnemo_mode, x86_afs.u08][m.modifs[w8]] dib_out.append({x86_afs.ad:True, x86_afs.size:size, x86_afs.imm:d}) elif dib in [r_cl, r_dx]: dib_out.append(dib) pass elif dib in segm_regs: size = self.mnemo_mode seg_regs = segm_regs if not dib in segm_regs: raise ValueError('segment reg not found', dib) r = dib dib_out.append({x86_afs.ad:False, x86_afs.size : size, x86_afs.reg_dict[r]:1}) else: raise ValueError('bad dib!!%X'%dib) mnemo_args+=dib_out for a in mnemo_args: for p in read_prefix: if is_address(a) and p in prefix_seg.values(): a[x86_afs.segm]=prefix_seg_inv[p] continue t_len = bin.offset-init_offset bin.offset = init_offset bytes_ret = bin.readbs(t_len) self.offset = init_offset self.instr_string = None self.l = t_len self.b = bytes_ret self.m = m self.arg = mnemo_args self.prefix = read_prefix #XXX really need to include this in disasm if 0x66 in read_prefix and self.m.name == "pushfd": self.m = x86mndb.pushfw_m if 0x66 in read_prefix and self.m.name == "lodsd": self.m = x86mndb.lodsw_m if 0x66 in read_prefix and self.m.name == "stosd": self.m = x86mndb.stosw_m if 0x66 in read_prefix and self.m.name == "movsd": self.m = x86mndb.movsw_m return True except IOError: log.warning( "cannot dis: not enougth bytes") return None @classmethod def parse_mnemo(self, l): tokens = [t for t in shlex.shlex(l)] prefix = [] if not tokens: raise ValueError('cannot parse mnemo?', l) while True: name = tokens[0] tokens = tokens[1:] if name in prefix_dic: prefix.append(name) continue break args = [] arg = [] s = ',' while s in tokens: i = tokens.index(s) args.append(tokens[:i]) tokens = tokens[i+1:] args.append(tokens) args = map(lambda x: reduce(lambda x,y: x+' '+y, x, ""), args) if args == ['']: return prefix, name, [] for a in args: if x86_afs.segm in a: prefix.append(x86_afs.reg_sg.index(a[x86_afs.segm])) return prefix, name, args @classmethod def parse_address(self, a): return parse_ad(a) def asm_parse(self, l): log.debug("asm: %s"%l) prefix, name, args = x86_mn.parse_mnemo(l) prefix = [prefix_dic[x] for x in prefix] log.debug("name: %s"%name) log.debug("args: %s"%str(args)) args_eval = [] for a in args: args_eval.append(x86_mn.parse_address(a)) if x86_afs.segm in args_eval[-1]: # XXX todo hack: if only one arg, no prefix if len(args) == 1: continue print args_eval[-1] fds prefix.append(prefix_seg[args_eval[-1][x86_afs.segm]]) del args_eval[-1][x86_afs.segm] #XXX test if symbol in arg and replace with imm... for pre asm if x86_afs.symb in args_eval[-1]: log.debug('pre-assembling with symbol! %s'%str(args_eval[-1][x86_afs.symb])) if not x86_afs.imm in args_eval[-1]: args_eval[-1][x86_afs.imm] = 0 del args_eval[-1][x86_afs.symb] log.info("prefix:%s"%str(prefix)) log.info('eval: %s'%str(args_eval)) #search all candidates log.debug('Find mnemo') candidate = x86mndb.find_mnemo(name) if not candidate: log.warning("no mnemonic found") can_be_16_32 = True log.debug("candi:") for c in candidate: if c.modifs[sd] or c.modifs[wd]: can_be_16_32 = False log.debug( c) #test for 16/32 bit mode if can_be_16_32: self.mnemo_mode = None for a in args_eval: #32 priority if (is_reg(a)) and a[x86_afs.size] == u32: self.mnemo_mode = u32 break #XXX if eax, cx .... 32 bit bug if (is_reg(a) or is_address(a)) and a[x86_afs.size] == u16 and self.mnemo_mode == None: self.mnemo_mode = u16 break if self.mnemo_mode == None: self.mnemo_mode = u32 if self.mnemo_mode == u16: log.debug("16 bit mode detected for %s"%str(l)) prefix.append(0x66) if name in ["movzx", "movsx"]: if args_eval[0][x86_afs.size] == u16: args_eval[0][x86_afs.size] = u32 if args_eval[0][x86_afs.ad]: args_eval[0][x86_afs.ad] = u32 else: for a in args_eval: if a[x86_afs.size] == u16: a[x86_afs.size] = u32 if a[x86_afs.ad]: a[x86_afs.ad] = u32 else: self.mnemo_mode = u32 log.info('eval2: %s'%str(args_eval)) modifs = dict([[x, None] for x in [w8, se, sw, ww, sg, dr, cr, ft, w64, sd, wd]]) modifs[sw] = False #spot dr/cr regs for a in args_eval: for x in a: if type(x) in [int, long] and x>=0x100: tmp = a[x] for y in mask_drcrsg: if x & mask_drcrsg[y]: modifs[y] = True candidate_out = [] for c in candidate: if (modifs[cr] or c.modifs[cr]) and modifs[cr] != c.modifs[cr]: continue if (modifs[dr] or c.modifs[dr]) and modifs[dr] != c.modifs[dr]: continue if (modifs[sg] or c.modifs[sg]) and modifs[sg] != c.modifs[sg]: continue args_sample = [dict(x) for x in args_eval] afs, dibs = c.afs, c.rm log.debug(c) parsed_args = [] parsed_val = [{}] out_opc = [c.opc[:]] opc_add = [] good_c = True dib_out = [] for dib in dibs: if dib in [u08, s08, u16, s16, u32, s32]: index_im = [-1, 0][afs == noafs] if len(args_sample)<=0: good_c = False break if not x86_afs.imm in args_sample[index_im] or args_sample[index_im][x86_afs.ad]: log.debug("not imm 1") good_c = False break if self.mnemo_mode !=u32: if dib == u32: dib = u16 if dib == s32: dib = s16 size = dib v = check_imm_size(args_sample[index_im][x86_afs.imm], size) if v == None: log.debug("cannot encode this val in size %s %x!"%(size, args_sample[index_im][x86_afs.imm])) good_c= False break args_sample[index_im][x86_afs.size] = size args_sample[index_im][x86_afs.imm] = tab_size2int[size](v) opc_add.append({x86_afs.size:size, x86_afs.imm:args_sample[index_im][x86_afs.imm]}) r = args_sample[index_im] del args_sample[index_im] dib_out.append(r) elif dib in [im1, im3]: if x86_afs.imm in args_sample[-1] and args_sample[-1][x86_afs.imm] =={im1:1,im3:3}[dib]: dib_out.append(args_sample.pop()) else: log.debug("not im val fixed") good_c = False break elif dib in [imm, ims]: if len(args_sample)<=0: good_c = False break if not x86_afs.imm in args_sample[-1] or args_sample[-1][x86_afs.ad]: log.debug("not imm 2") good_c = False break taille, fmt, t = x86mndb.get_im_fmt(c.modifs, self.mnemo_mode, dib) r = args_sample.pop() v = check_imm_size(r[x86_afs.imm], t) if v == None: log.debug("cannot encode this val in size %s %x!"%(t, int(r[x86_afs.imm]))) good_c= False break r[x86_afs.imm] = tab_size2int[t](v) opc_add.append({x86_afs.size:t, x86_afs.imm:r[x86_afs.imm]}) if c.modifs[se]: r[x86_afs.size] = r[x86_afs.size] r[x86_afs.imm] = tab_size2int[r[x86_afs.size]](r[x86_afs.imm]) dib_out.append(r) elif dib == rmr: continue elif dib == r_eax: if not args_sample or args_sample[0][x86_afs.ad]: log.debug("not r_eax1") good_c = False break size = args_sample[0][x86_afs.size] if not x86mndb.check_size_modif(size, c.modifs): log.debug(' bad reg size') good_c = False break if c.modifs[sw]: index = 1 if len(args_sample) !=2: raise ValueError("sw in r_eax zarb") else: index = 0 if not x86_afs.reg_dict[x86_afs.r_eax] in args_sample[index]: log.debug("not r_eax2") good_c = False break #add front if size == x86_afs.u32: args_sample[index][x86_afs.size] = self.mnemo_mode r = args_sample[index] del(args_sample[index]) if len(args_sample) and not c.modifs[sw]: parsed_args.append(r) else: dib_out.append(r) elif dib in [r_cl, r_dx]: index_im = [-1, 0][afs == noafs] dib_tmp = dict(dib) del(dib_tmp[x86_afs.size]) del(args_sample[index_im][x86_afs.size]) #XXX in al, dx => spot 16 bit manip; concat 66 bug if dib_tmp != args_sample[index_im]: log.debug("not r_cl d_dx") good_c = False break r = args_sample[index_im] del args_sample[index_im] dib_out.append(r) elif dib == mim: if len(args_sample)<=0: good_c = False break if not x86_afs.imm in args_sample[0] or not x86_afs.ad in args_sample[0] or not args_sample[0][x86_afs.ad]: log.debug("not mim") good_c = False break for k in args_sample[0]: if not k in [x86_afs.imm, x86_afs.ad, x86_afs.size]: log.debug("mim: cannot encode reg ") good_c = False break a_mem = {x86_afs.size:u32, x86_afs.imm:uint32(args_sample[0][x86_afs.imm])} opc_add.append(a_mem) del args_sample[0] a_pmem = dict(a_mem) a_pmem[x86_afs.ad] = u32 parsed_args.append(a_pmem) elif dib in segm_regs: fds else: raise ValueError('bad dib!!%X'%dib) if not good_c: continue log.debug("***pass dib***") log.debug(modifs) if afs in [d0, d1, d2, d3, d4, d5, d6, d7]: if len(args_sample)!=1: log.debug(str(c)+' bad arg num1') continue if args_sample[0][x86_afs.ad]: size = args_sample[0][x86_afs.ad] if not c.modifs[sd] == None: size = {x86_afs.u16:x86_afs.u16, x86_afs.u32:x86_afs.u32, x86_afs.f32:x86_afs.f32, x86_afs.f64:x86_afs.f64}[size] else: size = args_sample[0][x86_afs.size] if not x86mndb.check_size_modif(size, c.modifs): log.debug(' bad size digit') continue a = dict(args_sample[-1]) out_opc, parsed_val = x86mndb.forge_opc(out_opc, a) if out_opc == None or parsed_val == None: log.debug('cannot encode opc') continue parsed_args.append(args_sample.pop()) elif afs == reg: if len(args_sample)!=1: log.debug(str(c)+' bad arg num') continue if args_sample[0][x86_afs.ad]: log.debug(' address in reg') continue size = args_sample[0][x86_afs.size] if not x86mndb.check_size_modif(size, c.modifs): log.debug(' bad size reg') continue a = args_sample[-1] k = [x for x in a.keys() if type(x) in [long, int]] if a[x86_afs.ad] or x86_afs.imm in a or len(k)!=1: log.debug('bad a2 %s'%str(a)) continue out_opc[0][-1]+=k[0] parsed_args.append(args_sample.pop()) elif afs == noafs or (afs == cond and rmr in c.rm and len(args_sample)==2): if rmr in c.rm: if len(args_sample)!=2: log.debug(str(c)+' bad arg num') continue if c.modifs[sw] and args_sample[1][x86_afs.ad]: log.debug(' bad sw rmr 1') continue if not c.modifs[sw] and args_sample[0][x86_afs.ad]: log.debug(' bad sw rmr 2') continue for i in range(2): if not args_sample[i][x86_afs.ad] and x86_afs.imm in args_sample[i]: good_c = False log.debug('Imm in rmr') break if not good_c: continue size = [] for x in xrange(2): size.append(args_sample[x][x86_afs.size]) if not (imm in dibs or ims in dibs): if c.modifs[sw]: size.reverse() if c.modifs[se]!=None: if size[1] != [x86_afs.u08, x86_afs.u16][c.modifs[se]]: log.debug(' bad size se rmr') continue elif not x86mndb.check_size_modif(size[0], c.modifs): log.debug(' bad size rmr') continue #reg, modr a1 = dict(args_sample[-1]) a2 = dict(args_sample[-2]) args_sample = args_sample[:-2] if c.modifs[sw]: tmp_order = [a2,a1] else: tmp_order = [a1,a2] for y in mask_drcrsg: if not modifs[y]: continue for x in tmp_order[1]: if not type(x) in [int, long]: continue if not x&mask_drcrsg[y]: log.debug('cr dr sg not found in reg') good_c = False break tmp = tmp_order[1][x] del(tmp_order[1][x]) tmp_order[1][x&0xFF] = tmp if not good_c: continue out_opc, parsed_val = x86mndb.forge_opc(out_opc, *tmp_order) if out_opc == None or parsed_val == None: log.debug('cannot encode opc') continue tmp_o = [a2,a1] if c.modifs[se] and size[0] !=size[1]: size[1] = size[0] if size[0] !=size[1] and name !='movzx': if tmp_order[0][x86_afs.ad]: size[1] = size[0] else: log.debug('uncompatible size in rmr') continue for i in range(2): tmp_o[-1][x86_afs.size] = size[i] parsed_args+=tmp_o elif afs == cond: if rmr in c.rm: if len(args_sample)!=1: log.debug(str(c)+' bad arg num cond rmr') continue if args_sample[0][x86_afs.ad]: size = args_sample[0][x86_afs.ad] else: size = args_sample[0][x86_afs.size] a = dict(args_sample[-1]) add_out_opc, parsed_val = x86mndb.forge_opc([[0]], a) if add_out_opc == None or parsed_val == None: log.debug('cannot encode opc') continue parsed_args.append(args_sample.pop()) out_opc[0]+=add_out_opc[0] else: raise ValueError('erf ', afs) for do in dib_out: parsed_args.append(do) if len(args_sample): log.debug('too many args!') continue if self.mnemo_mode == u16: for a in parsed_args: if not x86_afs.size in a: a[x86_afs.size] = u16 continue if a[x86_afs.size] == u32: a[x86_afs.size] = u16 if a[x86_afs.ad]: a[x86_afs.ad] = u16 log.debug( "ok") log.debug(out_opc) log.debug(parsed_val) log.debug(parsed_args) for i in range(len(out_opc)): candidate_out.append((c, parsed_args, (out_opc[i], parsed_val[i], opc_add), self.mnemo_mode)) return prefix, candidate_out def _asm(self, l, symbol_off_out): log.debug("asm: %s"%l) prefix, candidate_out = self.asm_parse(l) symbol_off = [] log.info("selected candidate for:") log.info(l) hex_candidate = [] for c,eat_arg,opc_o, mnemo_mode in candidate_out: log.info(str(c)+' '+str(eat_arg)+' '+str(opc_o)) out_opc = prefix[:] out_opc += opc_o[0] val_add = [opc_o[1]]+opc_o[2] out_byte = reduce(lambda x,y: x+chr(y), out_opc, "") for c in val_add: if c == {}: continue if c[x86_afs.size] in [u08, s08, u16, s16, u32, s32]: out_byte+=struct.pack(x86_afs.dict_size[c[x86_afs.size]], int(c[x86_afs.imm])) else: raise ValueError('bad size in asm! %s'%str(c)) #XXX hack for reloc gen has_symb = None for ea in eat_arg: if x86_afs.ad in ea and ea[x86_afs.ad]: has_symb = len(out_byte)-4 symbol_off.append(has_symb) hex_candidate.append(out_byte) log.info( hexdump(out_byte)) if not len(hex_candidate): log.warning('cannot asm %s'%str(l)) all_candidate = zip(hex_candidate, symbol_off) all_candidate.sort(cmp = lambda x,y:len(x[0])-len(y[0])) hex_candidate = [x[0] for x in all_candidate] for x in all_candidate: symbol_off_out.append(x[1]) return hex_candidate x86mnemo = x86_mn if __name__ == '__main__': test_out = [] log.setLevel(logging.DEBUG) instr = x86mnemo.dis('07'.replace(' ', '').decode('hex'), admode=x86_afs.u32) print instr print instr.arg print instr.l fds instr = x86mnemo.dis('66A5'.replace(' ', '').decode('hex'), admode=x86_afs.u32) print instr print instr.arg print instr.l fds instr = x86mnemo.dis('DB 28'.replace(' ', '').decode('hex'), admode=x86_afs.u32) print instr print instr.arg print instr.l fds instr = x86mnemo.dis('DB 6D 08'.replace(' ', '').decode('hex'), admode=x86_afs.u32) print instr print instr.arg print instr.l fds instr = x86mnemo.dis('C7 44 24 08 00 00 00 00'.replace(' ', '').decode('hex'), admode=x86_afs.u32) print instr print instr.arg print instr.l fds instr = x86mnemo.dis('F0 65 0F B1 0D 84 00 00 00'.replace(' ', '').decode('hex'), admode=x86_afs.u32) print instr print instr.arg print instr.l fds instr = x86mnemo.dis('F0 65 83 0D 84 00 00 00 10'.replace(' ', '').decode('hex'), admode=x86_afs.u32) print instr print instr.arg print instr.l fds instr = x86mnemo.dis('65 C7 05 28 02 00 00 FF FF FF FF'.replace(' ', '').decode('hex'), admode=x86_afs.u32) print instr print instr.arg fds instr = x86mnemo.dis('66ab'.decode('hex'), admode=x86_afs.u32) print instr print instr.arg fds instr = x86mnemo.dis('6681384D5A0000'.decode('hex'), admode=x86_afs.u32) print instr print instr.arg