# # Copyright (C) 2011 EADS France, Fabrice Desclaux # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # from miasm.arch.ia32_reg import x86_afs from numpy import int32, uint32 def dict_add(a, b): tmp = dict(a) for k in b: #special case if k == x86_afs.symb: if k in tmp: tmp[k] = dict_add(tmp[k], b[k]) else: tmp[k] = dict(b[k]) continue #normal case if k in tmp: tmp[k]+=b[k] else: tmp[k] = b[k] if tmp[k]==0: del(tmp[k]) return tmp def dict_sub(a, b): tmp = dict(a) for k in b: #special case if k == x86_afs.symb: if k in tmp: tmp[k] = dict_sub(tmp[k], b[k]) else: tmp[k] = dict({},b[k]) continue #normal case if k in tmp: tmp[k]-=b[k] else: tmp[k] = -b[k] if tmp[k]==0: del(tmp[k]) return tmp def dict_mul(a, b): if a.keys() == [x86_afs.imm]: ret = {} for k in b: if k == x86_afs.symb: ret[k] = dict_mul({x86_afs.imm:a[x86_afs.imm]}, b[k]) else: ret[k] = a[x86_afs.imm]*b[k] return ret if b.keys() == [x86_afs.imm]: ret = {} for k in a: if k == x86_afs.symb: ret[k] = dict_mul({x86_afs.imm:b[x86_afs.imm]}, a[k]) else: ret[k] = b[x86_afs.imm]*a[k] return ret raise 'bad dict mul %s'%(str(a)+str(b)) keywords = ("BYTE", "WORD", "DWORD", "SINGLE", "DOUBLE", "ES", "CS", "SS", "DS", "FS", "GS", "PTR") tokens = keywords +( 'NUMBER', 'PLUS','MINUS','TIMES','DIVIDE', 'LPAREN','RPAREN','LBRA','RBRA', 'COLON', 'OFFSET','NAME', ) # Tokens t_PLUS = r'\+' t_MINUS = r'-' t_TIMES = r'\*' t_DIVIDE = r'/' t_LPAREN = r'\(' t_RPAREN = r'\)' t_LBRA = r'\[' t_RBRA = r'\]' t_COLON = r':' t_OFFSET = r'OFFSET' def t_NAME(t): r'[a-zA-Z_][a-zA-Z0-9_]*' if t.value.upper() in keywords: t.type = t.value.upper() t.value = t.value.lower() return t def t_NUMBER(t): r'((((0x)|(0X))[0-9a-fA-F]+)|(\d+))' try: if t.value.startswith("0x") or t.value.startswith("0X"): t.value = int(t.value, 16) else: t.value = int(t.value) except ValueError: print("Integer value too large %d", t.value) t.value = 0 return t # Ignored characters t_ignore = " \t" def t_newline(t): r'\n+' t.lexer.lineno += t.value.count("\n") def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lexer import ply.lex as lex lex.lex() precedence = ( ('left','PLUS','MINUS'), ('left','TIMES','DIVIDE'), ('right','UMINUS'), ) def p_expression_1(t): '''expression : ''' return {} def p_expression_2(t): '''expression : expression PLUS expression | expression MINUS expression | expression TIMES expression | expression DIVIDE expression''' if t[2] == '+': t[0] = dict_add(t[1], t[3]) elif t[2] == '-': t[0] = dict_sub(t[1], t[3]) elif t[2] == '*': t[0] = dict_mul(t[1], t[3]) elif t[2] == '/': raise 'bad op' else: raise 'bad op' def p_expression_3(t): '''expression : LPAREN expression RPAREN''' t[0] = t[2] def p_expression_4(t): '''expression : OFFSET expression ''' t[0] = t[2] def p_expression_5(t): '''expression : MINUS expression %prec UMINUS''' t[0] = dict([[k,-t[2][k]] for k in t[2]]) def p_expression_6(t): '''expression : NUMBER''' t[0] = {x86_afs.imm:int(int32(uint32(int(t[1]))))} #"[@?_a-zA-Z\.$][?\.a-zA-Z0-9_@$]*" def p_expression_8(t): '''expression : NAME''' if t[1] in x86_afs.reg_list32: size = x86_afs.u32 elif t[1] in x86_afs.reg_list16: size = x86_afs.u16 elif t[1] in x86_afs.reg_list8: size = x86_afs.u08 elif t[1] in x86_afs.reg_flt: size = x86_afs.f32 elif t[1] in x86_afs.reg_dr: size = x86_afs.u32 elif t[1] in x86_afs.reg_cr: size = x86_afs.u32 elif t[1] in x86_afs.reg_sg: size = x86_afs.u32 else: #raise 'bad reg size' t[0] = {x86_afs.symb:{t[1]:1}} return t[0] ={x86_afs.reg_dict[t[1]]:1, x86_afs.size : size} def p_PTRSIZE(t): '''PTRSIZE : BYTE | WORD | DWORD | SINGLE | DOUBLE ''' t[0] = t[1] def p_PTRMEM(t): '''PTRMEM : PTR''' t[0] = t[1] def p_OPTSEG(t): '''OPTSEG : ES | CS | SS | DS | FS | GS ''' t[0] = t[1] def p_opt_seg_colon_1(t): '''opt_seg_colon : OPTSEG COLON ''' t[0] = {x86_afs.segm:x86_afs.reg_sg.index(t[1])} def p_opt_seg_1(t): '''opt_seg : OPTSEG ''' t[0] = {x86_afs.segm:x86_afs.reg_sg.index(t[1])} def p_expression_9(t): '''expression : PTRSIZE PTRMEM LBRA expression RBRA | PTRSIZE PTRMEM opt_seg_colon LBRA expression RBRA ''' size = t[1] if len(t) == 6: index = 4 else: index = 5 if size=='byte': t[index][x86_afs.ad] = x86_afs.u08 elif size == 'word': t[index][x86_afs.ad] = x86_afs.u16 elif size == 'dword': t[index][x86_afs.ad] = x86_afs.u32 elif size == 'single': t[index][x86_afs.ad] = x86_afs.f32 elif size == 'double': t[index][x86_afs.ad] = x86_afs.f64 else: raise 'bad address size' if len(t) !=6: t[index].update(t[3]) t[0] = t[index] def p_expression_10(t): '''expression : LBRA expression RBRA | opt_seg_colon LBRA expression RBRA ''' if len(t) == 4: t[2][x86_afs.ad] = x86_afs.u32 t[0] = t[2] else: t[3][x86_afs.ad] = x86_afs.u32 t[3].update(t[1]) t[0] = t[3] def p_expression_11(t): '''expression : opt_seg''' t[0] = t[1] def parse_ad(a): tmp_dict = {} l = yacc.parse(a) if not x86_afs.ad in l: l[x86_afs.ad] = False else: l[x86_afs.size] = l[x86_afs.ad] if not x86_afs.size in l: l[x86_afs.size] = x86_afs.u32 return l import ply.yacc as yacc yacc.yacc() def ad_to_generic(a): #opt imm out = [] to_add = [] #generic ad size if a[x86_afs.ad]: a[x86_afs.ad] = True #imm can always be encoded in u32 to_add.append({x86_afs.imm:x86_afs.u32}) if x86_afs.imm in a: if a[x86_afs.imm] >=0 and a[x86_afs.imm] <=0xFF: to_add.append({x86_afs.imm:x86_afs.u08}) if a[x86_afs.imm] >=-128 and a[x86_afs.imm] <128: to_add.append({x86_afs.imm:x86_afs.s08}) else: to_add.append({x86_afs.imm:x86_afs.u08}) to_add.append({x86_afs.imm:x86_afs.s08}) if not x86_afs.imm in a: out.append(a) else: i = a[x86_afs.imm] if i<128 and i >= -128: to_add.append({x86_afs.imm:x86_afs.s08}) if i<=0xFF and i >=0 : to_add.append({x86_afs.imm:x86_afs.u08}) for kv in to_add: tmp = dict(a) tmp.update(kv) out.append(tmp) out_unik = [] for o in out: if not o in out_unik: out_unik.append(o) return out_unik