diff options
27 files changed, 597 insertions, 333 deletions
diff --git a/example/disasm/full.py b/example/disasm/full.py index ad85f7dc..84c856e1 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -61,10 +61,10 @@ if args.verbose: log.info('Load binary') if args.rawbinary: shift = args.shiftoffset if args.shiftoffset is not None else 0 - cont = Container.fallback_container(open(args.filename).read(), + cont = Container.fallback_container(open(args.filename, "rb").read(), None, addr=shift) else: - with open(args.filename) as fdesc: + with open(args.filename, "rb") as fdesc: cont = Container.from_stream(fdesc, addr=args.shiftoffset) default_addr = cont.entry_point diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index 438188ab..b3ee6938 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -1,24 +1,23 @@ import sys import subprocess -from collections import defaultdict from optparse import OptionParser from pdb import pm -from miasm2.arch.x86.arch import * -from miasm2.arch.x86.regs import * -from miasm2.arch.x86.sem import * +from miasm2.analysis.machine import Machine +from miasm2.expression.expression import ExprInt, ExprCond, ExprId, \ + get_expr_ids, ExprAff +from miasm2.arch.x86.arch import ParseAst from miasm2.core.bin_stream import bin_stream_str from miasm2.core import asmblock -from miasm2.expression.expression import get_rw -from miasm2.expression.modint import uint32 from miasm2.ir.symbexec import SymbolicExecutionEngine, get_block from miasm2.expression.simplifications import expr_simp -from miasm2.expression import stp from miasm2.core import parse_asm from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine +from miasm2.ir.translators.translator import Translator -mn = mn_x86 +machine = Machine("x86_32") + parser = OptionParser(usage="usage: %prog [options] file") parser.add_option('-a', "--address", dest="address", metavar="ADDRESS", @@ -32,99 +31,81 @@ if not args: def emul_symb(ir_arch, mdis, states_todo, states_done): while states_todo: - ad, symbols, conds = states_todo.pop() - print '*' * 40, "addr", ad, '*' * 40 - if (ad, symbols, conds) in states_done: - print 'skip', ad + addr, symbols, conds = states_todo.pop() + print '*' * 40, "addr", addr, '*' * 40 + if (addr, symbols, conds) in states_done: + print 'Known state, skipping', addr continue - states_done.add((ad, symbols, conds)) - sb = SymbolicExecutionEngine(ir_arch, {}) - sb.symbols = symbols.copy() - if ir_arch.pc in sb.symbols: - del(sb.symbols[ir_arch.pc]) - b = get_block(ir_arch, mdis, ad) - - print 'run block' - print b - # print blocks[ad] - ad = sb.emulbloc(b) - print 'final state' - sb.dump_id() - print 'dataflow' - # data_flow_graph_from_expr(sb) - - assert(ad is not None) - print "DST", ad - - if isinstance(ad, ExprCond): + states_done.add((addr, symbols, conds)) + symbexec = SymbolicExecutionEngine(ir_arch, {}) + symbexec.symbols = symbols.copy() + if ir_arch.pc in symbexec.symbols: + del symbexec.symbols[ir_arch.pc] + irblock = get_block(ir_arch, mdis, addr) + + print 'Run block:' + print irblock + addr = symbexec.emulbloc(irblock) + print 'Final state:' + symbexec.dump_id() + + assert addr is not None + + if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions - p1 = sb.symbols.copy() - p2 = sb.symbols.copy() - c1 = {ad.cond: ExprInt(0, ad.cond.size)} - c2 = {ad.cond: ExprInt(1, ad.cond.size)} - print ad.cond - p1[ad.cond] = ExprInt(0, ad.cond.size) - p2[ad.cond] = ExprInt(1, ad.cond.size) - ad1 = expr_simp(sb.eval_expr(ad.replace_expr(c1), {})) - ad2 = expr_simp(sb.eval_expr(ad.replace_expr(c2), {})) - if not (isinstance(ad1, ExprInt) or (isinstance(ad1, ExprId) and isinstance(ad1.name, asmblock.AsmLabel)) and - isinstance(ad2, ExprInt) or (isinstance(ad2, ExprId) and isinstance(ad2.name, asmblock.AsmLabel))): - print str(ad1), str(ad2) - raise ValueError("zarb condition") - conds1 = list(conds) + c1.items() - conds2 = list(conds) + c2.items() - if isinstance(ad1, ExprId): - ad1 = ad1.name - if isinstance(ad2, ExprId): - ad2 = ad2.name - if isinstance(ad1, ExprInt): - ad1 = ad1.arg - if isinstance(ad2, ExprInt): - ad2 = ad2.arg - states_todo.add((ad1, p1, tuple(conds1))) - states_todo.add((ad2, p2, tuple(conds2))) - elif isinstance(ad, ExprInt): - ad = int(ad.arg) - states_todo.add((ad, sb.symbols.copy(), tuple(conds))) - elif isinstance(ad, ExprId) and isinstance(ad.name, asmblock.AsmLabel): - if isinstance(ad, ExprId): - ad = ad.name - states_todo.add((ad, sb.symbols.copy(), tuple(conds))) - elif ad == ret_addr: - print 'ret reached' + cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} + cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} + addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) + addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) + if not (addr_a.is_int() or asmblock.expr_is_label(addr_a) and + addr_b.is_int() or asmblock.expr_is_label(addr_b)): + print str(addr_a), str(addr_b) + raise ValueError("Unsupported condition") + if isinstance(addr_a, ExprInt): + addr_a = int(addr_a.arg) + if isinstance(addr_b, ExprInt): + addr_b = int(addr_b.arg) + states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + cond_group_a.items()))) + states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + cond_group_b.items()))) + elif isinstance(addr, ExprInt): + addr = int(addr.arg) + states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) + elif asmblock.expr_is_label(addr): + addr = addr.name + states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) + elif addr == ret_addr: + print 'Return address reached' continue else: - raise ValueError("zarb eip") + raise ValueError("Unsupported destination") if __name__ == '__main__': + translator_smt2 = Translator.to_language("smt2") data = open(args[0]).read() bs = bin_stream_str(data) mdis = dis_engine(bs) - ad = int(options.address, 16) + addr = int(options.address, 16) - symbols_init = {} - for i, r in enumerate(all_regs_ids): - symbols_init[r] = all_regs_ids_init[i] + symbols_init = dict(machine.mn.regs.regs_init) # config parser for 32 bit - reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + reg_and_id = dict(machine.mn.regs.all_regs_ids_byname) - def my_ast_int2expr(a): - return ExprInt(a, 32) + def my_ast_int2expr(name): + return ExprInt(name, 32) # Modifify parser to avoid label creation in PUSH argc def my_ast_id2expr(string_parsed): if string_parsed in reg_and_id: return reg_and_id[string_parsed] - else: - return ExprId(string_parsed, size=32) + return ExprId(string_parsed, size=32) my_var_parser = ParseAst(my_ast_id2expr, my_ast_int2expr) - base_expr.setParseAction(my_var_parser) + machine.base_expr.setParseAction(my_var_parser) argc = ExprId('argc', 32) argv = ExprId('argv', 32) @@ -135,13 +116,13 @@ if __name__ == '__main__': my_symbols = [argc, argv, ret_addr] my_symbols = dict([(x.name, x) for x in my_symbols]) - my_symbols.update(mn_x86.regs.all_regs_ids_byname) + my_symbols.update(machine.mn.regs.all_regs_ids_byname) - ir_arch = ir_x86_32(mdis.symbol_pool) + ir_arch = machine.ir(mdis.symbol_pool) - sb = SymbolicExecutionEngine(ir_arch, symbols_init) + symbexec = SymbolicExecutionEngine(ir_arch, symbols_init) - blocks, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' + blocks, symbol_pool = parse_asm.parse_txt(machine.mn, 32, ''' PUSH argv PUSH argc PUSH ret_addr @@ -155,15 +136,15 @@ if __name__ == '__main__': line.offset, line.l = i, 1 ir_arch.add_block(b) irb = get_block(ir_arch, mdis, 0) - sb.emulbloc(irb) - sb.dump_mem() + symbexec.emulbloc(irb) + symbexec.dump_mem() # reset ir_arch blocks ir_arch.blocks = {} states_todo = set() states_done = set() - states_todo.add((uint32(ad), sb.symbols, ())) + states_todo.add((addr, symbexec.symbols, ())) # emul blocks, propagate states emul_symb(ir_arch, mdis, states_todo, states_done) @@ -171,57 +152,53 @@ if __name__ == '__main__': all_info = [] print '*' * 40, 'conditions to match', '*' * 40 - for ad, symbols, conds in sorted(states_done): - print '*' * 40, ad, '*' * 40 + for addr, symbols, conds in sorted(states_done): + print '*' * 40, addr, '*' * 40 reqs = [] for k, v in conds: print k, v reqs.append((k, v)) - all_info.append((ad, reqs)) + all_info.append((addr, reqs)) all_cases = set() - sb = SymbolicExecutionEngine(ir_arch, symbols_init) - for ad, reqs_cond in all_info: + symbexec = SymbolicExecutionEngine(ir_arch, symbols_init) + for addr, reqs_cond in all_info: + out = ['(set-logic QF_ABV)', + '(set-info :smt-lib-version 2.0)'] + + conditions = [] all_ids = set() - for k, v in reqs_cond: - all_ids.update(get_expr_ids(k)) - - out = [] - - # declare variables - for v in all_ids: - out.append(str(v) + ":" + "BITVECTOR(%d);" % v.size) - - all_csts = [] - for k, v in reqs_cond: - cst = k.strcst() - val = v.arg - assert(val in [0, 1]) - inv = "" - if val == 1: - inv = "NOT " - val = "0" * v.size - all_csts.append("(%s%s=0bin%s)" % (inv, cst, val)) - if not all_csts: + for expr, value in reqs_cond: + + all_ids.update(get_expr_ids(expr)) + expr_test = ExprCond(expr, + ExprInt(1, value.size), + ExprInt(0, value.size)) + cond = translator_smt2.from_expr(ExprAff(expr_test, value)) + conditions.append(cond) + + for name in all_ids: + out.append("(declare-fun %s () (_ BitVec %d))" % (name, name.size)) + if not out: continue - rez = " AND ".join(all_csts) - out.append("QUERY(NOT (%s));" % rez) - end = "\n".join(out) - open('out.dot', 'w').write(end) + + out += conditions + out.append('(check-sat)') + open('out.dot', 'w').write('\n'.join(out)) try: cases = subprocess.check_output(["/home/serpilliere/tools/stp/stp", - "-p", + "-p", '--SMTLIB2', "out.dot"]) except OSError: - print "ERF, cannot find stp" + print "Cannot find stp binary!" break for c in cases.split('\n'): if c.startswith('ASSERT'): - all_cases.add((ad, c)) + all_cases.add((addr, c)) print '*' * 40, 'ALL COND', '*' * 40 all_cases = list(all_cases) all_cases.sort(key=lambda x: (x[0], x[1])) - for ad, val in all_cases: - print 'address', ad, 'is reachable using argc', val + for addr, val in all_cases: + print 'Address:', addr, 'is reachable using argc', val diff --git a/example/ida/ctype_propagation.py b/example/ida/ctype_propagation.py index 76d4fa2d..8c64c6d2 100644 --- a/example/ida/ctype_propagation.py +++ b/example/ida/ctype_propagation.py @@ -11,6 +11,7 @@ from miasm2.expression.simplifications import expr_simp from miasm2.analysis.depgraph import DependencyGraph from miasm2.ir.ir import IRBlock, AssignBlock from miasm2.arch.x86.ctype import CTypeAMD64_unk, CTypeX86_unk +from miasm2.arch.msp430.ctype import CTypeMSP430_unk from miasm2.expression.expression import ExprId from miasm2.core.objc import CTypesManagerNotPacked, ExprToAccessC, CHandler from miasm2.core.ctypesmngr import CAstTypes @@ -29,7 +30,7 @@ class TypePropagationForm(ida_kernwin.Form): self.ira = ira default_types_info = r"""ExprId("RDX", 64): char *""" - archs = ["AMD64_unk", "X86_32_unk"] + archs = ["AMD64_unk", "X86_32_unk", "msp430_unk"] ida_kernwin.Form.__init__(self, r"""BUTTON YES* Launch @@ -59,6 +60,8 @@ def get_types_mngr(headerFile, arch): base_types = CTypeAMD64_unk() elif arch =="X86_32_unk": base_types = CTypeX86_unk() + elif arch =="msp430_unk": + base_types = CTypeMSP430_unk() else: raise NotImplementedError("Unsupported arch") types_ast = CAstTypes() diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index 9a65617b..8d9dea4f 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -95,18 +95,9 @@ class GraphMiasmIR(idaapi.GraphViewer): def OnClick(self, node_id): return True - def OnCommand(self, cmd_id): - if self.cmd_test == cmd_id: - print 'TEST!' - return - print "command:", cmd_id - def Show(self): if not idaapi.GraphViewer.Show(self): return False - self.cmd_test = self.AddCommand("Test", "F2") - if self.cmd_test == 0: - print "Failed to add popup menu item!" return True @@ -185,9 +176,6 @@ def build_graph(verbose=False, simplify=False): g = GraphMiasmIR(ir_arch, title, None) - g.cmd_a = g.AddCommand("cmd a", "x") - g.cmd_b = g.AddCommand("cmd b", "y") - g.Show() if __name__ == "__main__": diff --git a/example/ida/symbol_exec.py b/example/ida/symbol_exec.py index 3d4a64fa..b65b97a1 100644 --- a/example/ida/symbol_exec.py +++ b/example/ida/symbol_exec.py @@ -8,6 +8,29 @@ from miasm2.expression.expression import ExprAff from utils import expr2colorstr, translatorForm + +class ActionHandler(idaapi.action_handler_t): + def activate(self, ctx): + view_index = get_focused_view() + if view_index is None: + return 1 + self.custom_action(all_views[view_index]) + return 1 + + def update(self, ctx): + return idaapi.AST_ENABLE_ALWAYS + + +class ActionHandlerExpand(ActionHandler): + def custom_action(self, view): + view.expand_expr() + + +class ActionHandlerTranslate(ActionHandler): + def custom_action(self, view): + view.translate_expr(view.GetLineNo()) + + class symbolicexec_t(idaapi.simplecustviewer_t): def add(self, key, value): @@ -48,10 +71,12 @@ class symbolicexec_t(idaapi.simplecustviewer_t): self.print_lines() - self.menu_expand = self.AddPopupMenu("Expand [E]") - self.menu_translate = self.AddPopupMenu("Translate [T]") return True + def expand_expr(self): + self.expand(self.GetLineNo()) + self.print_lines() + def OnPopupMenu(self, menu_id): if menu_id == self.menu_expand: self.expand(self.GetLineNo()) @@ -65,15 +90,29 @@ class symbolicexec_t(idaapi.simplecustviewer_t): if vkey == 27: self.Close() return True - # E (expand) - if vkey == 69: - self.OnPopupMenu(self.menu_expand) - # T (translate) - if vkey == 84: - self.OnPopupMenu(self.menu_translate) + + if vkey == ord('E'): + self.expand_expr() + + if vkey == ord('T'): + self.translate_expr(self.GetLineNo()) + return False +def get_focused_view(): + for i, view in enumerate(all_views): + if view.IsFocused(): + return i + return None + + +class Hooks(idaapi.UI_Hooks): + def finish_populating_tform_popup(self, form, popup): + idaapi.attach_action_to_popup(form, popup, 'my:expand', None) + idaapi.attach_action_to_popup(form, popup, 'my:translate', None) + + def symbolic_exec(): from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.core.bin_stream_ida import bin_stream_ida @@ -109,17 +148,45 @@ def symbolic_exec(): view = symbolicexec_t() + all_views.append(view) if not view.Create(modified, machine, "Symbolic Execution - 0x%x to 0x%x" % (start, end)): return view.Show() -if __name__ == "__main__": + +# Support ida 6.9 and ida 7 +all_views = [] + +hooks = Hooks() +hooks.hook() + +action_expand = idaapi.action_desc_t( + 'my:expand', + 'Expand', + ActionHandlerExpand(), + 'E', + 'Expand expression', + 50) + +action_translate = idaapi.action_desc_t( + 'my:translate', + 'Translate', + ActionHandlerTranslate(), + 'T', + 'Translate expression in C/python/z3...', + 103) + +idaapi.register_action(action_expand) +idaapi.register_action(action_translate) + + +if __name__ == '__main__': idaapi.CompileLine('static key_F3() { RunPythonStatement("symbolic_exec()"); }') idc.AddHotkey("F3", "key_F3") print "=" * 50 print """Available commands: - symbolic_exec() - F3: Symbolic execution of current selection + symbolic_exec() - F3: Symbolic execution of current selection """ diff --git a/example/ida/utils.py b/example/ida/utils.py index 3f7c3c8a..b147cde2 100644 --- a/example/ida/utils.py +++ b/example/ida/utils.py @@ -64,61 +64,81 @@ def guess_machine(): return machine +class TranslatorIDA(Translator): + """Translate a Miasm expression to a IDA colored string""" + + # Implemented language + __LANG__ = "ida_w_color" + + def __init__(self, regs_ids=None, **kwargs): + super(TranslatorIDA, self).__init__(**kwargs) + if regs_ids is None: + regs_ids = {} + self.regs_ids = regs_ids + + def str_protected_child(self, child, parent): + return ("(%s)" % self.from_expr(child)) if m2_expr.should_parenthesize_child(child, parent) else self.from_expr(child) + + def from_ExprInt(self, expr): + return idaapi.COLSTR(str(expr), idaapi.SCOLOR_NUMBER) + + def from_ExprId(self, expr): + out = str(expr) + if expr in self.regs_ids: + out = idaapi.COLSTR(out, idaapi.SCOLOR_REG) + return out + + def from_ExprMem(self, expr): + ptr = self.from_expr(expr.arg) + size = idaapi.COLSTR('@' + str(expr.size), idaapi.SCOLOR_RPTCMT) + out = '%s[%s]' % (size, ptr) + return out + + def from_ExprSlice(self, expr): + base = self.from_expr(expr.arg) + start = idaapi.COLSTR(str(expr.start), idaapi.SCOLOR_RPTCMT) + stop = idaapi.COLSTR(str(expr.stop), idaapi.SCOLOR_RPTCMT) + out = "(%s)[%s:%s]" % (base, start, stop) + return out + + def from_ExprCompose(self, expr): + out = "{" + out += ", ".join(["%s, %s, %s" % (self.from_expr(subexpr), + idaapi.COLSTR(str(idx), idaapi.SCOLOR_RPTCMT), + idaapi.COLSTR(str(idx + subexpr.size), idaapi.SCOLOR_RPTCMT)) + for idx, subexpr in expr.iter_args()]) + out += "}" + return out + + def from_ExprCond(self, expr): + cond = self.str_protected_child(expr.cond, expr) + src1 = self.from_expr(expr.src1) + src2 = self.from_expr(expr.src2) + out = "%s?(%s,%s)" % (cond, src1, src2) + return out + + def from_ExprOp(self, expr): + if expr._op == '-': # Unary minus + return '-' + self.str_protected_child(expr._args[0], expr) + if expr.is_associative() or expr.is_infix(): + return (' ' + expr._op + ' ').join([self.str_protected_child(arg, expr) + for arg in expr._args]) + return (expr._op + '(' + + ', '.join([self.from_expr(arg) for arg in expr._args]) + ')') + + def from_ExprAff(self, expr): + return "%s = %s" % tuple(map(expr.from_expr, (expr.dst, expr.src))) + + + def expr2colorstr(regs_ids, expr): """Colorize an Expr instance for IDA @regs_ids: list of ExprId corresponding to available registers @expr: Expr instance to colorize """ - if isinstance(expr, m2_expr.ExprId): - s = str(expr) - if expr in regs_ids: - s = idaapi.COLSTR(s, idaapi.SCOLOR_REG) - elif isinstance(expr, m2_expr.ExprInt): - s = str(expr) - s = idaapi.COLSTR(s, idaapi.SCOLOR_NUMBER) - elif isinstance(expr, m2_expr.ExprMem): - s = '%s[%s]' % (idaapi.COLSTR('@' + str(expr.size), - idaapi.SCOLOR_RPTCMT), - expr2colorstr(regs_ids, expr.arg)) - elif isinstance(expr, m2_expr.ExprOp): - out = [] - for a in expr.args: - s = expr2colorstr(regs_ids, a) - if isinstance(a, m2_expr.ExprOp): - s = "(%s)" % s - out.append(s) - if len(out) == 1: - s = "%s %s" % (expr.op, str(out[0])) - else: - s = (" " + expr.op + " ").join(out) - elif isinstance(expr, m2_expr.ExprAff): - s = "%s = %s" % ( - expr2colorstr(regs_ids, expr.dst), expr2colorstr(regs_ids, expr.src)) - elif isinstance(expr, m2_expr.ExprCond): - cond = expr2colorstr(regs_ids, expr.cond) - src1 = expr2colorstr(regs_ids, expr.src1) - src2 = expr2colorstr(regs_ids, expr.src2) - s = "(%s?%s:%s)" % (cond, src1, src2) - elif isinstance(expr, m2_expr.ExprSlice): - s = "(%s)[%s:%s]" % (expr2colorstr(regs_ids, expr.arg), - idaapi.COLSTR(str(expr.start), - idaapi.SCOLOR_RPTCMT), - idaapi.COLSTR(str(expr.stop), - idaapi.SCOLOR_RPTCMT)) - elif isinstance(expr, m2_expr.ExprCompose): - s = "{" - s += ", ".join(["%s, %s, %s" % (expr2colorstr(regs_ids, subexpr), - idaapi.COLSTR(str(idx), - idaapi.SCOLOR_RPTCMT), - idaapi.COLSTR(str(idx + subexpr.size), - idaapi.SCOLOR_RPTCMT)) - for idx, subexpr in expr.iter_args()]) - s += "}" - else: - s = str(expr) - - return s + translator = TranslatorIDA(regs_ids) + return translator.from_expr(expr) class translatorForm(idaapi.Form): diff --git a/example/jitter/msp430.py b/example/jitter/msp430.py index b69f91c6..6dd67542 100755 --- a/example/jitter/msp430.py +++ b/example/jitter/msp430.py @@ -40,7 +40,7 @@ def jit_msp430_binary(args): myjit.jit.log_mn = args.log_mn myjit.jit.log_newbloc = args.log_newbloc - myjit.vm.add_memory_page(0, PAGE_READ | PAGE_WRITE, open(filepath).read()) + myjit.vm.add_memory_page(0, PAGE_READ | PAGE_WRITE, open(filepath, "rb").read()) myjit.add_breakpoint(0x1337, lambda _: exit(0)) diff --git a/example/jitter/sandbox_call.py b/example/jitter/sandbox_call.py index 49365004..dc64af15 100644 --- a/example/jitter/sandbox_call.py +++ b/example/jitter/sandbox_call.py @@ -13,7 +13,7 @@ options = parser.parse_args() sb = Sandbox_Linux_arml(options.filename, options, globals()) -with open(options.filename) as fdesc: +with open(options.filename, "rb") as fdesc: cont = Container.from_stream(fdesc) addr_to_call = cont.symbol_pool.getby_name("md5_starts").offset diff --git a/example/symbol_exec/depgraph.py b/example/symbol_exec/depgraph.py index 4d518cb3..c1d6174d 100644 --- a/example/symbol_exec/depgraph.py +++ b/example/symbol_exec/depgraph.py @@ -31,7 +31,7 @@ parser.add_argument("--json", args = parser.parse_args() # Get architecture -with open(args.filename) as fstream: +with open(args.filename, "rb") as fstream: cont = Container.from_stream(fstream) arch = args.architecture if args.architecture else cont.arch diff --git a/example/symbol_exec/dse_crackme.py b/example/symbol_exec/dse_crackme.py index 9ac4d6d1..303447a4 100644 --- a/example/symbol_exec/dse_crackme.py +++ b/example/symbol_exec/dse_crackme.py @@ -31,7 +31,7 @@ def xxx_fopen(jitter): global my_FILE_ptr ret_addr, args = jitter.func_args_systemv(['path', 'mode']) fname = jitter.get_str_ansi(args.path) - FILE_to_info[my_FILE_ptr] = FInfo(fname, open(fname)) + FILE_to_info[my_FILE_ptr] = FInfo(fname, open(fname, "rb")) my_FILE_ptr += 1 return jitter.func_ret_stdcall(ret_addr, my_FILE_ptr - 1) diff --git a/example/symbol_exec/single_instr.py b/example/symbol_exec/single_instr.py index 263c0252..d65702ba 100644 --- a/example/symbol_exec/single_instr.py +++ b/example/symbol_exec/single_instr.py @@ -14,6 +14,7 @@ asm = machine.mn.asm(line)[0] # Get back block bin_stream = bin_stream_str(asm) mdis = machine.dis_engine(bin_stream) +mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) # Translate ASM -> IR diff --git a/miasm2/analysis/sandbox.py b/miasm2/analysis/sandbox.py index a1736bea..07297094 100644 --- a/miasm2/analysis/sandbox.py +++ b/miasm2/analysis/sandbox.py @@ -199,7 +199,7 @@ class OS_Win(OS): fname_basename = os.path.basename(self.fname).lower() # Load main pe - with open(self.fname) as fstream: + with open(self.fname, "rb") as fstream: self.pe = vm_load_pe(self.jitter.vm, fstream.read(), load_hdr=self.options.load_hdr, name=self.fname, @@ -275,7 +275,7 @@ class OS_Linux(OS): # Import manager self.libs = libimp_elf() - with open(self.fname) as fstream: + with open(self.fname, "rb") as fstream: self.elf = vm_load_elf(self.jitter.vm, fstream.read(), name=self.fname, **kwargs) preload_elf(self.jitter.vm, self.elf, self.libs) @@ -321,7 +321,7 @@ class OS_Linux_str(OS): libs = libimp_elf() self.libs = libs - data = open(self.fname).read() + data = open(self.fname, "rb").read() self.options.load_base_addr = int(self.options.load_base_addr, 0) self.jitter.vm.add_memory_page( self.options.load_base_addr, PAGE_READ | PAGE_WRITE, data, diff --git a/miasm2/arch/msp430/ctype.py b/miasm2/arch/msp430/ctype.py new file mode 100644 index 00000000..adb0a953 --- /dev/null +++ b/miasm2/arch/msp430/ctype.py @@ -0,0 +1,68 @@ +from miasm2.core.objc import CLeafTypes, ObjCDecl, PADDING_TYPE_NAME +from miasm2.core.ctypesmngr import CTypeId, CTypePtr + + +class CTypeMSP430_unk(CLeafTypes): + """Define C types sizes/alignement for msp430 architecture""" + + obj_pad = ObjCDecl(PADDING_TYPE_NAME, 1, 1) # __padding__ is size 1/align 1 + + obj_char = ObjCDecl("char", 1, 1) + obj_short = ObjCDecl("short", 2, 2) + obj_int = ObjCDecl("int", 2, 2) + obj_long = ObjCDecl("long", 2, 2) + + obj_uchar = ObjCDecl("uchar", 1, 1) + obj_ushort = ObjCDecl("ushort", 2, 2) + obj_uint = ObjCDecl("uint", 2, 2) + obj_ulong = ObjCDecl("ulong", 2, 2) + + obj_void = ObjCDecl("void", 1, 1) + + obj_enum = ObjCDecl("enum", 2, 2) + + obj_float = ObjCDecl("float", 4, 4) + obj_double = ObjCDecl("double", 8, 8) + obj_ldouble = ObjCDecl("ldouble", 16, 16) + + def __init__(self): + self.types = { + CTypeId(PADDING_TYPE_NAME): self.obj_pad, + + CTypeId('char'): self.obj_char, + CTypeId('short'): self.obj_short, + CTypeId('int'): self.obj_int, + CTypeId('void'): self.obj_void, + CTypeId('long',): self.obj_long, + CTypeId('float'): self.obj_float, + CTypeId('double'): self.obj_double, + + CTypeId('signed', 'char'): self.obj_char, + CTypeId('unsigned', 'char'): self.obj_uchar, + + CTypeId('short', 'int'): self.obj_short, + CTypeId('signed', 'short'): self.obj_short, + CTypeId('signed', 'short', 'int'): self.obj_short, + CTypeId('unsigned', 'short'): self.obj_ushort, + CTypeId('unsigned', 'short', 'int'): self.obj_ushort, + + CTypeId('unsigned', ): self.obj_uint, + CTypeId('unsigned', 'int'): self.obj_uint, + CTypeId('signed', 'int'): self.obj_int, + + CTypeId('long', 'int'): self.obj_long, + CTypeId('long', 'long'): self.obj_long, + CTypeId('long', 'long', 'int'): self.obj_long, + CTypeId('signed', 'long', 'long'): self.obj_long, + CTypeId('unsigned', 'long', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'long', 'int'): self.obj_ulong, + + CTypeId('signed', 'long'): self.obj_long, + CTypeId('unsigned', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'int'): self.obj_ulong, + + CTypeId('long', 'double'): self.obj_ldouble, + CTypePtr(CTypeId('void')): self.obj_uint, + } diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 6725f5bc..13c06ae6 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -134,7 +134,7 @@ RBRACK = Suppress("]") dbreg = Group(gpregs16.parser | gpregs32.parser | gpregs64.parser) gpreg = (gpregs08.parser | gpregs08_64.parser | gpregs16.parser | gpregs32.parser | gpregs64.parser | gpregs_xmm.parser | - gpregs_mm.parser) + gpregs_mm.parser | gpregs_bnd.parser) def reg2exprid(r): @@ -304,7 +304,8 @@ rmarg = Group(gpregs08.parser | gpregs32.parser | gpregs64.parser | gpregs_mm.parser | - gpregs_xmm.parser + gpregs_xmm.parser | + gpregs_bnd.parser ).setParseAction(getreg) rmarg |= deref_mem @@ -1712,8 +1713,10 @@ def test_addr_size(ptr, size): SIZE2XMMREG = {64:gpregs_mm, 128:gpregs_xmm} +SIZE2BNDREG = {64:gpregs_mm, + 128:gpregs_bnd} -def parse_mem(expr, parent, w8, sx=0, xmm=0, mm=0): +def parse_mem(expr, parent, w8, sx=0, xmm=0, mm=0, bnd=0): dct_expr = {} opmode = parent.v_opmode() if expr.is_mem_segm() and expr.arg.args[0].is_int(): @@ -1780,17 +1783,23 @@ def parse_mem(expr, parent, w8, sx=0, xmm=0, mm=0): out = [dct_expr] return out, segm, True -def expr2modrm(expr, parent, w8, sx=0, xmm=0, mm=0): +def expr2modrm(expr, parent, w8, sx=0, xmm=0, mm=0, bnd=0): dct_expr = {f_isad : False} - if mm or xmm: + if mm or xmm or bnd: if mm and expr.size != 64: return None, None, False elif xmm and expr.size != 128: return None, None, False + elif bnd and expr.size != 128: + return None, None, False if isinstance(expr, ExprId): - selreg = SIZE2XMMREG[expr.size] + if bnd: + size2reg = SIZE2BNDREG + else: + size2reg = SIZE2XMMREG + selreg = size2reg[expr.size] if not expr in selreg.expr: return None, None, False i = selreg.expr.index(expr) @@ -1828,6 +1837,13 @@ def expr2modrm(expr, parent, w8, sx=0, xmm=0, mm=0): return [dct_expr], None, True else: return None, None, False + if bnd: + if expr in gpregs_bnd.expr: + i = gpregs_bnd.expr.index(expr) + dct_expr[i] = 1 + return [dct_expr], None, True + else: + return None, None, False if mm: if expr in gpregs_mm.expr: i = gpregs_mm.expr.index(expr) @@ -1858,9 +1874,9 @@ def expr2modrm(expr, parent, w8, sx=0, xmm=0, mm=0): return None, None, False dct_expr[i] = 1 return [dct_expr], None, True - return parse_mem(expr, parent, w8, sx, xmm, mm) + return parse_mem(expr, parent, w8, sx, xmm, mm, bnd) -def modrm2expr(modrm, parent, w8, sx=0, xmm=0, mm=0): +def modrm2expr(modrm, parent, w8, sx=0, xmm=0, mm=0, bnd=0): o = [] if not modrm[f_isad]: modrm_k = [x[0] for x in modrm.iteritems() if x[1] == 1] @@ -1879,6 +1895,8 @@ def modrm2expr(modrm, parent, w8, sx=0, xmm=0, mm=0): expr = gpregs_xmm.expr[modrm_k] elif mm: expr = gpregs_mm.expr[modrm_k] + elif bnd: + expr = gpregs_bnd.expr[modrm_k] elif opmode == 8 and (parent.v_opmode() == 64 or parent.rex_p.value == 1): expr = gpregs08_64.expr[modrm_k] else: @@ -1907,6 +1925,8 @@ def modrm2expr(modrm, parent, w8, sx=0, xmm=0, mm=0): opmode = 128 elif mm: opmode = 64 + elif bnd: + opmode = 128 expr = ExprMem(expr, size=opmode) return expr @@ -2326,11 +2346,12 @@ class x86_rm_mm(x86_rm_m80): msize = 64 is_mm = True is_xmm = False + is_bnd = False def decode(self, v): p = self.parent xx = self.get_modrm() - expr = modrm2expr(xx, p, 0, 0, self.is_xmm, self.is_mm) + expr = modrm2expr(xx, p, 0, 0, self.is_xmm, self.is_mm, self.is_bnd) if isinstance(expr, ExprMem): if self.msize is None: return False @@ -2356,7 +2377,8 @@ class x86_rm_mm(x86_rm_m80): elif self.is_mm: expr = ExprMem(expr.arg, 64) - v_cand, segm, ok = expr2modrm(expr, p, 0, 0, self.is_xmm, self.is_mm) + v_cand, segm, ok = expr2modrm(expr, p, 0, 0, self.is_xmm, self.is_mm, + self.is_bnd) for x in self.gen_cand(v_cand, p.v_admode()): yield x @@ -2382,6 +2404,11 @@ class x86_rm_xmm_m64(x86_rm_mm): is_mm = False is_xmm = True +class x86_rm_xmm_m128(x86_rm_mm): + msize = 128 + is_mm = False + is_xmm = True + class x86_rm_xmm_reg(x86_rm_mm): msize = None @@ -2393,6 +2420,35 @@ class x86_rm_mm_reg(x86_rm_mm): is_mm = True is_xmm = False + +class x86_rm_bnd(x86_rm_mm): + msize = 128 + is_mm = False + is_xmm = False + is_bnd = True + + +class x86_rm_bnd_reg(x86_rm_mm): + msize = None + is_mm = False + is_xmm = False + is_bnd = True + + +class x86_rm_bnd_m64(x86_rm_mm): + msize = 64 + is_mm = False + is_xmm = False + is_bnd = True + + +class x86_rm_bnd_m128(x86_rm_mm): + msize = 128 + is_mm = False + is_xmm = False + is_bnd = True + + class x86_rm_reg_noarg(object): prio = default_prio + 1 @@ -2511,6 +2567,9 @@ class x86_rm_reg_mm(x86_rm_reg_noarg, m_arg): class x86_rm_reg_xmm(x86_rm_reg_mm): selreg = gpregs_xmm +class x86_rm_reg_bnd(x86_rm_reg_mm): + selreg = gpregs_bnd + class x86_rm_reg(x86_rm_reg_noarg, m_arg): pass @@ -3196,6 +3255,7 @@ drreg = bs(l=3, cls=(x86_rm_dr, ), order =1, fname = "reg") mm_reg = bs(l=3, cls=(x86_rm_reg_mm, ), order =1, fname = "reg") xmm_reg = bs(l=3, cls=(x86_rm_reg_xmm, ), order =1, fname = "reg") +bnd_reg = bs(l=3, cls=(x86_rm_reg_bnd, ), order =1, fname = "reg") fltreg = bs(l=3, cls=(x86_rm_flt, ), order =1, fname = "reg") @@ -3226,8 +3286,15 @@ rm_arg_mm_reg = bs(l=0, cls=(x86_rm_mm_reg,), fname='rmarg') rm_arg_xmm = bs(l=0, cls=(x86_rm_xmm,), fname='rmarg') rm_arg_xmm_m32 = bs(l=0, cls=(x86_rm_xmm_m32,), fname='rmarg') rm_arg_xmm_m64 = bs(l=0, cls=(x86_rm_xmm_m64,), fname='rmarg') +rm_arg_xmm_m128 = bs(l=0, cls=(x86_rm_xmm_m128,), fname='rmarg') rm_arg_xmm_reg = bs(l=0, cls=(x86_rm_xmm_reg,), fname='rmarg') +rm_arg_bnd = bs(l=0, cls=(x86_rm_bnd,), fname='rmarg') +rm_arg_bnd_m64 = bs(l=0, cls=(x86_rm_bnd_m64,), fname='rmarg') +rm_arg_bnd_m128 = bs(l=0, cls=(x86_rm_bnd_m128,), fname='rmarg') +rm_arg_bnd_reg = bs(l=0, cls=(x86_rm_bnd_reg,), fname='rmarg') + + swapargs = bs_swapargs(l=1, fname="swap", mn_mod=range(1 << 1)) @@ -3353,6 +3420,17 @@ addop("and", [bs("100000"), se, w8] + rmmod(d4, rm_arg_w8) + [d_imm]) addop("and", [bs("001000"), swapargs, w8] + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("bndmov", [bs8(0x0f), bs8(0x1a), pref_66, bs_modeno64] + + rmmod(bnd_reg, rm_arg_bnd_m64), [bnd_reg, rm_arg_bnd_m64]) +addop("bndmov", [bs8(0x0f), bs8(0x1a), pref_66, bs_mode64] + + rmmod(bnd_reg, rm_arg_bnd_m128), [bnd_reg, rm_arg_bnd_m128]) +addop("bndmov", [bs8(0x0f), bs8(0x1b), pref_66, bs_modeno64] + + rmmod(bnd_reg, rm_arg_bnd_m64), [rm_arg_bnd_m64, bnd_reg]) +addop("bndmov", [bs8(0x0f), bs8(0x1b), pref_66, bs_mode64] + + rmmod(bnd_reg, rm_arg_bnd_m128), [rm_arg_bnd_m128, bnd_reg]) + + + addop("bsf", [bs8(0x0f), bs8(0xbc)] + rmmod(rmreg)) addop("bsr", [bs8(0x0f), bs8(0xbd), mod, rmreg, rm, sib_scale, sib_index, sib_base, disp, rm_arg]) @@ -3645,6 +3723,8 @@ addop("lgdt", [bs8(0x0f), bs8(0x01)] + rmmod(d2, modrm=mod_mem)) addop("lidt", [bs8(0x0f), bs8(0x01)] + rmmod(d3, modrm=mod_mem)) addop("lfence", [bs8(0x0f), bs8(0xae), bs8(0xe8)]) +addop("mfence", [bs8(0x0f), bs8(0xae), bs8(0xf0)]) +addop("sfence", [bs8(0x0f), bs8(0xae), bs8(0xf8)]) addop("leave", [bs8(0xc9), stk]) @@ -4138,6 +4218,11 @@ addop("cvttsd2si",[bs8(0x0f), bs8(0x2c), pref_f2] addop("cvttss2si",[bs8(0x0f), bs8(0x2c), pref_f3] + rmmod(reg, rm_arg_xmm_m32)) +addop("palignr", [bs8(0x0f), bs8(0x73), bs8(0x0f), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64) + [u08], [mm_reg, rm_arg_mm_m64, u08]) +addop("palignr", [bs8(0x0f), bs8(0x3a), bs8(0x0f), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128) + [u08], [xmm_reg, rm_arg_xmm_m128, u08]) + addop("psrlq", [bs8(0x0f), bs8(0x73), no_xmm_pref] + rmmod(d2, rm_arg_mm) + [u08], [rm_arg_mm, u08]) addop("psrlq", [bs8(0x0f), bs8(0x73), pref_66] + @@ -4249,6 +4334,11 @@ addop("pcmpeqd", [bs8(0x0f), bs8(0x76), no_xmm_pref] + addop("pcmpeqd", [bs8(0x0f), bs8(0x76), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +addop("pcmpgtb", [bs8(0x0f), bs8(0x64), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("pcmpgtb", [bs8(0x0f), bs8(0x64), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + addop("pcmpgtd", [bs8(0x0f), bs8(0x66), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) addop("pcmpgtd", [bs8(0x0f), bs8(0x66), pref_66] + diff --git a/miasm2/arch/x86/regs.py b/miasm2/arch/x86/regs.py index 5db75e37..cb7e0d7b 100644 --- a/miasm2/arch/x86/regs.py +++ b/miasm2/arch/x86/regs.py @@ -40,6 +40,8 @@ regs_xmm_expr = [ExprId(x, 128) for x in regs_xmm_str] regs_mm_str = ["MM%d" % i for i in xrange(16)] regs_mm_expr = [ExprId(x, 64) for x in regs_mm_str] +regs_bnd_str = ["BND%d" % i for i in xrange(4)] +regs_bnd_expr = [ExprId(x, 128) for x in regs_bnd_str] gpregs08 = reg_info(regs08_str, regs08_expr) gpregs08_64 = reg_info(regs08_64_str, regs08_64_expr) @@ -49,6 +51,7 @@ gpregs64 = reg_info(regs64_str, regs64_expr) gpregs_xmm = reg_info(regs_xmm_str, regs_xmm_expr) gpregs_mm = reg_info(regs_mm_str, regs_mm_expr) +gpregs_bnd = reg_info(regs_bnd_str, regs_bnd_expr) r08_eax = reg_info([regs08_str[0]], [regs08_expr[0]]) r16_eax = reg_info([regs16_str[0]], [regs16_expr[0]]) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 81da8107..56aca1c2 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -2626,11 +2626,34 @@ def nop(_, instr, a=None): return [], [] +def prefetch0(_, instr, src=None): + # see 4-198 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + +def prefetch1(_, instr, src=None): + # see 4-198 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + +def prefetch2(_, instr, src=None): + # see 4-198 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + def prefetchw(_, instr, src=None): # see 4-201 on this documentation # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf return [], [] +def prefetchnta(_, instr, src=None): + # see 4-201 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + def lfence(_, instr, src=None): # see 3-485 on this documentation @@ -2638,6 +2661,18 @@ def lfence(_, instr, src=None): return [], [] +def mfence(_, instr, src=None): + # see 3-516 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + +def sfence(_, instr, src=None): + # see 3-356 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + def ud2(_, instr, src=None): e = [m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_ILLEGAL_INSN, exception_flags.size))] @@ -3245,23 +3280,17 @@ def xorps(_, instr, dst, src): def rdmsr(ir, instr): - msr_addr = m2_expr.ExprId('MSR') + m2_expr.ExprInt( - 0, - 8) * mRCX[instr.mode][:32] + msr_addr = m2_expr.ExprId('MSR', 64) + m2_expr.ExprInt(8, 64) * mRCX[32].zeroExtend(64) e = [] - e.append( - m2_expr.ExprAff(mRAX[instr.mode][:32], ir.ExprMem(msr_addr, 32))) - e.append(m2_expr.ExprAff(mRDX[instr.mode][:32], m2_expr.ExprMem( - msr_addr + m2_expr.ExprInt(4, msr_addr.size), 32))) + e.append(m2_expr.ExprAff(mRAX[32], ir.ExprMem(msr_addr, 32))) + e.append(m2_expr.ExprAff(mRDX[32], ir.ExprMem(msr_addr + m2_expr.ExprInt(4, 64), 32))) return e, [] def wrmsr(ir, instr): - msr_addr = m2_expr.ExprId('MSR') + m2_expr.ExprInt( - 8, - 32) * mRCX[instr.mode][:32] + msr_addr = m2_expr.ExprId('MSR', 64) + m2_expr.ExprInt(8, 64) * mRCX[32].zeroExtend(64) e = [] - src = m2_expr.ExprCompose(mRAX[instr.mode][:32], mRDX[instr.mode][:32]) + src = m2_expr.ExprCompose(mRAX[32], mRDX[32]) e.append(m2_expr.ExprAff(ir.ExprMem(msr_addr, 64), src)) return e, [] @@ -3647,6 +3676,21 @@ def ucomiss(_, instr, src1, src2): return e, [] +def ucomisd(_, instr, src1, src2): + e = [] + e.append(m2_expr.ExprAff(zf, m2_expr.ExprOp( + 'ucomisd_zf', src1[:64], src2[:64]))) + e.append(m2_expr.ExprAff(pf, m2_expr.ExprOp( + 'ucomisd_pf', src1[:64], src2[:64]))) + e.append(m2_expr.ExprAff(cf, m2_expr.ExprOp( + 'ucomisd_cf', src1[:64], src2[:64]))) + + e.append(m2_expr.ExprAff(of, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAff(af, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAff(nf, m2_expr.ExprInt(0, 1))) + + return e, [] + def pshufb(_, instr, dst, src): e = [] @@ -3749,6 +3793,16 @@ def pslldq(_, instr, dst, src): return [m2_expr.ExprAff(dst, dst << m2_expr.ExprInt(8 * count, dst.size))], [] +def psrldq(_, instr, dst, src): + assert src.is_int() + e = [] + count = int(src) + if count > 15: + return [m2_expr.ExprAff(dst, m2_expr.ExprInt(0, dst.size))], [] + else: + return [m2_expr.ExprAff(dst, dst >> m2_expr.ExprInt(8 * count, dst.size))], [] + + def iret(ir, instr): """IRET implementation XXX: only support "no-privilege change" @@ -4090,6 +4144,29 @@ def smsw(ir, instr, dst): return e, [] +def bndmov(ir, instr, dst, src): + # Implemented as a NOP, because BND side effects are not yet supported + return [], [] + +def palignr(ir, instr, dst, src, imm): + # dst.src >> imm * 8 [:dst.size] + + shift = int(imm) * 8 + if shift == 0: + result = src + elif shift == src.size: + result = dst + elif shift > src.size: + result = dst >> m2_expr.ExprInt(shift - src.size, dst.size) + else: + # shift < src.size + result = m2_expr.ExprCompose( + src[shift:], + dst[:shift], + ) + + return [m2_expr.ExprAff(dst, result)], [] + mnemo_func = {'mov': mov, 'xchg': xchg, @@ -4263,8 +4340,14 @@ mnemo_func = {'mov': mov, 'fcomip': fcomip, 'nop': nop, 'ud2': ud2, + 'prefetch0': prefetch0, + 'prefetch1': prefetch1, + 'prefetch2': prefetch2, 'prefetchw': prefetchw, + 'prefetchnta': prefetchnta, 'lfence': lfence, + 'mfence': mfence, + 'sfence': sfence, 'fnop': nop, # XXX 'hlt': hlt, 'rdtsc': rdtsc, @@ -4439,9 +4522,7 @@ mnemo_func = {'mov': mov, "cvttss2si": cvttss2si, - - - + "bndmov": bndmov, @@ -4449,6 +4530,7 @@ mnemo_func = {'mov': mov, "movss": movss, "ucomiss": ucomiss, + "ucomisd": ucomisd, # # MMX/AVX/SSE operations @@ -4520,6 +4602,9 @@ mnemo_func = {'mov': mov, "pslld": pslld, "psllq": psllq, "pslldq": pslldq, + "psrldq": psrldq, + + "palignr": palignr, "pmaxub": pmaxub, "pmaxuw": pmaxuw, diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index d31c509c..6b189c4d 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -900,7 +900,8 @@ class ExprOp(Expr): '==', 'parity', 'fcom_c0', 'fcom_c1', 'fcom_c2', 'fcom_c3', 'fxam_c0', 'fxam_c1', 'fxam_c2', 'fxam_c3', "access_segment_ok", "load_segment_limit_ok", "bcdadd_cf", - "ucomiss_zf", "ucomiss_pf", "ucomiss_cf"]: + "ucomiss_zf", "ucomiss_pf", "ucomiss_cf", + "ucomisd_zf", "ucomisd_pf", "ucomisd_cf"]: size = 1 elif self._op in [TOK_INF, TOK_INF_SIGNED, TOK_INF_UNSIGNED, TOK_INF_EQUAL, diff --git a/miasm2/expression/stp.py b/miasm2/expression/stp.py deleted file mode 100644 index c9b76e4c..00000000 --- a/miasm2/expression/stp.py +++ /dev/null @@ -1,68 +0,0 @@ -from miasm2.expression.expression import * - - -""" -Quick implementation of miasm traduction to stp langage -TODO XXX: finish -""" - - -def ExprInt_strcst(self): - b = bin(int(self))[2::][::-1] - b += "0" * self.size - b = b[:self.size][::-1] - return "0bin" + b - - -def ExprId_strcst(self): - return self.name - - -def genop(op, size, a, b): - return op + '(' + str(size) + ',' + a + ', ' + b + ')' - - -def genop_nosize(op, size, a, b): - return op + '(' + a + ', ' + b + ')' - - -def ExprOp_strcst(self): - op = self.op - op_dct = {"|": " | ", - "&": " & "} - if op in op_dct: - return '(' + op_dct[op].join([x.strcst() for x in self.args]) + ')' - op_dct = {"-": "BVUMINUS"} - if op in op_dct: - return op_dct[op] + '(' + self.args[0].strcst() + ')' - op_dct = {"^": ("BVXOR", genop_nosize), - "+": ("BVPLUS", genop)} - if not op in op_dct: - raise ValueError('implement op', op) - op, f = op_dct[op] - args = [x.strcst() for x in self.args][::-1] - a = args.pop() - b = args.pop() - size = self.args[0].size - out = f(op, size, a, b) - while args: - out = f(op, size, out, args.pop()) - return out - - -def ExprSlice_strcst(self): - return '(' + self.arg.strcst() + ')[%d:%d]' % (self.stop - 1, self.start) - - -def ExprCond_strcst(self): - cond = self.cond.strcst() - src1 = self.src1.strcst() - src2 = self.src2.strcst() - return "(IF %s=(%s) THEN %s ELSE %s ENDIF)" % ( - "0bin%s" % ('0' * self.cond.size), cond, src2, src1) - -ExprInt.strcst = ExprInt_strcst -ExprId.strcst = ExprId_strcst -ExprOp.strcst = ExprOp_strcst -ExprCond.strcst = ExprCond_strcst -ExprSlice.strcst = ExprSlice_strcst diff --git a/miasm2/jitter/Jittcc.c b/miasm2/jitter/Jittcc.c index 955491ad..88359147 100644 --- a/miasm2/jitter/Jittcc.c +++ b/miasm2/jitter/Jittcc.c @@ -219,25 +219,25 @@ PyObject* tcc_compil(PyObject* self, PyObject* args) return NULL; if (tcc_compile_string(tcc_state, func_code) != 0) { - fprintf(stderr, "Erreur de compilation !\n"); + fprintf(stderr, "Error compiling !\n"); fprintf(stderr, "%s\n", func_code); exit(1); } /* XXX configure tinycc install with --disable-static */ if (tcc_relocate(tcc_state, TCC_RELOCATE_AUTO) < 0) { - fprintf(stderr, "tcc relocate error\n"); + fprintf(stderr, "TCC relocate error\n"); exit(1); } entry = tcc_get_symbol(tcc_state, func_name); if (!entry){ - fprintf(stderr, "Erreur de symbole %s!\n", func_name); + fprintf(stderr, "Error getting symbol %s!\n", func_name); fprintf(stderr, "%s\n", func_name); exit(1); } ret = PyTuple_New(2); if (ret == NULL) { - fprintf(stderr, "Erreur alloc %s!\n", func_name); + fprintf(stderr, "Error alloc %s!\n", func_name); fprintf(stderr, "%s\n", func_name); exit(1); } diff --git a/miasm2/jitter/arch/JitCore_arm.c b/miasm2/jitter/arch/JitCore_arm.c index d6e27acc..84716c2d 100644 --- a/miasm2/jitter/arch/JitCore_arm.c +++ b/miasm2/jitter/arch/JitCore_arm.c @@ -207,7 +207,7 @@ uint32_t clz(uint32_t arg) int i; for (i=0; i<32; i++) { - if (arg & (1 << (31-i))) + if (arg & (1ull << (31-i))) break; } return i; diff --git a/miasm2/jitter/jitcore_tcc.py b/miasm2/jitter/jitcore_tcc.py index 5b47bf6d..28288400 100644 --- a/miasm2/jitter/jitcore_tcc.py +++ b/miasm2/jitter/jitcore_tcc.py @@ -69,7 +69,7 @@ class JitCore_Tcc(JitCore_Cc_Base): fname_out = os.path.join(self.tempdir, "%s.c" % block_hash) if os.access(fname_out, os.R_OK): - func_code = open(fname_out).read() + func_code = open(fname_out, "rb").read() else: func_code = self.gen_c_code(block.label, block) diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index 0f88d842..83349781 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -313,7 +313,7 @@ class LLVMContext_JIT(LLVMContext): # No need to overwrite return - open(fname_out, "w").write(buffer) + open(fname_out, "wb").write(buffer) @staticmethod def cache_getbuffer(module): @@ -323,7 +323,7 @@ class LLVMContext_JIT(LLVMContext): fname_out = module.fname_out if os.access(fname_out, os.R_OK): - return open(fname_out).read() + return open(fname_out, "rb").read() return None def enable_cache(self): diff --git a/miasm2/jitter/loader/pe.py b/miasm2/jitter/loader/pe.py index 5c523c6c..65bf284b 100644 --- a/miasm2/jitter/loader/pe.py +++ b/miasm2/jitter/loader/pe.py @@ -66,20 +66,31 @@ def preload_pe(vm, e, runtime_lib, patch_vm_imp=True): return dyn_funcs -def is_redirected_export(e, ad): - # test is ad points to code or dll name - out = '' - for i in xrange(0x200): - c = e.virt.get(ad + i) - if c == "\x00": - break - out += c - if not (c.isalnum() or c in "_.-+*$@&#()[]={}"): - return False - if not "." in out: +def is_redirected_export(pe_obj, addr): + """Test if the @addr is a forwarded export address. If so, return + dllname/function name couple. If not, return False. + + An export address is a forwarded export if the rva is in the export + directory of the pe. + + @pe_obj: PE instance + @addr: virtual address of the function to test + """ + + export_dir = pe_obj.NThdr.optentries[pe.DIRECTORY_ENTRY_EXPORT] + addr_rva = pe_obj.virt2rva(addr) + if not (export_dir.rva <= addr_rva < export_dir.rva + export_dir.size): return False - i = out.find('.') - return out[:i], out[i + 1:] + addr_end = pe_obj.virt.find('\x00', addr) + data = pe_obj.virt.get(addr, addr_end) + + dllname, func_info = data.split('.', 1) + dllname = dllname.lower() + + # Test if function is forwarded using ordinal + if func_info.startswith('#'): + func_info = int(func_info[1:]) + return dllname, func_info def get_export_name_addr_list(e): @@ -223,7 +234,7 @@ def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs): log.info('Loading module %r', fname_in) fname = os.path.join(lib_path_base, fname_in) - with open(fname) as fstream: + with open(fname, "rb") as fstream: pe = vm_load_pe(vm, fstream.read(), name=fname_in, **kargs) libs.add_export_lib(pe, fname_in) return pe @@ -324,7 +335,7 @@ def vm2pe(myjit, fname, libs=None, e_orig=None, mye.DirRes.set_rva(s_res.addr) log.debug('%r', mye.DirRes) # generation - open(fname, 'w').write(str(mye)) + open(fname, 'wb').write(str(mye)) return mye @@ -490,7 +501,7 @@ def vm_load_pe_and_dependencies(vm, fname, name2module, runtime_lib, pe_obj = name2module[name] else: try: - with open(fname) as fstream: + with open(fname, "rb") as fstream: log.info('Loading module name %r', fname) pe_obj = vm_load_pe( vm, fstream.read(), name=fname, **kwargs) diff --git a/miasm2/jitter/vm_mngr.c b/miasm2/jitter/vm_mngr.c index 59cbdf6e..c628aeff 100644 --- a/miasm2/jitter/vm_mngr.c +++ b/miasm2/jitter/vm_mngr.c @@ -843,24 +843,23 @@ unsigned int rcr_rez_op(unsigned int size, unsigned int a, unsigned int b, unsig return tmp; } -unsigned int x86_bsr(unsigned int size, uint64_t src) +unsigned int x86_bsr(uint64_t size, uint64_t src) { - int i; + uint64_t i; for (i=size-1; i>=0; i--){ - if (src & (1<<i)) + if (src & (1ull << i)) return i; } fprintf(stderr, "sanity check error bsr\n"); exit(0); } -unsigned int x86_bsf(unsigned int size, uint64_t src) +unsigned int x86_bsf(uint64_t size, uint64_t src) { - int i; - + uint64_t i; for (i=0; i<size; i++){ - if (src & (1<<i)) + if (src & (1ull << i)) return i; } fprintf(stderr, "sanity check error bsf\n"); diff --git a/miasm2/os_dep/win_api_x86_32.py b/miasm2/os_dep/win_api_x86_32.py index e3addda6..c90d7939 100644 --- a/miasm2/os_dep/win_api_x86_32.py +++ b/miasm2/os_dep/win_api_x86_32.py @@ -1834,9 +1834,9 @@ def ntdll_LdrGetProcedureAddress(jitter): def ntdll_memset(jitter): - ret_ad, args = jitter.func_args_stdcall(['addr', 'c', 'size']) + ret_ad, args = jitter.func_args_cdecl(['addr', 'c', 'size']) jitter.vm.set_mem(args.addr, chr(args.c) * args.size) - jitter.func_ret_stdcall(ret_ad, args.addr) + jitter.func_ret_cdecl(ret_ad, args.addr) def msvcrt_memset(jitter): diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 694e18c0..d3b2964c 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -2147,6 +2147,10 @@ reg_tests = [ (m64, "00000000 LFENCE", "0faee8"), + (m64, "00000000 MFENCE", + "0FAEF0"), + (m64, "00000000 SFENCE", + "0FAEF8"), (m32, "00000000 SUB AL, 0x11", @@ -2798,6 +2802,12 @@ reg_tests = [ (m32, "00000000 PCMPGTQ XMM0, XMM5", "660f3837C5"), + (m64, "00000000 PCMPGTB XMM8, XMM5", + "66440f64c5"), + + (m64, "00000000 PALIGNR XMM1, XMM2, 0xC", + "660f3a0fca0c"), + (m32, "00000000 PUNPCKHBW MM2, QWORD PTR [EDX]", "0F6812"), @@ -2955,6 +2965,11 @@ reg_tests = [ (m32, "00000000 AESDECLAST XMM1, XMM2", "660f38dfca"), + (m64, "00000000 BNDMOV XMMWORD PTR [RSP + 0x80], BND0", + "660f1b842480000000"), + (m64, "00000000 BNDMOV BND3, XMMWORD PTR [RSP + 0xB0]", + "660f1a9c24b0000000"), + ] diff --git a/test/expression/stp.py b/test/expression/stp.py index a4b037de..38bbf9c8 100755 --- a/test/expression/stp.py +++ b/test/expression/stp.py @@ -8,24 +8,28 @@ class TestIrIr2STP(unittest.TestCase): def test_ExprOp_strcst(self): from miasm2.expression.expression import ExprInt, ExprOp - import miasm2.expression.stp # /!\ REALLY DIRTY HACK + from miasm2.ir.translators.translator import Translator + translator_smt2 = Translator.to_language("smt2") + args = [ExprInt(i, 32) for i in xrange(9)] self.assertEqual( - ExprOp('|', *args[:2]).strcst(), r'(0bin00000000000000000000000000000000 | 0bin00000000000000000000000000000001)') + translator_smt2.from_expr(ExprOp('|', *args[:2])), r'(bvor (_ bv0 32) (_ bv1 32))') self.assertEqual( - ExprOp('-', *args[:2]).strcst(), r'BVUMINUS(0bin00000000000000000000000000000000)') + translator_smt2.from_expr(ExprOp('-', *args[:2])), r'(bvsub (_ bv0 32) (_ bv1 32))') self.assertEqual( - ExprOp('+', *args[:3]).strcst(), r'BVPLUS(32,BVPLUS(32,0bin00000000000000000000000000000000, 0bin00000000000000000000000000000001), 0bin00000000000000000000000000000010)') - self.assertRaises(ValueError, ExprOp('X', *args[:1]).strcst) + translator_smt2.from_expr(ExprOp('+', *args[:3])), r'(bvadd (bvadd (_ bv0 32) (_ bv1 32)) (_ bv2 32))') + self.assertRaises(NotImplementedError, translator_smt2.from_expr, ExprOp('X', *args[:1])) def test_ExprSlice_strcst(self): - from miasm2.expression.expression import ExprInt, ExprSlice - import miasm2.expression.stp # /!\ REALLY DIRTY HACK + from miasm2.expression.expression import ExprInt, ExprOp + from miasm2.ir.translators.translator import Translator + translator_smt2 = Translator.to_language("smt2") + args = [ExprInt(i, 32) for i in xrange(9)] self.assertEqual( - args[0][1:2].strcst(), r'(0bin00000000000000000000000000000000)[1:1]') + translator_smt2.from_expr(args[0][1:2]), r'((_ extract 1 1) (_ bv0 32))') self.assertRaises(ValueError, args[0].__getitem__, slice(1,7,2)) if __name__ == '__main__': |