diff options
31 files changed, 609 insertions, 856 deletions
diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index 253386b5..9dc5c6bc 100644 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -10,8 +10,8 @@ from miasm2.analysis.machine import Machine from miasm2.core.interval import interval parser = ArgumentParser("Multi-arch (32 bits) assembler") -parser.add_argument('architecture', help="architecture: " + \ - ",".join(Machine.available_machine())) +parser.add_argument('architecture', help="architecture: " + + ",".join(Machine.available_machine())) parser.add_argument("source", help="Source file to assemble") parser.add_argument("output", help="Output file") parser.add_argument("--PE", help="Create a PE with a few imports", @@ -96,8 +96,13 @@ if args.encrypt: patches = new_patches print patches -for offset, raw in patches.items(): - virt[offset] = raw +if isinstance(virt, StrPatchwork): + for offset, raw in patches.items(): + virt[offset] = raw +else: + for offset, raw in patches.items(): + virt.set(offset, raw) + # Produce output open(args.output, 'wb').write(str(output)) diff --git a/example/disasm/full.py b/example/disasm/full.py index 03928e73..33b2f41f 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -194,10 +194,11 @@ if args.gen_ir: log.info("Print blocs (with analyse)") for label, bloc in ir_arch_a.blocs.iteritems(): print bloc - ir_arch_a.gen_graph() if args.simplify: ir_arch_a.dead_simp() - out = ir_arch_a.graph() + out = ir_arch_a.graph.dot() open('graph_irflow.dot', 'w').write(out) + out = ir_arch.graph.dot() + open('graph_irflow_raw.dot', 'w').write(out) diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py index 3ed59ffe..2f8999a4 100644 --- a/example/expression/asm_to_ir.py +++ b/example/expression/asm_to_ir.py @@ -45,13 +45,10 @@ for lbl, b in ir_arch.blocs.items(): print b # Dead propagation -ir_arch.gen_graph() -out = ir_arch.graph() -open('graph.dot', 'w').write(out) +open('graph.dot', 'w').write(ir_arch.graph.dot()) print '*' * 80 ir_arch.dead_simp() -out2 = ir_arch.graph() -open('graph2.dot', 'w').write(out2) +open('graph2.dot', 'w').write(ir_arch.graph.dot()) # Display new IR print 'new ir blocs' diff --git a/example/expression/get_read_write.py b/example/expression/get_read_write.py index f0f48015..d98b461a 100644 --- a/example/expression/get_read_write.py +++ b/example/expression/get_read_write.py @@ -21,6 +21,5 @@ for lbl, b in ir_arch.blocs.items(): print 'read: ', [str(x) for x in o_r] print 'written:', [str(x) for x in o_w] print -ir_arch.gen_graph() -g = ir_arch.graph() -open('graph_instr.dot', 'w').write(g) + +open('graph_instr.dot', 'w').write(ir_arch.graph.dot()) diff --git a/example/expression/graph_dataflow.py b/example/expression/graph_dataflow.py index e263a40e..64801e52 100644 --- a/example/expression/graph_dataflow.py +++ b/example/expression/graph_dataflow.py @@ -114,7 +114,6 @@ def gen_bloc_data_flow_graph(ir_arch, ad, block_flow_cb): for irbloc in ir_arch.blocs.values(): print irbloc - ir_arch.gen_graph() ir_arch.dead_simp() irbloc_0 = None diff --git a/example/ida/depgraph.py b/example/ida/depgraph.py index 03eea3d5..406f7200 100644 --- a/example/ida/depgraph.py +++ b/example/ida/depgraph.py @@ -136,9 +136,6 @@ for irb in ir_arch.blocs.values(): for i, expr in enumerate(irs): irs[i] = m2_expr.ExprAff(expr_simp(expr.dst), expr_simp(expr.src)) -# Build the IRA Graph -ir_arch.gen_graph() - # Get settings settings = depGraphSettingsForm(ir_arch) settings.Execute() diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index c3d88c36..b181f72a 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -56,7 +56,7 @@ class GraphMiasmIR(GraphViewer): continue dst = ir_arch.dst_trackback(irbloc) for d in dst: - if not self.ir_arch.ExprIsLabel(d): + if not expr_is_label(d): continue d = d.name @@ -138,8 +138,7 @@ for irb in ir_arch.blocs.values(): for i, expr in enumerate(irs): irs[i] = ExprAff(expr_simp(expr.dst), expr_simp(expr.src)) -ir_arch.gen_graph() -out = ir_arch.graph() +out = ir_arch.graph.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) @@ -197,7 +196,6 @@ def get_modified_symbols(sb): def gen_bloc_data_flow_graph(ir_arch, in_str, ad): # arch, attrib, pool_bin, bloc, symbol_pool): out_str = "" - ir_arch.gen_graph() # ir_arch.dead_simp() irbloc_0 = None diff --git a/example/jitter/unpack_upx.py b/example/jitter/unpack_upx.py index f1f179b8..72a9feb3 100644 --- a/example/jitter/unpack_upx.py +++ b/example/jitter/unpack_upx.py @@ -25,7 +25,8 @@ def kernel32_GetProcAddress(jitter): else jitter.get_str_ansi(args.fname)) logging.info(fname) - # Get the generated address of the library, and store it in memory to dst_ad + # Get the generated address of the library, and store it in memory to + # dst_ad ad = sb.libs.lib_get_add_func(args.libbase, fname, dst_ad) # Add a breakpoint in case of a call on the resolved function # NOTE: never happens in UPX, just for skeleton @@ -34,7 +35,6 @@ def kernel32_GetProcAddress(jitter): jitter.func_ret_stdcall(ret_ad, ad) - parser = Sandbox_Win_x86_32.parser(description="Generic UPX unpacker") parser.add_argument("filename", help="PE Filename") parser.add_argument('-v', "--verbose", @@ -43,7 +43,9 @@ parser.add_argument("--graph", help="Export the CFG graph in graph.dot", action="store_true") options = parser.parse_args() -sb = Sandbox_Win_x86_32(options.filename, options, globals()) +options.load_hdr = True +sb = Sandbox_Win_x86_32(options.filename, options, globals(), + parse_reloc=False) if options.verbose is True: @@ -84,7 +86,7 @@ def update_binary(jitter): logging.info('updating binary') for s in sb.pe.SHList: sdata = sb.jitter.vm.get_mem(sb.pe.rva2virt(s.addr), s.rawsize) - sb.pe.virt[sb.pe.rva2virt(s.addr)] = sdata + sb.pe.rva.set(s.addr, sdata) # Stop execution jitter.run = False diff --git a/example/symbol_exec/depgraph.py b/example/symbol_exec/depgraph.py index 6aa9cf81..a870b275 100644 --- a/example/symbol_exec/depgraph.py +++ b/example/symbol_exec/depgraph.py @@ -61,9 +61,6 @@ blocks = mdis.dis_multibloc(int(args.func_addr, 0)) for block in blocks: ir_arch.add_bloc(block) -# Build the IRA Graph -ir_arch.gen_graph() - # Get the instance dg = DependencyGraph(ir_arch, implicit=args.implicit, apply_simp=not(args.do_not_simplify), diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index 8462f150..9451a407 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -150,7 +150,7 @@ def inter_bloc_flow_link(ir_arch, flow_graph, todo, link_exec_to_data): x_nodes = tuple(sorted(list(irb.dst.get_r()))) todo = set() - for lbl_dst in ir_arch.g.successors(irb.label): + for lbl_dst in ir_arch.graph.successors(irb.label): todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes)) # pp(('OUT', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) @@ -166,7 +166,7 @@ def create_implicit_flow(ir_arch, flow_graph): while todo: lbl = todo.pop() irb = ir_arch.blocs[lbl] - for lbl_son in ir_arch.g.successors(irb.label): + for lbl_son in ir_arch.graph.successors(irb.label): if not lbl_son in ir_arch.blocs: print "cannot find bloc!!", lbl continue @@ -189,7 +189,7 @@ def create_implicit_flow(ir_arch, flow_graph): irb.in_nodes[n_r] = irb.label, 0, n_r node_n_r = irb.in_nodes[n_r] # print "###", node_n_r - for lbl_p in ir_arch.g.predecessors(irb.label): + for lbl_p in ir_arch.graph.predecessors(irb.label): todo.add(lbl_p) flow_graph.add_uniq_edge(node_n_r, node_n_w) diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py index 838183bf..0a5d38aa 100644 --- a/miasm2/analysis/depgraph.py +++ b/miasm2/analysis/depgraph.py @@ -686,7 +686,6 @@ class DependencyGraph(object): def __init__(self, ira, implicit=False, apply_simp=True, follow_mem=True, follow_call=True): """Create a DependencyGraph linked to @ira - The IRA graph must have been computed @ira: IRAnalysis instance @implicit: (optional) Imply implicit dependencies @@ -702,9 +701,6 @@ class DependencyGraph(object): self._step_counter = itertools.count() self._current_step = next(self._step_counter) - # The IRA graph must be computed - assert hasattr(self._ira, 'g') - # Create callback filters. The order is relevant. self._cb_follow = [] if apply_simp: @@ -892,7 +888,7 @@ class DependencyGraph(object): def _get_previousblocks(self, label): """Return an iterator on predecessors blocks of @label, with their lengths""" - preds = self._ira.g.predecessors_iter(label) + preds = self._ira.graph.predecessors_iter(label) for pred_label in preds: length = len(self._get_irs(pred_label)) yield (pred_label, length) diff --git a/miasm2/analysis/sandbox.py b/miasm2/analysis/sandbox.py index b3184626..115fd521 100644 --- a/miasm2/analysis/sandbox.py +++ b/miasm2/analysis/sandbox.py @@ -1,13 +1,15 @@ +import os import logging from argparse import ArgumentParser from miasm2.analysis.machine import Machine -from miasm2.os_dep import win_api_x86_32_seh from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE from miasm2.analysis import debugging from miasm2.jitter.jitload import log_func + class Sandbox(object): + """ Parent class for Sandbox abstraction """ @@ -29,9 +31,9 @@ class Sandbox(object): yield base_cls - classes = property(lambda x:x.__class__._classes_()) + classes = property(lambda x: x.__class__._classes_()) - def __init__(self, fname, options, custom_methods = {}): + def __init__(self, fname, options, custom_methods={}, **kwargs): """ Initialize a sandbox @fname: str file name @@ -46,9 +48,9 @@ class Sandbox(object): if cls == Sandbox: continue if issubclass(cls, OS): - cls.__init__(self, custom_methods) + cls.__init__(self, custom_methods, **kwargs) else: - cls.__init__(self) + cls.__init__(self, **kwargs) # Logging options if self.options.singlestep: @@ -84,8 +86,11 @@ class Sandbox(object): parser.add_argument("-j", "--jitter", help="Jitter engine. Possible values are: tcc (default), llvm, python", default="tcc") - parser.add_argument('-q', "--quiet-function-calls", action="store_true", + parser.add_argument( + '-q', "--quiet-function-calls", action="store_true", help="Don't log function calls") + parser.add_argument('-i', "--dependencies", action="store_true", + help="Load PE and its dependencies") for base_cls in cls._classes_(): base_cls.update_parser(parser) @@ -121,11 +126,12 @@ class Sandbox(object): class OS(object): + """ Parent class for OS abstraction """ - def __init__(self, custom_methods): + def __init__(self, custom_methods, **kwargs): pass @classmethod @@ -134,13 +140,15 @@ class OS(object): class Arch(object): + """ Parent class for Arch abstraction """ # Architecture name _ARCH_ = None - def __init__(self): + + def __init__(self, **kwargs): self.machine = Machine(self._ARCH_) self.jitter = self.machine.jitter(self.options.jitter) @@ -155,10 +163,12 @@ class OS_Win(OS): "ole32.dll", "urlmon.dll", "ws2_32.dll", 'advapi32.dll', "psapi.dll", ] + modules_path = "win_dll" def __init__(self, custom_methods, *args, **kwargs): - from miasm2.jitter.loader.pe import vm_load_pe, vm_load_pe_libs, preload_pe, libimp_pe - from miasm2.os_dep import win_api_x86_32 + from miasm2.jitter.loader.pe import vm_load_pe, vm_load_pe_libs,\ + preload_pe, libimp_pe, vm_load_pe_and_dependencies + from miasm2.os_dep import win_api_x86_32, win_api_x86_32_seh methods = win_api_x86_32.__dict__ methods.update(custom_methods) @@ -169,20 +179,36 @@ class OS_Win(OS): self.libs = libs win_api_x86_32.winobjs.runtime_dll = libs + self.name2module = {} + fname_basename = os.path.basename(self.fname).lower() + + # Load main pe + with open(self.fname) as fstream: + self.pe = vm_load_pe(self.jitter.vm, fstream.read(), + load_hdr=self.options.load_hdr, **kwargs) + self.name2module[fname_basename] = self.pe + # Load library if self.options.loadbasedll: - all_pe = [] # Load libs in memory - all_pe = vm_load_pe_libs(self.jitter.vm, self.ALL_IMP_DLL, libs) + self.name2module.update(vm_load_pe_libs(self.jitter.vm, + self.ALL_IMP_DLL, + libs, + self.modules_path, + **kwargs)) # Patch libs imports - for pe in all_pe.values(): + for pe in self.name2module.itervalues(): preload_pe(self.jitter.vm, pe, libs) - # Load main pe - with open(self.fname) as fstream: - self.pe = vm_load_pe(self.jitter.vm, fstream.read()) + if self.options.dependencies: + vm_load_pe_and_dependencies(self.jitter.vm, + fname_basename, + self.name2module, + libs, + self.modules_path, + **kwargs) win_api_x86_32.winobjs.current_pe = self.pe @@ -194,20 +220,20 @@ class OS_Win(OS): # Manage SEH if self.options.use_seh: - win_api_x86_32_seh.main_pe_name = self.fname + win_api_x86_32_seh.main_pe_name = fname_basename win_api_x86_32_seh.main_pe = self.pe - win_api_x86_32_seh.loaded_modules = self.ALL_IMP_DLL + win_api_x86_32.winobjs.hcurmodule = self.pe.NThdr.ImageBase + win_api_x86_32_seh.name2module = self.name2module win_api_x86_32_seh.init_seh(self.jitter) win_api_x86_32_seh.set_win_fs_0(self.jitter) - self.entry_point = self.pe.rva2virt(self.pe.Opthdr.AddressOfEntryPoint) + self.entry_point = self.pe.rva2virt( + self.pe.Opthdr.AddressOfEntryPoint) @classmethod def update_parser(cls, parser): - parser.add_argument('-o', "--loadhdr", action="store_true", + parser.add_argument('-o', "--load-hdr", action="store_true", help="Load pe hdr") - parser.add_argument('-e', "--loadmainpe", action="store_true", - help="Load main pe") parser.add_argument('-y', "--use-seh", action="store_true", help="Use windows SEH") parser.add_argument('-l', "--loadbasedll", action="store_true", @@ -230,7 +256,7 @@ class OS_Linux(OS): self.libs = libimp_elf() with open(self.fname) as fstream: - self.elf = vm_load_elf(self.jitter.vm, fstream.read()) + self.elf = vm_load_elf(self.jitter.vm, fstream.read(), **kwargs) preload_elf(self.jitter.vm, self.elf, self.libs) self.entry_point = self.elf.Ehdr.entry @@ -238,7 +264,9 @@ class OS_Linux(OS): # Library calls handler self.jitter.add_lib_handler(self.libs, methods) + class OS_Linux_str(OS): + def __init__(self, custom_methods, *args, **kwargs): from miasm2.jitter.loader.elf import libimp_elf from miasm2.os_dep import linux_stdlib @@ -253,7 +281,8 @@ class OS_Linux_str(OS): data = open(self.fname).read() self.options.load_base_addr = int(self.options.load_base_addr, 0) - self.jitter.vm.add_memory_page(self.options.load_base_addr, PAGE_READ | PAGE_WRITE, data) + self.jitter.vm.add_memory_page( + self.options.load_base_addr, PAGE_READ | PAGE_WRITE, data) # Library calls handler self.jitter.add_lib_handler(libs, methods) @@ -264,16 +293,16 @@ class OS_Linux_str(OS): class Arch_x86(Arch): - _ARCH_ = None # Arch name + _ARCH_ = None # Arch name STACK_SIZE = 0x10000 STACK_BASE = 0x130000 - def __init__(self): - super(Arch_x86, self).__init__() + def __init__(self, **kwargs): + super(Arch_x86, self).__init__(**kwargs) if self.options.usesegm: - self.jitter.ir_arch.do_stk_segm= True - self.jitter.ir_arch.do_ds_segm= True + self.jitter.ir_arch.do_stk_segm = True + self.jitter.ir_arch.do_ds_segm = True self.jitter.ir_arch.do_str_segm = True self.jitter.ir_arch.do_all_segm = True @@ -282,11 +311,10 @@ class Arch_x86(Arch): self.jitter.stack_base = self.STACK_BASE self.jitter.init_stack() - @classmethod def update_parser(cls, parser): parser.add_argument('-s', "--usesegm", action="store_true", - help="Use segments") + help="Use segments") class Arch_x86_32(Arch_x86): @@ -302,34 +330,36 @@ class Arch_arml(Arch): STACK_SIZE = 0x100000 STACK_BASE = 0x100000 - def __init__(self): - super(Arch_arml, self).__init__() + def __init__(self, **kwargs): + super(Arch_arml, self).__init__(**kwargs) # Init stack self.jitter.stack_size = self.STACK_SIZE self.jitter.stack_base = self.STACK_BASE self.jitter.init_stack() + class Arch_armb(Arch): _ARCH_ = "armb" STACK_SIZE = 0x100000 STACK_BASE = 0x100000 - def __init__(self): - super(Arch_armb, self).__init__() + def __init__(self, **kwargs): + super(Arch_armb, self).__init__(**kwargs) # Init stack self.jitter.stack_size = self.STACK_SIZE self.jitter.stack_base = self.STACK_BASE self.jitter.init_stack() + class Arch_aarch64l(Arch): _ARCH_ = "aarch64l" STACK_SIZE = 0x100000 STACK_BASE = 0x100000 - def __init__(self): - super(Arch_aarch64l, self).__init__() + def __init__(self, **kwargs): + super(Arch_aarch64l, self).__init__(**kwargs) # Init stack self.jitter.stack_size = self.STACK_SIZE @@ -342,8 +372,8 @@ class Arch_aarch64b(Arch): STACK_SIZE = 0x100000 STACK_BASE = 0x100000 - def __init__(self): - super(Arch_aarch64b, self).__init__() + def __init__(self, **kwargs): + super(Arch_aarch64b, self).__init__(**kwargs) # Init stack self.jitter.stack_size = self.STACK_SIZE @@ -351,7 +381,6 @@ class Arch_aarch64b(Arch): self.jitter.init_stack() - class Sandbox_Win_x86_32(Sandbox, Arch_x86_32, OS_Win): def __init__(self, *args, **kwargs): @@ -366,8 +395,7 @@ class Sandbox_Win_x86_32(Sandbox, Arch_x86_32, OS_Win): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): """ If addr is not set, use entrypoint """ @@ -391,8 +419,7 @@ class Sandbox_Win_x86_64(Sandbox, Arch_x86_64, OS_Win): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): """ If addr is not set, use entrypoint """ @@ -415,8 +442,7 @@ class Sandbox_Linux_x86_32(Sandbox, Arch_x86_32, OS_Linux): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): """ If addr is not set, use entrypoint """ @@ -440,8 +466,7 @@ class Sandbox_Linux_x86_64(Sandbox, Arch_x86_64, OS_Linux): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): """ If addr is not set, use entrypoint """ @@ -460,12 +485,12 @@ class Sandbox_Linux_arml(Sandbox, Arch_arml, OS_Linux): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): if addr is None and self.options.address is not None: addr = int(self.options.address, 16) super(Sandbox_Linux_arml, self).run(addr) + class Sandbox_Linux_armb_str(Sandbox, Arch_armb, OS_Linux_str): def __init__(self, *args, **kwargs): @@ -476,8 +501,7 @@ class Sandbox_Linux_armb_str(Sandbox, Arch_armb, OS_Linux_str): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): if addr is None and self.options.address is not None: addr = int(self.options.address, 0) super(Sandbox_Linux_armb_str, self).run(addr) @@ -493,8 +517,7 @@ class Sandbox_Linux_arml_str(Sandbox, Arch_arml, OS_Linux_str): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): if addr is None and self.options.address is not None: addr = int(self.options.address, 0) super(Sandbox_Linux_arml_str, self).run(addr) @@ -510,8 +533,7 @@ class Sandbox_Linux_aarch64l(Sandbox, Arch_aarch64l, OS_Linux): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): if addr is None and self.options.address is not None: addr = int(self.options.address, 0) super(Sandbox_Linux_aarch64l, self).run(addr) diff --git a/miasm2/arch/x86/ira.py b/miasm2/arch/x86/ira.py index 5bc4761f..b7a1f19f 100644 --- a/miasm2/arch/x86/ira.py +++ b/miasm2/arch/x86/ira.py @@ -3,6 +3,7 @@ from miasm2.expression.expression import ExprAff, ExprOp, ExprId from miasm2.core.graph import DiGraph +from miasm2.core.asmbloc import expr_is_label from miasm2.ir.ir import ir, irbloc from miasm2.ir.analysis import ira from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 @@ -46,7 +47,7 @@ class ir_a_x86_16(ir_x86_16, ira): if not l.is_subcall(): return sub_call_dst = l.args[0] - if self.ExprIsLabel(sub_call_dst): + if expr_is_label(sub_call_dst): sub_call_dst = sub_call_dst.name for b in ir_blocs: l = b.lines[-1] @@ -54,7 +55,7 @@ class ir_a_x86_16(ir_x86_16, ira): if not l.is_subcall(): continue sub_call_dst = l.args[0] - if self.ExprIsLabel(sub_call_dst): + if expr_is_label(sub_call_dst): sub_call_dst = sub_call_dst.name lbl = bloc.get_next() new_lbl = self.gen_label() diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 800a1fa2..802b6283 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -660,7 +660,6 @@ def cli(ir, instr): def sti(ir, instr): e = [m2_expr.ExprAff(exception_flags, m2_expr.ExprInt32(EXCEPT_PRIV_INSN))] - e = [] # XXX TODO HACK return e, [] diff --git a/miasm2/core/bin_stream.py b/miasm2/core/bin_stream.py index 6e158061..f7b160f9 100644 --- a/miasm2/core/bin_stream.py +++ b/miasm2/core/bin_stream.py @@ -108,7 +108,7 @@ class bin_stream_file(bin_stream): return self.bin.tell() - self.shift def setoffset(self, val): - self.bin.seek(val + self.shift) + self.bin.seek(val + self.shift) offset = property(getoffset, setoffset) def readbs(self, l=1): @@ -123,7 +123,6 @@ class bin_stream_file(bin_stream): return self.l - (self.offset + self.shift) - class bin_stream_container(bin_stream): def __init__(self, virt_view, offset=0L): @@ -142,13 +141,13 @@ class bin_stream_container(bin_stream): if self.offset + l > self.l: raise IOError("not enough bytes") self.offset += l - return self.bin(self.offset - l, self.offset) + return self.bin.get(self.offset - l, self.offset) def getbytes(self, start, l=1): - return self.bin(start, start + l) + return self.bin.get(start, start + l) def __str__(self): - out = self.bin(self.offset, self.l) + out = self.bin.get(self.offset, self.offset + self.l) return out def setoffset(self, val): diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py index 31f6294c..40a3bf64 100644 --- a/miasm2/ir/analysis.py +++ b/miasm2/ir/analysis.py @@ -4,7 +4,7 @@ import logging from miasm2.ir.symbexec import symbexec -from miasm2.core.graph import DiGraph +from miasm2.ir.ir import ir from miasm2.expression.expression \ import ExprAff, ExprCond, ExprId, ExprInt, ExprMem @@ -14,109 +14,21 @@ console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) log.addHandler(console_handler) log.setLevel(logging.WARNING) -class ira: + +class ira(ir): + """IR Analysis + This class provides higher level manipulations on IR, such as dead + instruction removals. + + This class can be used as a common parent with `miasm2.ir.ir::ir` class. + For instance: + class ira_x86_16(ir_x86_16, ira) + """ def ira_regs_ids(self): """Returns ids of all registers used in the IR""" return self.arch.regs.all_regs_ids + [self.IRDst] - def sort_dst(self, todo, done): - out = set() - while todo: - dst = todo.pop() - if self.ExprIsLabel(dst): - done.add(dst) - elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): - done.add(dst) - elif isinstance(dst, ExprCond): - todo.add(dst.src1) - todo.add(dst.src2) - elif isinstance(dst, ExprId): - out.add(dst) - else: - done.add(dst) - return out - - def dst_trackback(self, b): - dst = b.dst - todo = set([dst]) - done = set() - - for irs in reversed(b.irs): - if len(todo) == 0: - break - out = self.sort_dst(todo, done) - found = set() - follow = set() - for i in irs: - if not out: - break - for o in out: - if i.dst == o: - follow.add(i.src) - found.add(o) - for o in found: - out.remove(o) - - for o in out: - if o not in found: - follow.add(o) - todo = follow - - return done - - def gen_graph(self, link_all = True): - """ - Gen irbloc digraph - @link_all: also gen edges to non present irblocs - """ - self.g = DiGraph() - for lbl, b in self.blocs.items(): - # print 'add', lbl - self.g.add_node(lbl) - # dst = self.get_bloc_dst(b) - dst = self.dst_trackback(b) - # print "\tdst", dst - for d in dst: - if isinstance(d, ExprInt): - d = ExprId( - self.symbol_pool.getby_offset_create(int(d.arg))) - if self.ExprIsLabel(d): - if d.name in self.blocs or link_all is True: - self.g.add_edge(lbl, d.name) - - def graph(self): - """Output the graphviz script""" - out = """ - digraph asm_graph { - size="80,50"; - node [ - fontsize = "16", - shape = "box" - ]; - """ - all_lbls = {} - for lbl in self.g.nodes(): - if lbl not in self.blocs: - continue - irb = self.blocs[lbl] - ir_txt = [str(lbl)] - for irs in irb.irs: - for l in irs: - ir_txt.append(str(l)) - ir_txt.append("") - ir_txt.append("") - all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) - for l, v in all_lbls.items(): - # print l, v - out += '%s [label="%s"];\n' % (l, v) - - for a, b in self.g.edges(): - # print 'edge', a, b, hash(a), hash(b) - out += '%s -> %s;\n' % (hash(a), hash(b)) - out += '}' - return out - def remove_dead_instr(self, irb, useful): """Remove dead affectations using previous reaches analysis @irb: irbloc instance @@ -149,12 +61,12 @@ class ira: useful = set() - for node in self.g.nodes(): + for node in self.graph.nodes(): if node not in self.blocs: continue block = self.blocs[node] - successors = self.g.successors(node) + successors = self.graph.successors(node) has_son = bool(successors) for p_son in successors: if p_son not in self.blocs: @@ -274,7 +186,7 @@ class ira: for key, value in irb.cur_reach[0].iteritems()} # Compute reach from predecessors - for n_pred in self.g.predecessors(irb.label): + for n_pred in self.graph.predecessors(irb.label): p_block = self.blocs[n_pred] # Handle each register definition @@ -313,7 +225,7 @@ class ira: analysis""" fixed = True - for node in self.g.nodes(): + for node in self.graph.nodes(): if node in self.blocs: irb = self.blocs[node] if (irb.cur_reach != irb.prev_reach or @@ -329,13 +241,11 @@ class ira: Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 - - PRE: gen_graph() """ fixed_point = False log.debug('iteration...') while not fixed_point: - for node in self.g.nodes(): + for node in self.graph.nodes(): if node in self.blocs: self.compute_reach_block(self.blocs[node]) fixed_point = self._test_kill_reach_fix() @@ -347,8 +257,6 @@ class ira: Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 - - PRE: gen_graph() """ # Update r/w variables for all irblocs self.get_rw(self.ira_regs_ids()) diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index e051dc8c..f957fcab 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -22,15 +22,16 @@ import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.core import asmbloc from miasm2.expression.simplifications import expr_simp -from miasm2.core.asmbloc import asm_symbol_pool +from miasm2.core.asmbloc import asm_symbol_pool, expr_is_label, asm_label, \ + asm_bloc +from miasm2.core.graph import DiGraph class irbloc(object): def __init__(self, label, irs, lines = []): - assert(isinstance(label, asmbloc.asm_label)) + assert(isinstance(label, asm_label)) self.label = label self.irs = irs self.lines = lines @@ -119,6 +120,47 @@ class irbloc(object): return "\n".join(o) +class DiGraphIR(DiGraph): + """DiGraph for IR instances""" + + def __init__(self, blocks, *args, **kwargs): + """Instanciate a DiGraphIR + @blocks: IR blocks + """ + self._blocks = blocks + super(DiGraphIR, self).__init__(*args, **kwargs) + + def dot(self): + """Output the graphviz script""" + out = """ + digraph asm_graph { + size="80,50"; + node [ + fontsize = "16", + shape = "box" + ]; + """ + all_lbls = {} + for lbl in self.nodes(): + if lbl not in self._blocks: + continue + irb = self._blocks[lbl] + ir_txt = [str(lbl)] + for irs in irb.irs: + for l in irs: + ir_txt.append(str(l)) + ir_txt.append("") + ir_txt.append("") + all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) + for l, v in all_lbls.items(): + out += '%s [label="%s"];\n' % (l, v) + + for a, b in self.edges(): + out += '%s -> %s;\n' % (hash(a), hash(b)) + out += '}' + return out + + class ir(object): def __init__(self, arch, attrib, symbol_pool=None): @@ -130,6 +172,8 @@ class ir(object): self.sp = arch.getsp(attrib) self.arch = arch self.attrib = attrib + # Lazy structure + self._graph = None def instr2ir(self, l): ir_bloc_cur, ir_blocs_extra = self.get_ir(l) @@ -140,13 +184,13 @@ class ir(object): @ad: an ExprId/ExprInt/label/int""" if (isinstance(ad, m2_expr.ExprId) and - isinstance(ad.name, asmbloc.asm_label)): + isinstance(ad.name, asm_label)): ad = ad.name if isinstance(ad, m2_expr.ExprInt): ad = int(ad.arg) if type(ad) in [int, long]: ad = self.symbol_pool.getby_offset_create(ad) - elif isinstance(ad, asmbloc.asm_label): + elif isinstance(ad, asm_label): ad = self.symbol_pool.getby_name_create(ad.name) return ad @@ -158,7 +202,7 @@ class ir(object): return self.blocs.get(label, None) def add_instr(self, l, ad=0, gen_pc_updt = False): - b = asmbloc.asm_bloc(l) + b = asm_bloc(l) b.lines = [l] self.add_bloc(b, gen_pc_updt) @@ -299,6 +343,8 @@ class ir(object): self.blocs[irb.label] = irb + # Forget graph if any + self._graph = None def get_instr_label(self, instr): """Returns the label associated to an instruction @@ -334,6 +380,70 @@ class ir(object): for b in self.blocs.values(): b.get_rw(regs_ids) - def ExprIsLabel(self, l): - return isinstance(l, m2_expr.ExprId) and isinstance(l.name, - asmbloc.asm_label) + def sort_dst(self, todo, done): + out = set() + while todo: + dst = todo.pop() + if expr_is_label(dst): + done.add(dst) + elif isinstance(dst, m2_expr.ExprMem) or isinstance(dst, m2_expr.ExprInt): + done.add(dst) + elif isinstance(dst, m2_expr.ExprCond): + todo.add(dst.src1) + todo.add(dst.src2) + elif isinstance(dst, m2_expr.ExprId): + out.add(dst) + else: + done.add(dst) + return out + + def dst_trackback(self, b): + dst = b.dst + todo = set([dst]) + done = set() + + for irs in reversed(b.irs): + if len(todo) == 0: + break + out = self.sort_dst(todo, done) + found = set() + follow = set() + for i in irs: + if not out: + break + for o in out: + if i.dst == o: + follow.add(i.src) + found.add(o) + for o in found: + out.remove(o) + + for o in out: + if o not in found: + follow.add(o) + todo = follow + + return done + + def _gen_graph(self): + """ + Gen irbloc digraph + """ + self._graph = DiGraphIR(self.blocs) + for lbl, b in self.blocs.iteritems(): + self._graph.add_node(lbl) + dst = self.dst_trackback(b) + for d in dst: + if isinstance(d, m2_expr.ExprInt): + d = m2_expr.ExprId( + self.symbol_pool.getby_offset_create(int(d.arg))) + if expr_is_label(d): + self._graph.add_edge(lbl, d.name) + + @property + def graph(self): + """Get a DiGraph representation of current IR instance. + Lazy property, building the graph on-demand""" + if self._graph is None: + self._gen_graph() + return self._graph diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index 72759900..a7ba1a20 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -18,13 +18,9 @@ class TranslatorC(Translator): dct_rot = {'<<<': 'rot_left', '>>>': 'rot_right', } - dct_div = {'div8': "div_op", - 'div16': "div_op", - 'div32': "div_op", - 'idiv32': "div_op", # XXX to test - '<<<c_rez': 'rcl_rez_op', - '>>>c_rez': 'rcr_rez_op', - } + dct_rotc = {'<<<c_rez': 'rcl_rez_op', + '>>>c_rez': 'rcr_rez_op', + } def from_ExprId(self, expr): @@ -120,8 +116,8 @@ class TranslatorC(Translator): else: raise NotImplementedError('Unknown op: %r' % expr.op) - elif len(expr.args) == 3 and expr.op in self.dct_div: - return '(%s(%s, %s, %s, %s) &0x%x)' % (self.dct_div[expr.op], + elif len(expr.args) == 3 and expr.op in self.dct_rotc: + return '(%s(%s, %s, %s, %s) &0x%x)' % (self.dct_rotc[expr.op], expr.args[0].size, self.from_expr(expr.args[0]), self.from_expr(expr.args[1]), diff --git a/miasm2/jitter/csts.py b/miasm2/jitter/csts.py index e4b315e1..b71e9463 100644 --- a/miasm2/jitter/csts.py +++ b/miasm2/jitter/csts.py @@ -11,6 +11,8 @@ EXCEPT_INT_XX = (1 << 2) EXCEPT_BREAKPOINT_INTERN = (1 << 10) EXCEPT_ACCESS_VIOL = ((1 << 14) | EXCEPT_DO_NOT_UPDATE_PC) +EXCEPT_DIV_BY_ZERO = ((1 << 16) | EXCEPT_DO_NOT_UPDATE_PC) +EXCEPT_PRIV_INSN = ((1 << 17) | EXCEPT_DO_NOT_UPDATE_PC) # VM Mngr constants PAGE_READ = 1 diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index a035445b..2335cc3c 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -144,12 +144,11 @@ class CallbackHandlerBitflag(CallbackHandler): Iterator on other results""" res = True - for b in self.callbacks: - - if b & bitflag != 0: + for bitflag_expected in self.callbacks: + if bitflag_expected & bitflag == bitflag_expected: # If the flag matched for res in super(CallbackHandlerBitflag, - self).call_callbacks(b, *args): + self).call_callbacks(bitflag_expected, *args): if res is not True: yield res diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py index b3946000..08df632a 100644 --- a/miasm2/jitter/loader/elf.py +++ b/miasm2/jitter/loader/elf.py @@ -17,6 +17,7 @@ hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) log.setLevel(logging.CRITICAL) + def get_import_address_elf(e): import2addr = defaultdict(set) for sh in e.sh: @@ -46,21 +47,21 @@ def preload_elf(vm, e, runtime_lib, patch_vm_imp=True): return runtime_lib, dyn_funcs - def vm_load_elf(vm, fdata, **kargs): """ Very dirty elf loader TODO XXX: implement real loader """ - #log.setLevel(logging.DEBUG) + # log.setLevel(logging.DEBUG) e = elf_init.ELF(fdata, **kargs) i = interval() all_data = {} for p in e.ph.phlist: - if p.ph.type != 1: + if p.ph.type != elf_csts.PT_LOAD: continue - log.debug('0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset, - p.ph.filesz) + log.debug( + '0x%x 0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset, + p.ph.filesz, p.ph.type) data_o = e._content[p.ph.offset:p.ph.offset + p.ph.filesz] addr_o = p.ph.vaddr a_addr = addr_o & ~0xFFF @@ -68,16 +69,16 @@ def vm_load_elf(vm, fdata, **kargs): b_addr = (b_addr + 0xFFF) & ~0xFFF all_data[addr_o] = data_o # -2: Trick to avoid merging 2 consecutive pages - i += [(a_addr, b_addr-2)] + i += [(a_addr, b_addr - 2)] for a, b in i.intervals: - #print hex(a), hex(b) - vm.add_memory_page(a, PAGE_READ | PAGE_WRITE, "\x00"*(b+2-a)) - + # print hex(a), hex(b) + vm.add_memory_page(a, PAGE_READ | PAGE_WRITE, "\x00" * (b + 2 - a)) for r_vaddr, data in all_data.items(): vm.set_mem(r_vaddr, data) return e + class libimp_elf(libimp): pass @@ -94,6 +95,7 @@ ELF_machine = {(elf_csts.EM_ARM, 32, elf_csts.ELFDATA2LSB): "arml", (elf_csts.EM_SH, 32, elf_csts.ELFDATA2LSB): "sh4", } + def guess_arch(elf): """Return the architecture specified by the ELF container @elf. If unknown, return None""" diff --git a/miasm2/jitter/loader/pe.py b/miasm2/jitter/loader/pe.py index 1e876b4e..1c811101 100644 --- a/miasm2/jitter/loader/pe.py +++ b/miasm2/jitter/loader/pe.py @@ -10,12 +10,24 @@ from elfesteem import * from miasm2.jitter.csts import * from miasm2.jitter.loader.utils import canon_libname_libfunc, libimp - log = logging.getLogger('loader_pe') hnd = logging.StreamHandler() hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) -log.setLevel(logging.CRITICAL) +log.setLevel(logging.INFO) + + +def get_pe_dependencies(pe_obj): + """Return dependency set + @pe_obj: pe object""" + + if pe_obj.DirImport.impdesc is None: + return set() + out = set() + for dependency in pe_obj.DirImport.impdesc: + libname = dependency.dlldescname.name.lower() + out.add(libname) + return out def get_import_address_pe(e): @@ -58,7 +70,7 @@ def is_redirected_export(e, ad): # test is ad points to code or dll name out = '' for i in xrange(0x200): - c = e.virt(ad + i) + c = e.virt.get(ad + i) if c == "\x00": break out += c @@ -101,6 +113,7 @@ def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, **kargs): If all sections are aligned, they will be mapped on several different pages Otherwise, a big page is created, containing all sections """ + # Parse and build a PE instance pe = pe_init.PE(fdata, **kargs) @@ -199,6 +212,9 @@ def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs): Return the corresponding PE instance Extra arguments are passed to vm_load_pe """ + + log.info('Loading module %r', fname_in) + fname = os.path.join(lib_path_base, fname_in) with open(fname) as fstream: pe = vm_load_pe(vm, fstream.read(), **kargs) @@ -206,7 +222,7 @@ def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs): return pe -def vm_load_pe_libs(vm, libs_name, libs, lib_path_base="win_dll", **kargs): +def vm_load_pe_libs(vm, libs_name, libs, lib_path_base, **kargs): """Call vm_load_pe_lib on each @libs_name filename @vm: VmMngr instance @libs_name: list of str @@ -219,7 +235,7 @@ def vm_load_pe_libs(vm, libs_name, libs, lib_path_base="win_dll", **kargs): for fname in libs_name} -def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base="win_dll", +def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base, patch_vm_imp=True, **kargs): for e in lib_imgs.values(): preload_pe(e, libs, patch_vm_imp) @@ -228,7 +244,7 @@ def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base="win_dll", def vm2pe(myjit, fname, libs=None, e_orig=None, min_addr=None, max_addr=None, min_section_offset=0x1000, img_base=None, - added_funcs=None): + added_funcs=None, **kwargs): if e_orig: size = e_orig._wsize else: @@ -274,7 +290,9 @@ def vm2pe(myjit, fname, libs=None, e_orig=None, libbase, dllname = libs.fad2info[funcaddr] libs.lib_get_add_func(libbase, dllname, addr) - new_dll = libs.gen_new_lib(mye, mye.virt.is_addr_in) + filter_import = kwargs.get( + 'filter_import', lambda _, ad: mye.virt.is_addr_in(ad)) + new_dll = libs.gen_new_lib(mye, filter_import) else: new_dll = {} @@ -305,11 +323,26 @@ def vm2pe(myjit, fname, libs=None, e_orig=None, class libimp_pe(libimp): + def __init__(self, *args, **kwargs): + super(libimp_pe, self).__init__(*args, **kwargs) + # dependency -> redirector + self.created_redirected_imports = {} + def add_export_lib(self, e, name): + if name in self.created_redirected_imports: + log.error("%r has previously been created due to redirect\ + imports due to %r. Change the loading order.", + name, self.created_redirected_imports[name]) + raise RuntimeError('Bad import: loading previously created import') + self.all_exported_lib.append(e) # will add real lib addresses to database if name in self.name2off: ad = self.name2off[name] + if e is not None and name in self.fake_libs: + log.error( + "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name) + raise RuntimeError("Bad import") else: log.debug('new lib %s', name) ad = e.NThdr.ImageBase @@ -332,8 +365,6 @@ class libimp_pe(libimp): ret = is_redirected_export(e, ad) if ret: exp_dname, exp_fname = ret - # log.debug('export redirection %s' % imp_ord_or_name) - # log.debug('source %s %s' % (exp_dname, exp_fname)) exp_dname = exp_dname + '.dll' exp_dname = exp_dname.lower() # if dll auto refes in redirection @@ -343,17 +374,21 @@ class libimp_pe(libimp): # schedule func todo = [(imp_ord_or_name, ad)] + todo continue - elif not exp_dname in self.name2off: - raise ValueError('load %r first' % exp_dname) + else: + # import redirected lib from non loaded dll + if not exp_dname in self.name2off: + self.created_redirected_imports.setdefault( + exp_dname, set()).add(name) + + # Ensure import entry is created + new_lib_base = self.lib_get_add_base(exp_dname) + # Ensure function entry is created + _ = self.lib_get_add_func(new_lib_base, exp_fname) + c_name = canon_libname_libfunc(exp_dname, exp_fname) libad_tmp = self.name2off[exp_dname] ad = self.lib_imp2ad[libad_tmp][exp_fname] - # log.debug('%s' % hex(ad)) - # if not imp_ord_or_name in self.lib_imp2dstad[libad]: - # self.lib_imp2dstad[libad][imp_ord_or_name] = set() - # self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) - # log.debug('new imp %s %s' % (imp_ord_or_name, hex(ad))) self.lib_imp2ad[libad][imp_ord_or_name] = ad name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) @@ -362,10 +397,10 @@ class libimp_pe(libimp): self.fad2cname[ad] = c_name self.fad2info[ad] = libad, imp_ord_or_name - def gen_new_lib(self, target_pe, flt=lambda _: True): + def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs): """Gen a new DirImport description @target_pe: PE instance - @flt: (boolean f(address)) restrict addresses to keep + @filter_import: (boolean f(pe, address)) restrict addresses to keep """ new_lib = [] @@ -377,8 +412,9 @@ class libimp_pe(libimp): for func_name, dst_addresses in self.lib_imp2dstad[ad].items(): out_ads.update({addr: func_name for addr in dst_addresses}) - # Filter available addresses according to @flt - all_ads = [addr for addr in out_ads.keys() if flt(addr)] + # Filter available addresses according to @filter_import + all_ads = [ + addr for addr in out_ads.keys() if filter_import(target_pe, addr)] log.debug('ads: %s', map(hex, all_ads)) if not all_ads: continue @@ -416,6 +452,67 @@ class libimp_pe(libimp): return new_lib + +def vm_load_pe_and_dependencies(vm, fname, name2module, runtime_lib, + lib_path_base, **kwargs): + """Load a binary and all its dependencies. Returns a dictionnary containing + the association between binaries names and it's pe object + + @vm: virtual memory manager instance + @fname: full path of the binary + @name2module: dict containing association between name and pe + object. Updated. + @runtime_lib: libimp instance + @lib_path_base: directory of the libraries containing dependencies + + """ + + todo = [(fname, fname, 0)] + dependencies = [] + weight2name = {} + done = set() + + # Walk dependencies recursively + while todo: + name, fname, weight = todo.pop() + if name in done: + continue + done.add(name) + weight2name.setdefault(weight, set()).add(name) + if name in name2module: + pe_obj = name2module[name] + else: + try: + with open(fname) as fstream: + log.info('Loading module name %r', fname) + pe_obj = vm_load_pe(vm, fstream.read(), **kwargs) + except IOError: + log.error('Cannot open %s' % fname) + name2module[name] = None + continue + name2module[name] = pe_obj + + new_dependencies = get_pe_dependencies(pe_obj) + todo += [(name, os.path.join(lib_path_base, name), weight - 1) + for name in new_dependencies] + + ordered_modules = sorted(weight2name.items()) + for _, modules in ordered_modules: + for name in modules: + pe_obj = name2module[name] + if pe_obj is None: + continue + # Fix imports + if pe_obj.DirExport: + runtime_lib.add_export_lib(pe_obj, name) + + for pe_obj in name2module.itervalues(): + if pe_obj is None: + continue + preload_pe(vm, pe_obj, runtime_lib, patch_vm_imp=True) + + return name2module + # machine -> arch PE_machine = {0x14c: "x86_32", 0x8664: "x86_64", diff --git a/miasm2/jitter/loader/utils.py b/miasm2/jitter/loader/utils.py index a6a19cb3..83d1a796 100644 --- a/miasm2/jitter/loader/utils.py +++ b/miasm2/jitter/loader/utils.py @@ -4,7 +4,7 @@ log = logging.getLogger('loader_common') hnd = logging.StreamHandler() hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) -log.setLevel(logging.CRITICAL) +log.setLevel(logging.INFO) def canon_libname_libfunc(libname, libfunc): @@ -15,7 +15,7 @@ def canon_libname_libfunc(libname, libfunc): return str(dn), libfunc -class libimp: +class libimp(object): def __init__(self, lib_base_ad=0x71111000, **kargs): self.name2off = {} @@ -26,6 +26,7 @@ class libimp: self.fad2cname = {} self.fad2info = {} self.all_exported_lib = [] + self.fake_libs = set() def lib_get_add_base(self, name): name = name.lower().strip(' ') @@ -38,7 +39,8 @@ class libimp: ad = self.name2off[name] else: ad = self.libbase_ad - log.debug('new lib %s 0x%x', name, ad) + log.warning("Create dummy entry for %r", name) + self.fake_libs.add(name) self.name2off[name] = ad self.libbase2lastad[ad] = ad + 0x1 self.lib_imp2ad[ad] = {} diff --git a/miasm2/jitter/vm_mngr.c b/miasm2/jitter/vm_mngr.c index 3597ae4f..b86bee1a 100644 --- a/miasm2/jitter/vm_mngr.c +++ b/miasm2/jitter/vm_mngr.c @@ -32,13 +32,6 @@ -/* -struct memory_page_list_head memory_page_pool; -struct code_bloc_list_head code_bloc_pool; - -struct memory_breakpoint_info_head memory_breakpoint_pool; -*/ - /****************memory manager**************/ @@ -83,68 +76,50 @@ void print_val(uint64_t base, uint64_t addr) fprintf(stderr, "addr 0x%"PRIX64" val 0x%"PRIX64"\n", addr-base, *ptr); } - -int is_mem_mapped(vm_mngr_t* vm_mngr, uint64_t ad) +inline int midpoint(int imin, int imax) { - struct memory_page_node * mpn; - /* - mpn = memory_page_pool_tab[ad>>MEMORY_PAGE_POOL_MASK_BIT]; - if ( mpn && (mpn->ad <= ad) && (ad < mpn->ad + mpn->size)) - return 1; - */ - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ - if ((mpn->ad <= ad) && (ad <mpn->ad + mpn->size)) - return 1; - } - - return 0; + return (imin + imax) / 2; } -/* return the address base of the memory page - containing addr -*/ -uint64_t get_mem_base_addr(vm_mngr_t* vm_mngr, uint64_t ad, uint64_t *addr_base) +int find_page_node(struct memory_page_node * array, uint64_t key, int imin, int imax) { - struct memory_page_node * mpn; - /* - mpn = memory_page_pool_tab[ad>>MEMORY_PAGE_POOL_MASK_BIT]; - if ( mpn && (mpn->ad <= ad) && (ad < mpn->ad + mpn->size)){ - *addr_base = mpn->ad; - return 1; - } - */ - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ - if ((mpn->ad <= ad) && (ad <mpn->ad + mpn->size)) { - *addr_base = mpn->ad; - return 1; - } + // continue searching while [imin,imax] is not empty + while (imin <= imax) { + // calculate the midpoint for roughly equal partition + int imid = midpoint(imin, imax); + if(array[imid].ad <= key && key < array[imid].ad + array[imid].size) + // key found at index imid + return imid; + // determine which subarray to search + else if (array[imid].ad < key) + // change min index to search upper subarray + imin = imid + 1; + else + // change max index to search lower subarray + imax = imid - 1; } - return 0; + // key was not found + return -1; } struct memory_page_node * get_memory_page_from_address(vm_mngr_t* vm_mngr, uint64_t ad) { struct memory_page_node * mpn; -#if 0 - mpn = memory_page_pool_tab[ad>>MEMORY_PAGE_POOL_MASK_BIT]; - if ( mpn && (mpn->ad <= ad) && (ad < mpn->ad + mpn->size)) - return mpn; - - fprintf(stderr, "WARNING: address 0x%"PRIX64" is not mapped in virtual memory:\n", ad); - vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; - - return NULL; -#else + int i; - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + i = find_page_node(vm_mngr->memory_pages_array, + ad, + 0, + vm_mngr->memory_pages_number); + if (i >= 0) { + mpn = &vm_mngr->memory_pages_array[i]; if ((mpn->ad <= ad) && (ad < mpn->ad + mpn->size)) return mpn; } fprintf(stderr, "WARNING: address 0x%"PRIX64" is not mapped in virtual memory:\n", ad); vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; return NULL; -#endif } @@ -168,7 +143,7 @@ static uint64_t memory_page_read(vm_mngr_t* vm_mngr, unsigned int my_size, uint6 return 0; } - /* check read breakpoint*/ + /* check read breakpoint */ LIST_FOREACH(b, &vm_mngr->memory_breakpoint_pool, next){ if ((b->access & BREAKPOINT_READ) == 0) continue; @@ -566,28 +541,7 @@ int shift_right_arith(unsigned int size, int a, unsigned int b) exit(0); } } -/* -int shift_right_arith_08(int a, unsigned int b) -{ - char i8_a; - i8_a = a; - return (i8_a >> b)&0xff; -} -int shift_right_arith_16(int a, unsigned int b) -{ - short i16_a; - i16_a = a; - return (i16_a >> b)&0xffff; -} - -int shift_right_arith_32(int a, unsigned int b) -{ - int i32_a; - i32_a = a; - return (i32_a >> b)&0xffffffff; -} -*/ uint64_t shift_right_logic(uint64_t size, uint64_t a, uint64_t b) { @@ -609,28 +563,6 @@ uint64_t shift_right_logic(uint64_t size, exit(0); } } -/* -int shift_right_logic_08(unsigned int a, unsigned int b) -{ - unsigned char u8_a; - u8_a = a; - return (u8_a >> b)&0xff; -} - -int shift_right_logic_16(unsigned int a, unsigned int b) -{ - unsigned short u16_a; - u16_a = a; - return (u16_a >> b)&0xffff; -} - -int shift_right_logic_32(unsigned int a, unsigned int b) -{ - unsigned int u32_a; - u32_a = a; - return (u32_a >> b)&0xffffffff; -} -*/ uint64_t shift_left_logic(uint64_t size, uint64_t a, uint64_t b) { @@ -648,22 +580,6 @@ uint64_t shift_left_logic(uint64_t size, uint64_t a, uint64_t b) exit(0); } } -/* -int shift_left_logic_O8(unsigned int a, unsigned int b) -{ - return (a<<b)&0xff; -} - -int shift_left_logic_16(unsigned int a, unsigned int b) -{ - return (a<<b)&0xffff; -} - -int shift_left_logic_32(unsigned int a, unsigned int b) -{ - return (a<<b)&0xffffffff; -} -*/ unsigned int mul_lo_op(unsigned int size, unsigned int a, unsigned int b) { @@ -748,39 +664,6 @@ unsigned int umul16_hi(unsigned short a, unsigned short b) return (c>>16) & 0xffff; } - - - -unsigned int div_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c) -{ - int64_t num; - if (c == 0) - { - //vmmngr.exception_flags |= EXCEPT_INT_DIV_BY_ZERO; - return 0; - } - num = ((int64_t)a << size) + b; - num/=(int64_t)c; - return num; -} - - -unsigned int rem_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c) -{ - int64_t num; - - if (c == 0) - { - //vmmngr.exception_flags |= EXCEPT_INT_DIV_BY_ZERO; - return 0; - } - - num = ((int64_t)a << size) + b; - num = (int64_t)num-c*(num/c); - return num; -} - - uint64_t rot_left(uint64_t size, uint64_t a, uint64_t b) { uint64_t tmp; @@ -1450,10 +1333,9 @@ void dump_code_bloc_pool(vm_mngr_t* vm_mngr) void init_memory_page_pool(vm_mngr_t* vm_mngr) { - unsigned int i; - LIST_INIT(&vm_mngr->memory_page_pool); - for (i=0;i<MAX_MEMORY_PAGE_POOL_TAB; i++) - vm_mngr->memory_page_pool_tab[i] = NULL; + + vm_mngr->memory_pages_number = 0; + vm_mngr->memory_pages_array = NULL; } void init_code_bloc_pool(vm_mngr_t* vm_mngr) @@ -1471,18 +1353,8 @@ void init_memory_breakpoint(vm_mngr_t* vm_mngr) void reset_memory_page_pool(vm_mngr_t* vm_mngr) { - struct memory_page_node * mpn; - unsigned int i; - - while (!LIST_EMPTY(&vm_mngr->memory_page_pool)) { - mpn = LIST_FIRST(&vm_mngr->memory_page_pool); - LIST_REMOVE(mpn, next); - free(mpn->ad_hp); - free(mpn); - } - for (i=0;i<MAX_MEMORY_PAGE_POOL_TAB; i++) - vm_mngr->memory_page_pool_tab[i] = NULL; - + free(vm_mngr->memory_pages_array); + vm_mngr->memory_pages_number = 0; } @@ -1513,21 +1385,14 @@ void reset_memory_breakpoint(vm_mngr_t* vm_mngr) } - +/* We don't use dichotomy here for the insertion */ int is_mpn_in_tab(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) { struct memory_page_node * mpn; + int i; - /* - for (i=mpn_a->ad >> MEMORY_PAGE_POOL_MASK_BIT; - i<(mpn_a->ad + mpn_a->size + PAGE_SIZE - 1)>>MEMORY_PAGE_POOL_MASK_BIT; - i++){ - if (memory_page_pool_tab[i] !=NULL){ - return 1; - } - } - */ - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + for (i=0;i<vm_mngr->memory_pages_number; i++) { + mpn = &vm_mngr->memory_pages_array[i]; if (mpn->ad >= mpn_a->ad + mpn_a->size) continue; if (mpn->ad + mpn->size <= mpn_a->ad) @@ -1544,54 +1409,41 @@ int is_mpn_in_tab(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) return 0; } -void insert_mpn_in_tab(struct memory_page_node* mpn_a) -{ - /* - for (i=mpn_a->ad >> MEMORY_PAGE_POOL_MASK_BIT; - i<(mpn_a->ad + mpn_a->size + PAGE_SIZE - 1)>>MEMORY_PAGE_POOL_MASK_BIT; - i++){ - if (memory_page_pool_tab[i] !=NULL){ - fprintf(stderr, "known page in tab\n"); - exit(1); - } - memory_page_pool_tab[i] = mpn_a; - } - */ - -} +/* We don't use dichotomy here for the insertion */ void add_memory_page(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) { struct memory_page_node * mpn; - struct memory_page_node * lmpn; + int i; - if (LIST_EMPTY(&vm_mngr->memory_page_pool)){ - LIST_INSERT_HEAD(&vm_mngr->memory_page_pool, mpn_a, next); - insert_mpn_in_tab(mpn_a); - return; - } - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ - lmpn = mpn; + for (i=0; i < vm_mngr->memory_pages_number; i++) { + mpn = &vm_mngr->memory_pages_array[i]; if (mpn->ad < mpn_a->ad) continue; - LIST_INSERT_BEFORE(mpn, mpn_a, next); - insert_mpn_in_tab(mpn_a); - return; + break; } - LIST_INSERT_AFTER(lmpn, mpn_a, next); - insert_mpn_in_tab(mpn_a); + vm_mngr->memory_pages_array = realloc(vm_mngr->memory_pages_array, + sizeof(struct memory_page_node) * + (vm_mngr->memory_pages_number+1)); + + memmove(&vm_mngr->memory_pages_array[i+1], + &vm_mngr->memory_pages_array[i], + sizeof(struct memory_page_node) * (vm_mngr->memory_pages_number - i) + ); + + vm_mngr->memory_pages_array[i] = *mpn_a; + vm_mngr->memory_pages_number ++; } -/* - Return a char* representing the repr of vm_mngr_t object -*/ +/* Return a char* representing the repr of vm_mngr_t object */ char* dump(vm_mngr_t* vm_mngr) { char buf[100]; int length; int total_len = 0; char *buf_final; + int i; struct memory_page_node * mpn; buf_final = malloc(1); @@ -1600,8 +1452,9 @@ char* dump(vm_mngr_t* vm_mngr) exit(0); } buf_final[0] = '\x00'; - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + for (i=0; i< vm_mngr->memory_pages_number; i++) { + mpn = &vm_mngr->memory_pages_array[i]; length = snprintf(buf, sizeof(buf), "ad 0x%"PRIX64" size 0x%"PRIX64" %c%c%c\n", (uint64_t)mpn->ad, @@ -1664,59 +1517,6 @@ void remove_memory_breakpoint(vm_mngr_t* vm_mngr, uint64_t ad, unsigned int acce } - - - - - -unsigned int get_memory_page_next(vm_mngr_t* vm_mngr, unsigned int n_ad) -{ - struct memory_page_node * mpn; - uint64_t ad = 0; - - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ - if (mpn->ad < n_ad) - continue; - - if (ad == 0 || mpn->ad <ad) - ad = mpn->ad; - } - return ad; -} - - -#if 0 -unsigned int get_memory_page_from_min_ad(unsigned int size) -{ - struct memory_page_node * mpn; - unsigned int c_ad ; - unsigned int min_ad = min_page_ad; - int end = 0; - /* first, find free min ad */ - while (!end){ - end = 1; - LIST_FOREACH(mpn, &memory_page_pool, next){ - c_ad = (mpn->ad + mpn->size+0x1000)&0xfffff000; - if (c_ad <= min_ad) - continue; - if (mpn->ad <= min_ad){ - min_ad = c_ad; - end = 0; - break; - } - if (mpn->ad - min_ad < size){ - min_ad = c_ad; - end = 0; - break; - } - } - } - return min_ad; - } -#endif - - - /********************************************/ void hexdump(char* m, unsigned int l) @@ -1759,40 +1559,6 @@ void hexdump(char* m, unsigned int l) } - - -unsigned int access_segment(unsigned int d) -{ - // XXX TODO - printf("access segment %X\n", d); - return 0; -} -unsigned int access_segment_ok(unsigned int d) -{ - // XXX TODO - printf("access segment ok %X\n", d); - return 0; -} - -unsigned int load_segment_limit(unsigned int d) -{ - // XXX TODO - printf("load segment limit %X\n", d); - return 0; -} -unsigned int load_segment_limit_ok(unsigned int d) -{ - // XXX TODO - printf("load segment limit ok %X\n", d); - return 0; -} - -unsigned int load_tr_segment_selector(unsigned int d) -{ - // XXX TODO - return 0; -} - // Return vm_mngr's exception flag value uint64_t get_exception_flag(vm_mngr_t* vm_mngr) { diff --git a/miasm2/jitter/vm_mngr.h b/miasm2/jitter/vm_mngr.h index f5895e12..02b5de73 100644 --- a/miasm2/jitter/vm_mngr.h +++ b/miasm2/jitter/vm_mngr.h @@ -49,7 +49,6 @@ -LIST_HEAD(memory_page_list_head, memory_page_node); LIST_HEAD(code_bloc_list_head, code_bloc_node); LIST_HEAD(memory_breakpoint_info_head, memory_breakpoint_info); @@ -65,13 +64,23 @@ LIST_HEAD(memory_breakpoint_info_head, memory_breakpoint_info); #define VM_BIG_ENDIAN 1 #define VM_LITTLE_ENDIAN 2 + +struct memory_page_node { + uint64_t ad; + uint64_t size; + uint64_t access; + void* ad_hp; +}; + + + typedef struct { int sex; - struct memory_page_list_head memory_page_pool; struct code_bloc_list_head code_bloc_pool; struct memory_breakpoint_info_head memory_breakpoint_pool; - struct memory_page_node *memory_page_pool_tab[MAX_MEMORY_PAGE_POOL_TAB]; + int memory_pages_number; + struct memory_page_node* memory_pages_array; unsigned int *code_addr_tab; unsigned int code_bloc_pool_ad_min; @@ -93,16 +102,6 @@ typedef struct { //extern vm_mngr_t vmmngr; -struct memory_page_node { - uint64_t ad; - uint64_t size; - uint64_t access; - void* ad_hp; - LIST_ENTRY(memory_page_node) next; -}; - - - struct code_bloc_node { uint64_t ad_start; uint64_t ad_stop; @@ -183,11 +182,6 @@ int shift_right_arith(unsigned int size, int a, unsigned int b); uint64_t shift_right_logic(uint64_t size, uint64_t a, uint64_t b); uint64_t shift_left_logic(uint64_t size, uint64_t a, uint64_t b); -/* -int shift_left_logic_08(unsigned int a, unsigned int b); -int shift_left_logic_16(unsigned int a, unsigned int b); -int shift_left_logic_32(unsigned int a, unsigned int b); -*/ unsigned int mul_lo_op(unsigned int size, unsigned int a, unsigned int b); unsigned int mul_hi_op(unsigned int size, unsigned int a, unsigned int b); unsigned int imul_lo_op_08(char a, char b); @@ -202,8 +196,6 @@ unsigned int umul16_lo(unsigned short a, unsigned short b); unsigned int umul16_hi(unsigned short a, unsigned short b); -unsigned int div_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c); -unsigned int rem_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c); uint64_t rot_left(uint64_t size, uint64_t a, uint64_t b); uint64_t rot_right(uint64_t size, uint64_t a, uint64_t b); unsigned int rcl_rez_op(unsigned int size, unsigned int a, unsigned int b, unsigned int cf); @@ -261,21 +253,6 @@ unsigned int rcl_rez_op(unsigned int size, unsigned int a, unsigned int b, unsig } -//PyObject* _vm_push_uint32_t(PyObject *item); -//PyObject* _vm_pop_uint32_t(void); -////PyObject* _vm_put_str(PyObject *item); -//PyObject* _vm_set_mem(PyObject *item, PyObject *item_str); -//PyObject* _vm_set_mem_access(PyObject *addr, PyObject *access); -//PyObject* _vm_get_str(PyObject *item, PyObject *item_len); -//PyObject* _vm_add_memory_page(PyObject *item, PyObject *access, PyObject *item_str); -//PyObject* _vm_add_code_bloc(PyObject *item1, PyObject *item2);//, PyObject *item3); -//PyObject* _call_pyfunc_from_globals(char* funcname); -//PyObject* _call_pyfunc_from_eip(void); -// -//PyObject* call_pyfunc_from_globals(char* funcname); -// -//PyObject* _vm_get_gpreg(void); - void hexdump(char* m, unsigned int l); struct code_bloc_node * create_code_bloc_node(uint64_t ad_start, uint64_t ad_stop); @@ -302,20 +279,12 @@ void check_write_code_bloc(vm_mngr_t* vm_mngr, uint64_t my_size, uint64_t addr); char* dump(vm_mngr_t* vm_mngr); void dump_memory_breakpoint_pool(vm_mngr_t* vm_mngr); -//PyObject* _vm_get_all_memory(void); PyObject* addr2BlocObj(vm_mngr_t* vm_mngr, uint64_t addr); /********************************************/ - -//PyObject* _vm_get_cpu_state(void); -//PyObject* _vm_set_cpu_state(PyObject * s_cpustate); - - -//void memory_page_write(unsigned int my_size, uint64_t ad, unsigned int src); -//unsigned int memory_page_read(unsigned int my_size, uint64_t ad); unsigned int get_memory_page_max_address(void); unsigned int get_memory_page_max_user_address(void); @@ -351,11 +320,8 @@ void func_loadlib_fake(void); void func_getproc_fake(void); -//PyObject* _vm_exec_bloc(PyObject* my_eip, PyObject* known_blocs); - unsigned int cpuid(unsigned int a, unsigned int reg_num); double int2double(unsigned int m); -//PyObject* _vm_exec_blocs(PyObject* my_eip); double fadd(double a, double b); double fsub(double a, double b); diff --git a/miasm2/jitter/vm_mngr_py.c b/miasm2/jitter/vm_mngr_py.c index bb0a6207..0a22c397 100644 --- a/miasm2/jitter/vm_mngr_py.c +++ b/miasm2/jitter/vm_mngr_py.c @@ -66,44 +66,6 @@ PyObject* _vm_get_exception(unsigned int xcpt) RAISE(PyExc_TypeError,"arg must be int"); \ } \ - -PyObject* vm_is_mem_mapped(VmMngr* self, PyObject* item) -{ - PyObject *addr; - uint64_t page_addr; - uint32_t ret; - if (!PyArg_ParseTuple(item, "O", &addr)) - return NULL; - - PyGetInt(addr, page_addr); - - ret = is_mem_mapped(&self->vm_mngr, page_addr); - return PyInt_FromLong((long)ret); -} - - - -PyObject* vm_get_mem_base_addr(VmMngr* self, PyObject* item) -{ - PyObject *addr; - - uint64_t page_addr; - uint64_t addr_base; - unsigned int ret; - - if (!PyArg_ParseTuple(item, "O", &addr)) - return NULL; - - PyGetInt(addr, page_addr); - - ret = get_mem_base_addr(&self->vm_mngr, page_addr, &addr_base); - if (ret == 0){ - Py_INCREF(Py_None); - return Py_None; - } - return PyLong_FromUnsignedLongLong((uint64_t)addr_base); -} - static void sig_alarm(int signo) { global_vmmngr->vm_mngr.exception_flags |= BREAK_SIGALARM; @@ -145,10 +107,6 @@ PyObject* vm_add_memory_page(VmMngr* self, PyObject* args) buf_size = PyString_Size(item_str); PyString_AsStringAndSize(item_str, &buf_data, &length); - /* - fprintf(stderr, "add page %"PRIX64" %"PRIX64" %"PRIX64"\n", - page_addr, buf_size, page_access); - */ mpn = create_memory_page_node(page_addr, buf_size, page_access); if (mpn == NULL) RAISE(PyExc_TypeError,"cannot create page"); @@ -378,11 +336,13 @@ PyObject* vm_get_all_memory(VmMngr* self, PyObject* args) struct memory_page_node * mpn; PyObject *dict; PyObject *dict2; + int i; dict = PyDict_New(); - LIST_FOREACH(mpn, &self->vm_mngr.memory_page_pool, next){ + for (i=0;i<self->vm_mngr.memory_pages_number; i++) { + mpn = &self->vm_mngr.memory_pages_array[i]; dict2 = PyDict_New(); @@ -491,24 +451,6 @@ vm_set_little_endian(VmMngr *self, PyObject *value, void *closure) } - -/* -PyObject* add_jitbloc(VmMngr* self, PyObject* args) -{ - PyObject* jitobj; - - if (!PyArg_ParseTuple(args, "O", &addr2obj)) - return NULL; - - Py_INCREF(Py_None); - return Py_None; - -} -*/ - - - - static void VmMngr_dealloc(VmMngr* self) { diff --git a/miasm2/os_dep/win_api_x86_32_seh.py b/miasm2/os_dep/win_api_x86_32_seh.py index d86bd46a..58cc48af 100644 --- a/miasm2/os_dep/win_api_x86_32_seh.py +++ b/miasm2/os_dep/win_api_x86_32_seh.py @@ -80,7 +80,7 @@ FAKE_SEH_B_AD = context_address + 0x2000 cur_seh_ad = FAKE_SEH_B_AD -loaded_modules = ["ntdll.dll", "kernel32.dll"] +name2module = [] main_pe = None main_pe_name = "c:\\xxx\\toto.exe" @@ -206,11 +206,10 @@ class LoadedModules(object): self.module2name[module] = name def __repr__(self): - out = self.name2module.iteritems() - return "\n".join(out) + return "\n".join([str(x) for x in self.name2module.iteritems()]) -def create_modules_chain(jitter, modules_name): +def create_modules_chain(jitter, name2module): """ Create the modules entries. Those modules are not linked in this function. @@ -235,7 +234,7 @@ def create_modules_chain(jitter, modules_name): +0x04c PatchInformation : Ptr32 Void @jitter: jitter instance - @modules_name: a list of modules names + @name2module: dict containing association between name and its pe instance """ modules_info = LoadedModules() @@ -249,31 +248,18 @@ def create_modules_chain(jitter, modules_name): dummy_e.NThdr.sizeofimage = 0 out = "" - for i, m in enumerate([(main_pe_name, main_pe), - ("", dummy_e)] + modules_name): + for i, (fname, pe_obj) in enumerate([("", dummy_e)] + name2module.items()): + if pe_obj is None: + log.warning("Unknown module: ommited from link list (%r)", + fname) + continue addr = base_addr + i * 0x1000 - if isinstance(m, tuple): - fname, e = m - else: - fname, e = m, None bpath = fname.replace('/', '\\') bname_str = os.path.split(fname)[1].lower() bname = "\x00".join(bname_str) + "\x00" - if e is None: - if i == 0: - full_name = fname - else: - full_name = os.path.join("win_dll", fname) - try: - e = pe_init.PE(open(full_name, 'rb').read()) - except IOError: - log.error('No main pe, ldr data will be unconsistant!') - e = None - if e is None: - continue - log.info("Add module %x %r", e.NThdr.ImageBase, bname_str) + log.info("Add module %x %r", pe_obj.NThdr.ImageBase, bname_str) - modules_info.add(bname_str, e, addr) + modules_info.add(bname_str, pe_obj, addr) m_o = "" m_o += pck32(0) @@ -282,9 +268,9 @@ def create_modules_chain(jitter, modules_name): m_o += pck32(0) m_o += pck32(0) m_o += pck32(0) - m_o += pck32(e.NThdr.ImageBase) - m_o += pck32(e.rva2virt(e.Opthdr.AddressOfEntryPoint)) - m_o += pck32(e.NThdr.sizeofimage) + m_o += pck32(pe_obj.NThdr.ImageBase) + m_o += pck32(pe_obj.rva2virt(pe_obj.Opthdr.AddressOfEntryPoint)) + m_o += pck32(pe_obj.NThdr.sizeofimage) m_o += struct.pack('HH', len(bname), len(bname) + 2) m_o += pck32(addr + offset_path) m_o += struct.pack('HH', len(bname), len(bname) + 2) @@ -321,7 +307,7 @@ def fix_InLoadOrderModuleList(jitter, modules_info): dummy_pe = modules_info.name2module.get("", None) special_modules = [main_pe, kernel32_pe, ntdll_pe, dummy_pe] if not all(special_modules): - log.warn('No main pe, ldr data will be unconsistant') + log.warn('No main pe, ldr data will be unconsistant %r', special_modules) loaded_modules = modules_info.modules else: loaded_modules = [module for module in modules_info.modules @@ -461,7 +447,7 @@ def init_seh(jitter): build_teb(jitter, FS_0_AD) build_peb(jitter, peb_address) - modules_info = create_modules_chain(jitter, loaded_modules) + modules_info = create_modules_chain(jitter, name2module) fix_InLoadOrderModuleList(jitter, modules_info) fix_InMemoryOrderModuleList(jitter, modules_info) fix_InInitializationOrderModuleList(jitter, modules_info) diff --git a/test/analysis/depgraph.py b/test/analysis/depgraph.py index 90f2962c..b532080b 100644 --- a/test/analysis/depgraph.py +++ b/test/analysis/depgraph.py @@ -76,13 +76,13 @@ class Arch(object): return SP -class IRATest(ir, ira): +class IRATest(ira): """Fake IRA class for tests""" def __init__(self, symbol_pool=None): arch = Arch() - ir.__init__(self, arch, 32, symbol_pool) + super(IRATest, self).__init__(arch, 32, symbol_pool) self.IRDst = PC self.ret_reg = R @@ -206,8 +206,8 @@ G1_IRB0 = gen_irbloc(LBL0, [[ExprAff(C, CST1)]]) G1_IRB1 = gen_irbloc(LBL1, [[ExprAff(B, C)]]) G1_IRB2 = gen_irbloc(LBL2, [[ExprAff(A, B)]]) -G1_IRA.g.add_uniq_edge(G1_IRB0.label, G1_IRB1.label) -G1_IRA.g.add_uniq_edge(G1_IRB1.label, G1_IRB2.label) +G1_IRA.graph.add_uniq_edge(G1_IRB0.label, G1_IRB1.label) +G1_IRA.graph.add_uniq_edge(G1_IRB1.label, G1_IRB2.label) G1_IRA.blocs = dict([(irb.label, irb) for irb in [G1_IRB0, G1_IRB1, G1_IRB2]]) @@ -220,8 +220,8 @@ G2_IRB0 = gen_irbloc(LBL0, [[ExprAff(C, CST1)]]) G2_IRB1 = gen_irbloc(LBL1, [[ExprAff(B, CST2)]]) G2_IRB2 = gen_irbloc(LBL2, [[ExprAff(A, B + C)]]) -G2_IRA.g.add_uniq_edge(G2_IRB0.label, G2_IRB1.label) -G2_IRA.g.add_uniq_edge(G2_IRB1.label, G2_IRB2.label) +G2_IRA.graph.add_uniq_edge(G2_IRB0.label, G2_IRB1.label) +G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB2.label) G2_IRA.blocs = dict([(irb.label, irb) for irb in [G2_IRB0, G2_IRB1, G2_IRB2]]) @@ -236,10 +236,10 @@ G3_IRB1 = gen_irbloc(LBL1, [[ExprAff(B, CST2)]]) G3_IRB2 = gen_irbloc(LBL2, [[ExprAff(B, CST3)]]) G3_IRB3 = gen_irbloc(LBL3, [[ExprAff(A, B + C)]]) -G3_IRA.g.add_uniq_edge(G3_IRB0.label, G3_IRB1.label) -G3_IRA.g.add_uniq_edge(G3_IRB0.label, G3_IRB2.label) -G3_IRA.g.add_uniq_edge(G3_IRB1.label, G3_IRB3.label) -G3_IRA.g.add_uniq_edge(G3_IRB2.label, G3_IRB3.label) +G3_IRA.graph.add_uniq_edge(G3_IRB0.label, G3_IRB1.label) +G3_IRA.graph.add_uniq_edge(G3_IRB0.label, G3_IRB2.label) +G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB3.label) +G3_IRA.graph.add_uniq_edge(G3_IRB2.label, G3_IRB3.label) G3_IRA.blocs = dict([(irb.label, irb) for irb in [G3_IRB0, G3_IRB1, G3_IRB2, G3_IRB3]]) @@ -257,9 +257,9 @@ G4_IRB1 = gen_irbloc(LBL1, [[ExprAff(C, C + CST2)], G4_IRB2 = gen_irbloc(LBL2, [[ExprAff(A, B)]]) -G4_IRA.g.add_uniq_edge(G4_IRB0.label, G4_IRB1.label) -G4_IRA.g.add_uniq_edge(G4_IRB1.label, G4_IRB2.label) -G4_IRA.g.add_uniq_edge(G4_IRB1.label, G4_IRB1.label) +G4_IRA.graph.add_uniq_edge(G4_IRB0.label, G4_IRB1.label) +G4_IRA.graph.add_uniq_edge(G4_IRB1.label, G4_IRB2.label) +G4_IRA.graph.add_uniq_edge(G4_IRB1.label, G4_IRB1.label) G4_IRA.blocs = dict([(irb.label, irb) for irb in [G4_IRB0, G4_IRB1, G4_IRB2]]) @@ -277,9 +277,9 @@ G5_IRB1 = gen_irbloc(LBL1, [[ExprAff(B, B + CST2)], G5_IRB2 = gen_irbloc(LBL2, [[ExprAff(A, B)]]) -G5_IRA.g.add_uniq_edge(G5_IRB0.label, G5_IRB1.label) -G5_IRA.g.add_uniq_edge(G5_IRB1.label, G5_IRB2.label) -G5_IRA.g.add_uniq_edge(G5_IRB1.label, G5_IRB1.label) +G5_IRA.graph.add_uniq_edge(G5_IRB0.label, G5_IRB1.label) +G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB2.label) +G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB1.label) G5_IRA.blocs = dict([(irb.label, irb) for irb in [G5_IRB0, G5_IRB1, G5_IRB2]]) @@ -291,8 +291,8 @@ G6_IRA.g = GraphTest(G6_IRA) G6_IRB0 = gen_irbloc(LBL0, [[ExprAff(B, CST1)]]) G6_IRB1 = gen_irbloc(LBL1, [[ExprAff(A, B)]]) -G6_IRA.g.add_uniq_edge(G6_IRB0.label, G6_IRB1.label) -G6_IRA.g.add_uniq_edge(G6_IRB1.label, G6_IRB1.label) +G6_IRA.graph.add_uniq_edge(G6_IRB0.label, G6_IRB1.label) +G6_IRA.graph.add_uniq_edge(G6_IRB1.label, G6_IRB1.label) G6_IRA.blocs = dict([(irb.label, irb) for irb in [G6_IRB0, G6_IRB1]]) @@ -305,9 +305,9 @@ G7_IRB0 = gen_irbloc(LBL0, [[ExprAff(C, CST1)]]) G7_IRB1 = gen_irbloc(LBL1, [[ExprAff(B, C)], [ExprAff(A, B)]]) G7_IRB2 = gen_irbloc(LBL2, [[ExprAff(D, A)]]) -G7_IRA.g.add_uniq_edge(G7_IRB0.label, G7_IRB1.label) -G7_IRA.g.add_uniq_edge(G7_IRB1.label, G7_IRB1.label) -G7_IRA.g.add_uniq_edge(G7_IRB1.label, G7_IRB2.label) +G7_IRA.graph.add_uniq_edge(G7_IRB0.label, G7_IRB1.label) +G7_IRA.graph.add_uniq_edge(G7_IRB1.label, G7_IRB1.label) +G7_IRA.graph.add_uniq_edge(G7_IRB1.label, G7_IRB2.label) G7_IRA.blocs = dict([(irb.label, irb) for irb in [G7_IRB0, G7_IRB1, G7_IRB2]]) @@ -320,9 +320,9 @@ G8_IRB0 = gen_irbloc(LBL0, [[ExprAff(C, CST1)]]) G8_IRB1 = gen_irbloc(LBL1, [[ExprAff(B, C)], [ExprAff(C, D)]]) G8_IRB2 = gen_irbloc(LBL2, [[ExprAff(A, B)]]) -G8_IRA.g.add_uniq_edge(G8_IRB0.label, G8_IRB1.label) -G8_IRA.g.add_uniq_edge(G8_IRB1.label, G8_IRB1.label) -G8_IRA.g.add_uniq_edge(G8_IRB1.label, G8_IRB2.label) +G8_IRA.graph.add_uniq_edge(G8_IRB0.label, G8_IRB1.label) +G8_IRA.graph.add_uniq_edge(G8_IRB1.label, G8_IRB1.label) +G8_IRA.graph.add_uniq_edge(G8_IRB1.label, G8_IRB2.label) G8_IRA.blocs = dict([(irb.label, irb) for irb in [G8_IRB0, G8_IRB1, G8_IRB2]]) @@ -336,8 +336,8 @@ G10_IRA.g = GraphTest(G10_IRA) G10_IRB1 = gen_irbloc(LBL1, [[ExprAff(B, B + CST2)]]) G10_IRB2 = gen_irbloc(LBL2, [[ExprAff(A, B)]]) -G10_IRA.g.add_uniq_edge(G10_IRB1.label, G10_IRB2.label) -G10_IRA.g.add_uniq_edge(G10_IRB1.label, G10_IRB1.label) +G10_IRA.graph.add_uniq_edge(G10_IRB1.label, G10_IRB2.label) +G10_IRA.graph.add_uniq_edge(G10_IRB1.label, G10_IRB1.label) G10_IRA.blocs = dict([(irb.label, irb) for irb in [G10_IRB1, G10_IRB2]]) @@ -352,8 +352,8 @@ G11_IRB1 = gen_irbloc(LBL1, [[ExprAff(A, B), ExprAff(B, A)]]) G11_IRB2 = gen_irbloc(LBL2, [[ExprAff(A, A - B)]]) -G11_IRA.g.add_uniq_edge(G11_IRB0.label, G11_IRB1.label) -G11_IRA.g.add_uniq_edge(G11_IRB1.label, G11_IRB2.label) +G11_IRA.graph.add_uniq_edge(G11_IRB0.label, G11_IRB1.label) +G11_IRA.graph.add_uniq_edge(G11_IRB1.label, G11_IRB2.label) G11_IRA.blocs = dict([(irb.label, irb) for irb in [G11_IRB0, G11_IRB1, G11_IRB2]]) @@ -367,9 +367,9 @@ G12_IRB0 = gen_irbloc(LBL0, [[ExprAff(B, CST1)]]) G12_IRB1 = gen_irbloc(LBL1, [[ExprAff(A, B)], [ExprAff(B, B + CST2)]]) G12_IRB2 = gen_irbloc(LBL2, [[ExprAff(B, A)]]) -G12_IRA.g.add_uniq_edge(G12_IRB0.label, G12_IRB1.label) -G12_IRA.g.add_uniq_edge(G12_IRB1.label, G12_IRB2.label) -G12_IRA.g.add_uniq_edge(G12_IRB1.label, G12_IRB1.label) +G12_IRA.graph.add_uniq_edge(G12_IRB0.label, G12_IRB1.label) +G12_IRA.graph.add_uniq_edge(G12_IRB1.label, G12_IRB2.label) +G12_IRA.graph.add_uniq_edge(G12_IRB1.label, G12_IRB1.label) G12_IRA.blocs = dict([(irb.label, irb) for irb in [G12_IRB0, G12_IRB1, G12_IRB2]]) @@ -396,10 +396,10 @@ G13_IRB2 = gen_irbloc(LBL2, [[ExprAff(B, A + CST3)], [ExprAff(A, B + CST3)], G13_IRB3 = gen_irbloc(LBL3, [[ExprAff(R, C)]]) -G13_IRA.g.add_uniq_edge(G13_IRB0.label, G13_IRB1.label) -G13_IRA.g.add_uniq_edge(G13_IRB1.label, G13_IRB2.label) -G13_IRA.g.add_uniq_edge(G13_IRB2.label, G13_IRB1.label) -G13_IRA.g.add_uniq_edge(G13_IRB1.label, G13_IRB3.label) +G13_IRA.graph.add_uniq_edge(G13_IRB0.label, G13_IRB1.label) +G13_IRA.graph.add_uniq_edge(G13_IRB1.label, G13_IRB2.label) +G13_IRA.graph.add_uniq_edge(G13_IRB2.label, G13_IRB1.label) +G13_IRA.graph.add_uniq_edge(G13_IRB1.label, G13_IRB3.label) G13_IRA.blocs = dict([(irb.label, irb) for irb in [G13_IRB0, G13_IRB1, G13_IRB2, G13_IRB3]]) @@ -427,10 +427,10 @@ G14_IRB2 = gen_irbloc(LBL2, [[ExprAff(D, A)], G14_IRB3 = gen_irbloc(LBL3, [[ExprAff(R, D + B)]]) -G14_IRA.g.add_uniq_edge(G14_IRB0.label, G14_IRB1.label) -G14_IRA.g.add_uniq_edge(G14_IRB1.label, G14_IRB2.label) -G14_IRA.g.add_uniq_edge(G14_IRB2.label, G14_IRB1.label) -G14_IRA.g.add_uniq_edge(G14_IRB1.label, G14_IRB3.label) +G14_IRA.graph.add_uniq_edge(G14_IRB0.label, G14_IRB1.label) +G14_IRA.graph.add_uniq_edge(G14_IRB1.label, G14_IRB2.label) +G14_IRA.graph.add_uniq_edge(G14_IRB2.label, G14_IRB1.label) +G14_IRA.graph.add_uniq_edge(G14_IRB1.label, G14_IRB3.label) G14_IRA.blocs = dict([(irb.label, irb) for irb in [G14_IRB0, G14_IRB1, G14_IRB2, G14_IRB3]]) @@ -446,9 +446,9 @@ G15_IRB1 = gen_irbloc(LBL1, [[ExprAff(D, A + B)], [ExprAff(B, C)]]) G15_IRB2 = gen_irbloc(LBL2, [[ExprAff(R, B)]]) -G15_IRA.g.add_uniq_edge(G15_IRB0.label, G15_IRB1.label) -G15_IRA.g.add_uniq_edge(G15_IRB1.label, G15_IRB2.label) -G15_IRA.g.add_uniq_edge(G15_IRB1.label, G15_IRB1.label) +G15_IRA.graph.add_uniq_edge(G15_IRB0.label, G15_IRB1.label) +G15_IRA.graph.add_uniq_edge(G15_IRB1.label, G15_IRB2.label) +G15_IRA.graph.add_uniq_edge(G15_IRB1.label, G15_IRB1.label) G15_IRA.blocs = dict([(irb.label, irb) for irb in [G15_IRB0, G15_IRB1, G15_IRB2]]) @@ -465,14 +465,14 @@ G16_IRB3 = gen_irbloc(LBL3, [[ExprAff(R, D)]]) G16_IRB4 = gen_irbloc(LBL4, [[ExprAff(R, A)]]) G16_IRB5 = gen_irbloc(LBL5, [[ExprAff(R, A)]]) -G16_IRA.g.add_uniq_edge(G16_IRB0.label, G16_IRB1.label) -G16_IRA.g.add_uniq_edge(G16_IRB1.label, G16_IRB2.label) -G16_IRA.g.add_uniq_edge(G16_IRB2.label, G16_IRB1.label) -G16_IRA.g.add_uniq_edge(G16_IRB1.label, G16_IRB3.label) -G16_IRA.g.add_uniq_edge(G16_IRB3.label, G16_IRB1.label) -G16_IRA.g.add_uniq_edge(G16_IRB1.label, G16_IRB4.label) -G16_IRA.g.add_uniq_edge(G16_IRB4.label, G16_IRB1.label) -G16_IRA.g.add_uniq_edge(G16_IRB1.label, G16_IRB5.label) +G16_IRA.graph.add_uniq_edge(G16_IRB0.label, G16_IRB1.label) +G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB2.label) +G16_IRA.graph.add_uniq_edge(G16_IRB2.label, G16_IRB1.label) +G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB3.label) +G16_IRA.graph.add_uniq_edge(G16_IRB3.label, G16_IRB1.label) +G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB4.label) +G16_IRA.graph.add_uniq_edge(G16_IRB4.label, G16_IRB1.label) +G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB5.label) G16_IRA.blocs = dict([(irb.label, irb) for irb in [G16_IRB0, G16_IRB1, G16_IRB2, G16_IRB3, @@ -489,8 +489,8 @@ G17_IRB1 = gen_irbloc(LBL1, [[ExprAff(A, D), ExprAff(B, D)]]) G17_IRB2 = gen_irbloc(LBL2, [[ExprAff(A, A - B)]]) -G17_IRA.g.add_uniq_edge(G17_IRB0.label, G17_IRB1.label) -G17_IRA.g.add_uniq_edge(G17_IRB1.label, G17_IRB2.label) +G17_IRA.graph.add_uniq_edge(G17_IRB0.label, G17_IRB1.label) +G17_IRA.graph.add_uniq_edge(G17_IRB1.label, G17_IRB2.label) G17_IRA.blocs = dict([(irb.label, irb) for irb in [G17_IRB0, G17_IRB1, G17_IRB2]]) @@ -1116,7 +1116,7 @@ for test_nb, test in enumerate([(G1_IRA, G1_INPUT, G1_OUTPUT), print "[+] Test", test_nb + 1 g_ira, (depnodes, heads), g_test_output = test - open("graph_%02d.dot" % (test_nb + 1), "w").write(g_ira.g.dot()) + open("graph_%02d.dot" % (test_nb + 1), "w").write(g_ira.graph.dot()) # Different options suffix_key_list = ["", "_nosimp", "_nomem", "_nocall", @@ -1248,3 +1248,6 @@ if FAILED: print i, else: print "SUCCESS" + +# Return an error status on error +assert not FAILED diff --git a/test/ir/analysis.py b/test/ir/analysis.py index 92aa4aba..49daa70e 100755 --- a/test/ir/analysis.py +++ b/test/ir/analysis.py @@ -2,7 +2,7 @@ from miasm2.expression.expression import ExprId, ExprInt32, ExprAff, ExprMem from miasm2.core.asmbloc import asm_label from miasm2.ir.analysis import ira -from miasm2.ir.ir import ir, irbloc +from miasm2.ir.ir import irbloc a = ExprId("a") b = ExprId("b") @@ -52,11 +52,13 @@ class Arch(object): def getsp(self, _): return sp -class IRATest(ir, ira): +class IRATest(ira): + + """Fake IRA class for tests""" def __init__(self, symbol_pool=None): arch = Arch() - ir.__init__(self, arch, 32, symbol_pool) + super(IRATest, self).__init__(arch, 32, symbol_pool) self.IRDst = pc self.ret_reg = r @@ -71,10 +73,8 @@ G1_IRB0 = gen_irbloc(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]]) G1_IRB1 = gen_irbloc(LBL1, [[ExprAff(a, b)]]) G1_IRB2 = gen_irbloc(LBL2, [[ExprAff(r, a)]]) -G1_IRA.gen_graph() - -G1_IRA.g.add_uniq_edge(G1_IRB0.label, G1_IRB1.label) -G1_IRA.g.add_uniq_edge(G1_IRB1.label, G1_IRB2.label) +G1_IRA.graph.add_uniq_edge(G1_IRB0.label, G1_IRB1.label) +G1_IRA.graph.add_uniq_edge(G1_IRB1.label, G1_IRB2.label) G1_IRA.blocs = {irb.label : irb for irb in [G1_IRB0, G1_IRB1, G1_IRB2]} @@ -96,11 +96,9 @@ G2_IRB0 = gen_irbloc(LBL0, [[ExprAff(a, CST1)], [ExprAff(r, CST1)]]) G2_IRB1 = gen_irbloc(LBL1, [[ExprAff(a, a+CST1)]]) G2_IRB2 = gen_irbloc(LBL2, [[ExprAff(a, r)]]) -G2_IRA.gen_graph() - -G2_IRA.g.add_uniq_edge(G2_IRB0.label, G2_IRB1.label) -G2_IRA.g.add_uniq_edge(G2_IRB1.label, G2_IRB2.label) -G2_IRA.g.add_uniq_edge(G2_IRB1.label, G2_IRB1.label) +G2_IRA.graph.add_uniq_edge(G2_IRB0.label, G2_IRB1.label) +G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB2.label) +G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB1.label) G2_IRA.blocs = {irb.label : irb for irb in [G2_IRB0, G2_IRB1, G2_IRB2]} @@ -122,11 +120,9 @@ G3_IRB0 = gen_irbloc(LBL0, [[ExprAff(a, CST1)]]) G3_IRB1 = gen_irbloc(LBL1, [[ExprAff(a, a+CST1)]]) G3_IRB2 = gen_irbloc(LBL2, [[ExprAff(r, a)]]) -G3_IRA.gen_graph() - -G3_IRA.g.add_uniq_edge(G3_IRB0.label, G3_IRB1.label) -G3_IRA.g.add_uniq_edge(G3_IRB1.label, G3_IRB2.label) -G3_IRA.g.add_uniq_edge(G3_IRB1.label, G3_IRB1.label) +G3_IRA.graph.add_uniq_edge(G3_IRB0.label, G3_IRB1.label) +G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB2.label) +G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB1.label) G3_IRA.blocs = {irb.label : irb for irb in [G3_IRB0, G3_IRB1, G3_IRB2]} @@ -149,12 +145,10 @@ G4_IRB1 = gen_irbloc(LBL1, [[ExprAff(a, a+CST1)]]) G4_IRB2 = gen_irbloc(LBL2, [[ExprAff(a, a+CST2)]]) G4_IRB3 = gen_irbloc(LBL3, [[ExprAff(a, CST3)], [ExprAff(r, a)]]) -G4_IRA.gen_graph() - -G4_IRA.g.add_uniq_edge(G4_IRB0.label, G4_IRB1.label) -G4_IRA.g.add_uniq_edge(G4_IRB0.label, G4_IRB2.label) -G4_IRA.g.add_uniq_edge(G4_IRB1.label, G4_IRB3.label) -G4_IRA.g.add_uniq_edge(G4_IRB2.label, G4_IRB3.label) +G4_IRA.graph.add_uniq_edge(G4_IRB0.label, G4_IRB1.label) +G4_IRA.graph.add_uniq_edge(G4_IRB0.label, G4_IRB2.label) +G4_IRA.graph.add_uniq_edge(G4_IRB1.label, G4_IRB3.label) +G4_IRA.graph.add_uniq_edge(G4_IRB2.label, G4_IRB3.label) G4_IRA.blocs = {irb.label : irb for irb in [G4_IRB0, G4_IRB1, G4_IRB2, G4_IRB3]} @@ -167,8 +161,6 @@ G4_EXP_IRB1 = gen_irbloc(LBL1, [[]]) G4_EXP_IRB2 = gen_irbloc(LBL2, [[]]) G4_EXP_IRB3 = gen_irbloc(LBL3, [[ExprAff(a, CST3)], [ExprAff(r, a)]]) -G4_EXP_IRA.gen_graph() - G4_EXP_IRA.blocs = {irb.label : irb for irb in [G4_EXP_IRB0, G4_EXP_IRB1, G4_EXP_IRB2, G4_EXP_IRB3]} @@ -183,15 +175,13 @@ G5_IRB3 = gen_irbloc(LBL3, [[ExprAff(a, a+CST3)]]) G5_IRB4 = gen_irbloc(LBL4, [[ExprAff(a, a+CST1)]]) G5_IRB5 = gen_irbloc(LBL5, [[ExprAff(a, r)]]) -G5_IRA.gen_graph() - -G5_IRA.g.add_uniq_edge(G5_IRB0.label, G5_IRB1.label) -G5_IRA.g.add_uniq_edge(G5_IRB1.label, G5_IRB2.label) -G5_IRA.g.add_uniq_edge(G5_IRB1.label, G5_IRB3.label) -G5_IRA.g.add_uniq_edge(G5_IRB2.label, G5_IRB4.label) -G5_IRA.g.add_uniq_edge(G5_IRB3.label, G5_IRB4.label) -G5_IRA.g.add_uniq_edge(G5_IRB4.label, G5_IRB5.label) -G5_IRA.g.add_uniq_edge(G5_IRB4.label, G5_IRB1.label) +G5_IRA.graph.add_uniq_edge(G5_IRB0.label, G5_IRB1.label) +G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB2.label) +G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB3.label) +G5_IRA.graph.add_uniq_edge(G5_IRB2.label, G5_IRB4.label) +G5_IRA.graph.add_uniq_edge(G5_IRB3.label, G5_IRB4.label) +G5_IRA.graph.add_uniq_edge(G5_IRB4.label, G5_IRB5.label) +G5_IRA.graph.add_uniq_edge(G5_IRB4.label, G5_IRB1.label) G5_IRA.blocs = {irb.label : irb for irb in [G5_IRB0, G5_IRB1, G5_IRB2, G5_IRB3, G5_IRB4, G5_IRB5]} @@ -206,8 +196,6 @@ G5_EXP_IRB3 = gen_irbloc(LBL3, [[]]) G5_EXP_IRB4 = gen_irbloc(LBL4, [[]]) G5_EXP_IRB5 = gen_irbloc(LBL5, [[]]) -G5_EXP_IRA.gen_graph() - G5_EXP_IRA.blocs = {irb.label : irb for irb in [G5_EXP_IRB0, G5_EXP_IRB1, G5_EXP_IRB2, G5_EXP_IRB3, G5_EXP_IRB4, G5_EXP_IRB5]} @@ -223,12 +211,10 @@ G6_IRB2 = gen_irbloc(LBL2, [[ExprAff(a, b)]]) G6_IRB3 = gen_irbloc(LBL3, [[ExprAff(r, CST2)]]) -G6_IRA.gen_graph() - -G6_IRA.g.add_uniq_edge(G6_IRB0.label, G6_IRB1.label) -G6_IRA.g.add_uniq_edge(G6_IRB1.label, G6_IRB2.label) -G6_IRA.g.add_uniq_edge(G6_IRB2.label, G6_IRB1.label) -G6_IRA.g.add_uniq_edge(G6_IRB2.label, G6_IRB3.label) +G6_IRA.graph.add_uniq_edge(G6_IRB0.label, G6_IRB1.label) +G6_IRA.graph.add_uniq_edge(G6_IRB1.label, G6_IRB2.label) +G6_IRA.graph.add_uniq_edge(G6_IRB2.label, G6_IRB1.label) +G6_IRA.graph.add_uniq_edge(G6_IRB2.label, G6_IRB3.label) G6_IRA.blocs = {irb.label : irb for irb in [G6_IRB0, G6_IRB1, G6_IRB2, G6_IRB3]} @@ -254,13 +240,11 @@ G7_IRB2 = gen_irbloc(LBL2, [[ExprAff(a, a+CST2)]]) G7_IRB3 = gen_irbloc(LBL3, [[ExprAff(a, r)]]) -G7_IRA.gen_graph() - -G7_IRA.g.add_uniq_edge(G7_IRB0.label, G7_IRB1.label) -G7_IRA.g.add_uniq_edge(G7_IRB1.label, G7_IRB2.label) -G7_IRA.g.add_uniq_edge(G7_IRB2.label, G7_IRB1.label) -G7_IRA.g.add_uniq_edge(G7_IRB2.label, G7_IRB3.label) -G7_IRA.g.add_uniq_edge(G7_IRB0.label, G7_IRB2.label) +G7_IRA.graph.add_uniq_edge(G7_IRB0.label, G7_IRB1.label) +G7_IRA.graph.add_uniq_edge(G7_IRB1.label, G7_IRB2.label) +G7_IRA.graph.add_uniq_edge(G7_IRB2.label, G7_IRB1.label) +G7_IRA.graph.add_uniq_edge(G7_IRB2.label, G7_IRB3.label) +G7_IRA.graph.add_uniq_edge(G7_IRB0.label, G7_IRB2.label) G7_IRA.blocs = {irb.label : irb for irb in [G7_IRB0, G7_IRB1, G7_IRB2, @@ -287,13 +271,11 @@ G8_IRB2 = gen_irbloc(LBL2, [[ExprAff(b, b+CST2)]]) G8_IRB3 = gen_irbloc(LBL3, [[ExprAff(a, b)]]) -G8_IRA.gen_graph() - -G8_IRA.g.add_uniq_edge(G8_IRB0.label, G8_IRB1.label) -G8_IRA.g.add_uniq_edge(G8_IRB1.label, G8_IRB2.label) -G8_IRA.g.add_uniq_edge(G8_IRB2.label, G8_IRB1.label) -G8_IRA.g.add_uniq_edge(G8_IRB2.label, G8_IRB3.label) -G8_IRA.g.add_uniq_edge(G8_IRB3.label, G8_IRB2.label) +G8_IRA.graph.add_uniq_edge(G8_IRB0.label, G8_IRB1.label) +G8_IRA.graph.add_uniq_edge(G8_IRB1.label, G8_IRB2.label) +G8_IRA.graph.add_uniq_edge(G8_IRB2.label, G8_IRB1.label) +G8_IRA.graph.add_uniq_edge(G8_IRB2.label, G8_IRB3.label) +G8_IRA.graph.add_uniq_edge(G8_IRB3.label, G8_IRB2.label) G8_IRA.blocs = {irb.label : irb for irb in [G8_IRB0, G8_IRB1, G8_IRB2, @@ -322,16 +304,14 @@ G9_IRB3 = gen_irbloc(LBL3, [[ExprAff(a, b)]]) G9_IRB4 = gen_irbloc(LBL4, [[ExprAff(r, a)], [ExprAff(r, b)]]) -G9_IRA.gen_graph() - -G9_IRA.g.add_uniq_edge(G9_IRB0.label, G9_IRB4.label) -G9_IRA.g.add_uniq_edge(G9_IRB0.label, G9_IRB1.label) -G9_IRA.g.add_uniq_edge(G9_IRB1.label, G9_IRB0.label) -G9_IRA.g.add_uniq_edge(G9_IRB1.label, G9_IRB4.label) -G9_IRA.g.add_uniq_edge(G9_IRB1.label, G9_IRB2.label) -G9_IRA.g.add_uniq_edge(G9_IRB2.label, G9_IRB0.label) -G9_IRA.g.add_uniq_edge(G9_IRB2.label, G9_IRB3.label) -G9_IRA.g.add_uniq_edge(G9_IRB3.label, G9_IRB4.label) +G9_IRA.graph.add_uniq_edge(G9_IRB0.label, G9_IRB4.label) +G9_IRA.graph.add_uniq_edge(G9_IRB0.label, G9_IRB1.label) +G9_IRA.graph.add_uniq_edge(G9_IRB1.label, G9_IRB0.label) +G9_IRA.graph.add_uniq_edge(G9_IRB1.label, G9_IRB4.label) +G9_IRA.graph.add_uniq_edge(G9_IRB1.label, G9_IRB2.label) +G9_IRA.graph.add_uniq_edge(G9_IRB2.label, G9_IRB0.label) +G9_IRA.graph.add_uniq_edge(G9_IRB2.label, G9_IRB3.label) +G9_IRA.graph.add_uniq_edge(G9_IRB3.label, G9_IRB4.label) G9_IRA.blocs = {irb.label : irb for irb in [G9_IRB0, G9_IRB1, G9_IRB2, @@ -362,12 +342,10 @@ G10_IRB2 = gen_irbloc(LBL2, [[ExprAff(a, b)]]) G10_IRB3 = gen_irbloc(LBL3, [[ExprAff(r, CST1)]]) -G10_IRA.gen_graph() - -G10_IRA.g.add_uniq_edge(G10_IRB0.label, G10_IRB1.label) -G10_IRA.g.add_uniq_edge(G10_IRB1.label, G10_IRB2.label) -G10_IRA.g.add_uniq_edge(G10_IRB2.label, G10_IRB1.label) -G10_IRA.g.add_uniq_edge(G10_IRB2.label, G10_IRB3.label) +G10_IRA.graph.add_uniq_edge(G10_IRB0.label, G10_IRB1.label) +G10_IRA.graph.add_uniq_edge(G10_IRB1.label, G10_IRB2.label) +G10_IRA.graph.add_uniq_edge(G10_IRB2.label, G10_IRB1.label) +G10_IRA.graph.add_uniq_edge(G10_IRB2.label, G10_IRB3.label) G10_IRA.blocs = {irb.label : irb for irb in [G10_IRB0, G10_IRB1, G10_IRB2, G10_IRB3]} @@ -394,13 +372,11 @@ G11_IRB3 = gen_irbloc(LBL3, [[ExprAff(a, a+CST1)]]) G11_IRB4 = gen_irbloc(LBL4, [[ExprAff(b, b+CST1)]]) -G11_IRA.gen_graph() - -G11_IRA.g.add_uniq_edge(G11_IRB0.label, G11_IRB1.label) -#G11_IRA.g.add_uniq_edge(G11_IRB3.label, G11_IRB1.label) -G11_IRA.g.add_uniq_edge(G11_IRB1.label, G11_IRB0.label) -#G11_IRA.g.add_uniq_edge(G11_IRB4.label, G11_IRB0.label) -G11_IRA.g.add_uniq_edge(G11_IRB1.label, G11_IRB2.label) +G11_IRA.graph.add_uniq_edge(G11_IRB0.label, G11_IRB1.label) +#G11_IRA.graph.add_uniq_edge(G11_IRB3.label, G11_IRB1.label) +G11_IRA.graph.add_uniq_edge(G11_IRB1.label, G11_IRB0.label) +#G11_IRA.graph.add_uniq_edge(G11_IRB4.label, G11_IRB0.label) +G11_IRA.graph.add_uniq_edge(G11_IRB1.label, G11_IRB2.label) G11_IRA.blocs = {irb.label : irb for irb in [G11_IRB0, G11_IRB1, G11_IRB2]} @@ -428,13 +404,11 @@ G12_IRB3 = gen_irbloc(LBL3, [[ExprAff(r, CST3)]]) G12_IRB4 = gen_irbloc(LBL4, [[ExprAff(r, CST2)]]) G12_IRB5 = gen_irbloc(LBL5, [[ExprAff(r, b)]]) -G12_IRA.gen_graph() - -G12_IRA.g.add_uniq_edge(G12_IRB0.label, G12_IRB1.label) -G12_IRA.g.add_uniq_edge(G12_IRB0.label, G12_IRB2.label) -G12_IRA.g.add_uniq_edge(G12_IRB2.label, G12_IRB3.label) -G12_IRA.g.add_uniq_edge(G12_IRB2.label, G12_IRB4.label) -G12_IRA.g.add_uniq_edge(G12_IRB4.label, G12_IRB5.label) +G12_IRA.graph.add_uniq_edge(G12_IRB0.label, G12_IRB1.label) +G12_IRA.graph.add_uniq_edge(G12_IRB0.label, G12_IRB2.label) +G12_IRA.graph.add_uniq_edge(G12_IRB2.label, G12_IRB3.label) +G12_IRA.graph.add_uniq_edge(G12_IRB2.label, G12_IRB4.label) +G12_IRA.graph.add_uniq_edge(G12_IRB4.label, G12_IRB5.label) G12_IRA.blocs = {irb.label : irb for irb in [G12_IRB0, G12_IRB1, G12_IRB2, G12_IRB3, G12_IRB4, G12_IRB5]} @@ -465,12 +439,10 @@ G13_IRB2 = gen_irbloc(LBL2, [[ExprAff(d, CST2)], [ExprAff(a, b+CST1), G13_IRB3 = gen_irbloc(LBL3, [[]]) # lost son G13_IRB4 = gen_irbloc(LBL4, [[ExprAff(b, CST2)]]) -G13_IRA.gen_graph() - -G13_IRA.g.add_uniq_edge(G13_IRB0.label, G13_IRB1.label) -G13_IRA.g.add_uniq_edge(G13_IRB0.label, G13_IRB4.label) -G13_IRA.g.add_uniq_edge(G13_IRB2.label, G13_IRB3.label) -G13_IRA.g.add_uniq_edge(G13_IRB4.label, G13_IRB2.label) +G13_IRA.graph.add_uniq_edge(G13_IRB0.label, G13_IRB1.label) +G13_IRA.graph.add_uniq_edge(G13_IRB0.label, G13_IRB4.label) +G13_IRA.graph.add_uniq_edge(G13_IRB2.label, G13_IRB3.label) +G13_IRA.graph.add_uniq_edge(G13_IRB4.label, G13_IRB2.label) G13_IRA.blocs = {irb.label : irb for irb in [G13_IRB0, G13_IRB1, G13_IRB2, G13_IRB4]} @@ -499,9 +471,7 @@ G14_IRB0 = gen_irbloc(LBL0, [[ExprAff(a, CST1)], [ExprAff(c, a)], [ExprAff(a, CST2)]]) G14_IRB1 = gen_irbloc(LBL1, [[ExprAff(r, a+c)]]) -G14_IRA.gen_graph() - -G14_IRA.g.add_uniq_edge(G14_IRB0.label, G14_IRB1.label) +G14_IRA.graph.add_uniq_edge(G14_IRB0.label, G14_IRB1.label) G14_IRA.blocs = {irb.label : irb for irb in [G14_IRB0, G14_IRB1]} @@ -524,9 +494,7 @@ G15_IRB0 = gen_irbloc(LBL0, [[ExprAff(a, CST2)], [ExprAff(a, CST1), ExprAff(c, CST1)]]) G15_IRB1 = gen_irbloc(LBL1, [[ExprAff(r, a)]]) -G15_IRA.gen_graph() - -G15_IRA.g.add_uniq_edge(G15_IRB0.label, G15_IRB1.label) +G15_IRA.graph.add_uniq_edge(G15_IRB0.label, G15_IRB1.label) G15_IRA.blocs = {irb.label : irb for irb in [G15_IRB0, G15_IRB1]} @@ -548,10 +516,8 @@ G16_IRB0 = gen_irbloc(LBL0, [[ExprAff(a, CST1), ExprAff(b, CST2), G16_IRB1 = gen_irbloc(LBL1, [[ExprAff(r, a+b)], [ExprAff(r, c+r)]]) G16_IRB2 = gen_irbloc(LBL2, [[]]) -G16_IRA.gen_graph() - -G16_IRA.g.add_uniq_edge(G16_IRB0.label, G16_IRB1.label) -G16_IRA.g.add_uniq_edge(G16_IRB1.label, G16_IRB2.label) +G16_IRA.graph.add_uniq_edge(G16_IRB0.label, G16_IRB1.label) +G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB2.label) G16_IRA.blocs = {irb.label : irb for irb in [G16_IRB0, G16_IRB1]} @@ -622,11 +588,9 @@ G17_IRB0 = gen_irbloc(LBL0, [[ExprAff(a, a*b), ]) -G17_IRA.gen_graph() - G17_IRA.blocs = {irb.label : irb for irb in [G17_IRB0]} -G17_IRA.g.add_node(G17_IRB0.label) +G17_IRA.graph.add_node(G17_IRB0.label) # Expected output for graph 17 G17_EXP_IRA = IRATest() @@ -694,13 +658,13 @@ for test_nb, test in enumerate([(G1_IRA, G1_EXP_IRA), print "[+] Test", test_nb+1 # Print initial graph, for debug - open("graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph()) + open("graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph.dot()) # Simplify graph g_ira.dead_simp() # Print simplified graph, for debug - open("simp_graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph()) + open("simp_graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph.dot()) # Same number of blocks assert len(g_ira.blocs) == len(g_exp_ira.blocs) diff --git a/test/ir/ir2C.py b/test/ir/ir2C.py index 8a5f97c4..36683904 100644 --- a/test/ir/ir2C.py +++ b/test/ir/ir2C.py @@ -54,10 +54,6 @@ class TestIrIr2C(unittest.TestCase): self.assertRaises(NotImplementedError, translator.from_expr, ExprOp('X', *args[:2])) - # Ternary operators - self.translationTest( - ExprOp('div8', *args[:3]), r'(div_op(32, 0x0, 0x1, 0x2) &0xffffffff)') - # Other cases self.translationTest( ExprOp('+', *args[:3]), r'(((0x0&0xffffffff)+(0x1&0xffffffff)+(0x2&0xffffffff))&0xffffffff)') diff --git a/test/test_all.py b/test/test_all.py index c449ef33..71ea51a5 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -337,13 +337,15 @@ class ExampleDisasmFull(ExampleDisassembler): """DisasmFull specificities: - script: disasm/full.py - flags: -g -s - - @products: graph_execflow.dot, graph_irflow.dot, lines.dot, out.dot + - @products: graph_execflow.dot, graph_irflow.dot, graph_irflow_raw.dot, + lines.dot, out.dot """ def __init__(self, *args, **kwargs): super(ExampleDisasmFull, self).__init__(*args, **kwargs) self.command_line = ["full.py", "-g", "-s", "-m"] + self.command_line - self.products += ["graph_execflow.dot", "graph_irflow.dot", "lines.dot"] + self.products += ["graph_execflow.dot", "graph_irflow.dot", + "graph_irflow_raw.dot", "lines.dot"] testset += ExampleDisasmFull(["arml", Example.get_sample("demo_arm_l.bin"), |