diff options
Diffstat (limited to 'miasm2')
| -rw-r--r-- | miasm2/analysis/data_analysis.py | 6 | ||||
| -rw-r--r-- | miasm2/analysis/depgraph.py | 6 | ||||
| -rw-r--r-- | miasm2/analysis/sandbox.py | 134 | ||||
| -rw-r--r-- | miasm2/arch/x86/ira.py | 5 | ||||
| -rw-r--r-- | miasm2/arch/x86/sem.py | 1 | ||||
| -rw-r--r-- | miasm2/core/bin_stream.py | 9 | ||||
| -rw-r--r-- | miasm2/ir/analysis.py | 124 | ||||
| -rw-r--r-- | miasm2/ir/ir.py | 128 | ||||
| -rw-r--r-- | miasm2/ir/translators/C.py | 14 | ||||
| -rw-r--r-- | miasm2/jitter/csts.py | 2 | ||||
| -rw-r--r-- | miasm2/jitter/jitload.py | 7 | ||||
| -rw-r--r-- | miasm2/jitter/loader/elf.py | 20 | ||||
| -rw-r--r-- | miasm2/jitter/loader/pe.py | 137 | ||||
| -rw-r--r-- | miasm2/jitter/loader/utils.py | 8 | ||||
| -rw-r--r-- | miasm2/jitter/vm_mngr.c | 346 | ||||
| -rw-r--r-- | miasm2/jitter/vm_mngr.h | 58 | ||||
| -rw-r--r-- | miasm2/jitter/vm_mngr_py.c | 64 | ||||
| -rw-r--r-- | miasm2/os_dep/win_api_x86_32_seh.py | 46 |
18 files changed, 454 insertions, 661 deletions
diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index 8462f150..9451a407 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -150,7 +150,7 @@ def inter_bloc_flow_link(ir_arch, flow_graph, todo, link_exec_to_data): x_nodes = tuple(sorted(list(irb.dst.get_r()))) todo = set() - for lbl_dst in ir_arch.g.successors(irb.label): + for lbl_dst in ir_arch.graph.successors(irb.label): todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes)) # pp(('OUT', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) @@ -166,7 +166,7 @@ def create_implicit_flow(ir_arch, flow_graph): while todo: lbl = todo.pop() irb = ir_arch.blocs[lbl] - for lbl_son in ir_arch.g.successors(irb.label): + for lbl_son in ir_arch.graph.successors(irb.label): if not lbl_son in ir_arch.blocs: print "cannot find bloc!!", lbl continue @@ -189,7 +189,7 @@ def create_implicit_flow(ir_arch, flow_graph): irb.in_nodes[n_r] = irb.label, 0, n_r node_n_r = irb.in_nodes[n_r] # print "###", node_n_r - for lbl_p in ir_arch.g.predecessors(irb.label): + for lbl_p in ir_arch.graph.predecessors(irb.label): todo.add(lbl_p) flow_graph.add_uniq_edge(node_n_r, node_n_w) diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py index 838183bf..0a5d38aa 100644 --- a/miasm2/analysis/depgraph.py +++ b/miasm2/analysis/depgraph.py @@ -686,7 +686,6 @@ class DependencyGraph(object): def __init__(self, ira, implicit=False, apply_simp=True, follow_mem=True, follow_call=True): """Create a DependencyGraph linked to @ira - The IRA graph must have been computed @ira: IRAnalysis instance @implicit: (optional) Imply implicit dependencies @@ -702,9 +701,6 @@ class DependencyGraph(object): self._step_counter = itertools.count() self._current_step = next(self._step_counter) - # The IRA graph must be computed - assert hasattr(self._ira, 'g') - # Create callback filters. The order is relevant. self._cb_follow = [] if apply_simp: @@ -892,7 +888,7 @@ class DependencyGraph(object): def _get_previousblocks(self, label): """Return an iterator on predecessors blocks of @label, with their lengths""" - preds = self._ira.g.predecessors_iter(label) + preds = self._ira.graph.predecessors_iter(label) for pred_label in preds: length = len(self._get_irs(pred_label)) yield (pred_label, length) diff --git a/miasm2/analysis/sandbox.py b/miasm2/analysis/sandbox.py index b3184626..115fd521 100644 --- a/miasm2/analysis/sandbox.py +++ b/miasm2/analysis/sandbox.py @@ -1,13 +1,15 @@ +import os import logging from argparse import ArgumentParser from miasm2.analysis.machine import Machine -from miasm2.os_dep import win_api_x86_32_seh from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE from miasm2.analysis import debugging from miasm2.jitter.jitload import log_func + class Sandbox(object): + """ Parent class for Sandbox abstraction """ @@ -29,9 +31,9 @@ class Sandbox(object): yield base_cls - classes = property(lambda x:x.__class__._classes_()) + classes = property(lambda x: x.__class__._classes_()) - def __init__(self, fname, options, custom_methods = {}): + def __init__(self, fname, options, custom_methods={}, **kwargs): """ Initialize a sandbox @fname: str file name @@ -46,9 +48,9 @@ class Sandbox(object): if cls == Sandbox: continue if issubclass(cls, OS): - cls.__init__(self, custom_methods) + cls.__init__(self, custom_methods, **kwargs) else: - cls.__init__(self) + cls.__init__(self, **kwargs) # Logging options if self.options.singlestep: @@ -84,8 +86,11 @@ class Sandbox(object): parser.add_argument("-j", "--jitter", help="Jitter engine. Possible values are: tcc (default), llvm, python", default="tcc") - parser.add_argument('-q', "--quiet-function-calls", action="store_true", + parser.add_argument( + '-q', "--quiet-function-calls", action="store_true", help="Don't log function calls") + parser.add_argument('-i', "--dependencies", action="store_true", + help="Load PE and its dependencies") for base_cls in cls._classes_(): base_cls.update_parser(parser) @@ -121,11 +126,12 @@ class Sandbox(object): class OS(object): + """ Parent class for OS abstraction """ - def __init__(self, custom_methods): + def __init__(self, custom_methods, **kwargs): pass @classmethod @@ -134,13 +140,15 @@ class OS(object): class Arch(object): + """ Parent class for Arch abstraction """ # Architecture name _ARCH_ = None - def __init__(self): + + def __init__(self, **kwargs): self.machine = Machine(self._ARCH_) self.jitter = self.machine.jitter(self.options.jitter) @@ -155,10 +163,12 @@ class OS_Win(OS): "ole32.dll", "urlmon.dll", "ws2_32.dll", 'advapi32.dll', "psapi.dll", ] + modules_path = "win_dll" def __init__(self, custom_methods, *args, **kwargs): - from miasm2.jitter.loader.pe import vm_load_pe, vm_load_pe_libs, preload_pe, libimp_pe - from miasm2.os_dep import win_api_x86_32 + from miasm2.jitter.loader.pe import vm_load_pe, vm_load_pe_libs,\ + preload_pe, libimp_pe, vm_load_pe_and_dependencies + from miasm2.os_dep import win_api_x86_32, win_api_x86_32_seh methods = win_api_x86_32.__dict__ methods.update(custom_methods) @@ -169,20 +179,36 @@ class OS_Win(OS): self.libs = libs win_api_x86_32.winobjs.runtime_dll = libs + self.name2module = {} + fname_basename = os.path.basename(self.fname).lower() + + # Load main pe + with open(self.fname) as fstream: + self.pe = vm_load_pe(self.jitter.vm, fstream.read(), + load_hdr=self.options.load_hdr, **kwargs) + self.name2module[fname_basename] = self.pe + # Load library if self.options.loadbasedll: - all_pe = [] # Load libs in memory - all_pe = vm_load_pe_libs(self.jitter.vm, self.ALL_IMP_DLL, libs) + self.name2module.update(vm_load_pe_libs(self.jitter.vm, + self.ALL_IMP_DLL, + libs, + self.modules_path, + **kwargs)) # Patch libs imports - for pe in all_pe.values(): + for pe in self.name2module.itervalues(): preload_pe(self.jitter.vm, pe, libs) - # Load main pe - with open(self.fname) as fstream: - self.pe = vm_load_pe(self.jitter.vm, fstream.read()) + if self.options.dependencies: + vm_load_pe_and_dependencies(self.jitter.vm, + fname_basename, + self.name2module, + libs, + self.modules_path, + **kwargs) win_api_x86_32.winobjs.current_pe = self.pe @@ -194,20 +220,20 @@ class OS_Win(OS): # Manage SEH if self.options.use_seh: - win_api_x86_32_seh.main_pe_name = self.fname + win_api_x86_32_seh.main_pe_name = fname_basename win_api_x86_32_seh.main_pe = self.pe - win_api_x86_32_seh.loaded_modules = self.ALL_IMP_DLL + win_api_x86_32.winobjs.hcurmodule = self.pe.NThdr.ImageBase + win_api_x86_32_seh.name2module = self.name2module win_api_x86_32_seh.init_seh(self.jitter) win_api_x86_32_seh.set_win_fs_0(self.jitter) - self.entry_point = self.pe.rva2virt(self.pe.Opthdr.AddressOfEntryPoint) + self.entry_point = self.pe.rva2virt( + self.pe.Opthdr.AddressOfEntryPoint) @classmethod def update_parser(cls, parser): - parser.add_argument('-o', "--loadhdr", action="store_true", + parser.add_argument('-o', "--load-hdr", action="store_true", help="Load pe hdr") - parser.add_argument('-e', "--loadmainpe", action="store_true", - help="Load main pe") parser.add_argument('-y', "--use-seh", action="store_true", help="Use windows SEH") parser.add_argument('-l', "--loadbasedll", action="store_true", @@ -230,7 +256,7 @@ class OS_Linux(OS): self.libs = libimp_elf() with open(self.fname) as fstream: - self.elf = vm_load_elf(self.jitter.vm, fstream.read()) + self.elf = vm_load_elf(self.jitter.vm, fstream.read(), **kwargs) preload_elf(self.jitter.vm, self.elf, self.libs) self.entry_point = self.elf.Ehdr.entry @@ -238,7 +264,9 @@ class OS_Linux(OS): # Library calls handler self.jitter.add_lib_handler(self.libs, methods) + class OS_Linux_str(OS): + def __init__(self, custom_methods, *args, **kwargs): from miasm2.jitter.loader.elf import libimp_elf from miasm2.os_dep import linux_stdlib @@ -253,7 +281,8 @@ class OS_Linux_str(OS): data = open(self.fname).read() self.options.load_base_addr = int(self.options.load_base_addr, 0) - self.jitter.vm.add_memory_page(self.options.load_base_addr, PAGE_READ | PAGE_WRITE, data) + self.jitter.vm.add_memory_page( + self.options.load_base_addr, PAGE_READ | PAGE_WRITE, data) # Library calls handler self.jitter.add_lib_handler(libs, methods) @@ -264,16 +293,16 @@ class OS_Linux_str(OS): class Arch_x86(Arch): - _ARCH_ = None # Arch name + _ARCH_ = None # Arch name STACK_SIZE = 0x10000 STACK_BASE = 0x130000 - def __init__(self): - super(Arch_x86, self).__init__() + def __init__(self, **kwargs): + super(Arch_x86, self).__init__(**kwargs) if self.options.usesegm: - self.jitter.ir_arch.do_stk_segm= True - self.jitter.ir_arch.do_ds_segm= True + self.jitter.ir_arch.do_stk_segm = True + self.jitter.ir_arch.do_ds_segm = True self.jitter.ir_arch.do_str_segm = True self.jitter.ir_arch.do_all_segm = True @@ -282,11 +311,10 @@ class Arch_x86(Arch): self.jitter.stack_base = self.STACK_BASE self.jitter.init_stack() - @classmethod def update_parser(cls, parser): parser.add_argument('-s', "--usesegm", action="store_true", - help="Use segments") + help="Use segments") class Arch_x86_32(Arch_x86): @@ -302,34 +330,36 @@ class Arch_arml(Arch): STACK_SIZE = 0x100000 STACK_BASE = 0x100000 - def __init__(self): - super(Arch_arml, self).__init__() + def __init__(self, **kwargs): + super(Arch_arml, self).__init__(**kwargs) # Init stack self.jitter.stack_size = self.STACK_SIZE self.jitter.stack_base = self.STACK_BASE self.jitter.init_stack() + class Arch_armb(Arch): _ARCH_ = "armb" STACK_SIZE = 0x100000 STACK_BASE = 0x100000 - def __init__(self): - super(Arch_armb, self).__init__() + def __init__(self, **kwargs): + super(Arch_armb, self).__init__(**kwargs) # Init stack self.jitter.stack_size = self.STACK_SIZE self.jitter.stack_base = self.STACK_BASE self.jitter.init_stack() + class Arch_aarch64l(Arch): _ARCH_ = "aarch64l" STACK_SIZE = 0x100000 STACK_BASE = 0x100000 - def __init__(self): - super(Arch_aarch64l, self).__init__() + def __init__(self, **kwargs): + super(Arch_aarch64l, self).__init__(**kwargs) # Init stack self.jitter.stack_size = self.STACK_SIZE @@ -342,8 +372,8 @@ class Arch_aarch64b(Arch): STACK_SIZE = 0x100000 STACK_BASE = 0x100000 - def __init__(self): - super(Arch_aarch64b, self).__init__() + def __init__(self, **kwargs): + super(Arch_aarch64b, self).__init__(**kwargs) # Init stack self.jitter.stack_size = self.STACK_SIZE @@ -351,7 +381,6 @@ class Arch_aarch64b(Arch): self.jitter.init_stack() - class Sandbox_Win_x86_32(Sandbox, Arch_x86_32, OS_Win): def __init__(self, *args, **kwargs): @@ -366,8 +395,7 @@ class Sandbox_Win_x86_32(Sandbox, Arch_x86_32, OS_Win): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): """ If addr is not set, use entrypoint """ @@ -391,8 +419,7 @@ class Sandbox_Win_x86_64(Sandbox, Arch_x86_64, OS_Win): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): """ If addr is not set, use entrypoint """ @@ -415,8 +442,7 @@ class Sandbox_Linux_x86_32(Sandbox, Arch_x86_32, OS_Linux): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): """ If addr is not set, use entrypoint """ @@ -440,8 +466,7 @@ class Sandbox_Linux_x86_64(Sandbox, Arch_x86_64, OS_Linux): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): """ If addr is not set, use entrypoint """ @@ -460,12 +485,12 @@ class Sandbox_Linux_arml(Sandbox, Arch_arml, OS_Linux): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): if addr is None and self.options.address is not None: addr = int(self.options.address, 16) super(Sandbox_Linux_arml, self).run(addr) + class Sandbox_Linux_armb_str(Sandbox, Arch_armb, OS_Linux_str): def __init__(self, *args, **kwargs): @@ -476,8 +501,7 @@ class Sandbox_Linux_armb_str(Sandbox, Arch_armb, OS_Linux_str): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): if addr is None and self.options.address is not None: addr = int(self.options.address, 0) super(Sandbox_Linux_armb_str, self).run(addr) @@ -493,8 +517,7 @@ class Sandbox_Linux_arml_str(Sandbox, Arch_arml, OS_Linux_str): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): if addr is None and self.options.address is not None: addr = int(self.options.address, 0) super(Sandbox_Linux_arml_str, self).run(addr) @@ -510,8 +533,7 @@ class Sandbox_Linux_aarch64l(Sandbox, Arch_aarch64l, OS_Linux): # Set the runtime guard self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) - - def run(self, addr = None): + def run(self, addr=None): if addr is None and self.options.address is not None: addr = int(self.options.address, 0) super(Sandbox_Linux_aarch64l, self).run(addr) diff --git a/miasm2/arch/x86/ira.py b/miasm2/arch/x86/ira.py index 5bc4761f..b7a1f19f 100644 --- a/miasm2/arch/x86/ira.py +++ b/miasm2/arch/x86/ira.py @@ -3,6 +3,7 @@ from miasm2.expression.expression import ExprAff, ExprOp, ExprId from miasm2.core.graph import DiGraph +from miasm2.core.asmbloc import expr_is_label from miasm2.ir.ir import ir, irbloc from miasm2.ir.analysis import ira from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 @@ -46,7 +47,7 @@ class ir_a_x86_16(ir_x86_16, ira): if not l.is_subcall(): return sub_call_dst = l.args[0] - if self.ExprIsLabel(sub_call_dst): + if expr_is_label(sub_call_dst): sub_call_dst = sub_call_dst.name for b in ir_blocs: l = b.lines[-1] @@ -54,7 +55,7 @@ class ir_a_x86_16(ir_x86_16, ira): if not l.is_subcall(): continue sub_call_dst = l.args[0] - if self.ExprIsLabel(sub_call_dst): + if expr_is_label(sub_call_dst): sub_call_dst = sub_call_dst.name lbl = bloc.get_next() new_lbl = self.gen_label() diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 800a1fa2..802b6283 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -660,7 +660,6 @@ def cli(ir, instr): def sti(ir, instr): e = [m2_expr.ExprAff(exception_flags, m2_expr.ExprInt32(EXCEPT_PRIV_INSN))] - e = [] # XXX TODO HACK return e, [] diff --git a/miasm2/core/bin_stream.py b/miasm2/core/bin_stream.py index 6e158061..f7b160f9 100644 --- a/miasm2/core/bin_stream.py +++ b/miasm2/core/bin_stream.py @@ -108,7 +108,7 @@ class bin_stream_file(bin_stream): return self.bin.tell() - self.shift def setoffset(self, val): - self.bin.seek(val + self.shift) + self.bin.seek(val + self.shift) offset = property(getoffset, setoffset) def readbs(self, l=1): @@ -123,7 +123,6 @@ class bin_stream_file(bin_stream): return self.l - (self.offset + self.shift) - class bin_stream_container(bin_stream): def __init__(self, virt_view, offset=0L): @@ -142,13 +141,13 @@ class bin_stream_container(bin_stream): if self.offset + l > self.l: raise IOError("not enough bytes") self.offset += l - return self.bin(self.offset - l, self.offset) + return self.bin.get(self.offset - l, self.offset) def getbytes(self, start, l=1): - return self.bin(start, start + l) + return self.bin.get(start, start + l) def __str__(self): - out = self.bin(self.offset, self.l) + out = self.bin.get(self.offset, self.offset + self.l) return out def setoffset(self, val): diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py index 31f6294c..40a3bf64 100644 --- a/miasm2/ir/analysis.py +++ b/miasm2/ir/analysis.py @@ -4,7 +4,7 @@ import logging from miasm2.ir.symbexec import symbexec -from miasm2.core.graph import DiGraph +from miasm2.ir.ir import ir from miasm2.expression.expression \ import ExprAff, ExprCond, ExprId, ExprInt, ExprMem @@ -14,109 +14,21 @@ console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) log.addHandler(console_handler) log.setLevel(logging.WARNING) -class ira: + +class ira(ir): + """IR Analysis + This class provides higher level manipulations on IR, such as dead + instruction removals. + + This class can be used as a common parent with `miasm2.ir.ir::ir` class. + For instance: + class ira_x86_16(ir_x86_16, ira) + """ def ira_regs_ids(self): """Returns ids of all registers used in the IR""" return self.arch.regs.all_regs_ids + [self.IRDst] - def sort_dst(self, todo, done): - out = set() - while todo: - dst = todo.pop() - if self.ExprIsLabel(dst): - done.add(dst) - elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): - done.add(dst) - elif isinstance(dst, ExprCond): - todo.add(dst.src1) - todo.add(dst.src2) - elif isinstance(dst, ExprId): - out.add(dst) - else: - done.add(dst) - return out - - def dst_trackback(self, b): - dst = b.dst - todo = set([dst]) - done = set() - - for irs in reversed(b.irs): - if len(todo) == 0: - break - out = self.sort_dst(todo, done) - found = set() - follow = set() - for i in irs: - if not out: - break - for o in out: - if i.dst == o: - follow.add(i.src) - found.add(o) - for o in found: - out.remove(o) - - for o in out: - if o not in found: - follow.add(o) - todo = follow - - return done - - def gen_graph(self, link_all = True): - """ - Gen irbloc digraph - @link_all: also gen edges to non present irblocs - """ - self.g = DiGraph() - for lbl, b in self.blocs.items(): - # print 'add', lbl - self.g.add_node(lbl) - # dst = self.get_bloc_dst(b) - dst = self.dst_trackback(b) - # print "\tdst", dst - for d in dst: - if isinstance(d, ExprInt): - d = ExprId( - self.symbol_pool.getby_offset_create(int(d.arg))) - if self.ExprIsLabel(d): - if d.name in self.blocs or link_all is True: - self.g.add_edge(lbl, d.name) - - def graph(self): - """Output the graphviz script""" - out = """ - digraph asm_graph { - size="80,50"; - node [ - fontsize = "16", - shape = "box" - ]; - """ - all_lbls = {} - for lbl in self.g.nodes(): - if lbl not in self.blocs: - continue - irb = self.blocs[lbl] - ir_txt = [str(lbl)] - for irs in irb.irs: - for l in irs: - ir_txt.append(str(l)) - ir_txt.append("") - ir_txt.append("") - all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) - for l, v in all_lbls.items(): - # print l, v - out += '%s [label="%s"];\n' % (l, v) - - for a, b in self.g.edges(): - # print 'edge', a, b, hash(a), hash(b) - out += '%s -> %s;\n' % (hash(a), hash(b)) - out += '}' - return out - def remove_dead_instr(self, irb, useful): """Remove dead affectations using previous reaches analysis @irb: irbloc instance @@ -149,12 +61,12 @@ class ira: useful = set() - for node in self.g.nodes(): + for node in self.graph.nodes(): if node not in self.blocs: continue block = self.blocs[node] - successors = self.g.successors(node) + successors = self.graph.successors(node) has_son = bool(successors) for p_son in successors: if p_son not in self.blocs: @@ -274,7 +186,7 @@ class ira: for key, value in irb.cur_reach[0].iteritems()} # Compute reach from predecessors - for n_pred in self.g.predecessors(irb.label): + for n_pred in self.graph.predecessors(irb.label): p_block = self.blocs[n_pred] # Handle each register definition @@ -313,7 +225,7 @@ class ira: analysis""" fixed = True - for node in self.g.nodes(): + for node in self.graph.nodes(): if node in self.blocs: irb = self.blocs[node] if (irb.cur_reach != irb.prev_reach or @@ -329,13 +241,11 @@ class ira: Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 - - PRE: gen_graph() """ fixed_point = False log.debug('iteration...') while not fixed_point: - for node in self.g.nodes(): + for node in self.graph.nodes(): if node in self.blocs: self.compute_reach_block(self.blocs[node]) fixed_point = self._test_kill_reach_fix() @@ -347,8 +257,6 @@ class ira: Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 - - PRE: gen_graph() """ # Update r/w variables for all irblocs self.get_rw(self.ira_regs_ids()) diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index e051dc8c..f957fcab 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -22,15 +22,16 @@ import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.core import asmbloc from miasm2.expression.simplifications import expr_simp -from miasm2.core.asmbloc import asm_symbol_pool +from miasm2.core.asmbloc import asm_symbol_pool, expr_is_label, asm_label, \ + asm_bloc +from miasm2.core.graph import DiGraph class irbloc(object): def __init__(self, label, irs, lines = []): - assert(isinstance(label, asmbloc.asm_label)) + assert(isinstance(label, asm_label)) self.label = label self.irs = irs self.lines = lines @@ -119,6 +120,47 @@ class irbloc(object): return "\n".join(o) +class DiGraphIR(DiGraph): + """DiGraph for IR instances""" + + def __init__(self, blocks, *args, **kwargs): + """Instanciate a DiGraphIR + @blocks: IR blocks + """ + self._blocks = blocks + super(DiGraphIR, self).__init__(*args, **kwargs) + + def dot(self): + """Output the graphviz script""" + out = """ + digraph asm_graph { + size="80,50"; + node [ + fontsize = "16", + shape = "box" + ]; + """ + all_lbls = {} + for lbl in self.nodes(): + if lbl not in self._blocks: + continue + irb = self._blocks[lbl] + ir_txt = [str(lbl)] + for irs in irb.irs: + for l in irs: + ir_txt.append(str(l)) + ir_txt.append("") + ir_txt.append("") + all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) + for l, v in all_lbls.items(): + out += '%s [label="%s"];\n' % (l, v) + + for a, b in self.edges(): + out += '%s -> %s;\n' % (hash(a), hash(b)) + out += '}' + return out + + class ir(object): def __init__(self, arch, attrib, symbol_pool=None): @@ -130,6 +172,8 @@ class ir(object): self.sp = arch.getsp(attrib) self.arch = arch self.attrib = attrib + # Lazy structure + self._graph = None def instr2ir(self, l): ir_bloc_cur, ir_blocs_extra = self.get_ir(l) @@ -140,13 +184,13 @@ class ir(object): @ad: an ExprId/ExprInt/label/int""" if (isinstance(ad, m2_expr.ExprId) and - isinstance(ad.name, asmbloc.asm_label)): + isinstance(ad.name, asm_label)): ad = ad.name if isinstance(ad, m2_expr.ExprInt): ad = int(ad.arg) if type(ad) in [int, long]: ad = self.symbol_pool.getby_offset_create(ad) - elif isinstance(ad, asmbloc.asm_label): + elif isinstance(ad, asm_label): ad = self.symbol_pool.getby_name_create(ad.name) return ad @@ -158,7 +202,7 @@ class ir(object): return self.blocs.get(label, None) def add_instr(self, l, ad=0, gen_pc_updt = False): - b = asmbloc.asm_bloc(l) + b = asm_bloc(l) b.lines = [l] self.add_bloc(b, gen_pc_updt) @@ -299,6 +343,8 @@ class ir(object): self.blocs[irb.label] = irb + # Forget graph if any + self._graph = None def get_instr_label(self, instr): """Returns the label associated to an instruction @@ -334,6 +380,70 @@ class ir(object): for b in self.blocs.values(): b.get_rw(regs_ids) - def ExprIsLabel(self, l): - return isinstance(l, m2_expr.ExprId) and isinstance(l.name, - asmbloc.asm_label) + def sort_dst(self, todo, done): + out = set() + while todo: + dst = todo.pop() + if expr_is_label(dst): + done.add(dst) + elif isinstance(dst, m2_expr.ExprMem) or isinstance(dst, m2_expr.ExprInt): + done.add(dst) + elif isinstance(dst, m2_expr.ExprCond): + todo.add(dst.src1) + todo.add(dst.src2) + elif isinstance(dst, m2_expr.ExprId): + out.add(dst) + else: + done.add(dst) + return out + + def dst_trackback(self, b): + dst = b.dst + todo = set([dst]) + done = set() + + for irs in reversed(b.irs): + if len(todo) == 0: + break + out = self.sort_dst(todo, done) + found = set() + follow = set() + for i in irs: + if not out: + break + for o in out: + if i.dst == o: + follow.add(i.src) + found.add(o) + for o in found: + out.remove(o) + + for o in out: + if o not in found: + follow.add(o) + todo = follow + + return done + + def _gen_graph(self): + """ + Gen irbloc digraph + """ + self._graph = DiGraphIR(self.blocs) + for lbl, b in self.blocs.iteritems(): + self._graph.add_node(lbl) + dst = self.dst_trackback(b) + for d in dst: + if isinstance(d, m2_expr.ExprInt): + d = m2_expr.ExprId( + self.symbol_pool.getby_offset_create(int(d.arg))) + if expr_is_label(d): + self._graph.add_edge(lbl, d.name) + + @property + def graph(self): + """Get a DiGraph representation of current IR instance. + Lazy property, building the graph on-demand""" + if self._graph is None: + self._gen_graph() + return self._graph diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index 72759900..a7ba1a20 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -18,13 +18,9 @@ class TranslatorC(Translator): dct_rot = {'<<<': 'rot_left', '>>>': 'rot_right', } - dct_div = {'div8': "div_op", - 'div16': "div_op", - 'div32': "div_op", - 'idiv32': "div_op", # XXX to test - '<<<c_rez': 'rcl_rez_op', - '>>>c_rez': 'rcr_rez_op', - } + dct_rotc = {'<<<c_rez': 'rcl_rez_op', + '>>>c_rez': 'rcr_rez_op', + } def from_ExprId(self, expr): @@ -120,8 +116,8 @@ class TranslatorC(Translator): else: raise NotImplementedError('Unknown op: %r' % expr.op) - elif len(expr.args) == 3 and expr.op in self.dct_div: - return '(%s(%s, %s, %s, %s) &0x%x)' % (self.dct_div[expr.op], + elif len(expr.args) == 3 and expr.op in self.dct_rotc: + return '(%s(%s, %s, %s, %s) &0x%x)' % (self.dct_rotc[expr.op], expr.args[0].size, self.from_expr(expr.args[0]), self.from_expr(expr.args[1]), diff --git a/miasm2/jitter/csts.py b/miasm2/jitter/csts.py index e4b315e1..b71e9463 100644 --- a/miasm2/jitter/csts.py +++ b/miasm2/jitter/csts.py @@ -11,6 +11,8 @@ EXCEPT_INT_XX = (1 << 2) EXCEPT_BREAKPOINT_INTERN = (1 << 10) EXCEPT_ACCESS_VIOL = ((1 << 14) | EXCEPT_DO_NOT_UPDATE_PC) +EXCEPT_DIV_BY_ZERO = ((1 << 16) | EXCEPT_DO_NOT_UPDATE_PC) +EXCEPT_PRIV_INSN = ((1 << 17) | EXCEPT_DO_NOT_UPDATE_PC) # VM Mngr constants PAGE_READ = 1 diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index a035445b..2335cc3c 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -144,12 +144,11 @@ class CallbackHandlerBitflag(CallbackHandler): Iterator on other results""" res = True - for b in self.callbacks: - - if b & bitflag != 0: + for bitflag_expected in self.callbacks: + if bitflag_expected & bitflag == bitflag_expected: # If the flag matched for res in super(CallbackHandlerBitflag, - self).call_callbacks(b, *args): + self).call_callbacks(bitflag_expected, *args): if res is not True: yield res diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py index b3946000..08df632a 100644 --- a/miasm2/jitter/loader/elf.py +++ b/miasm2/jitter/loader/elf.py @@ -17,6 +17,7 @@ hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) log.setLevel(logging.CRITICAL) + def get_import_address_elf(e): import2addr = defaultdict(set) for sh in e.sh: @@ -46,21 +47,21 @@ def preload_elf(vm, e, runtime_lib, patch_vm_imp=True): return runtime_lib, dyn_funcs - def vm_load_elf(vm, fdata, **kargs): """ Very dirty elf loader TODO XXX: implement real loader """ - #log.setLevel(logging.DEBUG) + # log.setLevel(logging.DEBUG) e = elf_init.ELF(fdata, **kargs) i = interval() all_data = {} for p in e.ph.phlist: - if p.ph.type != 1: + if p.ph.type != elf_csts.PT_LOAD: continue - log.debug('0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset, - p.ph.filesz) + log.debug( + '0x%x 0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset, + p.ph.filesz, p.ph.type) data_o = e._content[p.ph.offset:p.ph.offset + p.ph.filesz] addr_o = p.ph.vaddr a_addr = addr_o & ~0xFFF @@ -68,16 +69,16 @@ def vm_load_elf(vm, fdata, **kargs): b_addr = (b_addr + 0xFFF) & ~0xFFF all_data[addr_o] = data_o # -2: Trick to avoid merging 2 consecutive pages - i += [(a_addr, b_addr-2)] + i += [(a_addr, b_addr - 2)] for a, b in i.intervals: - #print hex(a), hex(b) - vm.add_memory_page(a, PAGE_READ | PAGE_WRITE, "\x00"*(b+2-a)) - + # print hex(a), hex(b) + vm.add_memory_page(a, PAGE_READ | PAGE_WRITE, "\x00" * (b + 2 - a)) for r_vaddr, data in all_data.items(): vm.set_mem(r_vaddr, data) return e + class libimp_elf(libimp): pass @@ -94,6 +95,7 @@ ELF_machine = {(elf_csts.EM_ARM, 32, elf_csts.ELFDATA2LSB): "arml", (elf_csts.EM_SH, 32, elf_csts.ELFDATA2LSB): "sh4", } + def guess_arch(elf): """Return the architecture specified by the ELF container @elf. If unknown, return None""" diff --git a/miasm2/jitter/loader/pe.py b/miasm2/jitter/loader/pe.py index 1e876b4e..1c811101 100644 --- a/miasm2/jitter/loader/pe.py +++ b/miasm2/jitter/loader/pe.py @@ -10,12 +10,24 @@ from elfesteem import * from miasm2.jitter.csts import * from miasm2.jitter.loader.utils import canon_libname_libfunc, libimp - log = logging.getLogger('loader_pe') hnd = logging.StreamHandler() hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) -log.setLevel(logging.CRITICAL) +log.setLevel(logging.INFO) + + +def get_pe_dependencies(pe_obj): + """Return dependency set + @pe_obj: pe object""" + + if pe_obj.DirImport.impdesc is None: + return set() + out = set() + for dependency in pe_obj.DirImport.impdesc: + libname = dependency.dlldescname.name.lower() + out.add(libname) + return out def get_import_address_pe(e): @@ -58,7 +70,7 @@ def is_redirected_export(e, ad): # test is ad points to code or dll name out = '' for i in xrange(0x200): - c = e.virt(ad + i) + c = e.virt.get(ad + i) if c == "\x00": break out += c @@ -101,6 +113,7 @@ def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, **kargs): If all sections are aligned, they will be mapped on several different pages Otherwise, a big page is created, containing all sections """ + # Parse and build a PE instance pe = pe_init.PE(fdata, **kargs) @@ -199,6 +212,9 @@ def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs): Return the corresponding PE instance Extra arguments are passed to vm_load_pe """ + + log.info('Loading module %r', fname_in) + fname = os.path.join(lib_path_base, fname_in) with open(fname) as fstream: pe = vm_load_pe(vm, fstream.read(), **kargs) @@ -206,7 +222,7 @@ def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs): return pe -def vm_load_pe_libs(vm, libs_name, libs, lib_path_base="win_dll", **kargs): +def vm_load_pe_libs(vm, libs_name, libs, lib_path_base, **kargs): """Call vm_load_pe_lib on each @libs_name filename @vm: VmMngr instance @libs_name: list of str @@ -219,7 +235,7 @@ def vm_load_pe_libs(vm, libs_name, libs, lib_path_base="win_dll", **kargs): for fname in libs_name} -def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base="win_dll", +def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base, patch_vm_imp=True, **kargs): for e in lib_imgs.values(): preload_pe(e, libs, patch_vm_imp) @@ -228,7 +244,7 @@ def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base="win_dll", def vm2pe(myjit, fname, libs=None, e_orig=None, min_addr=None, max_addr=None, min_section_offset=0x1000, img_base=None, - added_funcs=None): + added_funcs=None, **kwargs): if e_orig: size = e_orig._wsize else: @@ -274,7 +290,9 @@ def vm2pe(myjit, fname, libs=None, e_orig=None, libbase, dllname = libs.fad2info[funcaddr] libs.lib_get_add_func(libbase, dllname, addr) - new_dll = libs.gen_new_lib(mye, mye.virt.is_addr_in) + filter_import = kwargs.get( + 'filter_import', lambda _, ad: mye.virt.is_addr_in(ad)) + new_dll = libs.gen_new_lib(mye, filter_import) else: new_dll = {} @@ -305,11 +323,26 @@ def vm2pe(myjit, fname, libs=None, e_orig=None, class libimp_pe(libimp): + def __init__(self, *args, **kwargs): + super(libimp_pe, self).__init__(*args, **kwargs) + # dependency -> redirector + self.created_redirected_imports = {} + def add_export_lib(self, e, name): + if name in self.created_redirected_imports: + log.error("%r has previously been created due to redirect\ + imports due to %r. Change the loading order.", + name, self.created_redirected_imports[name]) + raise RuntimeError('Bad import: loading previously created import') + self.all_exported_lib.append(e) # will add real lib addresses to database if name in self.name2off: ad = self.name2off[name] + if e is not None and name in self.fake_libs: + log.error( + "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name) + raise RuntimeError("Bad import") else: log.debug('new lib %s', name) ad = e.NThdr.ImageBase @@ -332,8 +365,6 @@ class libimp_pe(libimp): ret = is_redirected_export(e, ad) if ret: exp_dname, exp_fname = ret - # log.debug('export redirection %s' % imp_ord_or_name) - # log.debug('source %s %s' % (exp_dname, exp_fname)) exp_dname = exp_dname + '.dll' exp_dname = exp_dname.lower() # if dll auto refes in redirection @@ -343,17 +374,21 @@ class libimp_pe(libimp): # schedule func todo = [(imp_ord_or_name, ad)] + todo continue - elif not exp_dname in self.name2off: - raise ValueError('load %r first' % exp_dname) + else: + # import redirected lib from non loaded dll + if not exp_dname in self.name2off: + self.created_redirected_imports.setdefault( + exp_dname, set()).add(name) + + # Ensure import entry is created + new_lib_base = self.lib_get_add_base(exp_dname) + # Ensure function entry is created + _ = self.lib_get_add_func(new_lib_base, exp_fname) + c_name = canon_libname_libfunc(exp_dname, exp_fname) libad_tmp = self.name2off[exp_dname] ad = self.lib_imp2ad[libad_tmp][exp_fname] - # log.debug('%s' % hex(ad)) - # if not imp_ord_or_name in self.lib_imp2dstad[libad]: - # self.lib_imp2dstad[libad][imp_ord_or_name] = set() - # self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) - # log.debug('new imp %s %s' % (imp_ord_or_name, hex(ad))) self.lib_imp2ad[libad][imp_ord_or_name] = ad name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) @@ -362,10 +397,10 @@ class libimp_pe(libimp): self.fad2cname[ad] = c_name self.fad2info[ad] = libad, imp_ord_or_name - def gen_new_lib(self, target_pe, flt=lambda _: True): + def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs): """Gen a new DirImport description @target_pe: PE instance - @flt: (boolean f(address)) restrict addresses to keep + @filter_import: (boolean f(pe, address)) restrict addresses to keep """ new_lib = [] @@ -377,8 +412,9 @@ class libimp_pe(libimp): for func_name, dst_addresses in self.lib_imp2dstad[ad].items(): out_ads.update({addr: func_name for addr in dst_addresses}) - # Filter available addresses according to @flt - all_ads = [addr for addr in out_ads.keys() if flt(addr)] + # Filter available addresses according to @filter_import + all_ads = [ + addr for addr in out_ads.keys() if filter_import(target_pe, addr)] log.debug('ads: %s', map(hex, all_ads)) if not all_ads: continue @@ -416,6 +452,67 @@ class libimp_pe(libimp): return new_lib + +def vm_load_pe_and_dependencies(vm, fname, name2module, runtime_lib, + lib_path_base, **kwargs): + """Load a binary and all its dependencies. Returns a dictionnary containing + the association between binaries names and it's pe object + + @vm: virtual memory manager instance + @fname: full path of the binary + @name2module: dict containing association between name and pe + object. Updated. + @runtime_lib: libimp instance + @lib_path_base: directory of the libraries containing dependencies + + """ + + todo = [(fname, fname, 0)] + dependencies = [] + weight2name = {} + done = set() + + # Walk dependencies recursively + while todo: + name, fname, weight = todo.pop() + if name in done: + continue + done.add(name) + weight2name.setdefault(weight, set()).add(name) + if name in name2module: + pe_obj = name2module[name] + else: + try: + with open(fname) as fstream: + log.info('Loading module name %r', fname) + pe_obj = vm_load_pe(vm, fstream.read(), **kwargs) + except IOError: + log.error('Cannot open %s' % fname) + name2module[name] = None + continue + name2module[name] = pe_obj + + new_dependencies = get_pe_dependencies(pe_obj) + todo += [(name, os.path.join(lib_path_base, name), weight - 1) + for name in new_dependencies] + + ordered_modules = sorted(weight2name.items()) + for _, modules in ordered_modules: + for name in modules: + pe_obj = name2module[name] + if pe_obj is None: + continue + # Fix imports + if pe_obj.DirExport: + runtime_lib.add_export_lib(pe_obj, name) + + for pe_obj in name2module.itervalues(): + if pe_obj is None: + continue + preload_pe(vm, pe_obj, runtime_lib, patch_vm_imp=True) + + return name2module + # machine -> arch PE_machine = {0x14c: "x86_32", 0x8664: "x86_64", diff --git a/miasm2/jitter/loader/utils.py b/miasm2/jitter/loader/utils.py index a6a19cb3..83d1a796 100644 --- a/miasm2/jitter/loader/utils.py +++ b/miasm2/jitter/loader/utils.py @@ -4,7 +4,7 @@ log = logging.getLogger('loader_common') hnd = logging.StreamHandler() hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) -log.setLevel(logging.CRITICAL) +log.setLevel(logging.INFO) def canon_libname_libfunc(libname, libfunc): @@ -15,7 +15,7 @@ def canon_libname_libfunc(libname, libfunc): return str(dn), libfunc -class libimp: +class libimp(object): def __init__(self, lib_base_ad=0x71111000, **kargs): self.name2off = {} @@ -26,6 +26,7 @@ class libimp: self.fad2cname = {} self.fad2info = {} self.all_exported_lib = [] + self.fake_libs = set() def lib_get_add_base(self, name): name = name.lower().strip(' ') @@ -38,7 +39,8 @@ class libimp: ad = self.name2off[name] else: ad = self.libbase_ad - log.debug('new lib %s 0x%x', name, ad) + log.warning("Create dummy entry for %r", name) + self.fake_libs.add(name) self.name2off[name] = ad self.libbase2lastad[ad] = ad + 0x1 self.lib_imp2ad[ad] = {} diff --git a/miasm2/jitter/vm_mngr.c b/miasm2/jitter/vm_mngr.c index 3597ae4f..b86bee1a 100644 --- a/miasm2/jitter/vm_mngr.c +++ b/miasm2/jitter/vm_mngr.c @@ -32,13 +32,6 @@ -/* -struct memory_page_list_head memory_page_pool; -struct code_bloc_list_head code_bloc_pool; - -struct memory_breakpoint_info_head memory_breakpoint_pool; -*/ - /****************memory manager**************/ @@ -83,68 +76,50 @@ void print_val(uint64_t base, uint64_t addr) fprintf(stderr, "addr 0x%"PRIX64" val 0x%"PRIX64"\n", addr-base, *ptr); } - -int is_mem_mapped(vm_mngr_t* vm_mngr, uint64_t ad) +inline int midpoint(int imin, int imax) { - struct memory_page_node * mpn; - /* - mpn = memory_page_pool_tab[ad>>MEMORY_PAGE_POOL_MASK_BIT]; - if ( mpn && (mpn->ad <= ad) && (ad < mpn->ad + mpn->size)) - return 1; - */ - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ - if ((mpn->ad <= ad) && (ad <mpn->ad + mpn->size)) - return 1; - } - - return 0; + return (imin + imax) / 2; } -/* return the address base of the memory page - containing addr -*/ -uint64_t get_mem_base_addr(vm_mngr_t* vm_mngr, uint64_t ad, uint64_t *addr_base) +int find_page_node(struct memory_page_node * array, uint64_t key, int imin, int imax) { - struct memory_page_node * mpn; - /* - mpn = memory_page_pool_tab[ad>>MEMORY_PAGE_POOL_MASK_BIT]; - if ( mpn && (mpn->ad <= ad) && (ad < mpn->ad + mpn->size)){ - *addr_base = mpn->ad; - return 1; - } - */ - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ - if ((mpn->ad <= ad) && (ad <mpn->ad + mpn->size)) { - *addr_base = mpn->ad; - return 1; - } + // continue searching while [imin,imax] is not empty + while (imin <= imax) { + // calculate the midpoint for roughly equal partition + int imid = midpoint(imin, imax); + if(array[imid].ad <= key && key < array[imid].ad + array[imid].size) + // key found at index imid + return imid; + // determine which subarray to search + else if (array[imid].ad < key) + // change min index to search upper subarray + imin = imid + 1; + else + // change max index to search lower subarray + imax = imid - 1; } - return 0; + // key was not found + return -1; } struct memory_page_node * get_memory_page_from_address(vm_mngr_t* vm_mngr, uint64_t ad) { struct memory_page_node * mpn; -#if 0 - mpn = memory_page_pool_tab[ad>>MEMORY_PAGE_POOL_MASK_BIT]; - if ( mpn && (mpn->ad <= ad) && (ad < mpn->ad + mpn->size)) - return mpn; - - fprintf(stderr, "WARNING: address 0x%"PRIX64" is not mapped in virtual memory:\n", ad); - vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; - - return NULL; -#else + int i; - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + i = find_page_node(vm_mngr->memory_pages_array, + ad, + 0, + vm_mngr->memory_pages_number); + if (i >= 0) { + mpn = &vm_mngr->memory_pages_array[i]; if ((mpn->ad <= ad) && (ad < mpn->ad + mpn->size)) return mpn; } fprintf(stderr, "WARNING: address 0x%"PRIX64" is not mapped in virtual memory:\n", ad); vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; return NULL; -#endif } @@ -168,7 +143,7 @@ static uint64_t memory_page_read(vm_mngr_t* vm_mngr, unsigned int my_size, uint6 return 0; } - /* check read breakpoint*/ + /* check read breakpoint */ LIST_FOREACH(b, &vm_mngr->memory_breakpoint_pool, next){ if ((b->access & BREAKPOINT_READ) == 0) continue; @@ -566,28 +541,7 @@ int shift_right_arith(unsigned int size, int a, unsigned int b) exit(0); } } -/* -int shift_right_arith_08(int a, unsigned int b) -{ - char i8_a; - i8_a = a; - return (i8_a >> b)&0xff; -} -int shift_right_arith_16(int a, unsigned int b) -{ - short i16_a; - i16_a = a; - return (i16_a >> b)&0xffff; -} - -int shift_right_arith_32(int a, unsigned int b) -{ - int i32_a; - i32_a = a; - return (i32_a >> b)&0xffffffff; -} -*/ uint64_t shift_right_logic(uint64_t size, uint64_t a, uint64_t b) { @@ -609,28 +563,6 @@ uint64_t shift_right_logic(uint64_t size, exit(0); } } -/* -int shift_right_logic_08(unsigned int a, unsigned int b) -{ - unsigned char u8_a; - u8_a = a; - return (u8_a >> b)&0xff; -} - -int shift_right_logic_16(unsigned int a, unsigned int b) -{ - unsigned short u16_a; - u16_a = a; - return (u16_a >> b)&0xffff; -} - -int shift_right_logic_32(unsigned int a, unsigned int b) -{ - unsigned int u32_a; - u32_a = a; - return (u32_a >> b)&0xffffffff; -} -*/ uint64_t shift_left_logic(uint64_t size, uint64_t a, uint64_t b) { @@ -648,22 +580,6 @@ uint64_t shift_left_logic(uint64_t size, uint64_t a, uint64_t b) exit(0); } } -/* -int shift_left_logic_O8(unsigned int a, unsigned int b) -{ - return (a<<b)&0xff; -} - -int shift_left_logic_16(unsigned int a, unsigned int b) -{ - return (a<<b)&0xffff; -} - -int shift_left_logic_32(unsigned int a, unsigned int b) -{ - return (a<<b)&0xffffffff; -} -*/ unsigned int mul_lo_op(unsigned int size, unsigned int a, unsigned int b) { @@ -748,39 +664,6 @@ unsigned int umul16_hi(unsigned short a, unsigned short b) return (c>>16) & 0xffff; } - - - -unsigned int div_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c) -{ - int64_t num; - if (c == 0) - { - //vmmngr.exception_flags |= EXCEPT_INT_DIV_BY_ZERO; - return 0; - } - num = ((int64_t)a << size) + b; - num/=(int64_t)c; - return num; -} - - -unsigned int rem_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c) -{ - int64_t num; - - if (c == 0) - { - //vmmngr.exception_flags |= EXCEPT_INT_DIV_BY_ZERO; - return 0; - } - - num = ((int64_t)a << size) + b; - num = (int64_t)num-c*(num/c); - return num; -} - - uint64_t rot_left(uint64_t size, uint64_t a, uint64_t b) { uint64_t tmp; @@ -1450,10 +1333,9 @@ void dump_code_bloc_pool(vm_mngr_t* vm_mngr) void init_memory_page_pool(vm_mngr_t* vm_mngr) { - unsigned int i; - LIST_INIT(&vm_mngr->memory_page_pool); - for (i=0;i<MAX_MEMORY_PAGE_POOL_TAB; i++) - vm_mngr->memory_page_pool_tab[i] = NULL; + + vm_mngr->memory_pages_number = 0; + vm_mngr->memory_pages_array = NULL; } void init_code_bloc_pool(vm_mngr_t* vm_mngr) @@ -1471,18 +1353,8 @@ void init_memory_breakpoint(vm_mngr_t* vm_mngr) void reset_memory_page_pool(vm_mngr_t* vm_mngr) { - struct memory_page_node * mpn; - unsigned int i; - - while (!LIST_EMPTY(&vm_mngr->memory_page_pool)) { - mpn = LIST_FIRST(&vm_mngr->memory_page_pool); - LIST_REMOVE(mpn, next); - free(mpn->ad_hp); - free(mpn); - } - for (i=0;i<MAX_MEMORY_PAGE_POOL_TAB; i++) - vm_mngr->memory_page_pool_tab[i] = NULL; - + free(vm_mngr->memory_pages_array); + vm_mngr->memory_pages_number = 0; } @@ -1513,21 +1385,14 @@ void reset_memory_breakpoint(vm_mngr_t* vm_mngr) } - +/* We don't use dichotomy here for the insertion */ int is_mpn_in_tab(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) { struct memory_page_node * mpn; + int i; - /* - for (i=mpn_a->ad >> MEMORY_PAGE_POOL_MASK_BIT; - i<(mpn_a->ad + mpn_a->size + PAGE_SIZE - 1)>>MEMORY_PAGE_POOL_MASK_BIT; - i++){ - if (memory_page_pool_tab[i] !=NULL){ - return 1; - } - } - */ - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + for (i=0;i<vm_mngr->memory_pages_number; i++) { + mpn = &vm_mngr->memory_pages_array[i]; if (mpn->ad >= mpn_a->ad + mpn_a->size) continue; if (mpn->ad + mpn->size <= mpn_a->ad) @@ -1544,54 +1409,41 @@ int is_mpn_in_tab(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) return 0; } -void insert_mpn_in_tab(struct memory_page_node* mpn_a) -{ - /* - for (i=mpn_a->ad >> MEMORY_PAGE_POOL_MASK_BIT; - i<(mpn_a->ad + mpn_a->size + PAGE_SIZE - 1)>>MEMORY_PAGE_POOL_MASK_BIT; - i++){ - if (memory_page_pool_tab[i] !=NULL){ - fprintf(stderr, "known page in tab\n"); - exit(1); - } - memory_page_pool_tab[i] = mpn_a; - } - */ - -} +/* We don't use dichotomy here for the insertion */ void add_memory_page(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) { struct memory_page_node * mpn; - struct memory_page_node * lmpn; + int i; - if (LIST_EMPTY(&vm_mngr->memory_page_pool)){ - LIST_INSERT_HEAD(&vm_mngr->memory_page_pool, mpn_a, next); - insert_mpn_in_tab(mpn_a); - return; - } - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ - lmpn = mpn; + for (i=0; i < vm_mngr->memory_pages_number; i++) { + mpn = &vm_mngr->memory_pages_array[i]; if (mpn->ad < mpn_a->ad) continue; - LIST_INSERT_BEFORE(mpn, mpn_a, next); - insert_mpn_in_tab(mpn_a); - return; + break; } - LIST_INSERT_AFTER(lmpn, mpn_a, next); - insert_mpn_in_tab(mpn_a); + vm_mngr->memory_pages_array = realloc(vm_mngr->memory_pages_array, + sizeof(struct memory_page_node) * + (vm_mngr->memory_pages_number+1)); + + memmove(&vm_mngr->memory_pages_array[i+1], + &vm_mngr->memory_pages_array[i], + sizeof(struct memory_page_node) * (vm_mngr->memory_pages_number - i) + ); + + vm_mngr->memory_pages_array[i] = *mpn_a; + vm_mngr->memory_pages_number ++; } -/* - Return a char* representing the repr of vm_mngr_t object -*/ +/* Return a char* representing the repr of vm_mngr_t object */ char* dump(vm_mngr_t* vm_mngr) { char buf[100]; int length; int total_len = 0; char *buf_final; + int i; struct memory_page_node * mpn; buf_final = malloc(1); @@ -1600,8 +1452,9 @@ char* dump(vm_mngr_t* vm_mngr) exit(0); } buf_final[0] = '\x00'; - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + for (i=0; i< vm_mngr->memory_pages_number; i++) { + mpn = &vm_mngr->memory_pages_array[i]; length = snprintf(buf, sizeof(buf), "ad 0x%"PRIX64" size 0x%"PRIX64" %c%c%c\n", (uint64_t)mpn->ad, @@ -1664,59 +1517,6 @@ void remove_memory_breakpoint(vm_mngr_t* vm_mngr, uint64_t ad, unsigned int acce } - - - - - -unsigned int get_memory_page_next(vm_mngr_t* vm_mngr, unsigned int n_ad) -{ - struct memory_page_node * mpn; - uint64_t ad = 0; - - LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ - if (mpn->ad < n_ad) - continue; - - if (ad == 0 || mpn->ad <ad) - ad = mpn->ad; - } - return ad; -} - - -#if 0 -unsigned int get_memory_page_from_min_ad(unsigned int size) -{ - struct memory_page_node * mpn; - unsigned int c_ad ; - unsigned int min_ad = min_page_ad; - int end = 0; - /* first, find free min ad */ - while (!end){ - end = 1; - LIST_FOREACH(mpn, &memory_page_pool, next){ - c_ad = (mpn->ad + mpn->size+0x1000)&0xfffff000; - if (c_ad <= min_ad) - continue; - if (mpn->ad <= min_ad){ - min_ad = c_ad; - end = 0; - break; - } - if (mpn->ad - min_ad < size){ - min_ad = c_ad; - end = 0; - break; - } - } - } - return min_ad; - } -#endif - - - /********************************************/ void hexdump(char* m, unsigned int l) @@ -1759,40 +1559,6 @@ void hexdump(char* m, unsigned int l) } - - -unsigned int access_segment(unsigned int d) -{ - // XXX TODO - printf("access segment %X\n", d); - return 0; -} -unsigned int access_segment_ok(unsigned int d) -{ - // XXX TODO - printf("access segment ok %X\n", d); - return 0; -} - -unsigned int load_segment_limit(unsigned int d) -{ - // XXX TODO - printf("load segment limit %X\n", d); - return 0; -} -unsigned int load_segment_limit_ok(unsigned int d) -{ - // XXX TODO - printf("load segment limit ok %X\n", d); - return 0; -} - -unsigned int load_tr_segment_selector(unsigned int d) -{ - // XXX TODO - return 0; -} - // Return vm_mngr's exception flag value uint64_t get_exception_flag(vm_mngr_t* vm_mngr) { diff --git a/miasm2/jitter/vm_mngr.h b/miasm2/jitter/vm_mngr.h index f5895e12..02b5de73 100644 --- a/miasm2/jitter/vm_mngr.h +++ b/miasm2/jitter/vm_mngr.h @@ -49,7 +49,6 @@ -LIST_HEAD(memory_page_list_head, memory_page_node); LIST_HEAD(code_bloc_list_head, code_bloc_node); LIST_HEAD(memory_breakpoint_info_head, memory_breakpoint_info); @@ -65,13 +64,23 @@ LIST_HEAD(memory_breakpoint_info_head, memory_breakpoint_info); #define VM_BIG_ENDIAN 1 #define VM_LITTLE_ENDIAN 2 + +struct memory_page_node { + uint64_t ad; + uint64_t size; + uint64_t access; + void* ad_hp; +}; + + + typedef struct { int sex; - struct memory_page_list_head memory_page_pool; struct code_bloc_list_head code_bloc_pool; struct memory_breakpoint_info_head memory_breakpoint_pool; - struct memory_page_node *memory_page_pool_tab[MAX_MEMORY_PAGE_POOL_TAB]; + int memory_pages_number; + struct memory_page_node* memory_pages_array; unsigned int *code_addr_tab; unsigned int code_bloc_pool_ad_min; @@ -93,16 +102,6 @@ typedef struct { //extern vm_mngr_t vmmngr; -struct memory_page_node { - uint64_t ad; - uint64_t size; - uint64_t access; - void* ad_hp; - LIST_ENTRY(memory_page_node) next; -}; - - - struct code_bloc_node { uint64_t ad_start; uint64_t ad_stop; @@ -183,11 +182,6 @@ int shift_right_arith(unsigned int size, int a, unsigned int b); uint64_t shift_right_logic(uint64_t size, uint64_t a, uint64_t b); uint64_t shift_left_logic(uint64_t size, uint64_t a, uint64_t b); -/* -int shift_left_logic_08(unsigned int a, unsigned int b); -int shift_left_logic_16(unsigned int a, unsigned int b); -int shift_left_logic_32(unsigned int a, unsigned int b); -*/ unsigned int mul_lo_op(unsigned int size, unsigned int a, unsigned int b); unsigned int mul_hi_op(unsigned int size, unsigned int a, unsigned int b); unsigned int imul_lo_op_08(char a, char b); @@ -202,8 +196,6 @@ unsigned int umul16_lo(unsigned short a, unsigned short b); unsigned int umul16_hi(unsigned short a, unsigned short b); -unsigned int div_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c); -unsigned int rem_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c); uint64_t rot_left(uint64_t size, uint64_t a, uint64_t b); uint64_t rot_right(uint64_t size, uint64_t a, uint64_t b); unsigned int rcl_rez_op(unsigned int size, unsigned int a, unsigned int b, unsigned int cf); @@ -261,21 +253,6 @@ unsigned int rcl_rez_op(unsigned int size, unsigned int a, unsigned int b, unsig } -//PyObject* _vm_push_uint32_t(PyObject *item); -//PyObject* _vm_pop_uint32_t(void); -////PyObject* _vm_put_str(PyObject *item); -//PyObject* _vm_set_mem(PyObject *item, PyObject *item_str); -//PyObject* _vm_set_mem_access(PyObject *addr, PyObject *access); -//PyObject* _vm_get_str(PyObject *item, PyObject *item_len); -//PyObject* _vm_add_memory_page(PyObject *item, PyObject *access, PyObject *item_str); -//PyObject* _vm_add_code_bloc(PyObject *item1, PyObject *item2);//, PyObject *item3); -//PyObject* _call_pyfunc_from_globals(char* funcname); -//PyObject* _call_pyfunc_from_eip(void); -// -//PyObject* call_pyfunc_from_globals(char* funcname); -// -//PyObject* _vm_get_gpreg(void); - void hexdump(char* m, unsigned int l); struct code_bloc_node * create_code_bloc_node(uint64_t ad_start, uint64_t ad_stop); @@ -302,20 +279,12 @@ void check_write_code_bloc(vm_mngr_t* vm_mngr, uint64_t my_size, uint64_t addr); char* dump(vm_mngr_t* vm_mngr); void dump_memory_breakpoint_pool(vm_mngr_t* vm_mngr); -//PyObject* _vm_get_all_memory(void); PyObject* addr2BlocObj(vm_mngr_t* vm_mngr, uint64_t addr); /********************************************/ - -//PyObject* _vm_get_cpu_state(void); -//PyObject* _vm_set_cpu_state(PyObject * s_cpustate); - - -//void memory_page_write(unsigned int my_size, uint64_t ad, unsigned int src); -//unsigned int memory_page_read(unsigned int my_size, uint64_t ad); unsigned int get_memory_page_max_address(void); unsigned int get_memory_page_max_user_address(void); @@ -351,11 +320,8 @@ void func_loadlib_fake(void); void func_getproc_fake(void); -//PyObject* _vm_exec_bloc(PyObject* my_eip, PyObject* known_blocs); - unsigned int cpuid(unsigned int a, unsigned int reg_num); double int2double(unsigned int m); -//PyObject* _vm_exec_blocs(PyObject* my_eip); double fadd(double a, double b); double fsub(double a, double b); diff --git a/miasm2/jitter/vm_mngr_py.c b/miasm2/jitter/vm_mngr_py.c index bb0a6207..0a22c397 100644 --- a/miasm2/jitter/vm_mngr_py.c +++ b/miasm2/jitter/vm_mngr_py.c @@ -66,44 +66,6 @@ PyObject* _vm_get_exception(unsigned int xcpt) RAISE(PyExc_TypeError,"arg must be int"); \ } \ - -PyObject* vm_is_mem_mapped(VmMngr* self, PyObject* item) -{ - PyObject *addr; - uint64_t page_addr; - uint32_t ret; - if (!PyArg_ParseTuple(item, "O", &addr)) - return NULL; - - PyGetInt(addr, page_addr); - - ret = is_mem_mapped(&self->vm_mngr, page_addr); - return PyInt_FromLong((long)ret); -} - - - -PyObject* vm_get_mem_base_addr(VmMngr* self, PyObject* item) -{ - PyObject *addr; - - uint64_t page_addr; - uint64_t addr_base; - unsigned int ret; - - if (!PyArg_ParseTuple(item, "O", &addr)) - return NULL; - - PyGetInt(addr, page_addr); - - ret = get_mem_base_addr(&self->vm_mngr, page_addr, &addr_base); - if (ret == 0){ - Py_INCREF(Py_None); - return Py_None; - } - return PyLong_FromUnsignedLongLong((uint64_t)addr_base); -} - static void sig_alarm(int signo) { global_vmmngr->vm_mngr.exception_flags |= BREAK_SIGALARM; @@ -145,10 +107,6 @@ PyObject* vm_add_memory_page(VmMngr* self, PyObject* args) buf_size = PyString_Size(item_str); PyString_AsStringAndSize(item_str, &buf_data, &length); - /* - fprintf(stderr, "add page %"PRIX64" %"PRIX64" %"PRIX64"\n", - page_addr, buf_size, page_access); - */ mpn = create_memory_page_node(page_addr, buf_size, page_access); if (mpn == NULL) RAISE(PyExc_TypeError,"cannot create page"); @@ -378,11 +336,13 @@ PyObject* vm_get_all_memory(VmMngr* self, PyObject* args) struct memory_page_node * mpn; PyObject *dict; PyObject *dict2; + int i; dict = PyDict_New(); - LIST_FOREACH(mpn, &self->vm_mngr.memory_page_pool, next){ + for (i=0;i<self->vm_mngr.memory_pages_number; i++) { + mpn = &self->vm_mngr.memory_pages_array[i]; dict2 = PyDict_New(); @@ -491,24 +451,6 @@ vm_set_little_endian(VmMngr *self, PyObject *value, void *closure) } - -/* -PyObject* add_jitbloc(VmMngr* self, PyObject* args) -{ - PyObject* jitobj; - - if (!PyArg_ParseTuple(args, "O", &addr2obj)) - return NULL; - - Py_INCREF(Py_None); - return Py_None; - -} -*/ - - - - static void VmMngr_dealloc(VmMngr* self) { diff --git a/miasm2/os_dep/win_api_x86_32_seh.py b/miasm2/os_dep/win_api_x86_32_seh.py index d86bd46a..58cc48af 100644 --- a/miasm2/os_dep/win_api_x86_32_seh.py +++ b/miasm2/os_dep/win_api_x86_32_seh.py @@ -80,7 +80,7 @@ FAKE_SEH_B_AD = context_address + 0x2000 cur_seh_ad = FAKE_SEH_B_AD -loaded_modules = ["ntdll.dll", "kernel32.dll"] +name2module = [] main_pe = None main_pe_name = "c:\\xxx\\toto.exe" @@ -206,11 +206,10 @@ class LoadedModules(object): self.module2name[module] = name def __repr__(self): - out = self.name2module.iteritems() - return "\n".join(out) + return "\n".join([str(x) for x in self.name2module.iteritems()]) -def create_modules_chain(jitter, modules_name): +def create_modules_chain(jitter, name2module): """ Create the modules entries. Those modules are not linked in this function. @@ -235,7 +234,7 @@ def create_modules_chain(jitter, modules_name): +0x04c PatchInformation : Ptr32 Void @jitter: jitter instance - @modules_name: a list of modules names + @name2module: dict containing association between name and its pe instance """ modules_info = LoadedModules() @@ -249,31 +248,18 @@ def create_modules_chain(jitter, modules_name): dummy_e.NThdr.sizeofimage = 0 out = "" - for i, m in enumerate([(main_pe_name, main_pe), - ("", dummy_e)] + modules_name): + for i, (fname, pe_obj) in enumerate([("", dummy_e)] + name2module.items()): + if pe_obj is None: + log.warning("Unknown module: ommited from link list (%r)", + fname) + continue addr = base_addr + i * 0x1000 - if isinstance(m, tuple): - fname, e = m - else: - fname, e = m, None bpath = fname.replace('/', '\\') bname_str = os.path.split(fname)[1].lower() bname = "\x00".join(bname_str) + "\x00" - if e is None: - if i == 0: - full_name = fname - else: - full_name = os.path.join("win_dll", fname) - try: - e = pe_init.PE(open(full_name, 'rb').read()) - except IOError: - log.error('No main pe, ldr data will be unconsistant!') - e = None - if e is None: - continue - log.info("Add module %x %r", e.NThdr.ImageBase, bname_str) + log.info("Add module %x %r", pe_obj.NThdr.ImageBase, bname_str) - modules_info.add(bname_str, e, addr) + modules_info.add(bname_str, pe_obj, addr) m_o = "" m_o += pck32(0) @@ -282,9 +268,9 @@ def create_modules_chain(jitter, modules_name): m_o += pck32(0) m_o += pck32(0) m_o += pck32(0) - m_o += pck32(e.NThdr.ImageBase) - m_o += pck32(e.rva2virt(e.Opthdr.AddressOfEntryPoint)) - m_o += pck32(e.NThdr.sizeofimage) + m_o += pck32(pe_obj.NThdr.ImageBase) + m_o += pck32(pe_obj.rva2virt(pe_obj.Opthdr.AddressOfEntryPoint)) + m_o += pck32(pe_obj.NThdr.sizeofimage) m_o += struct.pack('HH', len(bname), len(bname) + 2) m_o += pck32(addr + offset_path) m_o += struct.pack('HH', len(bname), len(bname) + 2) @@ -321,7 +307,7 @@ def fix_InLoadOrderModuleList(jitter, modules_info): dummy_pe = modules_info.name2module.get("", None) special_modules = [main_pe, kernel32_pe, ntdll_pe, dummy_pe] if not all(special_modules): - log.warn('No main pe, ldr data will be unconsistant') + log.warn('No main pe, ldr data will be unconsistant %r', special_modules) loaded_modules = modules_info.modules else: loaded_modules = [module for module in modules_info.modules @@ -461,7 +447,7 @@ def init_seh(jitter): build_teb(jitter, FS_0_AD) build_peb(jitter, peb_address) - modules_info = create_modules_chain(jitter, loaded_modules) + modules_info = create_modules_chain(jitter, name2module) fix_InLoadOrderModuleList(jitter, modules_info) fix_InMemoryOrderModuleList(jitter, modules_info) fix_InInitializationOrderModuleList(jitter, modules_info) |