about summary refs log tree commit diff stats
path: root/miasm2/jitter/loader
diff options
context:
space:
mode:
Diffstat (limited to 'miasm2/jitter/loader')
-rw-r--r--miasm2/jitter/loader/elf.py20
-rw-r--r--miasm2/jitter/loader/pe.py137
-rw-r--r--miasm2/jitter/loader/utils.py8
3 files changed, 133 insertions, 32 deletions
diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py
index b3946000..08df632a 100644
--- a/miasm2/jitter/loader/elf.py
+++ b/miasm2/jitter/loader/elf.py
@@ -17,6 +17,7 @@ hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s"))
 log.addHandler(hnd)
 log.setLevel(logging.CRITICAL)
 
+
 def get_import_address_elf(e):
     import2addr = defaultdict(set)
     for sh in e.sh:
@@ -46,21 +47,21 @@ def preload_elf(vm, e, runtime_lib, patch_vm_imp=True):
     return runtime_lib, dyn_funcs
 
 
-
 def vm_load_elf(vm, fdata, **kargs):
     """
     Very dirty elf loader
     TODO XXX: implement real loader
     """
-    #log.setLevel(logging.DEBUG)
+    # log.setLevel(logging.DEBUG)
     e = elf_init.ELF(fdata, **kargs)
     i = interval()
     all_data = {}
     for p in e.ph.phlist:
-        if p.ph.type != 1:
+        if p.ph.type != elf_csts.PT_LOAD:
             continue
-        log.debug('0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset,
-                  p.ph.filesz)
+        log.debug(
+            '0x%x 0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset,
+                  p.ph.filesz, p.ph.type)
         data_o = e._content[p.ph.offset:p.ph.offset + p.ph.filesz]
         addr_o = p.ph.vaddr
         a_addr = addr_o & ~0xFFF
@@ -68,16 +69,16 @@ def vm_load_elf(vm, fdata, **kargs):
         b_addr = (b_addr + 0xFFF) & ~0xFFF
         all_data[addr_o] = data_o
         # -2: Trick to avoid merging 2 consecutive pages
-        i += [(a_addr, b_addr-2)]
+        i += [(a_addr, b_addr - 2)]
     for a, b in i.intervals:
-        #print hex(a), hex(b)
-        vm.add_memory_page(a, PAGE_READ | PAGE_WRITE, "\x00"*(b+2-a))
-
+        # print hex(a), hex(b)
+        vm.add_memory_page(a, PAGE_READ | PAGE_WRITE, "\x00" * (b + 2 - a))
 
     for r_vaddr, data in all_data.items():
         vm.set_mem(r_vaddr, data)
     return e
 
+
 class libimp_elf(libimp):
     pass
 
@@ -94,6 +95,7 @@ ELF_machine = {(elf_csts.EM_ARM, 32, elf_csts.ELFDATA2LSB): "arml",
                (elf_csts.EM_SH, 32, elf_csts.ELFDATA2LSB): "sh4",
                }
 
+
 def guess_arch(elf):
     """Return the architecture specified by the ELF container @elf.
     If unknown, return None"""
diff --git a/miasm2/jitter/loader/pe.py b/miasm2/jitter/loader/pe.py
index 1e876b4e..1c811101 100644
--- a/miasm2/jitter/loader/pe.py
+++ b/miasm2/jitter/loader/pe.py
@@ -10,12 +10,24 @@ from elfesteem import *
 from miasm2.jitter.csts import *
 from miasm2.jitter.loader.utils import canon_libname_libfunc, libimp
 
-
 log = logging.getLogger('loader_pe')
 hnd = logging.StreamHandler()
 hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s"))
 log.addHandler(hnd)
-log.setLevel(logging.CRITICAL)
+log.setLevel(logging.INFO)
+
+
+def get_pe_dependencies(pe_obj):
+    """Return dependency set
+    @pe_obj: pe object"""
+
+    if pe_obj.DirImport.impdesc is None:
+        return set()
+    out = set()
+    for dependency in pe_obj.DirImport.impdesc:
+        libname = dependency.dlldescname.name.lower()
+        out.add(libname)
+    return out
 
 
 def get_import_address_pe(e):
@@ -58,7 +70,7 @@ def is_redirected_export(e, ad):
     # test is ad points to code or dll name
     out = ''
     for i in xrange(0x200):
-        c = e.virt(ad + i)
+        c = e.virt.get(ad + i)
         if c == "\x00":
             break
         out += c
@@ -101,6 +113,7 @@ def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, **kargs):
     If all sections are aligned, they will be mapped on several different pages
     Otherwise, a big page is created, containing all sections
     """
+
     # Parse and build a PE instance
     pe = pe_init.PE(fdata, **kargs)
 
@@ -199,6 +212,9 @@ def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs):
     Return the corresponding PE instance
     Extra arguments are passed to vm_load_pe
     """
+
+    log.info('Loading module %r', fname_in)
+
     fname = os.path.join(lib_path_base, fname_in)
     with open(fname) as fstream:
         pe = vm_load_pe(vm, fstream.read(), **kargs)
@@ -206,7 +222,7 @@ def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs):
     return pe
 
 
-def vm_load_pe_libs(vm, libs_name, libs, lib_path_base="win_dll", **kargs):
+def vm_load_pe_libs(vm, libs_name, libs, lib_path_base, **kargs):
     """Call vm_load_pe_lib on each @libs_name filename
     @vm: VmMngr instance
     @libs_name: list of str
@@ -219,7 +235,7 @@ def vm_load_pe_libs(vm, libs_name, libs, lib_path_base="win_dll", **kargs):
             for fname in libs_name}
 
 
-def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base="win_dll",
+def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base,
                            patch_vm_imp=True, **kargs):
     for e in lib_imgs.values():
         preload_pe(e, libs, patch_vm_imp)
@@ -228,7 +244,7 @@ def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base="win_dll",
 def vm2pe(myjit, fname, libs=None, e_orig=None,
           min_addr=None, max_addr=None,
           min_section_offset=0x1000, img_base=None,
-          added_funcs=None):
+          added_funcs=None, **kwargs):
     if e_orig:
         size = e_orig._wsize
     else:
@@ -274,7 +290,9 @@ def vm2pe(myjit, fname, libs=None, e_orig=None,
                 libbase, dllname = libs.fad2info[funcaddr]
                 libs.lib_get_add_func(libbase, dllname, addr)
 
-        new_dll = libs.gen_new_lib(mye, mye.virt.is_addr_in)
+        filter_import = kwargs.get(
+            'filter_import', lambda _, ad: mye.virt.is_addr_in(ad))
+        new_dll = libs.gen_new_lib(mye, filter_import)
     else:
         new_dll = {}
 
@@ -305,11 +323,26 @@ def vm2pe(myjit, fname, libs=None, e_orig=None,
 
 class libimp_pe(libimp):
 
+    def __init__(self, *args, **kwargs):
+        super(libimp_pe, self).__init__(*args, **kwargs)
+        # dependency -> redirector
+        self.created_redirected_imports = {}
+
     def add_export_lib(self, e, name):
+        if name in self.created_redirected_imports:
+            log.error("%r has previously been created due to redirect\
+            imports due to %r. Change the loading order.",
+                      name, self.created_redirected_imports[name])
+            raise RuntimeError('Bad import: loading previously created import')
+
         self.all_exported_lib.append(e)
         # will add real lib addresses to database
         if name in self.name2off:
             ad = self.name2off[name]
+            if e is not None and name in self.fake_libs:
+                log.error(
+                    "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name)
+                raise RuntimeError("Bad import")
         else:
             log.debug('new lib %s', name)
             ad = e.NThdr.ImageBase
@@ -332,8 +365,6 @@ class libimp_pe(libimp):
                 ret = is_redirected_export(e, ad)
                 if ret:
                     exp_dname, exp_fname = ret
-                    # log.debug('export redirection %s' % imp_ord_or_name)
-                    # log.debug('source %s %s' % (exp_dname, exp_fname))
                     exp_dname = exp_dname + '.dll'
                     exp_dname = exp_dname.lower()
                     # if dll auto refes in redirection
@@ -343,17 +374,21 @@ class libimp_pe(libimp):
                             # schedule func
                             todo = [(imp_ord_or_name, ad)] + todo
                             continue
-                    elif not exp_dname in self.name2off:
-                        raise ValueError('load %r first' % exp_dname)
+                    else:
+                        # import redirected lib from non loaded dll
+                        if not exp_dname in self.name2off:
+                            self.created_redirected_imports.setdefault(
+                                exp_dname, set()).add(name)
+
+                        # Ensure import entry is created
+                        new_lib_base = self.lib_get_add_base(exp_dname)
+                        # Ensure function entry is created
+                        _ = self.lib_get_add_func(new_lib_base, exp_fname)
+
                     c_name = canon_libname_libfunc(exp_dname, exp_fname)
                     libad_tmp = self.name2off[exp_dname]
                     ad = self.lib_imp2ad[libad_tmp][exp_fname]
-                    # log.debug('%s' % hex(ad))
-                # if not imp_ord_or_name in self.lib_imp2dstad[libad]:
-                #    self.lib_imp2dstad[libad][imp_ord_or_name] = set()
-                # self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad)
 
-                # log.debug('new imp %s %s' % (imp_ord_or_name, hex(ad)))
                 self.lib_imp2ad[libad][imp_ord_or_name] = ad
 
                 name_inv = dict([(x[1], x[0]) for x in self.name2off.items()])
@@ -362,10 +397,10 @@ class libimp_pe(libimp):
                 self.fad2cname[ad] = c_name
                 self.fad2info[ad] = libad, imp_ord_or_name
 
-    def gen_new_lib(self, target_pe, flt=lambda _: True):
+    def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs):
         """Gen a new DirImport description
         @target_pe: PE instance
-        @flt: (boolean f(address)) restrict addresses to keep
+        @filter_import: (boolean f(pe, address)) restrict addresses to keep
         """
 
         new_lib = []
@@ -377,8 +412,9 @@ class libimp_pe(libimp):
             for func_name, dst_addresses in self.lib_imp2dstad[ad].items():
                 out_ads.update({addr: func_name for addr in dst_addresses})
 
-            # Filter available addresses according to @flt
-            all_ads = [addr for addr in out_ads.keys() if flt(addr)]
+            # Filter available addresses according to @filter_import
+            all_ads = [
+                addr for addr in out_ads.keys() if filter_import(target_pe, addr)]
             log.debug('ads: %s', map(hex, all_ads))
             if not all_ads:
                 continue
@@ -416,6 +452,67 @@ class libimp_pe(libimp):
 
         return new_lib
 
+
+def vm_load_pe_and_dependencies(vm, fname, name2module, runtime_lib,
+                                lib_path_base, **kwargs):
+    """Load a binary and all its dependencies. Returns a dictionnary containing
+    the association between binaries names and it's pe object
+
+    @vm: virtual memory manager instance
+    @fname: full path of the binary
+    @name2module: dict containing association between name and pe
+    object. Updated.
+    @runtime_lib: libimp instance
+    @lib_path_base: directory of the libraries containing dependencies
+
+    """
+
+    todo = [(fname, fname, 0)]
+    dependencies = []
+    weight2name = {}
+    done = set()
+
+    # Walk dependencies recursively
+    while todo:
+        name, fname, weight = todo.pop()
+        if name in done:
+            continue
+        done.add(name)
+        weight2name.setdefault(weight, set()).add(name)
+        if name in name2module:
+            pe_obj = name2module[name]
+        else:
+            try:
+                with open(fname) as fstream:
+                    log.info('Loading module name %r', fname)
+                    pe_obj = vm_load_pe(vm, fstream.read(), **kwargs)
+            except IOError:
+                log.error('Cannot open %s' % fname)
+                name2module[name] = None
+                continue
+            name2module[name] = pe_obj
+
+        new_dependencies = get_pe_dependencies(pe_obj)
+        todo += [(name, os.path.join(lib_path_base, name), weight - 1)
+                 for name in new_dependencies]
+
+    ordered_modules = sorted(weight2name.items())
+    for _, modules in ordered_modules:
+        for name in modules:
+            pe_obj = name2module[name]
+            if pe_obj is None:
+                continue
+            # Fix imports
+            if pe_obj.DirExport:
+                runtime_lib.add_export_lib(pe_obj, name)
+
+    for pe_obj in name2module.itervalues():
+        if pe_obj is None:
+            continue
+        preload_pe(vm, pe_obj, runtime_lib, patch_vm_imp=True)
+
+    return name2module
+
 # machine -> arch
 PE_machine = {0x14c: "x86_32",
               0x8664: "x86_64",
diff --git a/miasm2/jitter/loader/utils.py b/miasm2/jitter/loader/utils.py
index a6a19cb3..83d1a796 100644
--- a/miasm2/jitter/loader/utils.py
+++ b/miasm2/jitter/loader/utils.py
@@ -4,7 +4,7 @@ log = logging.getLogger('loader_common')
 hnd = logging.StreamHandler()
 hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s"))
 log.addHandler(hnd)
-log.setLevel(logging.CRITICAL)
+log.setLevel(logging.INFO)
 
 
 def canon_libname_libfunc(libname, libfunc):
@@ -15,7 +15,7 @@ def canon_libname_libfunc(libname, libfunc):
         return str(dn), libfunc
 
 
-class libimp:
+class libimp(object):
 
     def __init__(self, lib_base_ad=0x71111000, **kargs):
         self.name2off = {}
@@ -26,6 +26,7 @@ class libimp:
         self.fad2cname = {}
         self.fad2info = {}
         self.all_exported_lib = []
+        self.fake_libs = set()
 
     def lib_get_add_base(self, name):
         name = name.lower().strip(' ')
@@ -38,7 +39,8 @@ class libimp:
             ad = self.name2off[name]
         else:
             ad = self.libbase_ad
-            log.debug('new lib %s 0x%x', name, ad)
+            log.warning("Create dummy entry for %r", name)
+            self.fake_libs.add(name)
             self.name2off[name] = ad
             self.libbase2lastad[ad] = ad + 0x1
             self.lib_imp2ad[ad] = {}