diff options
| -rw-r--r-- | miasm2/jitter/jitload.py | 419 | ||||
| -rw-r--r-- | miasm2/jitter/loader/__init__.py | 0 | ||||
| -rw-r--r-- | miasm2/jitter/loader/elf.py | 80 | ||||
| -rw-r--r-- | miasm2/jitter/loader/pe.py | 183 | ||||
| -rw-r--r-- | miasm2/jitter/loader/utils.py | 197 | ||||
| -rwxr-xr-x | setup.py | 1 |
6 files changed, 461 insertions, 419 deletions
diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index 72e7eae7..f12e326b 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -2,11 +2,6 @@ import os from miasm2.core import asmbloc -from collections import defaultdict -import struct -from elfesteem import pe -from elfesteem import cstruct -from elfesteem import * from csts import * from miasm2.core.utils import * @@ -76,420 +71,6 @@ class bin_stream_vm(bin_stream): self.offset = val -def get_import_address(e): - import2addr = defaultdict(set) - if e.DirImport.impdesc is None: - return import2addr - for s in e.DirImport.impdesc: - # fthunk = e.rva2virt(s.firstthunk) - # l = "%2d %-25s %s" % (i, repr(s.dlldescname), repr(s)) - libname = s.dlldescname.name.lower() - for ii, imp in enumerate(s.impbynames): - if isinstance(imp, pe.ImportByName): - funcname = imp.name - else: - funcname = imp - # l = " %2d %-16s" % (ii, repr(funcname)) - import2addr[(libname, funcname)].add( - e.rva2virt(s.firstthunk + e._wsize * ii / 8)) - return import2addr - - -def preload_pe(vm, e, runtime_lib, patch_vm_imp=True): - fa = get_import_address(e) - dyn_funcs = {} - # log.debug('imported funcs: %s' % fa) - for (libname, libfunc), ads in fa.items(): - for ad in ads: - ad_base_lib = runtime_lib.lib_get_add_base(libname) - ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) - - libname_s = canon_libname_libfunc(libname, libfunc) - dyn_funcs[libname_s] = ad_libfunc - if patch_vm_imp: - vm.set_mem( - ad, struct.pack(cstruct.size2type[e._wsize], ad_libfunc)) - return dyn_funcs - - -def get_import_address_elf(e): - import2addr = defaultdict(set) - for sh in e.sh: - if not hasattr(sh, 'rel'): - continue - for k, v in sh.rel.items(): - import2addr[('xxx', k)].add(v.offset) - return import2addr - - -def preload_elf(vm, e, runtime_lib, patch_vm_imp=True): - # XXX quick hack - fa = get_import_address_elf(e) - dyn_funcs = {} - # log.debug('imported funcs: %s' % fa) - for (libname, libfunc), ads in fa.items(): - for ad in ads: - ad_base_lib = runtime_lib.lib_get_add_base(libname) - ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) - - libname_s = canon_libname_libfunc(libname, libfunc) - dyn_funcs[libname_s] = ad_libfunc - if patch_vm_imp: - log.debug('patch %s %s %s' % - (hex(ad), hex(ad_libfunc), libfunc)) - vm.set_mem( - ad, struct.pack(cstruct.size2type[e.size], ad_libfunc)) - return runtime_lib, dyn_funcs - - -def is_redirected_export(e, ad): - # test is ad points to code or dll name - out = '' - for i in xrange(0x200): - c = e.virt(ad + i) - if c == "\x00": - break - out += c - if not (c.isalnum() or c in "_.-+*$@&#()[]={}"): - return False - if not "." in out: - return False - i = out.find('.') - return out[:i], out[i + 1:] - - -def get_export_name_addr_list(e): - out = [] - # add func name - for i, n in enumerate(e.DirExport.f_names): - addr = e.DirExport.f_address[e.DirExport.f_nameordinals[i].ordinal] - f_name = n.name.name - # log.debug('%s %s' % (f_name, hex(e.rva2virt(addr.rva)))) - out.append((f_name, e.rva2virt(addr.rva))) - - # add func ordinal - for i, o in enumerate(e.DirExport.f_nameordinals): - addr = e.DirExport.f_address[o.ordinal] - # log.debug('%s %s %s' % (o.ordinal, e.DirExport.expdesc.base, - # hex(e.rva2virt(addr.rva)))) - out.append( - (o.ordinal + e.DirExport.expdesc.base, e.rva2virt(addr.rva))) - return out - - -def canon_libname_libfunc(libname, libfunc): - dn = libname.split('.')[0] - if type(libfunc) == str: - return "%s_%s" % (dn, libfunc) - else: - return str(dn), libfunc - - -class libimp: - - def __init__(self, lib_base_ad=0x71111000, **kargs): - self.name2off = {} - self.libbase2lastad = {} - self.libbase_ad = lib_base_ad - self.lib_imp2ad = {} - self.lib_imp2dstad = {} - self.fad2cname = {} - self.fad2info = {} - self.all_exported_lib = [] - - def lib_get_add_base(self, name): - name = name.lower().strip(' ') - if not "." in name: - log.debug('warning adding .dll to modulename') - name += '.dll' - log.debug('%s' % name) - - if name in self.name2off: - ad = self.name2off[name] - else: - ad = self.libbase_ad - log.debug('new lib %s %s' % (name, hex(ad))) - self.name2off[name] = ad - self.libbase2lastad[ad] = ad + 0x1 - self.lib_imp2ad[ad] = {} - self.lib_imp2dstad[ad] = {} - self.libbase_ad += 0x1000 - return ad - - def lib_get_add_func(self, libad, imp_ord_or_name, dst_ad=None): - if not libad in self.name2off.values(): - raise ValueError('unknown lib base!', hex(libad)) - - # test if not ordinatl - # if imp_ord_or_name >0x10000: - # imp_ord_or_name = vm_get_str(imp_ord_or_name, 0x100) - # imp_ord_or_name = imp_ord_or_name[:imp_ord_or_name.find('\x00')] - - #/!\ can have multiple dst ad - if not imp_ord_or_name in self.lib_imp2dstad[libad]: - self.lib_imp2dstad[libad][imp_ord_or_name] = set() - self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) - - if imp_ord_or_name in self.lib_imp2ad[libad]: - return self.lib_imp2ad[libad][imp_ord_or_name] - # log.debug('new imp %s %s' % (imp_ord_or_name, dst_ad)) - ad = self.libbase2lastad[libad] - self.libbase2lastad[libad] += 0x11 # arbitrary - self.lib_imp2ad[libad][imp_ord_or_name] = ad - - name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) - c_name = canon_libname_libfunc(name_inv[libad], imp_ord_or_name) - self.fad2cname[ad] = c_name - self.fad2info[ad] = libad, imp_ord_or_name - return ad - - def check_dst_ad(self): - for ad in self.lib_imp2dstad: - all_ads = self.lib_imp2dstad[ad].values() - all_ads.sort() - for i, x in enumerate(all_ads[:-1]): - if x is None or all_ads[i + 1] is None: - return False - if x + 4 != all_ads[i + 1]: - return False - return True - - def add_export_lib(self, e, name): - self.all_exported_lib.append(e) - # will add real lib addresses to database - if name in self.name2off: - ad = self.name2off[name] - else: - log.debug('new lib %s' % name) - ad = e.NThdr.ImageBase - libad = ad - self.name2off[name] = ad - self.libbase2lastad[ad] = ad + 0x1 - self.lib_imp2ad[ad] = {} - self.lib_imp2dstad[ad] = {} - self.libbase_ad += 0x1000 - - ads = get_export_name_addr_list(e) - todo = ads - # done = [] - while todo: - # for imp_ord_or_name, ad in ads: - imp_ord_or_name, ad = todo.pop() - - # if export is a redirection, search redirected dll - # and get function real addr - ret = is_redirected_export(e, ad) - if ret: - exp_dname, exp_fname = ret - # log.debug('export redirection %s' % imp_ord_or_name) - # log.debug('source %s %s' % (exp_dname, exp_fname)) - exp_dname = exp_dname + '.dll' - exp_dname = exp_dname.lower() - # if dll auto refes in redirection - if exp_dname == name: - libad_tmp = self.name2off[exp_dname] - if not exp_fname in self.lib_imp2ad[libad_tmp]: - # schedule func - todo = [(imp_ord_or_name, ad)] + todo - continue - elif not exp_dname in self.name2off: - raise ValueError('load %r first' % exp_dname) - c_name = canon_libname_libfunc(exp_dname, exp_fname) - libad_tmp = self.name2off[exp_dname] - ad = self.lib_imp2ad[libad_tmp][exp_fname] - # log.debug('%s' % hex(ad)) - # if not imp_ord_or_name in self.lib_imp2dstad[libad]: - # self.lib_imp2dstad[libad][imp_ord_or_name] = set() - # self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) - - # log.debug('new imp %s %s' % (imp_ord_or_name, hex(ad))) - self.lib_imp2ad[libad][imp_ord_or_name] = ad - - name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) - c_name = canon_libname_libfunc( - name_inv[libad], imp_ord_or_name) - self.fad2cname[ad] = c_name - self.fad2info[ad] = libad, imp_ord_or_name - - def gen_new_lib(self, target_pe, filter=lambda _: True): - """Gen a new DirImport description - @target_pe: PE instance - @filter: (boolean f(address)) restrict addresses to keep - """ - - new_lib = [] - for lib_name, ad in self.name2off.items(): - # Build an IMAGE_IMPORT_DESCRIPTOR - - # Get fixed addresses - out_ads = dict() # addr -> func_name - for func_name, dst_addresses in self.lib_imp2dstad[ad].items(): - out_ads.update({addr:func_name for addr in dst_addresses}) - - # Filter available addresses according to @filter - all_ads = [addr for addr in out_ads.keys() if filter(addr)] - log.debug('ads: %s' % map(hex, all_ads)) - if not all_ads: - continue - - # Keep non-NULL elements - all_ads.sort() - for i, x in enumerate(all_ads): - if x not in [0, None]: - break - all_ads = all_ads[i:] - - while all_ads: - # Find libname's Import Address Table - othunk = all_ads[0] - i = 0 - while i + 1 < len(all_ads) and all_ads[i] + 4 == all_ads[i + 1]: - i += 1 - # 'i + 1' is IAT's length - - # Effectively build an IMAGE_IMPORT_DESCRIPTOR - funcs = [out_ads[addr] for addr in all_ads[:i + 1]] - try: - rva = target_pe.virt2rva(othunk) - except pe.InvalidOffset: - pass - else: - new_lib.append(({"name": lib_name, - "firstthunk": rva}, - funcs) - ) - - # Update elements to handle - all_ads = all_ads[i + 1:] - - return new_lib - - -def vm_load_pe(vm, fname, align_s=True, load_hdr=True, - **kargs): - e = pe_init.PE(open(fname, 'rb').read(), **kargs) - - aligned = True - for s in e.SHList: - if s.addr & 0xFFF: - aligned = False - break - - if aligned: - if load_hdr: - hdr_len = max(0x200, e.NThdr.sizeofheaders) - min_len = min(e.SHList[0].addr, 0x1000)#e.NThdr.sizeofheaders) - pe_hdr = e.content[:hdr_len] - pe_hdr = pe_hdr + min_len * "\x00" - pe_hdr = pe_hdr[:min_len] - vm.add_memory_page( - e.NThdr.ImageBase, PAGE_READ | PAGE_WRITE, pe_hdr) - if align_s: - for i, s in enumerate(e.SHList[:-1]): - s.size = e.SHList[i + 1].addr - s.addr - s.rawsize = s.size - s.data = strpatchwork.StrPatchwork(s.data[:s.size]) - s.offset = s.addr - s = e.SHList[-1] - s.size = (s.size + 0xfff) & 0xfffff000 - for s in e.SHList: - data = str(s.data) - data += "\x00" * (s.size - len(data)) - # log.debug('SECTION %s %s' % (hex(s.addr), - # hex(e.rva2virt(s.addr)))) - vm.add_memory_page( - e.rva2virt(s.addr), PAGE_READ | PAGE_WRITE, data) - # s.offset = s.addr - return e - - # not aligned - log.warning('pe is not aligned, creating big section') - min_addr = None - max_addr = None - data = "" - - if load_hdr: - data = e.content[:0x400] - data += (e.SHList[0].addr - len(data)) * "\x00" - min_addr = 0 - - for i, s in enumerate(e.SHList): - if i < len(e.SHList) - 1: - s.size = e.SHList[i + 1].addr - s.addr - s.rawsize = s.size - s.offset = s.addr - - if min_addr is None or s.addr < min_addr: - min_addr = s.addr - if max_addr is None or s.addr + s.size > max_addr: - max_addr = s.addr + max(s.size, len(s.data)) - min_addr = e.rva2virt(min_addr) - max_addr = e.rva2virt(max_addr) - log.debug('%s %s %s' % - (hex(min_addr), hex(max_addr), hex(max_addr - min_addr))) - - vm.add_memory_page(min_addr, - PAGE_READ | PAGE_WRITE, - (max_addr - min_addr) * "\x00") - for s in e.SHList: - log.debug('%s %s' % (hex(e.rva2virt(s.addr)), len(s.data))) - vm.set_mem(e.rva2virt(s.addr), str(s.data)) - return e - - -def vm_load_elf(vm, fname, **kargs): - """ - Very dirty elf loader - TODO XXX: implement real loader - """ - #log.setLevel(logging.DEBUG) - e = elf_init.ELF(open(fname, 'rb').read(), **kargs) - i = interval() - all_data = {} - for p in e.ph.phlist: - if p.ph.type != 1: - continue - log.debug('%s %s %s %s' % - (hex(p.ph.vaddr), hex(p.ph.memsz), hex(p.ph.offset), hex(p.ph.filesz))) - data_o = e._content[p.ph.offset:p.ph.offset + p.ph.filesz] - addr_o = p.ph.vaddr - a_addr = addr_o & ~0xFFF - b_addr = addr_o + max(p.ph.memsz, p.ph.filesz) - b_addr = (b_addr + 0xFFF) & ~0xFFF - all_data[addr_o] = data_o - # -2: Trick to avoid merging 2 consecutive pages - i += [(a_addr, b_addr-2)] - for a, b in i.intervals: - #print hex(a), hex(b) - vm.add_memory_page(a, PAGE_READ | PAGE_WRITE, "\x00"*(b+2-a)) - - #vm.dump_memory_page_pool() - - for r_vaddr, data in all_data.items(): - vm.set_mem(r_vaddr, data) - return e - -def vm_load_pe_lib(fname_in, libs, lib_path_base, patch_vm_imp, **kargs): - fname = os.path.join(lib_path_base, fname_in) - e = vm_load_pe(fname, **kargs) - libs.add_export_lib(e, fname_in) - # preload_pe(e, libs, patch_vm_imp) - return e - - -def vm_load_pe_libs(libs_name, libs, lib_path_base="win_dll", - patch_vm_imp=True, **kargs): - lib_imgs = {} - for fname in libs_name: - e = vm_load_pe_lib(fname, libs, lib_path_base, patch_vm_imp) - lib_imgs[fname] = e - return lib_imgs - - -def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base="win_dll", - patch_vm_imp=True, **kargs): - for e in lib_imgs.values(): - preload_pe(e, libs, patch_vm_imp) diff --git a/miasm2/jitter/loader/__init__.py b/miasm2/jitter/loader/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/jitter/loader/__init__.py diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py new file mode 100644 index 00000000..9a81c4f7 --- /dev/null +++ b/miasm2/jitter/loader/elf.py @@ -0,0 +1,80 @@ +import struct +from collections import defaultdict + +from elfesteem import pe +from elfesteem import cstruct +from elfesteem import * +from miasm2.jitter.csts import * +from utils import canon_libname_libfunc +from miasm2.core.interval import interval + +import logging + +log = logging.getLogger('loader_elf') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + +def get_import_address_elf(e): + import2addr = defaultdict(set) + for sh in e.sh: + if not hasattr(sh, 'rel'): + continue + for k, v in sh.rel.items(): + import2addr[('xxx', k)].add(v.offset) + return import2addr + + +def preload_elf(vm, e, runtime_lib, patch_vm_imp=True): + # XXX quick hack + fa = get_import_address_elf(e) + dyn_funcs = {} + # log.debug('imported funcs: %s' % fa) + for (libname, libfunc), ads in fa.items(): + for ad in ads: + ad_base_lib = runtime_lib.lib_get_add_base(libname) + ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) + + libname_s = canon_libname_libfunc(libname, libfunc) + dyn_funcs[libname_s] = ad_libfunc + if patch_vm_imp: + log.debug('patch %s %s %s' % + (hex(ad), hex(ad_libfunc), libfunc)) + vm.set_mem( + ad, struct.pack(cstruct.size2type[e.size], ad_libfunc)) + return runtime_lib, dyn_funcs + + + +def vm_load_elf(vm, fname, **kargs): + """ + Very dirty elf loader + TODO XXX: implement real loader + """ + #log.setLevel(logging.DEBUG) + e = elf_init.ELF(open(fname, 'rb').read(), **kargs) + i = interval() + all_data = {} + for p in e.ph.phlist: + if p.ph.type != 1: + continue + log.debug('%s %s %s %s' % + (hex(p.ph.vaddr), hex(p.ph.memsz), hex(p.ph.offset), hex(p.ph.filesz))) + data_o = e._content[p.ph.offset:p.ph.offset + p.ph.filesz] + addr_o = p.ph.vaddr + a_addr = addr_o & ~0xFFF + b_addr = addr_o + max(p.ph.memsz, p.ph.filesz) + b_addr = (b_addr + 0xFFF) & ~0xFFF + all_data[addr_o] = data_o + # -2: Trick to avoid merging 2 consecutive pages + i += [(a_addr, b_addr-2)] + for a, b in i.intervals: + #print hex(a), hex(b) + vm.add_memory_page(a, PAGE_READ | PAGE_WRITE, "\x00"*(b+2-a)) + + #vm.dump_memory_page_pool() + + for r_vaddr, data in all_data.items(): + vm.set_mem(r_vaddr, data) + return e diff --git a/miasm2/jitter/loader/pe.py b/miasm2/jitter/loader/pe.py new file mode 100644 index 00000000..880f8f61 --- /dev/null +++ b/miasm2/jitter/loader/pe.py @@ -0,0 +1,183 @@ +import struct +from collections import defaultdict + +from elfesteem import pe +from elfesteem import cstruct +from elfesteem import * +from miasm2.jitter.csts import * +from utils import canon_libname_libfunc + +import logging + +log = logging.getLogger('loader_pe') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + +def get_import_address_pe(e): + import2addr = defaultdict(set) + if e.DirImport.impdesc is None: + return import2addr + for s in e.DirImport.impdesc: + # fthunk = e.rva2virt(s.firstthunk) + # l = "%2d %-25s %s" % (i, repr(s.dlldescname), repr(s)) + libname = s.dlldescname.name.lower() + for ii, imp in enumerate(s.impbynames): + if isinstance(imp, pe.ImportByName): + funcname = imp.name + else: + funcname = imp + # l = " %2d %-16s" % (ii, repr(funcname)) + import2addr[(libname, funcname)].add( + e.rva2virt(s.firstthunk + e._wsize * ii / 8)) + return import2addr + + +def preload_pe(vm, e, runtime_lib, patch_vm_imp=True): + fa = get_import_address_pe(e) + dyn_funcs = {} + # log.debug('imported funcs: %s' % fa) + for (libname, libfunc), ads in fa.items(): + for ad in ads: + ad_base_lib = runtime_lib.lib_get_add_base(libname) + ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) + + libname_s = canon_libname_libfunc(libname, libfunc) + dyn_funcs[libname_s] = ad_libfunc + if patch_vm_imp: + vm.set_mem( + ad, struct.pack(cstruct.size2type[e._wsize], ad_libfunc)) + return dyn_funcs + + + +def is_redirected_export(e, ad): + # test is ad points to code or dll name + out = '' + for i in xrange(0x200): + c = e.virt(ad + i) + if c == "\x00": + break + out += c + if not (c.isalnum() or c in "_.-+*$@&#()[]={}"): + return False + if not "." in out: + return False + i = out.find('.') + return out[:i], out[i + 1:] + + +def get_export_name_addr_list(e): + out = [] + # add func name + for i, n in enumerate(e.DirExport.f_names): + addr = e.DirExport.f_address[e.DirExport.f_nameordinals[i].ordinal] + f_name = n.name.name + # log.debug('%s %s' % (f_name, hex(e.rva2virt(addr.rva)))) + out.append((f_name, e.rva2virt(addr.rva))) + + # add func ordinal + for i, o in enumerate(e.DirExport.f_nameordinals): + addr = e.DirExport.f_address[o.ordinal] + # log.debug('%s %s %s' % (o.ordinal, e.DirExport.expdesc.base, + # hex(e.rva2virt(addr.rva)))) + out.append( + (o.ordinal + e.DirExport.expdesc.base, e.rva2virt(addr.rva))) + return out + + + +def vm_load_pe(vm, fname, align_s=True, load_hdr=True, + **kargs): + e = pe_init.PE(open(fname, 'rb').read(), **kargs) + + aligned = True + for s in e.SHList: + if s.addr & 0xFFF: + aligned = False + break + + if aligned: + if load_hdr: + hdr_len = max(0x200, e.NThdr.sizeofheaders) + min_len = min(e.SHList[0].addr, 0x1000)#e.NThdr.sizeofheaders) + pe_hdr = e.content[:hdr_len] + pe_hdr = pe_hdr + min_len * "\x00" + pe_hdr = pe_hdr[:min_len] + vm.add_memory_page( + e.NThdr.ImageBase, PAGE_READ | PAGE_WRITE, pe_hdr) + if align_s: + for i, s in enumerate(e.SHList[:-1]): + s.size = e.SHList[i + 1].addr - s.addr + s.rawsize = s.size + s.data = strpatchwork.StrPatchwork(s.data[:s.size]) + s.offset = s.addr + s = e.SHList[-1] + s.size = (s.size + 0xfff) & 0xfffff000 + for s in e.SHList: + data = str(s.data) + data += "\x00" * (s.size - len(data)) + # log.debug('SECTION %s %s' % (hex(s.addr), + # hex(e.rva2virt(s.addr)))) + vm.add_memory_page( + e.rva2virt(s.addr), PAGE_READ | PAGE_WRITE, data) + # s.offset = s.addr + return e + + # not aligned + log.warning('pe is not aligned, creating big section') + min_addr = None + max_addr = None + data = "" + + if load_hdr: + data = e.content[:0x400] + data += (e.SHList[0].addr - len(data)) * "\x00" + min_addr = 0 + + for i, s in enumerate(e.SHList): + if i < len(e.SHList) - 1: + s.size = e.SHList[i + 1].addr - s.addr + s.rawsize = s.size + s.offset = s.addr + + if min_addr is None or s.addr < min_addr: + min_addr = s.addr + if max_addr is None or s.addr + s.size > max_addr: + max_addr = s.addr + max(s.size, len(s.data)) + min_addr = e.rva2virt(min_addr) + max_addr = e.rva2virt(max_addr) + log.debug('%s %s %s' % + (hex(min_addr), hex(max_addr), hex(max_addr - min_addr))) + + vm.add_memory_page(min_addr, + PAGE_READ | PAGE_WRITE, + (max_addr - min_addr) * "\x00") + for s in e.SHList: + log.debug('%s %s' % (hex(e.rva2virt(s.addr)), len(s.data))) + vm.set_mem(e.rva2virt(s.addr), str(s.data)) + return e + + +def vm_load_pe_lib(fname_in, libs, lib_path_base, patch_vm_imp, **kargs): + fname = os.path.join(lib_path_base, fname_in) + e = vm_load_pe(fname, **kargs) + libs.add_export_lib(e, fname_in) + # preload_pe(e, libs, patch_vm_imp) + return e + + +def vm_load_pe_libs(libs_name, libs, lib_path_base="win_dll", + patch_vm_imp=True, **kargs): + lib_imgs = {} + for fname in libs_name: + e = vm_load_pe_lib(fname, libs, lib_path_base, patch_vm_imp) + lib_imgs[fname] = e + return lib_imgs + + +def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base="win_dll", + patch_vm_imp=True, **kargs): + for e in lib_imgs.values(): + preload_pe(e, libs, patch_vm_imp) diff --git a/miasm2/jitter/loader/utils.py b/miasm2/jitter/loader/utils.py new file mode 100644 index 00000000..de269f83 --- /dev/null +++ b/miasm2/jitter/loader/utils.py @@ -0,0 +1,197 @@ +import logging + +log = logging.getLogger('loader_common') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + + +def canon_libname_libfunc(libname, libfunc): + dn = libname.split('.')[0] + if type(libfunc) == str: + return "%s_%s" % (dn, libfunc) + else: + return str(dn), libfunc + + +class libimp: + + def __init__(self, lib_base_ad=0x71111000, **kargs): + self.name2off = {} + self.libbase2lastad = {} + self.libbase_ad = lib_base_ad + self.lib_imp2ad = {} + self.lib_imp2dstad = {} + self.fad2cname = {} + self.fad2info = {} + self.all_exported_lib = [] + + def lib_get_add_base(self, name): + name = name.lower().strip(' ') + if not "." in name: + log.debug('warning adding .dll to modulename') + name += '.dll' + log.debug('%s' % name) + + if name in self.name2off: + ad = self.name2off[name] + else: + ad = self.libbase_ad + log.debug('new lib %s %s' % (name, hex(ad))) + self.name2off[name] = ad + self.libbase2lastad[ad] = ad + 0x1 + self.lib_imp2ad[ad] = {} + self.lib_imp2dstad[ad] = {} + self.libbase_ad += 0x1000 + return ad + + def lib_get_add_func(self, libad, imp_ord_or_name, dst_ad=None): + if not libad in self.name2off.values(): + raise ValueError('unknown lib base!', hex(libad)) + + # test if not ordinatl + # if imp_ord_or_name >0x10000: + # imp_ord_or_name = vm_get_str(imp_ord_or_name, 0x100) + # imp_ord_or_name = imp_ord_or_name[:imp_ord_or_name.find('\x00')] + + #/!\ can have multiple dst ad + if not imp_ord_or_name in self.lib_imp2dstad[libad]: + self.lib_imp2dstad[libad][imp_ord_or_name] = set() + self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) + + if imp_ord_or_name in self.lib_imp2ad[libad]: + return self.lib_imp2ad[libad][imp_ord_or_name] + # log.debug('new imp %s %s' % (imp_ord_or_name, dst_ad)) + ad = self.libbase2lastad[libad] + self.libbase2lastad[libad] += 0x11 # arbitrary + self.lib_imp2ad[libad][imp_ord_or_name] = ad + + name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) + c_name = canon_libname_libfunc(name_inv[libad], imp_ord_or_name) + self.fad2cname[ad] = c_name + self.fad2info[ad] = libad, imp_ord_or_name + return ad + + def check_dst_ad(self): + for ad in self.lib_imp2dstad: + all_ads = self.lib_imp2dstad[ad].values() + all_ads.sort() + for i, x in enumerate(all_ads[:-1]): + if x is None or all_ads[i + 1] is None: + return False + if x + 4 != all_ads[i + 1]: + return False + return True + + def add_export_lib(self, e, name): + self.all_exported_lib.append(e) + # will add real lib addresses to database + if name in self.name2off: + ad = self.name2off[name] + else: + log.debug('new lib %s' % name) + ad = e.NThdr.ImageBase + libad = ad + self.name2off[name] = ad + self.libbase2lastad[ad] = ad + 0x1 + self.lib_imp2ad[ad] = {} + self.lib_imp2dstad[ad] = {} + self.libbase_ad += 0x1000 + + ads = get_export_name_addr_list(e) + todo = ads + # done = [] + while todo: + # for imp_ord_or_name, ad in ads: + imp_ord_or_name, ad = todo.pop() + + # if export is a redirection, search redirected dll + # and get function real addr + ret = is_redirected_export(e, ad) + if ret: + exp_dname, exp_fname = ret + # log.debug('export redirection %s' % imp_ord_or_name) + # log.debug('source %s %s' % (exp_dname, exp_fname)) + exp_dname = exp_dname + '.dll' + exp_dname = exp_dname.lower() + # if dll auto refes in redirection + if exp_dname == name: + libad_tmp = self.name2off[exp_dname] + if not exp_fname in self.lib_imp2ad[libad_tmp]: + # schedule func + todo = [(imp_ord_or_name, ad)] + todo + continue + elif not exp_dname in self.name2off: + raise ValueError('load %r first' % exp_dname) + c_name = canon_libname_libfunc(exp_dname, exp_fname) + libad_tmp = self.name2off[exp_dname] + ad = self.lib_imp2ad[libad_tmp][exp_fname] + # log.debug('%s' % hex(ad)) + # if not imp_ord_or_name in self.lib_imp2dstad[libad]: + # self.lib_imp2dstad[libad][imp_ord_or_name] = set() + # self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) + + # log.debug('new imp %s %s' % (imp_ord_or_name, hex(ad))) + self.lib_imp2ad[libad][imp_ord_or_name] = ad + + name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) + c_name = canon_libname_libfunc( + name_inv[libad], imp_ord_or_name) + self.fad2cname[ad] = c_name + self.fad2info[ad] = libad, imp_ord_or_name + + def gen_new_lib(self, target_pe, filter=lambda _: True): + """Gen a new DirImport description + @target_pe: PE instance + @filter: (boolean f(address)) restrict addresses to keep + """ + + new_lib = [] + for lib_name, ad in self.name2off.items(): + # Build an IMAGE_IMPORT_DESCRIPTOR + + # Get fixed addresses + out_ads = dict() # addr -> func_name + for func_name, dst_addresses in self.lib_imp2dstad[ad].items(): + out_ads.update({addr:func_name for addr in dst_addresses}) + + # Filter available addresses according to @filter + all_ads = [addr for addr in out_ads.keys() if filter(addr)] + log.debug('ads: %s' % map(hex, all_ads)) + if not all_ads: + continue + + # Keep non-NULL elements + all_ads.sort() + for i, x in enumerate(all_ads): + if x not in [0, None]: + break + all_ads = all_ads[i:] + + while all_ads: + # Find libname's Import Address Table + othunk = all_ads[0] + i = 0 + while i + 1 < len(all_ads) and all_ads[i] + 4 == all_ads[i + 1]: + i += 1 + # 'i + 1' is IAT's length + + # Effectively build an IMAGE_IMPORT_DESCRIPTOR + funcs = [out_ads[addr] for addr in all_ads[:i + 1]] + try: + rva = target_pe.virt2rva(othunk) + except pe.InvalidOffset: + pass + else: + new_lib.append(({"name": lib_name, + "firstthunk": rva}, + funcs) + ) + + # Update elements to handle + all_ads = all_ads[i + 1:] + + return new_lib + + diff --git a/setup.py b/setup.py index 73f98858..de1667e9 100755 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ def buil_all(): 'miasm2/os_dep', 'miasm2/jitter', 'miasm2/jitter/arch', + 'miasm2/jitter/loader', ] ext_modules_no_tcc = [ Extension("miasm2.jitter.arch.JitCore_x86", |