diff options
Diffstat (limited to 'src/miasm/jitter/loader')
| -rw-r--r-- | src/miasm/jitter/loader/__init__.py | 0 | ||||
| -rw-r--r-- | src/miasm/jitter/loader/elf.py | 339 | ||||
| -rw-r--r-- | src/miasm/jitter/loader/pe.py | 834 | ||||
| -rw-r--r-- | src/miasm/jitter/loader/utils.py | 100 |
4 files changed, 1273 insertions, 0 deletions
diff --git a/src/miasm/jitter/loader/__init__.py b/src/miasm/jitter/loader/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/miasm/jitter/loader/__init__.py diff --git a/src/miasm/jitter/loader/elf.py b/src/miasm/jitter/loader/elf.py new file mode 100644 index 00000000..91d1c18b --- /dev/null +++ b/src/miasm/jitter/loader/elf.py @@ -0,0 +1,339 @@ +import struct +from collections import defaultdict + +from future.utils import viewitems + +from miasm.loader import cstruct +from miasm.loader import * +import miasm.loader.elf as elf_csts + +from miasm.jitter.csts import * +from miasm.jitter.loader.utils import canon_libname_libfunc, libimp +from miasm.core.utils import force_str +from miasm.core.interval import interval + +import logging + +log = logging.getLogger('loader_elf') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)-8s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + + +def get_import_address_elf(e): + import2addr = defaultdict(set) + for sh in e.sh: + if not hasattr(sh, 'rel'): + continue + for k, v in viewitems(sh.rel): + k = force_str(k) + import2addr[('xxx', k)].add(v.offset) + return import2addr + + +def preload_elf(vm, e, runtime_lib, patch_vm_imp=True, loc_db=None): + # XXX quick hack + fa = get_import_address_elf(e) + dyn_funcs = {} + for (libname, libfunc), ads in viewitems(fa): + # Quick hack - if a symbol is already known, do not stub it + if loc_db and loc_db.get_name_location(libfunc) is not None: + continue + for ad in ads: + ad_base_lib = runtime_lib.lib_get_add_base(libname) + ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) + + libname_s = canon_libname_libfunc(libname, libfunc) + dyn_funcs[libname_s] = ad_libfunc + if patch_vm_imp: + log.debug('patch 0x%x 0x%x %s', ad, ad_libfunc, libfunc) + set_endianness = { elf_csts.ELFDATA2MSB: ">", + elf_csts.ELFDATA2LSB: "<", + elf_csts.ELFDATANONE: "" }[e.sex] + vm.set_mem(ad, + struct.pack(set_endianness + + cstruct.size2type[e.size], + ad_libfunc)) + return runtime_lib, dyn_funcs + +def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): + """Parse the miasm.loader's ELF @elf to extract symbols, and fill the LocationDB + instance @loc_db with parsed symbols. + + The ELF is considered mapped at @base_addr + @elf: miasm.loader's ELF instance + @loc_db: LocationDB used to retrieve symbols'offset + @base_addr: addr to reloc to (if any) + """ + # Get symbol sections + symbol_sections = [] + for section_header in elf.sh: + if hasattr(section_header, 'symbols'): + for name, sym in viewitems(section_header.symbols): + if not name or sym.value == 0: + continue + name = loc_db.find_free_name(force_str(name)) + loc_db.add_location(name, sym.value, strict=False) + + if hasattr(section_header, 'reltab'): + for rel in section_header.reltab: + if not rel.sym or rel.offset == 0: + continue + name = loc_db.find_free_name(force_str(rel.sym)) + loc_db.add_location(name, rel.offset, strict=False) + + if hasattr(section_header, 'symtab'): + log.debug("Find %d symbols in %r", len(section_header.symtab), + section_header) + symbol_sections.append(section_header) + elif isinstance(section_header, ( + elf_init.GNUVerDef, elf_init.GNUVerSym, elf_init.GNUVerNeed + )): + log.debug("Find GNU version related section, unsupported for now") + + for section in symbol_sections: + for symbol_entry in section.symtab: + # Here, the computation of vaddr assumes 'elf' is an executable or a + # shared object file + + # For relocatable file, symbol_entry.value is an offset from the section + # base -> not handled here + st_bind = symbol_entry.info >> 4 + st_type = symbol_entry.info & 0xF + + if st_type not in [ + elf_csts.STT_NOTYPE, + elf_csts.STT_OBJECT, + elf_csts.STT_FUNC, + elf_csts.STT_COMMON, + elf_csts.STT_GNU_IFUNC, + ]: + # Ignore symbols useless in linking + continue + + if st_bind == elf_csts.STB_GLOBAL: + # Global symbol + weak = False + elif st_bind == elf_csts.STB_WEAK: + # Weak symbol + weak = True + else: + # Ignore local & others symbols + continue + + absolute = False + if symbol_entry.shndx == 0: + # SHN_UNDEF + continue + elif symbol_entry.shndx == 0xfff1: + # SHN_ABS + absolute = True + log.debug("Absolute symbol %r - %x", symbol_entry.name, + symbol_entry.value) + elif 0xff00 <= symbol_entry.shndx <= 0xffff: + # Reserved index (between SHN_LORESERV and SHN_HIRESERVE) + raise RuntimeError("Unsupported reserved index: %r" % symbol_entry) + + name = force_str(symbol_entry.name) + if name == "": + # Ignore empty symbol + log.debug("Empty symbol %r", symbol_entry) + continue + + if absolute: + vaddr = symbol_entry.value + else: + vaddr = symbol_entry.value + base_addr + + # 'weak' information is only used to force global symbols for now + already_existing_loc = loc_db.get_name_location(name) + if already_existing_loc is not None: + if weak: + # Weak symbol, this is ok to already exists, skip it + continue + else: + # Global symbol, force it + loc_db.remove_location_name(already_existing_loc, + name) + already_existing_off = loc_db.get_offset_location(vaddr) + if already_existing_off is not None: + loc_db.add_location_name(already_existing_off, name) + else: + loc_db.add_location(name=name, offset=vaddr) + + +def apply_reloc_x86(elf, vm, section, base_addr, loc_db): + """Apply relocation for x86 ELF contained in the section @section + @elf: miasm.loader's ELF instance + @vm: VmMngr instance + @section: elf's section containing relocation to perform + @base_addr: addr to reloc to + @loc_db: LocationDB used to retrieve symbols'offset + """ + if elf.size == 64: + addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, + struct.pack("<Q", addr)) + elif elf.size == 32: + addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, + struct.pack("<I", addr)) + else: + raise ValueError("Unsupported elf size %d" % elf.size) + + symb_section = section.linksection + for reloc in section.reltab: + + # Parse relocation info + r_info = reloc.info + if elf.size == 64: + r_info_sym = (r_info >> 32) & 0xFFFFFFFF + r_info_type = r_info & 0xFFFFFFFF + elif elf.size == 32: + r_info_sym = (r_info >> 8) & 0xFFFFFF + r_info_type = r_info & 0xFF + + is_ifunc = False + symbol_entry = None + if r_info_sym > 0: + symbol_entry = symb_section.symtab[r_info_sym] + + r_offset = reloc.offset + r_addend = reloc.cstr.sym + + if (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_RELATIVE), + (64, elf_csts.R_X86_64_IRELATIVE), + (32, elf_csts.R_386_RELATIVE), + (32, elf_csts.R_386_IRELATIVE), + ]: + # B + A + addr = base_addr + r_addend + where = base_addr + r_offset + elif reloc.type == elf_csts.R_X86_64_64: + # S + A + addr_symb = loc_db.get_name_offset(symbol_entry.name) + if addr_symb is None: + log.warning("Unable to find symbol %r" % symbol_entry.name) + continue + addr = addr_symb + r_addend + where = base_addr + r_offset + elif (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_TPOFF64), + (64, elf_csts.R_X86_64_DTPMOD64), + (32, elf_csts.R_386_TLS_TPOFF), + ]: + # Thread dependent, ignore for now + log.debug("Skip relocation TPOFF64 %r", reloc) + continue + elif (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_GLOB_DAT), + (64, elf_csts.R_X86_64_JUMP_SLOT), + (32, elf_csts.R_386_JMP_SLOT), + (32, elf_csts.R_386_GLOB_DAT), + ]: + # S + addr = loc_db.get_name_offset(symbol_entry.name) + if addr is None: + log.warning("Unable to find symbol %r" % symbol_entry.name) + continue + is_ifunc = symbol_entry.info & 0xF == elf_csts.STT_GNU_IFUNC + where = base_addr + r_offset + else: + raise ValueError( + "Unknown relocation type: %d (%r)" % (reloc.type, + reloc) + ) + if is_ifunc: + # Resolve at runtime - not implemented for now + log.warning("Relocation for %r (at %x, currently pointing on %x) " + "has to be resolved at runtime", + name, where, sym_addr) + continue + + log.debug("Write %x at %x", addr, where) + addr_writer(where, addr) + + +def vm_load_elf(vm, fdata, name="", base_addr=0, loc_db=None, apply_reloc=False, + **kargs): + """ + Very dirty elf loader + TODO XXX: implement real loader + """ + elf = elf_init.ELF(fdata, **kargs) + i = interval() + all_data = {} + + for p in elf.ph.phlist: + if p.ph.type != elf_csts.PT_LOAD: + continue + log.debug( + '0x%x 0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset, + p.ph.filesz, p.ph.type) + data_o = elf._content[p.ph.offset:p.ph.offset + p.ph.filesz] + addr_o = p.ph.vaddr + base_addr + a_addr = addr_o & ~0xFFF + b_addr = addr_o + max(p.ph.memsz, p.ph.filesz) + b_addr = (b_addr + 0xFFF) & ~0xFFF + all_data[addr_o] = data_o + # -2: Trick to avoid merging 2 consecutive pages + i += [(a_addr, b_addr - 2)] + for a, b in i.intervals: + vm.add_memory_page( + a, + PAGE_READ | PAGE_WRITE, + b"\x00" * (b + 2 - a), + repr(name) + ) + + for r_vaddr, data in viewitems(all_data): + vm.set_mem(r_vaddr, data) + + if loc_db is not None: + fill_loc_db_with_symbols(elf, loc_db, base_addr) + + if apply_reloc: + arch = guess_arch(elf) + sections = [] + for section in elf.sh: + if not hasattr(section, 'reltab'): + continue + if isinstance(section, elf_init.RelATable): + pass + elif isinstance(section, elf_init.RelTable): + if arch == "x86_64": + log.warning("REL section should not happen in x86_64") + else: + raise RuntimeError("Unknown relocation section type: %r" % section) + sections.append(section) + for section in sections: + if arch in ["x86_64", "x86_32"]: + apply_reloc_x86(elf, vm, section, base_addr, loc_db) + else: + log.debug("Unsupported relocation for arch %r" % arch) + + return elf + + +class libimp_elf(libimp): + pass + + +# machine, size, sex -> arch_name +ELF_machine = {(elf_csts.EM_ARM, 32, elf_csts.ELFDATA2LSB): "arml", + (elf_csts.EM_ARM, 32, elf_csts.ELFDATA2MSB): "armb", + (elf_csts.EM_AARCH64, 64, elf_csts.ELFDATA2LSB): "aarch64l", + (elf_csts.EM_AARCH64, 64, elf_csts.ELFDATA2MSB): "aarch64b", + (elf_csts.EM_MIPS, 32, elf_csts.ELFDATA2MSB): "mips32b", + (elf_csts.EM_MIPS, 32, elf_csts.ELFDATA2LSB): "mips32l", + (elf_csts.EM_386, 32, elf_csts.ELFDATA2LSB): "x86_32", + (elf_csts.EM_X86_64, 64, elf_csts.ELFDATA2LSB): "x86_64", + (elf_csts.EM_SH, 32, elf_csts.ELFDATA2LSB): "sh4", + (elf_csts.EM_PPC, 32, elf_csts.ELFDATA2MSB): "ppc32b", + } + + +def guess_arch(elf): + """Return the architecture specified by the ELF container @elf. + If unknown, return None""" + return ELF_machine.get((elf.Ehdr.machine, elf.size, elf.sex), None) diff --git a/src/miasm/jitter/loader/pe.py b/src/miasm/jitter/loader/pe.py new file mode 100644 index 00000000..9af068e4 --- /dev/null +++ b/src/miasm/jitter/loader/pe.py @@ -0,0 +1,834 @@ +from builtins import map +import os +import struct +import logging +from collections import defaultdict + +from future.utils import viewitems, viewvalues + +from miasm.loader import pe +from miasm.loader import cstruct +from miasm.loader import * + +from miasm.jitter.csts import * +from miasm.jitter.loader.utils import canon_libname_libfunc, libimp +from miasm.core.utils import force_str + +log = logging.getLogger('loader_pe') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)-8s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.INFO) + + +def get_pe_dependencies(pe_obj): + """Collect the shared libraries upon which this PE depends. + + @pe_obj: pe object + Returns a set of strings of DLL names. + + Example: + + pe = miasm.analysis.binary.Container.from_string(buf) + deps = miasm.jitter.loader.pe.get_pe_dependencies(pe.executable) + assert sorted(deps)[0] == 'api-ms-win-core-appcompat-l1-1-0.dll' + """ + + if pe_obj.DirImport.impdesc is None: + return set() + out = set() + for dependency in pe_obj.DirImport.impdesc: + libname = dependency.dlldescname.name.lower() + # transform bytes to str + libname = force_str(libname) + out.add(libname) + + # If binary has redirected export, add dependencies + if pe_obj.DirExport.expdesc != None: + addrs = get_export_name_addr_list(pe_obj) + for imp_ord_or_name, ad in addrs: + # if export is a redirection, search redirected dll + # and get function real addr + ret = is_redirected_export(pe_obj, ad) + if ret is False: + continue + dllname, func_info = ret + dllname = dllname + '.dll' + out.add(dllname) + + return out + + +def get_import_address_pe(e): + """Compute the addresses of imported symbols. + @e: pe object + Returns a dict mapping from tuple (dll name string, symbol name string) to set of virtual addresses. + + Example: + + pe = miasm.analysis.binary.Container.from_string(buf) + imports = miasm.jitter.loader.pe.get_import_address_pe(pe.executable) + assert imports[('api-ms-win-core-rtlsupport-l1-1-0.dll', 'RtlCaptureStackBackTrace')] == {0x6b88a6d0} + """ + import2addr = defaultdict(set) + if e.DirImport.impdesc is None: + return import2addr + for s in e.DirImport.impdesc: + # fthunk = e.rva2virt(s.firstthunk) + # l = "%2d %-25s %s" % (i, repr(s.dlldescname), repr(s)) + libname = force_str(s.dlldescname.name.lower()) + + for ii, imp in enumerate(s.impbynames): + if isinstance(imp, pe.ImportByName): + funcname = force_str(imp.name) + else: + funcname = imp + # l = " %2d %-16s" % (ii, repr(funcname)) + import2addr[(libname, funcname)].add( + e.rva2virt(s.firstthunk + (e._wsize * ii) // 8) + ) + return import2addr + + +def preload_pe(vm, e, runtime_lib, patch_vm_imp=True): + fa = get_import_address_pe(e) + dyn_funcs = {} + # log.debug('imported funcs: %s' % fa) + for (libname, libfunc), ads in viewitems(fa): + for ad in ads: + libname = force_str(libname) + ad_base_lib = runtime_lib.lib_get_add_base(libname) + ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) + + libname_s = canon_libname_libfunc(libname, libfunc) + dyn_funcs[libname_s] = ad_libfunc + if patch_vm_imp: + vm.set_mem( + ad, struct.pack(cstruct.size2type[e._wsize], ad_libfunc)) + return dyn_funcs + + +def is_redirected_export(pe_obj, addr): + """Test if the @addr is a forwarded export address. If so, return + dllname/function name couple. If not, return False. + + An export address is a forwarded export if the rva is in the export + directory of the pe. + + @pe_obj: PE instance + @addr: virtual address of the function to test + """ + + export_dir = pe_obj.NThdr.optentries[pe.DIRECTORY_ENTRY_EXPORT] + addr_rva = pe_obj.virt2rva(addr) + if not (export_dir.rva <= addr_rva < export_dir.rva + export_dir.size): + return False + addr_end = pe_obj.virt.find(b'\x00', addr) + data = pe_obj.virt.get(addr, addr_end) + + data = force_str(data) + dllname, func_info = data.split('.', 1) + dllname = dllname.lower() + + # Test if function is forwarded using ordinal + if func_info.startswith('#'): + func_info = int(func_info[1:]) + return dllname, func_info + + +def get_export_name_addr_list(e): + """Collect names/ordinals and addresses of symbols exported by the given PE. + @e: PE instance + Returns a list of tuples: + (symbol name string, virtual address) + (ordinal number, virtual address) + + Example: + + pe = miasm.analysis.binary.Container.from_string(buf) + exports = miasm.jitter.loader.pe.get_export_name_addr_list(pe.executable) + assert exports[0] == ('AcquireSRWLockExclusive', 0x6b89b22a) + """ + out = [] + if e.DirExport.expdesc is None: + return out + + # add func name + for i, n in enumerate(e.DirExport.f_names): + addr = e.DirExport.f_address[e.DirExport.f_nameordinals[i].ordinal] + f_name = force_str(n.name.name) + # log.debug('%s %s' % (f_name, hex(e.rva2virt(addr.rva)))) + out.append((f_name, e.rva2virt(addr.rva))) + + # add func ordinal + for i, s in enumerate(e.DirExport.f_address): + if not s.rva: + continue + out.append((i + e.DirExport.expdesc.base, e.rva2virt(s.rva))) + + return out + + +def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", winobjs=None, **kargs): + """Load a PE in memory (@vm) from a data buffer @fdata + @vm: VmMngr instance + @fdata: data buffer to parse + @align_s: (optional) If False, keep gaps between section + @load_hdr: (optional) If False, do not load the NThdr in memory + Return the corresponding PE instance. + + Extra arguments are passed to PE instantiation. + If all sections are aligned, they will be mapped on several different pages + Otherwise, a big page is created, containing all sections + """ + + # Parse and build a PE instance + pe = pe_init.PE(fdata, **kargs) + + # Check if all section are aligned + aligned = True + for section in pe.SHList: + if section.addr & 0xFFF: + aligned = False + break + + if aligned: + # Loader NT header + if load_hdr: + # Header length + hdr_len = max(0x200, pe.NThdr.sizeofheaders) + # Page minimum size + min_len = min(pe.SHList[0].addr, 0x1000) + + # Get and pad the pe_hdr + pe_hdr = ( + pe.content[:hdr_len] + + max(0, (min_len - hdr_len)) * b"\x00" + ) + + if winobjs: + winobjs.allocated_pages[pe.NThdr.ImageBase] = (pe.NThdr.ImageBase, len(pe_hdr)) + vm.add_memory_page( + pe.NThdr.ImageBase, + PAGE_READ | PAGE_WRITE, + pe_hdr, + "%r: PE Header" % name + ) + + # Align sections size + if align_s: + # Use the next section address to compute the new size + for i, section in enumerate(pe.SHList[:-1]): + new_size = pe.SHList[i + 1].addr - section.addr + section.size = new_size + section.rawsize = new_size + section.data = strpatchwork.StrPatchwork( + section.data[:new_size] + ) + section.offset = section.addr + + # Last section alignment + last_section = pe.SHList[-1] + last_section.size = (last_section.size + 0xfff) & 0xfffff000 + + # Pad sections with null bytes and map them + for section in pe.SHList: + data = bytes(section.data) + data += b"\x00" * (section.size - len(data)) + attrib = PAGE_READ + if section.flags & 0x80000000: + attrib |= PAGE_WRITE + + section_addr = pe.rva2virt(section.addr) + if winobjs: + winobjs.allocated_pages[section_addr] = (section_addr, len(data)) + vm.add_memory_page( + section_addr, + attrib, + data, + "%r: %r" % (name, section.name) + ) + + return pe + + # At least one section is not aligned + log.warning('PE is not aligned, creating big section') + min_addr = 0 if load_hdr else None + max_addr = None + data = "" + + for i, section in enumerate(pe.SHList): + if i < len(pe.SHList) - 1: + # If it is not the last section, use next section address + section.size = pe.SHList[i + 1].addr - section.addr + section.rawsize = section.size + section.offset = section.addr + + # Update min and max addresses + if min_addr is None or section.addr < min_addr: + min_addr = section.addr + max_section_len = max(section.size, len(section.data)) + if max_addr is None or section.addr + max_section_len > max_addr: + max_addr = section.addr + max_section_len + + min_addr = pe.rva2virt(min_addr) + max_addr = pe.rva2virt(max_addr) + log.debug('Min: 0x%x, Max: 0x%x, Size: 0x%x', min_addr, max_addr, + (max_addr - min_addr)) + + # Create only one big section containing the whole PE + vm.add_memory_page( + min_addr, + PAGE_READ | PAGE_WRITE, + (max_addr - min_addr) * b"\x00" + ) + + # Copy each sections content in memory + for section in pe.SHList: + log.debug('Map 0x%x bytes to 0x%x', len(section.data), + pe.rva2virt(section.addr)) + vm.set_mem(pe.rva2virt(section.addr), bytes(section.data)) + + return pe + + +def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs): + """Call vm_load_pe on @fname_in and update @libs accordingly + @vm: VmMngr instance + @fname_in: library name + @libs: libimp_pe instance + @lib_path_base: DLLs relative path + Return the corresponding PE instance + Extra arguments are passed to vm_load_pe + """ + + log.info('Loading module %r', fname_in) + + fname = os.path.join(lib_path_base, fname_in) + with open(fname, "rb") as fstream: + pe = vm_load_pe(vm, fstream.read(), name=fname_in, **kargs) + libs.add_export_lib(pe, fname_in) + return pe + + +def vm_load_pe_libs(vm, libs_name, libs, lib_path_base, **kargs): + """Call vm_load_pe_lib on each @libs_name filename + @vm: VmMngr instance + @libs_name: list of str + @libs: libimp_pe instance + @lib_path_base: (optional) DLLs relative path + Return a dictionary Filename -> PE instances + Extra arguments are passed to vm_load_pe_lib + """ + out = {} + for fname in libs_name: + assert isinstance(fname, str) + out[fname] = vm_load_pe_lib(vm, fname, libs, lib_path_base, **kargs) + return out + + +def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base, + patch_vm_imp=True, **kargs): + for e in viewvalues(lib_imgs): + preload_pe(e, libs, patch_vm_imp) + + +def vm2pe(myjit, fname, libs=None, e_orig=None, + min_addr=None, max_addr=None, + min_section_offset=0x1000, img_base=None, + added_funcs=None, **kwargs): + if e_orig: + size = e_orig._wsize + else: + size = 32 + mye = pe_init.PE(wsize=size) + + if min_addr is None and e_orig is not None: + min_addr = min([e_orig.rva2virt(s.addr) for s in e_orig.SHList]) + if max_addr is None and e_orig is not None: + max_addr = max([e_orig.rva2virt(s.addr + s.size) + for s in e_orig.SHList]) + + if img_base is None: + img_base = e_orig.NThdr.ImageBase + + mye.NThdr.ImageBase = img_base + all_mem = myjit.vm.get_all_memory() + addrs = list(all_mem) + addrs.sort() + entry_point = mye.virt2rva(myjit.pc) + if entry_point is None or not 0 < entry_point < 0xFFFFFFFF: + raise ValueError( + "Current pc (0x%x) used as entry point seems to be out of the binary" % + myjit.pc + ) + + mye.Opthdr.AddressOfEntryPoint = entry_point + first = True + for ad in addrs: + if not min_addr <= ad < max_addr: + continue + log.debug("0x%x", ad) + if first: + mye.SHList.add_section( + "%.8X" % ad, + addr=ad - mye.NThdr.ImageBase, + data=all_mem[ad]['data'], + offset=min_section_offset) + else: + mye.SHList.add_section( + "%.8X" % ad, + addr=ad - mye.NThdr.ImageBase, + data=all_mem[ad]['data']) + first = False + if libs: + if added_funcs is not None: + for addr, funcaddr in added_funcs: + libbase, dllname = libs.fad2info[funcaddr] + libs.lib_get_add_func(libbase, dllname, addr) + + filter_import = kwargs.get( + 'filter_import', lambda _, ad: mye.virt.is_addr_in(ad)) + new_dll = libs.gen_new_lib(mye, filter_import) + else: + new_dll = {} + + log.debug('%s', new_dll) + + mye.DirImport.add_dlldesc(new_dll) + s_imp = mye.SHList.add_section("import", rawsize=len(mye.DirImport)) + mye.DirImport.set_rva(s_imp.addr) + log.debug('%r', mye.SHList) + if e_orig: + # resource + xx = bytes(mye) + mye.content = xx + ad = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva + size = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size + log.debug('dirres 0x%x', ad) + if ad != 0: + mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva = ad + mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size = size + mye.DirRes = pe.DirRes.unpack(mye.img_rva, ad, mye) + log.debug('%r', mye.DirRes) + s_res = mye.SHList.add_section( + name="myres", + rawsize=len(mye.DirRes) + ) + mye.DirRes.set_rva(s_res.addr) + # generation + open(fname, 'wb').write(bytes(mye)) + return mye + + +class libimp_pe(libimp): + + def __init__(self, *args, **kwargs): + super(libimp_pe, self).__init__(*args, **kwargs) + # dependency -> redirector + self.created_redirected_imports = {} + + + def add_function(self, dllname, imp_ord_or_name, addr): + assert isinstance(dllname, str) + assert isinstance(imp_ord_or_name, (int, str)) + libad = self.name2off[dllname] + c_name = canon_libname_libfunc( + dllname, imp_ord_or_name + ) + update_entry = True + if addr in self.fad2info: + known_libad, known_imp_ord_or_name = self.fad2info[addr] + if isinstance(imp_ord_or_name, int): + update_entry = False + self.cname2addr[c_name] = addr + log.debug("Add func %s %s", hex(addr), c_name) + if update_entry: + log.debug("Real Add func %s %s", hex(addr), c_name) + self.fad2cname[addr] = c_name + self.fad2info[addr] = libad, imp_ord_or_name + + + def add_export_lib(self, e, name): + if name in self.created_redirected_imports: + log.error("%r has previously been created due to redirect\ + imports due to %r. Change the loading order.", + name, self.created_redirected_imports[name]) + raise RuntimeError('Bad import: loading previously created import') + + self.all_exported_lib.append(e) + # will add real lib addresses to database + if name in self.name2off: + ad = self.name2off[name] + if e is not None and name in self.fake_libs: + log.error( + "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name) + raise RuntimeError("Bad import") + else: + log.debug('new lib %s', name) + ad = e.NThdr.ImageBase + libad = ad + self.name2off[name] = ad + self.libbase2lastad[ad] = ad + 0x1 + self.lib_imp2ad[ad] = {} + self.lib_imp2dstad[ad] = {} + self.libbase_ad += 0x1000 + + ads = get_export_name_addr_list(e) + todo = list(ads) + # done = [] + while todo: + # for imp_ord_or_name, ad in ads: + imp_ord_or_name, ad = todo.pop() + + # if export is a redirection, search redirected dll + # and get function real addr + ret = is_redirected_export(e, ad) + if ret: + exp_dname, exp_fname = ret + exp_dname = exp_dname + '.dll' + exp_dname = exp_dname.lower() + # if dll auto refes in redirection + if exp_dname == name: + libad_tmp = self.name2off[exp_dname] + if isinstance(exp_fname, str): + exp_fname = bytes(ord(c) for c in exp_fname) + found = None + for tmp_func, tmp_addr in ads: + if tmp_func == exp_fname: + found = tmp_addr + assert found is not None + ad = found + else: + # import redirected lib from non loaded dll + if not exp_dname in self.name2off: + self.created_redirected_imports.setdefault( + exp_dname, set()).add(name) + + # Ensure import entry is created + new_lib_base = self.lib_get_add_base(exp_dname) + # Ensure function entry is created + _ = self.lib_get_add_func(new_lib_base, exp_fname) + + libad_tmp = self.name2off[exp_dname] + ad = self.lib_imp2ad[libad_tmp][exp_fname] + + self.lib_imp2ad[libad][imp_ord_or_name] = ad + name_inv = dict( + (value, key) for key, value in viewitems(self.name2off) + ) + c_name = canon_libname_libfunc( + name_inv[libad], imp_ord_or_name) + self.fad2cname[ad] = c_name + self.cname2addr[c_name] = ad + log.debug("Add func %s %s", hex(ad), c_name) + self.fad2info[ad] = libad, imp_ord_or_name + + def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs): + """Gen a new DirImport description + @target_pe: PE instance + @filter_import: (boolean f(pe, address)) restrict addresses to keep + """ + + new_lib = [] + for lib_name, ad in viewitems(self.name2off): + # Build an IMAGE_IMPORT_DESCRIPTOR + + # Get fixed addresses + out_ads = dict() # addr -> func_name + for func_name, dst_addresses in viewitems(self.lib_imp2dstad[ad]): + out_ads.update({addr: func_name for addr in dst_addresses}) + + # Filter available addresses according to @filter_import + all_ads = [ + addr for addr in list(out_ads) if filter_import(target_pe, addr) + ] + + if not all_ads: + continue + + # Keep non-NULL elements + all_ads.sort(key=str) + for i, x in enumerate(all_ads): + if x not in [0, None]: + break + all_ads = all_ads[i:] + log.debug('ads: %s', list(map(hex, all_ads))) + + while all_ads: + # Find libname's Import Address Table + othunk = all_ads[0] + i = 0 + while (i + 1 < len(all_ads) and + all_ads[i] + target_pe._wsize // 8 == all_ads[i + 1]): + i += 1 + # 'i + 1' is IAT's length + + # Effectively build an IMAGE_IMPORT_DESCRIPTOR + funcs = [out_ads[addr] for addr in all_ads[:i + 1]] + try: + rva = target_pe.virt2rva(othunk) + except pe.InvalidOffset: + pass + else: + new_lib.append(({"name": lib_name, + "firstthunk": rva}, + funcs) + ) + + # Update elements to handle + all_ads = all_ads[i + 1:] + + return new_lib + + +def vm_load_pe_and_dependencies(vm, fname, name2module, runtime_lib, + lib_path_base, **kwargs): + """Load a binary and all its dependencies. Returns a dictionary containing + the association between binaries names and it's pe object + + @vm: virtual memory manager instance + @fname: full path of the binary + @name2module: dict containing association between name and pe + object. Updated. + @runtime_lib: libimp instance + @lib_path_base: directory of the libraries containing dependencies + + """ + + todo = [(fname, fname, 0)] + weight2name = {} + done = set() + + # Walk dependencies recursively + while todo: + name, fname, weight = todo.pop() + if name in done: + continue + done.add(name) + weight2name.setdefault(weight, set()).add(name) + if name in name2module: + pe_obj = name2module[name] + else: + try: + with open(fname, "rb") as fstream: + log.info('Loading module name %r', fname) + pe_obj = vm_load_pe( + vm, fstream.read(), name=fname, **kwargs) + except IOError: + log.error('Cannot open %s' % fname) + name2module[name] = None + continue + name2module[name] = pe_obj + + new_dependencies = get_pe_dependencies(pe_obj) + todo += [(name, os.path.join(lib_path_base, name), weight - 1) + for name in new_dependencies] + + known_export_addresses = {} + to_resolve = {} + for name, pe_obj in name2module.items(): + print(name) + if pe_obj is None: + continue + if pe_obj.DirExport.expdesc == None: + continue + addrs = get_export_name_addr_list(pe_obj) + for imp_ord_or_name, ad in addrs: + # if export is a redirection, search redirected dll + # and get function real addr + ret = is_redirected_export(pe_obj, ad) + if ret is False: + known_export_addresses[(name, imp_ord_or_name)] = ad + else: + dllname, func_info = ret + dllname = dllname + '.dll' + to_resolve[(name, imp_ord_or_name)] = (dllname, func_info) + + modified = True + while modified: + modified = False + out = {} + for target, dependency in to_resolve.items(): + dllname, funcname = dependency + if dependency in known_export_addresses: + known_export_addresses[target] = known_export_addresses[dependency] + modified = True + else: + log.error("Cannot resolve redirection %r %r", dllname, dependency) + raise RuntimeError('Cannot resolve redirection') + to_resolve = out + + for dllname, pe_obj in name2module.items(): + if pe_obj is None: + continue + ad = pe_obj.NThdr.ImageBase + libad = ad + runtime_lib.name2off[dllname] = ad + runtime_lib.libbase2lastad[ad] = ad + 0x1 + runtime_lib.lib_imp2ad[ad] = {} + runtime_lib.lib_imp2dstad[ad] = {} + runtime_lib.libbase_ad += 0x1000 + + for (dllname, imp_ord_or_name), addr in known_export_addresses.items(): + runtime_lib.add_function(dllname, imp_ord_or_name, addr) + libad = runtime_lib.name2off[dllname] + runtime_lib.lib_imp2ad[libad][imp_ord_or_name] = addr + + assert not to_resolve + + for dllname, pe_obj in name2module.items(): + if pe_obj is None: + continue + preload_pe(vm, pe_obj, runtime_lib, patch_vm_imp=True) + + return name2module + +# machine -> arch +PE_machine = { + 0x14c: "x86_32", + 0x8664: "x86_64", +} + + +def guess_arch(pe): + """Return the architecture specified by the PE container @pe. + If unknown, return None""" + return PE_machine.get(pe.Coffhdr.machine, None) + + +class ImpRecStateMachine(object): + """ + Finite State Machine used for internal purpose only. + See `ImpRecStrategy` for more details. + """ + + # Looking for a function pointer + STATE_SEARCH = 0 + # Candidate function list + STATE_FUNC_FOUND = 1 + # Function list found, terminated by a NULL entry + STATE_END_FUNC_LIST = 2 + + def __init__(self, libs, ptrtype): + self.ptrtype = ptrtype + self.libs = libs + self.func_addrs = set(struct.pack(self.ptrtype, address) for address in self.libs.cname2addr.values()) + self.off2name = {v:k for k,v in self.libs.name2off.items()} + self.state = self.STATE_SEARCH + + # STATE_FUNC_FOUND + self.cur_list = [] + self.cur_list_lib = None + + # STATE_END_FUNC_LIST + self.seen = [] + + def format_func_info(self, func_info, func_addr): + return { + "lib_addr": func_info[0], + "lib_name": self.off2name[func_info[0]], + "entry_name": func_info[1], + "entry_module_addr": func_addr, + "entry_memory_addr": self.cur_address, + } + + def transition(self, data): + if self.state == self.STATE_SEARCH: + if data in self.func_addrs: + self.state = self.STATE_FUNC_FOUND + func_addr = struct.unpack(self.ptrtype, data)[0] + func_info = self.libs.fad2info[func_addr] + self.cur_list = [self.format_func_info(func_info, func_addr)] + self.cur_list_lib = func_info[0] + elif self.state == self.STATE_FUNC_FOUND: + if data == (b"\x00" * len(data)): + self.state = self.STATE_END_FUNC_LIST + elif data in self.func_addrs: + func_addr = struct.unpack(self.ptrtype, data)[0] + func_info = self.libs.fad2info[func_addr] + if func_info[0] != self.cur_list_lib: + # The list must belong to the same library + self.state = self.STATE_SEARCH + return + self.cur_list.append(self.format_func_info(func_info, func_addr)) + else: + self.state == self.STATE_SEARCH + elif self.state == self.STATE_END_FUNC_LIST: + self.seen.append(self.cur_list) + self.state = self.STATE_SEARCH + self.transition(data) + else: + raise ValueError() + + def run(self): + while True: + data, address = yield + self.cur_address = address + self.transition(data) + + +class ImpRecStrategy(object): + """ + Naive import reconstruction, similar to ImpRec + + It looks for a continuation of module export addresses, ended by a NULL entry, ie: + [...] + &Kernel32::LoadLibraryA + &Kernel32::HeapCreate + 00 00 00 00 + [...] + + Usage: + >>> sb = Sandbox[...] + >>> sb.run() + >>> imprec = ImpRecStrategy(sb.jitter, sb.libs, size=32) + >>> imprec.recover_import() + List<List<Recovered functions>> + + -> sb.libs has also been updated, ready to be passed to `vm2pe` + """ + def __init__(self, jitter, libs, size): + self._jitter = jitter + self._libs = libs + if size == 32: + self._ptrtype = "<I" + elif size == 64: + self._ptrtype = "<Q" + else: + ValueError("Unsupported size: %d" % size) + + def recover_import(self, update_libs=True, align_hypothesis=False): + """ + Launch the import recovery routine. + @update_libs: if set (default), update `libs` object with founded addresses + @align_hypothesis: if not set (default), do not consider import + addresses are written on aligned addresses + + Return the list of candidates + """ + candidates = [] + + alignments = [0] + if not align_hypothesis: + alignments = list(range(0, struct.calcsize(self._ptrtype))) + + for starting_offset in alignments: + # Search for several addresses from `func_addrs` ending with a `\x00` + fsm_obj = ImpRecStateMachine(self._libs, self._ptrtype) + fsm = fsm_obj.run() + fsm.send(None) + for addr_start, page_info in self._jitter.vm.get_all_memory().items(): + data = page_info["data"] + for i in range(starting_offset, page_info["size"], struct.calcsize(self._ptrtype)): + fsm.send((data[i:i+4], addr_start + i)) + + candidates.extend(fsm_obj.seen) + + # Apply to libs + if update_libs: + for entry_list in candidates: + for func_info in entry_list: + self._libs.lib_imp2dstad[func_info["lib_addr"]][func_info["entry_name"]].add(func_info["entry_memory_addr"]) + + return candidates diff --git a/src/miasm/jitter/loader/utils.py b/src/miasm/jitter/loader/utils.py new file mode 100644 index 00000000..7f913d76 --- /dev/null +++ b/src/miasm/jitter/loader/utils.py @@ -0,0 +1,100 @@ +from builtins import int as int_types +import logging + +from future.utils import viewitems, viewvalues +from past.builtins import basestring + +log = logging.getLogger('loader_common') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)-8s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.INFO) + + +def canon_libname_libfunc(libname, libfunc): + assert isinstance(libname, basestring) + assert isinstance(libfunc, basestring) or isinstance(libfunc, int_types) + dn = libname.split('.')[0] + if isinstance(libfunc, int_types): + return str(dn), libfunc + else: + return "%s_%s" % (dn, libfunc) + + +class libimp(object): + + def __init__(self, lib_base_ad=0x71111000, **kargs): + self.name2off = {} + self.libbase2lastad = {} + self.libbase_ad = lib_base_ad + self.lib_imp2ad = {} + self.lib_imp2dstad = {} + self.fad2cname = {} + self.cname2addr = {} + self.fad2info = {} + self.all_exported_lib = [] + self.fake_libs = set() + + def lib_get_add_base(self, name): + assert isinstance(name, basestring) + name = name.lower().strip(' ') + if not "." in name: + log.warning('warning adding .dll to modulename') + name += '.dll' + log.warning(name) + + if name in self.name2off: + ad = self.name2off[name] + else: + ad = self.libbase_ad + log.warning("Create dummy entry for %r", name) + self.fake_libs.add(name) + self.name2off[name] = ad + self.libbase2lastad[ad] = ad + 0x4 + self.lib_imp2ad[ad] = {} + self.lib_imp2dstad[ad] = {} + self.libbase_ad += 0x1000 + return ad + + def lib_get_add_func(self, libad, imp_ord_or_name, dst_ad=None): + if not libad in viewvalues(self.name2off): + raise ValueError('unknown lib base!', hex(libad)) + + # test if not ordinatl + # if imp_ord_or_name >0x10000: + # imp_ord_or_name = vm_get_str(imp_ord_or_name, 0x100) + # imp_ord_or_name = imp_ord_or_name[:imp_ord_or_name.find('\x00')] + + #[!] can have multiple dst ad + if not imp_ord_or_name in self.lib_imp2dstad[libad]: + self.lib_imp2dstad[libad][imp_ord_or_name] = set() + if dst_ad is not None: + self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) + + if imp_ord_or_name in self.lib_imp2ad[libad]: + return self.lib_imp2ad[libad][imp_ord_or_name] + log.debug('new imp %s %s' % (imp_ord_or_name, dst_ad)) + ad = self.libbase2lastad[libad] + self.libbase2lastad[libad] += 0x10 # arbitrary + self.lib_imp2ad[libad][imp_ord_or_name] = ad + + name_inv = dict( + (value, key) for key, value in viewitems(self.name2off) + ) + c_name = canon_libname_libfunc(name_inv[libad], imp_ord_or_name) + self.fad2cname[ad] = c_name + self.cname2addr[c_name] = ad + self.fad2info[ad] = libad, imp_ord_or_name + return ad + + def check_dst_ad(self): + for ad in self.lib_imp2dstad: + all_ads = sorted(viewvalues(self.lib_imp2dstad[ad])) + for i, x in enumerate(all_ads[:-1]): + if x is None or all_ads[i + 1] is None: + return False + if x + 4 != all_ads[i + 1]: + return False + return True + + |