diff options
Diffstat (limited to 'miasm2/jitter/loader/elf.py')
| -rw-r--r-- | miasm2/jitter/loader/elf.py | 337 |
1 files changed, 0 insertions, 337 deletions
diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py deleted file mode 100644 index 1044fe73..00000000 --- a/miasm2/jitter/loader/elf.py +++ /dev/null @@ -1,337 +0,0 @@ -import struct -from collections import defaultdict - -from future.utils import viewitems - -from elfesteem import cstruct -from elfesteem import * -import elfesteem.elf as elf_csts - -from miasm2.jitter.csts import * -from miasm2.jitter.loader.utils import canon_libname_libfunc, libimp -from miasm2.core.interval import interval - -import logging - -log = logging.getLogger('loader_elf') -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.CRITICAL) - - -def get_import_address_elf(e): - import2addr = defaultdict(set) - for sh in e.sh: - if not hasattr(sh, 'rel'): - continue - for k, v in viewitems(sh.rel): - import2addr[('xxx', k)].add(v.offset) - return import2addr - - -def preload_elf(vm, e, runtime_lib, patch_vm_imp=True, loc_db=None): - # XXX quick hack - fa = get_import_address_elf(e) - dyn_funcs = {} - for (libname, libfunc), ads in viewitems(fa): - # Quick hack - if a symbol is already known, do not stub it - if loc_db and loc_db.get_name_location(libfunc) is not None: - continue - for ad in ads: - ad_base_lib = runtime_lib.lib_get_add_base(libname) - ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) - - libname_s = canon_libname_libfunc(libname, libfunc) - dyn_funcs[libname_s] = ad_libfunc - if patch_vm_imp: - log.debug('patch 0x%x 0x%x %s', ad, ad_libfunc, libfunc) - set_endianness = { elf_csts.ELFDATA2MSB: ">", - elf_csts.ELFDATA2LSB: "<", - elf_csts.ELFDATANONE: "" }[e.sex] - vm.set_mem(ad, - struct.pack(set_endianness + - cstruct.size2type[e.size], - ad_libfunc)) - return runtime_lib, dyn_funcs - -def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): - """Parse the elfesteem's ELF @elf to extract symbols, and fill the LocationDB - instance @loc_db with parsed symbols. - - The ELF is considered mapped at @base_addr - @elf: elfesteem's ELF instance - @loc_db: LocationDB used to retrieve symbols'offset - @base_addr: addr to reloc to (if any) - """ - # Get symbol sections - symbol_sections = [] - for section_header in elf.sh: - if hasattr(section_header, 'symbols'): - for name, sym in viewitems(section_header.symbols): - if not name or sym.value == 0: - continue - name = loc_db.find_free_name(name) - loc_db.add_location(name, sym.value, strict=False) - - if hasattr(section_header, 'reltab'): - for rel in section_header.reltab: - if not rel.sym or rel.offset == 0: - continue - name = loc_db.find_free_name(rel.sym) - loc_db.add_location(name, rel.offset, strict=False) - - if hasattr(section_header, 'symtab'): - log.debug("Find %d symbols in %r", len(section_header.symtab), - section_header) - symbol_sections.append(section_header) - elif isinstance(section_header, ( - elf_init.GNUVerDef, elf_init.GNUVerSym, elf_init.GNUVerNeed - )): - log.debug("Find GNU version related section, unsupported for now") - - for section in symbol_sections: - for symbol_entry in section.symtab: - # Here, the computation of vaddr assumes 'elf' is an executable or a - # shared object file - - # For relocatable file, symbol_entry.value is an offset from the section - # base -> not handled here - st_bind = symbol_entry.info >> 4 - st_type = symbol_entry.info & 0xF - - if st_type not in [ - elf_csts.STT_NOTYPE, - elf_csts.STT_OBJECT, - elf_csts.STT_FUNC, - elf_csts.STT_COMMON, - elf_csts.STT_GNU_IFUNC, - ]: - # Ignore symbols useless in linking - continue - - if st_bind == elf_csts.STB_GLOBAL: - # Global symbol - weak = False - elif st_bind == elf_csts.STB_WEAK: - # Weak symbol - weak = True - else: - # Ignore local & others symbols - continue - - absolute = False - if symbol_entry.shndx == 0: - # SHN_UNDEF - continue - elif symbol_entry.shndx == 0xfff1: - # SHN_ABS - absolute = True - log.debug("Absolute symbol %r - %x", symbol_entry.name, - symbol_entry.value) - elif 0xff00 <= symbol_entry.shndx <= 0xffff: - # Reserved index (between SHN_LORESERV and SHN_HIRESERVE) - raise RuntimeError("Unsupported reserved index: %r" % symbol_entry) - - name = symbol_entry.name - if name == "": - # Ignore empty symbol - log.debug("Empty symbol %r", symbol_entry) - continue - - if absolute: - vaddr = symbol_entry.value - else: - vaddr = symbol_entry.value + base_addr - - # 'weak' information is only used to force global symbols for now - already_existing_loc = loc_db.get_name_location(name) - if already_existing_loc is not None: - if weak: - # Weak symbol, this is ok to already exists, skip it - continue - else: - # Global symbol, force it - loc_db.remove_location_name(already_existing_loc, - name) - already_existing_off = loc_db.get_offset_location(vaddr) - if already_existing_off is not None: - loc_db.add_location_name(already_existing_off, name) - else: - loc_db.add_location(name=name, offset=vaddr) - - -def apply_reloc_x86(elf, vm, section, base_addr, loc_db): - """Apply relocation for x86 ELF contained in the section @section - @elf: elfesteem's ELF instance - @vm: VmMngr instance - @section: elf's section containing relocation to perform - @base_addr: addr to reloc to - @loc_db: LocationDB used to retrieve symbols'offset - """ - if elf.size == 64: - addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, - struct.pack("<Q", addr)) - elif elf.size == 32: - addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, - struct.pack("<I", addr)) - else: - raise ValueError("Unsupported elf size %d" % elf.size) - - symb_section = section.linksection - for reloc in section.reltab: - - # Parse relocation info - r_info = reloc.info - if elf.size == 64: - r_info_sym = (r_info >> 32) & 0xFFFFFFFF - r_info_type = r_info & 0xFFFFFFFF - elif elf.size == 32: - r_info_sym = (r_info >> 8) & 0xFFFFFF - r_info_type = r_info & 0xFF - - is_ifunc = False - symbol_entry = None - if r_info_sym > 0: - symbol_entry = symb_section.symtab[r_info_sym] - - r_offset = reloc.offset - r_addend = reloc.cstr.sym - - if (elf.size, reloc.type) in [ - (64, elf_csts.R_X86_64_RELATIVE), - (64, elf_csts.R_X86_64_IRELATIVE), - (32, elf_csts.R_386_RELATIVE), - (32, elf_csts.R_386_IRELATIVE), - ]: - # B + A - addr = base_addr + r_addend - where = base_addr + r_offset - elif reloc.type == elf_csts.R_X86_64_64: - # S + A - addr_symb = loc_db.get_name_offset(symbol_entry.name) - if addr_symb is None: - log.warning("Unable to find symbol %r" % symbol_entry.name) - continue - addr = addr_symb + r_addend - where = base_addr + r_offset - elif (elf.size, reloc.type) in [ - (64, elf_csts.R_X86_64_TPOFF64), - (64, elf_csts.R_X86_64_DTPMOD64), - (32, elf_csts.R_386_TLS_TPOFF), - ]: - # Thread dependent, ignore for now - log.debug("Skip relocation TPOFF64 %r", reloc) - continue - elif (elf.size, reloc.type) in [ - (64, elf_csts.R_X86_64_GLOB_DAT), - (64, elf_csts.R_X86_64_JUMP_SLOT), - (32, elf_csts.R_386_JMP_SLOT), - (32, elf_csts.R_386_GLOB_DAT), - ]: - # S - addr = loc_db.get_name_offset(symbol_entry.name) - if addr is None: - log.warning("Unable to find symbol %r" % symbol_entry.name) - continue - is_ifunc = symbol_entry.info & 0xF == elf_csts.STT_GNU_IFUNC - where = base_addr + r_offset - else: - raise ValueError( - "Unknown relocation type: %d (%r)" % (reloc.type, - reloc) - ) - if is_ifunc: - # Resolve at runtime - not implemented for now - log.warning("Relocation for %r (at %x, currently pointing on %x) " - "has to be resolved at runtime", - name, where, sym_addr) - continue - - log.debug("Write %x at %x", addr, where) - addr_writer(where, addr) - - -def vm_load_elf(vm, fdata, name="", base_addr=0, loc_db=None, apply_reloc=False, - **kargs): - """ - Very dirty elf loader - TODO XXX: implement real loader - """ - elf = elf_init.ELF(fdata, **kargs) - i = interval() - all_data = {} - - for p in elf.ph.phlist: - if p.ph.type != elf_csts.PT_LOAD: - continue - log.debug( - '0x%x 0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset, - p.ph.filesz, p.ph.type) - data_o = elf._content[p.ph.offset:p.ph.offset + p.ph.filesz] - addr_o = p.ph.vaddr + base_addr - a_addr = addr_o & ~0xFFF - b_addr = addr_o + max(p.ph.memsz, p.ph.filesz) - b_addr = (b_addr + 0xFFF) & ~0xFFF - all_data[addr_o] = data_o - # -2: Trick to avoid merging 2 consecutive pages - i += [(a_addr, b_addr - 2)] - for a, b in i.intervals: - vm.add_memory_page( - a, - PAGE_READ | PAGE_WRITE, - b"\x00" * (b + 2 - a), - repr(name) - ) - - for r_vaddr, data in viewitems(all_data): - vm.set_mem(r_vaddr, data) - - if loc_db is not None: - fill_loc_db_with_symbols(elf, loc_db, base_addr) - - if apply_reloc: - arch = guess_arch(elf) - sections = [] - for section in elf.sh: - if not hasattr(section, 'reltab'): - continue - if isinstance(section, elf_init.RelATable): - pass - elif isinstance(section, elf_init.RelTable): - if arch == "x86_64": - log.warning("REL section should not happen in x86_64") - else: - raise RuntimeError("Unknown relocation section type: %r" % section) - sections.append(section) - for section in sections: - if arch in ["x86_64", "x86_32"]: - apply_reloc_x86(elf, vm, section, base_addr, loc_db) - else: - log.debug("Unsupported relocation for arch %r" % arch) - - return elf - - -class libimp_elf(libimp): - pass - - -# machine, size, sex -> arch_name -ELF_machine = {(elf_csts.EM_ARM, 32, elf_csts.ELFDATA2LSB): "arml", - (elf_csts.EM_ARM, 32, elf_csts.ELFDATA2MSB): "armb", - (elf_csts.EM_AARCH64, 64, elf_csts.ELFDATA2LSB): "aarch64l", - (elf_csts.EM_AARCH64, 64, elf_csts.ELFDATA2MSB): "aarch64b", - (elf_csts.EM_MIPS, 32, elf_csts.ELFDATA2MSB): "mips32b", - (elf_csts.EM_MIPS, 32, elf_csts.ELFDATA2LSB): "mips32l", - (elf_csts.EM_386, 32, elf_csts.ELFDATA2LSB): "x86_32", - (elf_csts.EM_X86_64, 64, elf_csts.ELFDATA2LSB): "x86_64", - (elf_csts.EM_SH, 32, elf_csts.ELFDATA2LSB): "sh4", - (elf_csts.EM_PPC, 32, elf_csts.ELFDATA2MSB): "ppc32b", - } - - -def guess_arch(elf): - """Return the architecture specified by the ELF container @elf. - If unknown, return None""" - return ELF_machine.get((elf.Ehdr.machine, elf.size, elf.sex), None) |