diff options
Diffstat (limited to 'miasm2/jitter/loader/elf.py')
| -rw-r--r-- | miasm2/jitter/loader/elf.py | 226 |
1 files changed, 218 insertions, 8 deletions
diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py index 01dea647..af078ab5 100644 --- a/miasm2/jitter/loader/elf.py +++ b/miasm2/jitter/loader/elf.py @@ -28,11 +28,14 @@ def get_import_address_elf(e): return import2addr -def preload_elf(vm, e, runtime_lib, patch_vm_imp=True): +def preload_elf(vm, e, runtime_lib, patch_vm_imp=True, loc_db=None): # XXX quick hack fa = get_import_address_elf(e) dyn_funcs = {} for (libname, libfunc), ads in fa.items(): + # Quick hack - if a symbol is already known, do not stub it + if loc_db and loc_db.get_name_location(libfunc) is not None: + continue for ad in ads: ad_base_lib = runtime_lib.lib_get_add_base(libname) ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) @@ -50,25 +53,208 @@ def preload_elf(vm, e, runtime_lib, patch_vm_imp=True): ad_libfunc)) return runtime_lib, dyn_funcs +def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): + """Parse the elfesteem's ELF @elf to extract symbols, and fill the LocationDB + instance @loc_db with parsed symbols. + + The ELF is considered mapped at @base_addr + @elf: elfesteem's ELF instance + @loc_db: LocationDB used to retrieve symbols'offset + @base_addr: addr to reloc to (if any) + """ + # Get symbol sections + symbol_sections = [] + for section_header in elf.sh: + if hasattr(section_header, 'symtab'): + log.debug("Find %d symbols in %r", len(section_header.symtab), + section_header) + symbol_sections.append(section_header) + elif isinstance(section_header, ( + elf_init.GNUVerDef, elf_init.GNUVerSym, elf_init.GNUVerNeed + )): + log.debug("Find GNU version related section, unsupported for now") + + for section in symbol_sections: + for symbol_entry in section.symtab: + # Here, the computation of vaddr assumes 'elf' is an executable or a + # shared object file + + # For relocatable file, symbol_entry.value is an offset from the section + # base -> not handled here + st_bind = symbol_entry.info >> 4 + st_type = symbol_entry.info & 0xF + + if st_type not in [ + elf_csts.STT_NOTYPE, + elf_csts.STT_OBJECT, + elf_csts.STT_FUNC, + elf_csts.STT_COMMON, + elf_csts.STT_GNU_IFUNC, + ]: + # Ignore symbols useless in linking + continue + + if st_bind == elf_csts.STB_GLOBAL: + # Global symbol + weak = False + elif st_bind == elf_csts.STB_WEAK: + # Weak symbol + weak = True + else: + # Ignore local & others symbols + continue + + absolute = False + if symbol_entry.shndx == 0: + # SHN_UNDEF + continue + elif symbol_entry.shndx == 0xfff1: + # SHN_ABS + absolute = True + log.debug("Absolute symbol %r - %x", symbol_entry.name, + symbol_entry.value) + elif 0xff00 <= symbol_entry.shndx <= 0xffff: + # Reserved index (between SHN_LORESERV and SHN_HIRESERVE) + raise RuntimeError("Unsupported reserved index: %r" % symbol_entry) + + name = symbol_entry.name + if name == "": + # Ignore empty symbol + log.debug("Empty symbol %r", symbol_entry) + continue + + if absolute: + vaddr = symbol_entry.value + else: + vaddr = symbol_entry.value + base_addr + + # 'weak' information is only used to force global symbols for now + already_existing_loc = loc_db.get_name_location(name) + if already_existing_loc is not None: + if weak: + # Weak symbol, this is ok to already exists, skip it + continue + else: + # Global symbol, force it + loc_db.remove_location_name(already_existing_loc, + name) + already_existing_off = loc_db.get_offset_location(vaddr) + if already_existing_off is not None: + loc_db.add_location_name(already_existing_off, name) + else: + loc_db.add_location(name=name, offset=vaddr) + + +def apply_reloc_x86(elf, vm, section, base_addr, loc_db): + """Apply relocation for x86 ELF contained in the secion @section + @elf: elfesteem's ELF instance + @vm: VmMngr instance + @section: elf's section containing relocation to perform + @base_addr: addr to reloc to + @loc_db: LocationDB used to retrieve symbols'offset + """ + if elf.size == 64: + addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, + struct.pack("<Q", addr)) + elif elf.size == 32: + addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, + struct.pack("<I", addr)) + else: + raise ValueError("Unsupported elf size %d" % elf.size) + + symb_section = section.linksection + for reloc in section.reltab: + + # Parse relocation info + r_addend = reloc.addend if hasattr(reloc, "addend") else 0 + r_info = reloc.info + if elf.size == 64: + r_info_sym = (r_info >> 32) & 0xFFFFFFFF + r_info_type = r_info & 0xFFFFFFFF + elif elf.size == 32: + r_info_sym = (r_info >> 8) & 0xFFFFFF + r_info_type = r_info & 0xFF + + is_ifunc = False + symbol_entry = None + if r_info_sym > 0: + symbol_entry = symb_section.symtab[r_info_sym] -def vm_load_elf(vm, fdata, name="", **kargs): + r_offset = reloc.offset + r_addend = reloc.cstr.sym + + if (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_RELATIVE), + (64, elf_csts.R_X86_64_IRELATIVE), + (32, elf_csts.R_386_RELATIVE), + (32, elf_csts.R_386_IRELATIVE), + ]: + # B + A + addr = base_addr + r_addend + where = base_addr + r_offset + elif reloc.type == elf_csts.R_X86_64_64: + # S + A + addr_symb = loc_db.get_name_offset(symbol_entry.name) + if addr_symb is None: + log.warning("Unable to find symbol %r" % symbol_entry.name) + continue + addr = addr_symb + r_addend + where = base_addr + r_offset + elif (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_TPOFF64), + (64, elf_csts.R_X86_64_DTPMOD64), + (32, elf_csts.R_386_TLS_TPOFF), + ]: + # Thread dependent, ignore for now + log.debug("Skip relocation TPOFF64 %r", reloc) + continue + elif (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_GLOB_DAT), + (64, elf_csts.R_X86_64_JUMP_SLOT), + (32, elf_csts.R_386_JMP_SLOT), + (32, elf_csts.R_386_GLOB_DAT), + ]: + # S + addr = loc_db.get_name_offset(symbol_entry.name) + if addr is None: + log.warning("Unable to find symbol %r" % symbol_entry.name) + continue + is_ifunc = symbol_entry.info & 0xF == elf_csts.STT_GNU_IFUNC + where = base_addr + r_offset + else: + raise ValueError( + "Unknown relocation type: %d (%r)" % (reloc.type, + reloc) + ) + if is_ifunc: + # Resolve at runtime - not implemented for now + log.warning("Relocation for %r (at %x, currently pointing on %x) " + "has to be resolved at runtime", + name, where, sym_addr) + continue + + log.debug("Write %x at %x", addr, where) + addr_writer(where, addr) + + +def vm_load_elf(vm, fdata, name="", base_addr=0, loc_db=None, apply_reloc=False, + **kargs): """ Very dirty elf loader TODO XXX: implement real loader """ - # log.setLevel(logging.DEBUG) - e = elf_init.ELF(fdata, **kargs) + elf = elf_init.ELF(fdata, **kargs) i = interval() all_data = {} - for p in e.ph.phlist: + for p in elf.ph.phlist: if p.ph.type != elf_csts.PT_LOAD: continue log.debug( '0x%x 0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset, p.ph.filesz, p.ph.type) - data_o = e._content[p.ph.offset:p.ph.offset + p.ph.filesz] - addr_o = p.ph.vaddr + data_o = elf._content[p.ph.offset:p.ph.offset + p.ph.filesz] + addr_o = p.ph.vaddr + base_addr a_addr = addr_o & ~0xFFF b_addr = addr_o + max(p.ph.memsz, p.ph.filesz) b_addr = (b_addr + 0xFFF) & ~0xFFF @@ -81,7 +267,31 @@ def vm_load_elf(vm, fdata, name="", **kargs): for r_vaddr, data in all_data.items(): vm.set_mem(r_vaddr, data) - return e + + if loc_db is not None: + fill_loc_db_with_symbols(elf, loc_db, base_addr) + + if apply_reloc: + arch = guess_arch(elf) + sections = [] + for section in elf.sh: + if not hasattr(section, 'reltab'): + continue + if isinstance(section, elf_init.RelATable): + pass + elif isinstance(section, elf_init.RelTable): + if arch == "x86_64": + log.warning("REL section should not happen in x86_64") + else: + raise RuntimeError("Unknown relocation section type: %r" % section) + sections.append(section) + for section in sections: + if arch in ["x86_64", "x86_32"]: + apply_reloc_x86(elf, vm, section, base_addr, loc_db) + else: + log.debug("Unsupported relocation for arch %r" % arch) + + return elf class libimp_elf(libimp): |