diff options
| author | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2025-10-14 09:09:29 +0000 |
|---|---|---|
| committer | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2025-10-14 09:09:29 +0000 |
| commit | 579cf1d03fb932083e6317967d1613d5c2587fb6 (patch) | |
| tree | 629f039935382a2a7391bce9253f6c9968159049 /src/miasm/jitter/loader/elf.py | |
| parent | 51c15d3ea2e16d4fc5f0f01a3b9befc66b1f982e (diff) | |
| download | focaccia-miasm-579cf1d03fb932083e6317967d1613d5c2587fb6.tar.gz focaccia-miasm-579cf1d03fb932083e6317967d1613d5c2587fb6.zip | |
Convert to src-layout ta/nix
Diffstat (limited to 'src/miasm/jitter/loader/elf.py')
| -rw-r--r-- | src/miasm/jitter/loader/elf.py | 339 |
1 files changed, 339 insertions, 0 deletions
diff --git a/src/miasm/jitter/loader/elf.py b/src/miasm/jitter/loader/elf.py new file mode 100644 index 00000000..91d1c18b --- /dev/null +++ b/src/miasm/jitter/loader/elf.py @@ -0,0 +1,339 @@ +import struct +from collections import defaultdict + +from future.utils import viewitems + +from miasm.loader import cstruct +from miasm.loader import * +import miasm.loader.elf as elf_csts + +from miasm.jitter.csts import * +from miasm.jitter.loader.utils import canon_libname_libfunc, libimp +from miasm.core.utils import force_str +from miasm.core.interval import interval + +import logging + +log = logging.getLogger('loader_elf') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)-8s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + + +def get_import_address_elf(e): + import2addr = defaultdict(set) + for sh in e.sh: + if not hasattr(sh, 'rel'): + continue + for k, v in viewitems(sh.rel): + k = force_str(k) + import2addr[('xxx', k)].add(v.offset) + return import2addr + + +def preload_elf(vm, e, runtime_lib, patch_vm_imp=True, loc_db=None): + # XXX quick hack + fa = get_import_address_elf(e) + dyn_funcs = {} + for (libname, libfunc), ads in viewitems(fa): + # Quick hack - if a symbol is already known, do not stub it + if loc_db and loc_db.get_name_location(libfunc) is not None: + continue + for ad in ads: + ad_base_lib = runtime_lib.lib_get_add_base(libname) + ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) + + libname_s = canon_libname_libfunc(libname, libfunc) + dyn_funcs[libname_s] = ad_libfunc + if patch_vm_imp: + log.debug('patch 0x%x 0x%x %s', ad, ad_libfunc, libfunc) + set_endianness = { elf_csts.ELFDATA2MSB: ">", + elf_csts.ELFDATA2LSB: "<", + elf_csts.ELFDATANONE: "" }[e.sex] + vm.set_mem(ad, + struct.pack(set_endianness + + cstruct.size2type[e.size], + ad_libfunc)) + return runtime_lib, dyn_funcs + +def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): + """Parse the miasm.loader's ELF @elf to extract symbols, and fill the LocationDB + instance @loc_db with parsed symbols. + + The ELF is considered mapped at @base_addr + @elf: miasm.loader's ELF instance + @loc_db: LocationDB used to retrieve symbols'offset + @base_addr: addr to reloc to (if any) + """ + # Get symbol sections + symbol_sections = [] + for section_header in elf.sh: + if hasattr(section_header, 'symbols'): + for name, sym in viewitems(section_header.symbols): + if not name or sym.value == 0: + continue + name = loc_db.find_free_name(force_str(name)) + loc_db.add_location(name, sym.value, strict=False) + + if hasattr(section_header, 'reltab'): + for rel in section_header.reltab: + if not rel.sym or rel.offset == 0: + continue + name = loc_db.find_free_name(force_str(rel.sym)) + loc_db.add_location(name, rel.offset, strict=False) + + if hasattr(section_header, 'symtab'): + log.debug("Find %d symbols in %r", len(section_header.symtab), + section_header) + symbol_sections.append(section_header) + elif isinstance(section_header, ( + elf_init.GNUVerDef, elf_init.GNUVerSym, elf_init.GNUVerNeed + )): + log.debug("Find GNU version related section, unsupported for now") + + for section in symbol_sections: + for symbol_entry in section.symtab: + # Here, the computation of vaddr assumes 'elf' is an executable or a + # shared object file + + # For relocatable file, symbol_entry.value is an offset from the section + # base -> not handled here + st_bind = symbol_entry.info >> 4 + st_type = symbol_entry.info & 0xF + + if st_type not in [ + elf_csts.STT_NOTYPE, + elf_csts.STT_OBJECT, + elf_csts.STT_FUNC, + elf_csts.STT_COMMON, + elf_csts.STT_GNU_IFUNC, + ]: + # Ignore symbols useless in linking + continue + + if st_bind == elf_csts.STB_GLOBAL: + # Global symbol + weak = False + elif st_bind == elf_csts.STB_WEAK: + # Weak symbol + weak = True + else: + # Ignore local & others symbols + continue + + absolute = False + if symbol_entry.shndx == 0: + # SHN_UNDEF + continue + elif symbol_entry.shndx == 0xfff1: + # SHN_ABS + absolute = True + log.debug("Absolute symbol %r - %x", symbol_entry.name, + symbol_entry.value) + elif 0xff00 <= symbol_entry.shndx <= 0xffff: + # Reserved index (between SHN_LORESERV and SHN_HIRESERVE) + raise RuntimeError("Unsupported reserved index: %r" % symbol_entry) + + name = force_str(symbol_entry.name) + if name == "": + # Ignore empty symbol + log.debug("Empty symbol %r", symbol_entry) + continue + + if absolute: + vaddr = symbol_entry.value + else: + vaddr = symbol_entry.value + base_addr + + # 'weak' information is only used to force global symbols for now + already_existing_loc = loc_db.get_name_location(name) + if already_existing_loc is not None: + if weak: + # Weak symbol, this is ok to already exists, skip it + continue + else: + # Global symbol, force it + loc_db.remove_location_name(already_existing_loc, + name) + already_existing_off = loc_db.get_offset_location(vaddr) + if already_existing_off is not None: + loc_db.add_location_name(already_existing_off, name) + else: + loc_db.add_location(name=name, offset=vaddr) + + +def apply_reloc_x86(elf, vm, section, base_addr, loc_db): + """Apply relocation for x86 ELF contained in the section @section + @elf: miasm.loader's ELF instance + @vm: VmMngr instance + @section: elf's section containing relocation to perform + @base_addr: addr to reloc to + @loc_db: LocationDB used to retrieve symbols'offset + """ + if elf.size == 64: + addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, + struct.pack("<Q", addr)) + elif elf.size == 32: + addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, + struct.pack("<I", addr)) + else: + raise ValueError("Unsupported elf size %d" % elf.size) + + symb_section = section.linksection + for reloc in section.reltab: + + # Parse relocation info + r_info = reloc.info + if elf.size == 64: + r_info_sym = (r_info >> 32) & 0xFFFFFFFF + r_info_type = r_info & 0xFFFFFFFF + elif elf.size == 32: + r_info_sym = (r_info >> 8) & 0xFFFFFF + r_info_type = r_info & 0xFF + + is_ifunc = False + symbol_entry = None + if r_info_sym > 0: + symbol_entry = symb_section.symtab[r_info_sym] + + r_offset = reloc.offset + r_addend = reloc.cstr.sym + + if (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_RELATIVE), + (64, elf_csts.R_X86_64_IRELATIVE), + (32, elf_csts.R_386_RELATIVE), + (32, elf_csts.R_386_IRELATIVE), + ]: + # B + A + addr = base_addr + r_addend + where = base_addr + r_offset + elif reloc.type == elf_csts.R_X86_64_64: + # S + A + addr_symb = loc_db.get_name_offset(symbol_entry.name) + if addr_symb is None: + log.warning("Unable to find symbol %r" % symbol_entry.name) + continue + addr = addr_symb + r_addend + where = base_addr + r_offset + elif (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_TPOFF64), + (64, elf_csts.R_X86_64_DTPMOD64), + (32, elf_csts.R_386_TLS_TPOFF), + ]: + # Thread dependent, ignore for now + log.debug("Skip relocation TPOFF64 %r", reloc) + continue + elif (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_GLOB_DAT), + (64, elf_csts.R_X86_64_JUMP_SLOT), + (32, elf_csts.R_386_JMP_SLOT), + (32, elf_csts.R_386_GLOB_DAT), + ]: + # S + addr = loc_db.get_name_offset(symbol_entry.name) + if addr is None: + log.warning("Unable to find symbol %r" % symbol_entry.name) + continue + is_ifunc = symbol_entry.info & 0xF == elf_csts.STT_GNU_IFUNC + where = base_addr + r_offset + else: + raise ValueError( + "Unknown relocation type: %d (%r)" % (reloc.type, + reloc) + ) + if is_ifunc: + # Resolve at runtime - not implemented for now + log.warning("Relocation for %r (at %x, currently pointing on %x) " + "has to be resolved at runtime", + name, where, sym_addr) + continue + + log.debug("Write %x at %x", addr, where) + addr_writer(where, addr) + + +def vm_load_elf(vm, fdata, name="", base_addr=0, loc_db=None, apply_reloc=False, + **kargs): + """ + Very dirty elf loader + TODO XXX: implement real loader + """ + elf = elf_init.ELF(fdata, **kargs) + i = interval() + all_data = {} + + for p in elf.ph.phlist: + if p.ph.type != elf_csts.PT_LOAD: + continue + log.debug( + '0x%x 0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset, + p.ph.filesz, p.ph.type) + data_o = elf._content[p.ph.offset:p.ph.offset + p.ph.filesz] + addr_o = p.ph.vaddr + base_addr + a_addr = addr_o & ~0xFFF + b_addr = addr_o + max(p.ph.memsz, p.ph.filesz) + b_addr = (b_addr + 0xFFF) & ~0xFFF + all_data[addr_o] = data_o + # -2: Trick to avoid merging 2 consecutive pages + i += [(a_addr, b_addr - 2)] + for a, b in i.intervals: + vm.add_memory_page( + a, + PAGE_READ | PAGE_WRITE, + b"\x00" * (b + 2 - a), + repr(name) + ) + + for r_vaddr, data in viewitems(all_data): + vm.set_mem(r_vaddr, data) + + if loc_db is not None: + fill_loc_db_with_symbols(elf, loc_db, base_addr) + + if apply_reloc: + arch = guess_arch(elf) + sections = [] + for section in elf.sh: + if not hasattr(section, 'reltab'): + continue + if isinstance(section, elf_init.RelATable): + pass + elif isinstance(section, elf_init.RelTable): + if arch == "x86_64": + log.warning("REL section should not happen in x86_64") + else: + raise RuntimeError("Unknown relocation section type: %r" % section) + sections.append(section) + for section in sections: + if arch in ["x86_64", "x86_32"]: + apply_reloc_x86(elf, vm, section, base_addr, loc_db) + else: + log.debug("Unsupported relocation for arch %r" % arch) + + return elf + + +class libimp_elf(libimp): + pass + + +# machine, size, sex -> arch_name +ELF_machine = {(elf_csts.EM_ARM, 32, elf_csts.ELFDATA2LSB): "arml", + (elf_csts.EM_ARM, 32, elf_csts.ELFDATA2MSB): "armb", + (elf_csts.EM_AARCH64, 64, elf_csts.ELFDATA2LSB): "aarch64l", + (elf_csts.EM_AARCH64, 64, elf_csts.ELFDATA2MSB): "aarch64b", + (elf_csts.EM_MIPS, 32, elf_csts.ELFDATA2MSB): "mips32b", + (elf_csts.EM_MIPS, 32, elf_csts.ELFDATA2LSB): "mips32l", + (elf_csts.EM_386, 32, elf_csts.ELFDATA2LSB): "x86_32", + (elf_csts.EM_X86_64, 64, elf_csts.ELFDATA2LSB): "x86_64", + (elf_csts.EM_SH, 32, elf_csts.ELFDATA2LSB): "sh4", + (elf_csts.EM_PPC, 32, elf_csts.ELFDATA2MSB): "ppc32b", + } + + +def guess_arch(elf): + """Return the architecture specified by the ELF container @elf. + If unknown, return None""" + return ELF_machine.get((elf.Ehdr.machine, elf.size, elf.sex), None) |