about summary refs log tree commit diff stats
path: root/miasm2/jitter/loader/elf.py
diff options
context:
space:
mode:
Diffstat (limited to 'miasm2/jitter/loader/elf.py')
-rw-r--r--miasm2/jitter/loader/elf.py226
1 files changed, 218 insertions, 8 deletions
diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py
index 01dea647..af078ab5 100644
--- a/miasm2/jitter/loader/elf.py
+++ b/miasm2/jitter/loader/elf.py
@@ -28,11 +28,14 @@ def get_import_address_elf(e):
     return import2addr
 
 
-def preload_elf(vm, e, runtime_lib, patch_vm_imp=True):
+def preload_elf(vm, e, runtime_lib, patch_vm_imp=True, loc_db=None):
     # XXX quick hack
     fa = get_import_address_elf(e)
     dyn_funcs = {}
     for (libname, libfunc), ads in fa.items():
+        # Quick hack - if a symbol is already known, do not stub it
+        if loc_db and loc_db.get_name_location(libfunc) is not None:
+            continue
         for ad in ads:
             ad_base_lib = runtime_lib.lib_get_add_base(libname)
             ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad)
@@ -50,25 +53,208 @@ def preload_elf(vm, e, runtime_lib, patch_vm_imp=True):
                                        ad_libfunc))
     return runtime_lib, dyn_funcs
 
+def fill_loc_db_with_symbols(elf, loc_db, base_addr=0):
+    """Parse the elfesteem's ELF @elf to extract symbols, and fill the LocationDB
+    instance @loc_db with parsed symbols.
+
+    The ELF is considered mapped at @base_addr
+    @elf: elfesteem's ELF instance
+    @loc_db: LocationDB used to retrieve symbols'offset
+    @base_addr: addr to reloc to (if any)
+    """
+    # Get symbol sections
+    symbol_sections = []
+    for section_header in elf.sh:
+        if hasattr(section_header, 'symtab'):
+            log.debug("Find %d symbols in %r", len(section_header.symtab),
+                      section_header)
+            symbol_sections.append(section_header)
+        elif isinstance(section_header, (
+                elf_init.GNUVerDef, elf_init.GNUVerSym, elf_init.GNUVerNeed
+        )):
+            log.debug("Find GNU version related section, unsupported for now")
+
+    for section in symbol_sections:
+        for symbol_entry in section.symtab:
+            # Here, the computation of vaddr assumes 'elf' is an executable or a
+            # shared object file
+
+            # For relocatable file, symbol_entry.value is an offset from the section
+            # base -> not handled here
+            st_bind = symbol_entry.info >> 4
+            st_type = symbol_entry.info & 0xF
+
+            if st_type not in [
+                    elf_csts.STT_NOTYPE,
+                    elf_csts.STT_OBJECT,
+                    elf_csts.STT_FUNC,
+                    elf_csts.STT_COMMON,
+                    elf_csts.STT_GNU_IFUNC,
+            ]:
+                # Ignore symbols useless in linking
+                continue
+
+            if st_bind == elf_csts.STB_GLOBAL:
+                # Global symbol
+                weak = False
+            elif st_bind == elf_csts.STB_WEAK:
+                # Weak symbol
+                weak = True
+            else:
+                # Ignore local & others symbols
+                continue
+
+            absolute = False
+            if symbol_entry.shndx == 0:
+                # SHN_UNDEF
+                continue
+            elif symbol_entry.shndx == 0xfff1:
+                # SHN_ABS
+                absolute = True
+                log.debug("Absolute symbol %r - %x", symbol_entry.name,
+                          symbol_entry.value)
+            elif 0xff00 <= symbol_entry.shndx <= 0xffff:
+                # Reserved index (between SHN_LORESERV and SHN_HIRESERVE)
+                raise RuntimeError("Unsupported reserved index: %r" % symbol_entry)
+
+            name = symbol_entry.name
+            if name == "":
+                # Ignore empty symbol
+                log.debug("Empty symbol %r", symbol_entry)
+                continue
+
+            if absolute:
+                vaddr = symbol_entry.value
+            else:
+                vaddr = symbol_entry.value + base_addr
+
+            # 'weak' information is only used to force global symbols for now
+            already_existing_loc = loc_db.get_name_location(name)
+            if already_existing_loc is not None:
+                if weak:
+                    # Weak symbol, this is ok to already exists, skip it
+                    continue
+                else:
+                    # Global symbol, force it
+                    loc_db.remove_location_name(already_existing_loc,
+                                                name)
+            already_existing_off = loc_db.get_offset_location(vaddr)
+            if already_existing_off is not None:
+                loc_db.add_location_name(already_existing_off, name)
+            else:
+                loc_db.add_location(name=name, offset=vaddr)
+
+
+def apply_reloc_x86(elf, vm, section, base_addr, loc_db):
+    """Apply relocation for x86 ELF contained in the secion @section
+    @elf: elfesteem's ELF instance
+    @vm: VmMngr instance
+    @section: elf's section containing relocation to perform
+    @base_addr: addr to reloc to
+    @loc_db: LocationDB used to retrieve symbols'offset
+    """
+    if elf.size == 64:
+        addr_writer = lambda vaddr, addr: vm.set_mem(vaddr,
+                                                     struct.pack("<Q", addr))
+    elif elf.size == 32:
+        addr_writer = lambda vaddr, addr: vm.set_mem(vaddr,
+                                                     struct.pack("<I", addr))
+    else:
+        raise ValueError("Unsupported elf size %d" % elf.size)
+
+    symb_section = section.linksection
+    for reloc in section.reltab:
+
+        # Parse relocation info
+        r_addend = reloc.addend if hasattr(reloc, "addend") else 0
+        r_info = reloc.info
+        if elf.size == 64:
+            r_info_sym = (r_info >> 32) & 0xFFFFFFFF
+            r_info_type = r_info & 0xFFFFFFFF
+        elif elf.size == 32:
+            r_info_sym = (r_info >> 8) & 0xFFFFFF
+            r_info_type = r_info & 0xFF
+
+        is_ifunc = False
+        symbol_entry = None
+        if r_info_sym > 0:
+            symbol_entry = symb_section.symtab[r_info_sym]
 
-def vm_load_elf(vm, fdata, name="", **kargs):
+        r_offset = reloc.offset
+        r_addend = reloc.cstr.sym
+
+        if (elf.size, reloc.type) in [
+                (64, elf_csts.R_X86_64_RELATIVE),
+                (64, elf_csts.R_X86_64_IRELATIVE),
+                (32, elf_csts.R_386_RELATIVE),
+                (32, elf_csts.R_386_IRELATIVE),
+        ]:
+            # B + A
+            addr = base_addr + r_addend
+            where = base_addr + r_offset
+        elif reloc.type == elf_csts.R_X86_64_64:
+            # S + A
+            addr_symb = loc_db.get_name_offset(symbol_entry.name)
+            if addr_symb is None:
+                log.warning("Unable to find symbol %r" % symbol_entry.name)
+                continue
+            addr = addr_symb + r_addend
+            where = base_addr + r_offset
+        elif (elf.size, reloc.type) in [
+                (64, elf_csts.R_X86_64_TPOFF64),
+                (64, elf_csts.R_X86_64_DTPMOD64),
+                (32, elf_csts.R_386_TLS_TPOFF),
+        ]:
+            # Thread dependent, ignore for now
+            log.debug("Skip relocation TPOFF64 %r", reloc)
+            continue
+        elif (elf.size, reloc.type) in [
+                (64, elf_csts.R_X86_64_GLOB_DAT),
+                (64, elf_csts.R_X86_64_JUMP_SLOT),
+                (32, elf_csts.R_386_JMP_SLOT),
+                (32, elf_csts.R_386_GLOB_DAT),
+        ]:
+            # S
+            addr = loc_db.get_name_offset(symbol_entry.name)
+            if addr is None:
+                log.warning("Unable to find symbol %r" % symbol_entry.name)
+                continue
+            is_ifunc = symbol_entry.info & 0xF == elf_csts.STT_GNU_IFUNC
+            where = base_addr + r_offset
+        else:
+            raise ValueError(
+                "Unknown relocation type: %d (%r)" % (reloc.type,
+                                                      reloc)
+            )
+        if is_ifunc:
+            # Resolve at runtime - not implemented for now
+            log.warning("Relocation for %r (at %x, currently pointing on %x) "
+                        "has to be resolved at runtime",
+                        name, where, sym_addr)
+            continue
+
+        log.debug("Write %x at %x", addr, where)
+        addr_writer(where, addr)
+
+
+def vm_load_elf(vm, fdata, name="", base_addr=0, loc_db=None, apply_reloc=False,
+                **kargs):
     """
     Very dirty elf loader
     TODO XXX: implement real loader
     """
-    # log.setLevel(logging.DEBUG)
-    e = elf_init.ELF(fdata, **kargs)
+    elf = elf_init.ELF(fdata, **kargs)
     i = interval()
     all_data = {}
 
-    for p in e.ph.phlist:
+    for p in elf.ph.phlist:
         if p.ph.type != elf_csts.PT_LOAD:
             continue
         log.debug(
             '0x%x 0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset,
                   p.ph.filesz, p.ph.type)
-        data_o = e._content[p.ph.offset:p.ph.offset + p.ph.filesz]
-        addr_o = p.ph.vaddr
+        data_o = elf._content[p.ph.offset:p.ph.offset + p.ph.filesz]
+        addr_o = p.ph.vaddr + base_addr
         a_addr = addr_o & ~0xFFF
         b_addr = addr_o + max(p.ph.memsz, p.ph.filesz)
         b_addr = (b_addr + 0xFFF) & ~0xFFF
@@ -81,7 +267,31 @@ def vm_load_elf(vm, fdata, name="", **kargs):
 
     for r_vaddr, data in all_data.items():
         vm.set_mem(r_vaddr, data)
-    return e
+
+    if loc_db is not None:
+        fill_loc_db_with_symbols(elf, loc_db, base_addr)
+
+    if apply_reloc:
+        arch = guess_arch(elf)
+        sections = []
+        for section in elf.sh:
+            if not hasattr(section, 'reltab'):
+                continue
+            if isinstance(section, elf_init.RelATable):
+                pass
+            elif isinstance(section, elf_init.RelTable):
+                if arch == "x86_64":
+                    log.warning("REL section should not happen in x86_64")
+            else:
+                raise RuntimeError("Unknown relocation section type: %r" % section)
+            sections.append(section)
+        for section in sections:
+            if arch in ["x86_64", "x86_32"]:
+                apply_reloc_x86(elf, vm, section, base_addr, loc_db)
+            else:
+                log.debug("Unsupported relocation for arch %r" % arch)
+
+    return elf
 
 
 class libimp_elf(libimp):