diff options
| author | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2025-10-14 09:09:29 +0000 |
|---|---|---|
| committer | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2025-10-14 09:09:29 +0000 |
| commit | 579cf1d03fb932083e6317967d1613d5c2587fb6 (patch) | |
| tree | 629f039935382a2a7391bce9253f6c9968159049 /src/miasm/loader/elf_init.py | |
| parent | 51c15d3ea2e16d4fc5f0f01a3b9befc66b1f982e (diff) | |
| download | focaccia-miasm-ta/nix.tar.gz focaccia-miasm-ta/nix.zip | |
Convert to src-layout ta/nix
Diffstat (limited to 'src/miasm/loader/elf_init.py')
| -rw-r--r-- | src/miasm/loader/elf_init.py | 882 |
1 files changed, 882 insertions, 0 deletions
diff --git a/src/miasm/loader/elf_init.py b/src/miasm/loader/elf_init.py new file mode 100644 index 00000000..cffb3575 --- /dev/null +++ b/src/miasm/loader/elf_init.py @@ -0,0 +1,882 @@ +#! /usr/bin/env python + +from __future__ import print_function +from builtins import range +import logging +import struct + +from future.utils import PY3, with_metaclass + +from miasm.core.utils import force_bytes, force_str +from miasm.loader import cstruct +from miasm.loader import elf +from miasm.loader.strpatchwork import StrPatchwork + +log = logging.getLogger("elfparse") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("[%(levelname)-8s]: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + + +def printable(string): + if isinstance(string, bytes): + return "".join( + c.decode() if b" " <= c < b"~" else "." + for c in (string[i:i+1] for i in range(len(string))) + ) + return string + + +class StructWrapper_metaclass(type): + + def __new__(cls, name, bases, dct): + wrapped = dct["wrapped"] + if wrapped is not None: # XXX: make dct lookup look into base classes + for fname, v in wrapped._fields: + dct[fname] = property(dct.pop("get_" + fname, + lambda self, fname=fname: getattr( + self.cstr, fname)), + dct.pop("set_" + fname, + lambda self, v, fname=fname: setattr( + self.cstr, fname, v)), + dct.pop("del_" + fname, None)) + return type.__new__(cls, name, bases, dct) + + +class StructWrapper(with_metaclass(StructWrapper_metaclass, object)): + + wrapped = None + + def __init__(self, parent, sex, size, *args, **kargs): + self.cstr = self.wrapped(sex, size, *args, **kargs) + self.parent = parent + + def __getitem__(self, item): + return getattr(self, item) + + def __repr__(self): + return "<W-" + repr(self.cstr)[1:] + + def __str__(self): + return str(self.cstr) + + def __bytes__(self): + return bytes(self.cstr) + + +class WEhdr(StructWrapper): + wrapped = elf.Ehdr + + def set_shstrndx(self, val): + self.cstr.shstrndx = val + + +class WSym32(StructWrapper): + wrapped = elf.Sym32 + + def get_name(self): + return self.parent.linksection.get_name(self.cstr.name) + + +class WSym64(StructWrapper): + wrapped = elf.Sym64 + + def get_name(self): + return self.parent.linksection.get_name(self.cstr.name) + + +class WRel32(StructWrapper): + wrapped = elf.Rel32 + wrapped._fields.append(("sym", "u32")) + wrapped._fields.append(("type", "u08")) + + def get_sym(self): + if isinstance(self.parent.linksection, NullSection): + return None + return self.parent.linksection.symtab[self.cstr.info >> 8].name + + def get_type(self): + return self.cstr.info & 0xff + + +class WRel64(StructWrapper): + wrapped = elf.Rel64 + wrapped._fields.append(("sym", "u32")) + wrapped._fields.append(("type", "u32")) + + def get_sym(self): + if not hasattr(self.parent.linksection, 'symtab'): + return None + return self.parent.linksection.symtab[self.cstr.info >> 32].name + + def get_type(self): + return self.cstr.info & 0xffffffff + + +class WRela32(WRel32): + wrapped = elf.Rela32 + wrapped._fields.append(("sym", "u32")) + wrapped._fields.append(("type", "u08")) + + def get_sym(self): + return self.parent.linksection.symtab[self.cstr.info >> 8].name + + def get_type(self): + return self.cstr.info & 0xff + + +class WRela64(WRel64): + wrapped = elf.Rela64 + wrapped._fields.append(("sym", "u32")) + wrapped._fields.append(("type", "u32")) + + def get_sym(self): + return self.parent.linksection.symtab[self.cstr.info >> 32].name + + def get_type(self): + return self.cstr.info & 0xffffffff + + +class WShdr(StructWrapper): + wrapped = elf.Shdr + + def get_name(self): + return self.parent.parent._shstr.get_name(self.cstr.name) + + +class WDynamic(StructWrapper): + wrapped = elf.Dynamic + + def get_name(self): + if self.type == elf.DT_NEEDED: + return self.parent.linksection.get_name(self.cstr.name) + return self.cstr.name + + +class WPhdr(StructWrapper): + wrapped = elf.Phdr + + +class WPhdr64(StructWrapper): + wrapped = elf.Phdr64 + + +class WNhdr(StructWrapper): + wrapped = elf.Nhdr + + +class ContentManager(object): + + def __get__(self, owner, x): + if hasattr(owner, '_content'): + return owner._content + + def __set__(self, owner, new_content): + owner.resize(len(owner._content), len(new_content)) + owner._content = StrPatchwork(new_content) + owner.parse_content(owner.sex, owner.size) + + def __delete__(self, owner): + self.__set__(owner, None) + + +# Sections + +class Section_metaclass(type): + + def __new__(cls, name, bases, dct): + o = type.__new__(cls, name, bases, dct) + if name != "Section": + Section.register(o) + return o + + def register(cls, o): + if o.sht is not None: + cls.sectypes[o.sht] = o + + def __call__(cls, parent, sex, size, shstr=None): + sh = None + if shstr is not None: + sh = WShdr(None, sex, size, shstr) + if sh.type in Section.sectypes: + cls = Section.sectypes[sh.type] + i = cls.__new__(cls, cls.__name__, cls.__bases__, cls.__dict__) + if sh is not None: + sh.parent = i + i.__init__(parent, sh) + return i + + +class Section(with_metaclass(Section_metaclass, object)): + + sectypes = {} + content = ContentManager() + + def resize(self, old, new): + self.sh.size += new - old + self.parent.resize(self, new - old) + if self.phparent: + self.phparent.resize(self, new - old) + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + pass + + def get_linksection(self): + return self.parent[self.sh.link] + + def set_linksection(self, val): + if isinstance(val, Section): + val = self.parent.shlist.find(val) + if type(val) is int: + self.sh.link = val + linksection = property(get_linksection, set_linksection) + + def get_infosection(self): + # XXX info may not be in sh list ?!? + if not self.sh.info in self.parent: + return None + return self.parent[self.sh.info] + + def set_infosection(self, val): + if isinstance(val, Section): + val = self.parent.shlist.find(val) + if type(val) is int: + self.sh.info = val + infosection = property(get_infosection, set_infosection) + + def __init__(self, parent, sh=None): + self.parent = parent + self.phparent = None + self.sh = sh + self._content = b"" + + def __repr__(self): + r = "{%(name)s ofs=%(offset)#x sz=%(size)#x addr=%(addr)#010x}" % self.sh + return r + + +class NullSection(Section): + sht = elf.SHT_NULL + + def get_name(self, ofs): + # XXX check this + return b"" + + +class ProgBits(Section): + sht = elf.SHT_PROGBITS + + +class HashSection(Section): + sht = elf.SHT_HASH + + +class NoBitsSection(Section): + sht = elf.SHT_NOBITS + + +class ShLibSection(Section): + sht = elf.SHT_SHLIB + + +class InitArray(Section): + sht = elf.SHT_INIT_ARRAY + + +class FiniArray(Section): + sht = elf.SHT_FINI_ARRAY + + +class GroupSection(Section): + sht = elf.SHT_GROUP + + +class SymTabSHIndeces(Section): + sht = elf.SHT_SYMTAB_SHNDX + + +class GNUVerSym(Section): + sht = elf.SHT_GNU_versym + + +class GNUVerNeed(Section): + sht = elf.SHT_GNU_verneed + + +class GNUVerDef(Section): + sht = elf.SHT_GNU_verdef + + +class GNULibLIst(Section): + sht = elf.SHT_GNU_LIBLIST + + +class CheckSumSection(Section): + sht = elf.SHT_CHECKSUM + + +class NoteSection(Section): + sht = elf.SHT_NOTE + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + c = self.content + hsz = 12 + self.notes = [] + # XXX: c may not be aligned? + while len(c) > hsz: + note = WNhdr(self, sex, size, c) + namesz, descsz = note.namesz, note.descsz + name = c[hsz:hsz + namesz] + desc = c[hsz + namesz:hsz + namesz + descsz] + c = c[hsz + namesz + descsz:] + self.notes.append((note.type, name, desc)) + + +class Dynamic(Section): + sht = elf.SHT_DYNAMIC + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + c = self.content + self.dyntab = [] + self.dynamic = {} + sz = self.sh.entsize + idx = 0 + while len(c) > sz*idx: + s = c[sz*idx:sz*(idx+1)] + idx += 1 + dyn = WDynamic(self, sex, size, s) + self.dyntab.append(dyn) + if isinstance(dyn.name, str): + self[dyn.name] = dyn + + def __setitem__(self, item, value): + if isinstance(item, bytes): + self.dynamic[item] = value + return + if isinstance(item, str): + self.symbols[item.encode()] = value + return + self.dyntab[item] = value + + def __getitem__(self, item): + if isinstance(item, bytes): + return self.dynamic[item] + if isinstance(item, str): + return self.dynamic[item.encode()] + return self.dyntab[item] + + +class StrTable(Section): + sht = elf.SHT_STRTAB + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + self.res = {} + c = self.content + q = 0 + index = 0 + l = len(c) + while index < l: + p = c.find(b"\x00", index) + if p < 0: + log.warning("Missing trailing 0 for string [%s]" % c) # XXX + p = len(c) - index + self.res[index] = c[index:p] + # print q, c[:p] + index = p + 1 + # q += p+1 + # c = c[p+1:] + + def get_name(self, ofs): + return self.content[ofs:self.content.find(b'\x00', start=ofs)] + + def add_name(self, name): + name = force_bytes(name) + name = name + b"\x00" + if name in self.content: + return self.content.find(name) + n = len(self.content) + self.content = bytes(self.content) + name + return n + + def mod_name(self, name, new_name): + s = bytes(self.content) + name_b = b'\x00%s\x00' % name.encode() + if not name_b in s: + raise ValueError('Unknown name %r' % name) + self.content = s.replace( + name_b, + b'\x00%s\x00' % new_name.encode() + ) + return len(self.content) + + +class SymTable(Section): + sht = elf.SHT_SYMTAB + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + c = self.content + self.symtab = [] + self.symbols = {} + sz = self.sh.entsize + index = 0 + l = len(c) + if size == 32: + WSym = WSym32 + elif size == 64: + WSym = WSym64 + else: + ValueError('unknown size') + while index < l: + s = c[index:index + sz] + index += sz + sym = WSym(self, sex, size, s) + self.symtab.append(sym) + self[sym.name] = sym + + def __getitem__(self, item): + if isinstance(item, bytes): + return self.symbols[item] + if isinstance(item, str): + return self.symbols[item.encode()] + return self.symtab[item] + + def __setitem__(self, item, value): + if isinstance(item, bytes): + self.symbols[item] = value + return + if isinstance(item, str): + self.symbols[item.encode()] = value + return + self.symtab[item] = value + + +class DynSymTable(SymTable): + sht = elf.SHT_DYNSYM + + +class RelTable(Section): + sht = elf.SHT_REL + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + if size == 32: + WRel = WRel32 + elif size == 64: + WRel = WRel64 + else: + ValueError('unknown size') + c = self.content + self.reltab = [] + self.rel = {} + sz = self.sh.entsize + + idx = 0 + while len(c) > sz*idx: + s = c[sz*idx:sz*(idx+1)] + idx += 1 + rel = WRel(self, sex, size, s) + self.reltab.append(rel) + if rel.parent.linksection != self.parent.shlist[0]: + self.rel[rel.sym] = rel + + +class RelATable(RelTable): + sht = elf.SHT_RELA + +# Section List + + +class SHList(object): + + def __init__(self, parent, sex, size): + self.parent = parent + self.shlist = [] + ehdr = self.parent.Ehdr + of1 = ehdr.shoff + if not of1: # No SH table + return + for i in range(ehdr.shnum): + of2 = of1 + ehdr.shentsize + shstr = parent[of1:of2] + self.shlist.append(Section(self, sex, size, shstr=shstr)) + of1 = of2 + self._shstr = self.shlist[ehdr.shstrndx] + + for s in self.shlist: + if not isinstance(s, NoBitsSection): + s._content = StrPatchwork( + parent[s.sh.offset: s.sh.offset + s.sh.size] + ) + # Follow dependencies when initializing sections + zero = self.shlist[0] + todo = self.shlist[1:] + done = [] + while todo: + s = todo.pop(0) + if ((s.linksection == zero or s.linksection in done) and + (s.infosection in [zero, None] or s.infosection in done)): + done.append(s) + s.parse_content(sex, size) + else: + todo.append(s) + for s in self.shlist: + self.do_add_section(s) + + def do_add_section(self, section): + n = section.sh.name + if n.startswith(b"."): + n = n[1:] + n = printable(n).replace(".", "_").replace("-", "_") + setattr(self, n, section) # xxx + + def append(self, item): + self.do_add_section(item) + self.shlist.append(item) + + def __getitem__(self, item): + return self.shlist[item] + + def __repr__(self): + rep = ["# section offset size addr flags"] + for i, s in enumerate(self.shlist): + l = "%(name)-15s %(offset)08x %(size)06x %(addr)08x %(flags)x " % s.sh + l = ("%2i " % i) + l + s.__class__.__name__ + rep.append(l) + return "\n".join(rep) + + def __bytes__(self): + return b"".join( + bytes(s.sh) for s in self.shlist + ) + + def __str__(self): + if PY3: + return repr(self) + return bytes(self) + + def resize(self, sec, diff): + for s in self.shlist: + if s.sh.offset > sec.sh.offset: + s.sh.offset += diff + if self.parent.Ehdr.shoff > sec.sh.offset: + self.parent.Ehdr.shoff += diff + if self.parent.Ehdr.phoff > sec.sh.offset: + self.parent.Ehdr.phoff += diff + +# Program Header List + + +class ProgramHeader(object): + + def __init__(self, parent, sex, size, phstr): + self.parent = parent + self.ph = WPhdr(self, sex, size, phstr) + self.shlist = [] + for s in self.parent.parent.sh: + if isinstance(s, NullSection): + continue + if ((isinstance(s, NoBitsSection) and s.sh.offset == self.ph.offset + self.ph.filesz) + or self.ph.offset <= s.sh.offset < self.ph.offset + self.ph.filesz): + s.phparent = self + self.shlist.append(s) + + def resize(self, sec, diff): + self.ph.filesz += diff + self.ph.memsz += diff + self.parent.resize(sec, diff) + + +class ProgramHeader64(object): + + def __init__(self, parent, sex, size, phstr): + self.parent = parent + self.ph = WPhdr64(self, sex, size, phstr) + self.shlist = [] + for s in self.parent.parent.sh: + if isinstance(s, NullSection): + continue + if ((isinstance(s, NoBitsSection) and s.sh.offset == self.ph.offset + self.ph.filesz) + or self.ph.offset <= s.sh.offset < self.ph.offset + self.ph.filesz): + s.phparent = self + self.shlist.append(s) + + def resize(self, sec, diff): + self.ph.filesz += diff + self.ph.memsz += diff + self.parent.resize(sec, diff) + + +class PHList(object): + + def __init__(self, parent, sex, size): + self.parent = parent + self.phlist = [] + ehdr = self.parent.Ehdr + of1 = ehdr.phoff + for i in range(ehdr.phnum): + of2 = of1 + ehdr.phentsize + phstr = parent[of1:of2] + if size == 32: + self.phlist.append(ProgramHeader(self, sex, size, phstr)) + else: + self.phlist.append(ProgramHeader64(self, sex, size, phstr)) + of1 = of2 + + def __getitem__(self, item): + return self.phlist[item] + + def __repr__(self): + r = [" offset filesz vaddr memsz"] + for i, p in enumerate(self.phlist): + l = "%(offset)07x %(filesz)06x %(vaddr)08x %(memsz)07x %(type)02x %(flags)01x" % p.ph + l = ("%2i " % i) + l + r.append(l) + r.append(" " + " ".join(printable(s.sh.name) for s in p.shlist)) + return "\n".join(r) + + def __bytes__(self): + return b"".join( + bytes(p.ph) for p in self.phlist + ) + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__(self) + + def resize(self, sec, diff): + for p in self.phlist: + if p.ph.offset > sec.sh.offset: + p.ph.offset += diff + if p.ph.vaddr > sec.phparent.ph.vaddr + sec.sh.offset: + p.ph.vaddr += diff + if p.ph.paddr > sec.phparent.ph.paddr + sec.sh.offset: + p.ph.paddr += diff + + +class virt(object): + + def __init__(self, x): + self.parent = x + + def get_rvaitem(self, start, stop=None): + if stop == None: + s = self.parent.getsectionbyvad(start) + if s: + start = start - s.sh.addr + else: + s = self.parent.getphbyvad(start) + if s: + start = start - s.ph.vaddr + if not s: + return [(None, start)] + return [(s, start)] + total_len = stop - start + + virt_item = [] + while total_len: + s = self.parent.getsectionbyvad(start) + if not s: + s = self.parent.getphbyvad(start) + if not s: + raise ValueError('unknown rva address! %x' % start) + if isinstance(s, ProgramHeader) or isinstance(s, ProgramHeader64): + s_max = s.ph.filesz + s_start = start - s.ph.vaddr + s_stop = stop - s.ph.vaddr + else: + s_max = s.sh.size + s_start = start - s.sh.addr + s_stop = stop - s.sh.addr + if s_stop > s_max: + s_stop = s_max + + s_len = s_stop - s_start + if s_len == 0: + raise ValueError('empty section! %x' % start) + total_len -= s_len + start += s_len + n_item = slice(s_start, s_stop) + virt_item.append((s, n_item)) + return virt_item + + def item2virtitem(self, item): + if not type(item) is slice: # integer + return self.get_rvaitem(item) + start = item.start + stop = item.stop + assert(item.step is None) + return self.get_rvaitem(start, stop) + + def get(self, ad_start, ad_stop=None): + rva_items = self.get_rvaitem(ad_start, ad_stop) + data_out = b"" + for s, n_item in rva_items: + if not (isinstance(s, ProgramHeader) or isinstance(s, ProgramHeader64)): + data_out += s.content.__getitem__(n_item) + continue + if not type(n_item) is slice: + n_item = slice(n_item, n_item + 1, 1) + start = n_item.start + s.ph.offset + stop = n_item.stop + s.ph.offset + if n_item.step != None: + step = n_item.step + s.ph.offset + else: + step = None + n_item = slice(start, stop, step) + # data_out += self.parent.content.__s.content.__getitem__(n_item) + data_out += self.parent.content.__getitem__(n_item) + + return data_out + + def set(self, item, data): + if not type(item) is slice: + item = slice(item, item + len(data), None) + virt_item = self.item2virtitem(item) + if not virt_item: + return + off = 0 + for s, n_item in virt_item: + if isinstance(s, ProgBits): + i = slice(off, n_item.stop + off - n_item.start, n_item.step) + + data_slice = data.__getitem__(i) + s.content.__setitem__(n_item, data_slice) + off = i.stop + else: + raise ValueError('TODO XXX') + + return + + def __getitem__(self, item): + if isinstance(item, slice): + assert(item.step is None) + return self.get(item.start, item.stop) + else: + return self.get(item) + + def __setitem__(self, item, data): + if isinstance(item, slice): + rva = item.start + else: + rva = item + self.set(rva, data) + + def max_addr(self): + # the maximum virtual address is found by retrieving the maximum + # possible virtual address, either from the program entries, and + # section entries. if there is no such object, raise an error. + l = 0 + if self.parent.ph.phlist: + for phdr in self.parent.ph.phlist: + l = max(l, phdr.ph.vaddr + phdr.ph.memsz) + if self.parent.sh.shlist: + for shdr in self.parent.sh.shlist: + l = max(l, shdr.sh.addr + shdr.sh.size) + if not l: + raise ValueError('maximum virtual address not found !') + return l + + def is_addr_in(self, ad): + return self.parent.is_in_virt_address(ad) + + def find(self, pattern, start=0): + sections = [] + offset = start + for s in self.parent.ph: + s_max = s.ph.memsz # max(s.ph.filesz, s.ph.memsz) + if offset < s.ph.vaddr + s_max: + sections.append(s) + + if not sections: + return -1 + offset -= sections[0].ph.vaddr + if offset < 0: + offset = 0 + for s in sections: + data = self.parent.content[s.ph.offset:s.ph.offset + s.ph.filesz] + ret = data.find(pattern, offset) + if ret != -1: + return ret + s.ph.vaddr # self.parent.rva2virt(s.addr + ret) + offset = 0 + return -1 + +# ELF object + + +class ELF(object): + + def __init__(self, elfstr): + self._content = elfstr + self.parse_content() + + self._virt = virt(self) + + def get_virt(self): + return self._virt + virt = property(get_virt) + + content = ContentManager() + + def parse_content(self): + h = self.content[:8] + self.size = struct.unpack('B', h[4:5])[0] * 32 + self.sex = struct.unpack('B', h[5:6])[0] + self.Ehdr = WEhdr(self, self.sex, self.size, self.content) + self.sh = SHList(self, self.sex, self.size) + self.ph = PHList(self, self.sex, self.size) + + def resize(self, old, new): + pass + + def __getitem__(self, item): + return self.content[item] + + def build_content(self): + c = StrPatchwork() + c[0] = bytes(self.Ehdr) + c[self.Ehdr.phoff] = bytes(self.ph) + for s in self.sh: + c[s.sh.offset] = bytes(s.content) + c[self.Ehdr.shoff] = bytes(self.sh) + return bytes(c) + + def __bytes__(self): + return self.build_content() + + def __str__(self): + if PY3: + return repr(self) + return bytes(self) + + def getphbyvad(self, ad): + for s in self.ph: + if s.ph.vaddr <= ad < s.ph.vaddr + s.ph.memsz: + return s + + def getsectionbyvad(self, ad): + for s in self.sh: + if s.sh.addr <= ad < s.sh.addr + s.sh.size: + return s + + def getsectionbyname(self, name): + name = force_bytes(name) + for s in self.sh: + try: + if s.sh.name.strip(b'\x00') == name: + return s + except UnicodeDecodeError: + pass + return None + + def is_in_virt_address(self, ad): + for s in self.sh: + if s.sh.addr <= ad < s.sh.addr + s.sh.size: + return True + return False |