diff options
| -rw-r--r-- | example/disasm/full.py | 5 | ||||
| -rw-r--r-- | example/jitter/run_with_linuxenv.py | 85 | ||||
| -rw-r--r-- | miasm2/analysis/binary.py | 79 | ||||
| -rw-r--r-- | miasm2/arch/arm/jit.py | 8 | ||||
| -rw-r--r-- | miasm2/arch/x86/jit.py | 8 | ||||
| -rw-r--r-- | miasm2/core/asmblock.py | 14 | ||||
| -rw-r--r-- | miasm2/core/locationdb.py | 12 | ||||
| -rw-r--r-- | miasm2/jitter/llvmconvert.py | 5 | ||||
| -rw-r--r-- | miasm2/jitter/loader/elf.py | 226 | ||||
| -rw-r--r-- | miasm2/jitter/op_semantics.c | 60 | ||||
| -rw-r--r-- | miasm2/os_dep/linux/__init__.py | 1 | ||||
| -rw-r--r-- | miasm2/os_dep/linux/environment.py | 894 | ||||
| -rw-r--r-- | miasm2/os_dep/linux/syscall.py | 1040 | ||||
| -rwxr-xr-x | setup.py | 1 | ||||
| -rw-r--r-- | test/core/locationdb.py | 5 | ||||
| -rwxr-xr-x | test/test_all.py | 13 |
16 files changed, 2393 insertions, 63 deletions
diff --git a/example/disasm/full.py b/example/disasm/full.py index 5658c2c2..746d3ee4 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -42,7 +42,7 @@ parser.add_argument('-l', "--dontdis-retcall", action="store_true", help="If set, disassemble only call destinations") parser.add_argument('-s', "--simplify", action="count", help="Apply simplifications rules (liveness, graph simplification, ...)") -parser.add_argument('-o', "--shiftoffset", default=None, +parser.add_argument('-o', "--shiftoffset", default=0, type=lambda x: int(x, 0), help="Shift input binary by an offset") parser.add_argument('-a', "--try-disasm-all", action="store_true", @@ -64,9 +64,8 @@ if args.verbose: log.info('Load binary') if args.rawbinary: - shift = args.shiftoffset if args.shiftoffset is not None else 0 cont = Container.fallback_container(open(args.filename, "rb").read(), - None, addr=shift) + vm=None, addr=args.shiftoffset) else: with open(args.filename, "rb") as fdesc: cont = Container.from_stream(fdesc, addr=args.shiftoffset) diff --git a/example/jitter/run_with_linuxenv.py b/example/jitter/run_with_linuxenv.py new file mode 100644 index 00000000..933459f4 --- /dev/null +++ b/example/jitter/run_with_linuxenv.py @@ -0,0 +1,85 @@ +from argparse import ArgumentParser +import logging +import os +import re + +from elfesteem import elf as elf_csts + +from miasm2.os_dep.linux import environment, syscall +from miasm2.analysis.machine import Machine +from miasm2.analysis.binary import Container + +parser = ArgumentParser("Run an ELF in a Linux-like environment") +parser.add_argument("target", help="Target ELF") +parser.add_argument("extra_args", help="Arguments for the target ELF", + nargs="*", default=[]) +parser.add_argument("-j", "--jitter", help="Jitter engine", default="llvm") +parser.add_argument("-p", "--passthrough", help="Reg-exp for passthrough files", + default="^$") +parser.add_argument("-f", "--flags", help="Flags") +parser.add_argument("-v", "--verbose", action="store_true", + help="Activate verbose syscalls") +args = parser.parse_args() + +if args.verbose: + syscall.log.setLevel(logging.DEBUG) + +# Get corresponding interpreter and reloc address +cont_target_tmp = Container.from_stream(open(args.target)) +ld_path = str(cont_target_tmp.executable.getsectionbyname(".interp").content).strip("\x00") +if cont_target_tmp.executable.Ehdr.type in [elf_csts.ET_REL, elf_csts.ET_DYN]: + elf_base_addr = 0x40000000 +elif cont_target_tmp.executable.Ehdr.type == elf_csts.ET_EXEC: + elf_base_addr = 0 # Not relocatable +else: + raise ValueError("Unsuported type %d" % cont_target_tmp.executable.Ehdr.type) + +# Instanciate a jitter +machine = Machine(cont_target_tmp.arch) +jitter = machine.jitter(args.jitter) +jitter.init_stack() + +# Get elements for the target architecture +if cont_target_tmp.arch == "arml": + LinuxEnvironment = environment.LinuxEnvironment_arml + syscall_callbacks = syscall.syscall_callbacks_arml + prepare_loader = environment.prepare_loader_arml +elif cont_target_tmp.arch == "x86_64": + LinuxEnvironment = environment.LinuxEnvironment_x86_64 + syscall_callbacks = syscall.syscall_callbacks_x86_64 + prepare_loader = environment.prepare_loader_x86_64 +else: + raise ValueError("Unsupported architecture: %r", cont_target_tmp.arch) + +# Load the interpreter in memory, applying relocation +linux_env = LinuxEnvironment() +linux_env.filesystem.passthrough.append(re.compile(args.passthrough)) +ld_path = linux_env.filesystem.resolve_path(ld_path) +cont_ld = Container.from_stream(open(ld_path), + vm=jitter.vm, + addr=0x80000000, + apply_reloc=True) +# Load the target ELF in memory, without applying reloc +loc_db = cont_ld.loc_db +cont_target = Container.from_stream(open(args.target), vm=jitter.vm, + loc_db=loc_db, + addr=elf_base_addr, + apply_reloc=False) +# PHDR containing the PH header +elf_phdr_header = [ph32.ph for ph32 in cont_target.executable.ph + if ph32.ph.type == elf_csts.PT_PHDR][0] + +# Prepare the desired environment +argv = [args.target] + args.extra_args +if args.flags: + argv += ["-%s" % args.flags] +envp = {"PATH": "/usr/local/bin", "USER": linux_env.user_name} +auxv = environment.AuxVec(elf_base_addr + elf_phdr_header.vaddr, + cont_target.entry_point, linux_env) +prepare_loader(jitter, argv, envp, auxv, linux_env) +syscall.enable_syscall_handling(jitter, linux_env, syscall_callbacks) + + +# Run +jitter.init_run(cont_ld.entry_point) +jitter.continue_run() diff --git a/miasm2/analysis/binary.py b/miasm2/analysis/binary.py index 16e573bb..21e5c356 100644 --- a/miasm2/analysis/binary.py +++ b/miasm2/analysis/binary.py @@ -35,34 +35,23 @@ class Container(object): fallback_container = None # Fallback container format @classmethod - def from_string(cls, data, vm=None, addr=None): + def from_string(cls, data, *args, **kwargs): """Instanciate a container and parse the binary @data: str containing the binary - @vm: (optional) VmMngr instance to link with the executable - @addr: (optional) Base address for the binary. If set, - force the unknown format """ log.info('Load binary') - - if not addr: - addr = 0 - else: - # Force fallback mode - log.warning('Fallback to string input (offset=%s)', hex(addr)) - return cls.fallback_container(data, vm, addr) - # Try each available format for container_type in cls.available_container: try: - return container_type(data, vm) + return container_type(data, *args, **kwargs) except ContainerSignatureException: continue except ContainerParsingException, error: log.error(error) # Fallback mode - log.warning('Fallback to string input (offset=%s)', hex(addr)) - return cls.fallback_container(data, vm, addr) + log.warning('Fallback to string input') + return cls.fallback_container(data, *args, **kwargs) @classmethod def register_container(cls, container): @@ -85,20 +74,25 @@ class Container(object): return Container.from_string(stream.read(), *args, **kwargs) def parse(self, data, *args, **kwargs): - "Launch parsing of @data" + """Launch parsing of @data + @data: str containing the binary + """ raise NotImplementedError("Abstract method") - def __init__(self, *args, **kwargs): + def __init__(self, data, loc_db=None, **kwargs): "Alias for 'parse'" # Init attributes self._executable = None self._bin_stream = None self._entry_point = None self._arch = None - self._loc_db = LocationDB() + if loc_db is None: + self._loc_db = LocationDB() + else: + self._loc_db = loc_db # Launch parsing - self.parse(*args, **kwargs) + self.parse(data, **kwargs) @property def bin_stream(self): @@ -135,7 +129,7 @@ class Container(object): class ContainerPE(Container): "Container abstraction for PE" - def parse(self, data, vm=None): + def parse(self, data, vm=None, **kwargs): from miasm2.jitter.loader.pe import vm_load_pe, guess_arch from elfesteem import pe_init @@ -172,8 +166,18 @@ class ContainerPE(Container): class ContainerELF(Container): "Container abstraction for ELF" - def parse(self, data, vm=None): - from miasm2.jitter.loader.elf import vm_load_elf, guess_arch + def parse(self, data, vm=None, addr=0, apply_reloc=False, **kwargs): + """Load an ELF from @data + @data: str containing the ELF bytes + @vm (optional): VmMngr instance. If set, load the ELF in virtual memory + @addr (optional): base address the ELF in virtual memory + @apply_reloc (optional): if set, apply relocation during ELF loading + + @addr and @apply_reloc are only meaningful in the context of a + non-empty @vm + """ + from miasm2.jitter.loader.elf import vm_load_elf, guess_arch, \ + fill_loc_db_with_symbols from elfesteem import elf_init # Parse signature @@ -183,7 +187,9 @@ class ContainerELF(Container): # Build executable instance try: if vm is not None: - self._executable = vm_load_elf(vm, data) + self._executable = vm_load_elf(vm, data, loc_db=self.loc_db, + base_addr=addr, + apply_reloc=apply_reloc) else: self._executable = elf_init.ELF(data) except Exception, error: @@ -195,37 +201,20 @@ class ContainerELF(Container): # Build the bin_stream instance and set the entry point try: self._bin_stream = bin_stream_elf(self._executable.virt) - self._entry_point = self._executable.Ehdr.entry + self._entry_point = self._executable.Ehdr.entry + addr except Exception, error: raise ContainerParsingException('Cannot read ELF: %s' % error) - # Add known symbols - for symb_source_name in [".symtab", ".dynsym"]: - symb_source = self._executable.getsectionbyname(symb_source_name) - if symb_source is None: - continue - for name, symb in symb_source.symbols.iteritems(): - offset = symb.value - if offset == 0: - continue - if not name: - continue - try: - self._loc_db.add_location(name, offset) - except ValueError: - # Two symbols points on the same offset - log.warning("Same offset (%s) for %s and %s", - (hex(offset), - name, - self._loc_db.get_offset_location(offset))) - continue + if vm is None: + # Add known symbols (vm_load_elf already does it) + fill_loc_db_with_symbols(self._executable, self.loc_db, addr) class ContainerUnknown(Container): "Container abstraction for unknown format" - def parse(self, data, vm, addr): + def parse(self, data, vm=None, addr=0, **kwargs): self._bin_stream = bin_stream_str(data, shift=addr) if vm is not None: vm.add_memory_page(addr, diff --git a/miasm2/arch/arm/jit.py b/miasm2/arch/arm/jit.py index 2b5dc4cf..267bcea6 100644 --- a/miasm2/arch/arm/jit.py +++ b/miasm2/arch/arm/jit.py @@ -106,6 +106,14 @@ class jitter_arml(Jitter): func_prepare_systemv = func_prepare_stdcall get_arg_n_systemv = get_arg_n_stdcall + def syscall_args_systemv(self, n_args): + args = [self.cpu.R0, self.cpu.R1, self.cpu.R2, self.cpu.R3, + self.cpu.R4, self.cpu.R5][:n_args] + return args + + def syscall_ret_systemv(self, value): + self.cpu.R0 = value + def init_run(self, *args, **kwargs): Jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc diff --git a/miasm2/arch/x86/jit.py b/miasm2/arch/x86/jit.py index bf74051d..3322e722 100644 --- a/miasm2/arch/x86/jit.py +++ b/miasm2/arch/x86/jit.py @@ -272,3 +272,11 @@ class jitter_x86_64(Jitter): remaining_args = args[len(args_regs):] for arg in reversed(remaining_args): self.push_uint64_t(arg) + + def syscall_args_systemv(self, n_args): + args = [self.cpu.RDI, self.cpu.RSI, self.cpu.RDX, self.cpu.R10, + self.cpu.R8, self.cpu.R9][:n_args] + return args + + def syscall_ret_systemv(self, value): + self.cpu.RAX = value diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index 2f16543f..c00b4c5c 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -1596,3 +1596,17 @@ class disasmEngine(object): warnings.warn('DEPRECATION WARNING: use "dis_multiblock" instead of "dis_multibloc"') return self.dis_multiblock(offset, blocs) + def dis_instr(self, offset): + """Disassemble one instruction at offset @offset and return the + corresponding instruction instance + @offset: targeted offset to disassemble + """ + old_lineswd = self.lines_wd + self.lines_wd = 1 + try: + block = self.dis_block(offset) + finally: + self.lines_wd = old_lineswd + + instr = block.lines[0] + return instr diff --git a/miasm2/core/locationdb.py b/miasm2/core/locationdb.py index 39c1c99a..21e2d713 100644 --- a/miasm2/core/locationdb.py +++ b/miasm2/core/locationdb.py @@ -39,6 +39,8 @@ class LocationDB(object): None >>> loc_db.get_location_offset(loc_key2) 0x1234 + >>> loc_db.get_location_offset("first_name") + 0x5678 # Display a location >>> loc_db.pretty_str(loc_key1) @@ -112,6 +114,16 @@ class LocationDB(object): return loc_key return self.add_location(offset=offset) + def get_name_offset(self, name): + """ + Return the offset of @name if any, None otherwise. + @name: target name + """ + loc_key = self.get_name_location(name) + if loc_key is None: + return None + return self.get_location_offset(loc_key) + def add_location_name(self, loc_key, name): """Associate a name @name to a given @loc_key @name: str instance diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index 49c696e9..4a0eae93 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -821,7 +821,10 @@ class LLVMFunction(): "ret": LLVMType.IntType(expr.size), "args": [LLVMType.IntType(expr.args[0].size)] }} - self.llvm_context.add_fc(func_sig, readonly=True) + try: + self.mod.get_global(func_llvm_name) + except KeyError: + self.llvm_context.add_fc(func_sig, readonly=True) ret = builder.call(self.mod.get_global(func_llvm_name), [arg]) self.update_cache(expr, ret) diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py index 01dea647..af078ab5 100644 --- a/miasm2/jitter/loader/elf.py +++ b/miasm2/jitter/loader/elf.py @@ -28,11 +28,14 @@ def get_import_address_elf(e): return import2addr -def preload_elf(vm, e, runtime_lib, patch_vm_imp=True): +def preload_elf(vm, e, runtime_lib, patch_vm_imp=True, loc_db=None): # XXX quick hack fa = get_import_address_elf(e) dyn_funcs = {} for (libname, libfunc), ads in fa.items(): + # Quick hack - if a symbol is already known, do not stub it + if loc_db and loc_db.get_name_location(libfunc) is not None: + continue for ad in ads: ad_base_lib = runtime_lib.lib_get_add_base(libname) ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) @@ -50,25 +53,208 @@ def preload_elf(vm, e, runtime_lib, patch_vm_imp=True): ad_libfunc)) return runtime_lib, dyn_funcs +def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): + """Parse the elfesteem's ELF @elf to extract symbols, and fill the LocationDB + instance @loc_db with parsed symbols. + + The ELF is considered mapped at @base_addr + @elf: elfesteem's ELF instance + @loc_db: LocationDB used to retrieve symbols'offset + @base_addr: addr to reloc to (if any) + """ + # Get symbol sections + symbol_sections = [] + for section_header in elf.sh: + if hasattr(section_header, 'symtab'): + log.debug("Find %d symbols in %r", len(section_header.symtab), + section_header) + symbol_sections.append(section_header) + elif isinstance(section_header, ( + elf_init.GNUVerDef, elf_init.GNUVerSym, elf_init.GNUVerNeed + )): + log.debug("Find GNU version related section, unsupported for now") + + for section in symbol_sections: + for symbol_entry in section.symtab: + # Here, the computation of vaddr assumes 'elf' is an executable or a + # shared object file + + # For relocatable file, symbol_entry.value is an offset from the section + # base -> not handled here + st_bind = symbol_entry.info >> 4 + st_type = symbol_entry.info & 0xF + + if st_type not in [ + elf_csts.STT_NOTYPE, + elf_csts.STT_OBJECT, + elf_csts.STT_FUNC, + elf_csts.STT_COMMON, + elf_csts.STT_GNU_IFUNC, + ]: + # Ignore symbols useless in linking + continue + + if st_bind == elf_csts.STB_GLOBAL: + # Global symbol + weak = False + elif st_bind == elf_csts.STB_WEAK: + # Weak symbol + weak = True + else: + # Ignore local & others symbols + continue + + absolute = False + if symbol_entry.shndx == 0: + # SHN_UNDEF + continue + elif symbol_entry.shndx == 0xfff1: + # SHN_ABS + absolute = True + log.debug("Absolute symbol %r - %x", symbol_entry.name, + symbol_entry.value) + elif 0xff00 <= symbol_entry.shndx <= 0xffff: + # Reserved index (between SHN_LORESERV and SHN_HIRESERVE) + raise RuntimeError("Unsupported reserved index: %r" % symbol_entry) + + name = symbol_entry.name + if name == "": + # Ignore empty symbol + log.debug("Empty symbol %r", symbol_entry) + continue + + if absolute: + vaddr = symbol_entry.value + else: + vaddr = symbol_entry.value + base_addr + + # 'weak' information is only used to force global symbols for now + already_existing_loc = loc_db.get_name_location(name) + if already_existing_loc is not None: + if weak: + # Weak symbol, this is ok to already exists, skip it + continue + else: + # Global symbol, force it + loc_db.remove_location_name(already_existing_loc, + name) + already_existing_off = loc_db.get_offset_location(vaddr) + if already_existing_off is not None: + loc_db.add_location_name(already_existing_off, name) + else: + loc_db.add_location(name=name, offset=vaddr) + + +def apply_reloc_x86(elf, vm, section, base_addr, loc_db): + """Apply relocation for x86 ELF contained in the secion @section + @elf: elfesteem's ELF instance + @vm: VmMngr instance + @section: elf's section containing relocation to perform + @base_addr: addr to reloc to + @loc_db: LocationDB used to retrieve symbols'offset + """ + if elf.size == 64: + addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, + struct.pack("<Q", addr)) + elif elf.size == 32: + addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, + struct.pack("<I", addr)) + else: + raise ValueError("Unsupported elf size %d" % elf.size) + + symb_section = section.linksection + for reloc in section.reltab: + + # Parse relocation info + r_addend = reloc.addend if hasattr(reloc, "addend") else 0 + r_info = reloc.info + if elf.size == 64: + r_info_sym = (r_info >> 32) & 0xFFFFFFFF + r_info_type = r_info & 0xFFFFFFFF + elif elf.size == 32: + r_info_sym = (r_info >> 8) & 0xFFFFFF + r_info_type = r_info & 0xFF + + is_ifunc = False + symbol_entry = None + if r_info_sym > 0: + symbol_entry = symb_section.symtab[r_info_sym] -def vm_load_elf(vm, fdata, name="", **kargs): + r_offset = reloc.offset + r_addend = reloc.cstr.sym + + if (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_RELATIVE), + (64, elf_csts.R_X86_64_IRELATIVE), + (32, elf_csts.R_386_RELATIVE), + (32, elf_csts.R_386_IRELATIVE), + ]: + # B + A + addr = base_addr + r_addend + where = base_addr + r_offset + elif reloc.type == elf_csts.R_X86_64_64: + # S + A + addr_symb = loc_db.get_name_offset(symbol_entry.name) + if addr_symb is None: + log.warning("Unable to find symbol %r" % symbol_entry.name) + continue + addr = addr_symb + r_addend + where = base_addr + r_offset + elif (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_TPOFF64), + (64, elf_csts.R_X86_64_DTPMOD64), + (32, elf_csts.R_386_TLS_TPOFF), + ]: + # Thread dependent, ignore for now + log.debug("Skip relocation TPOFF64 %r", reloc) + continue + elif (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_GLOB_DAT), + (64, elf_csts.R_X86_64_JUMP_SLOT), + (32, elf_csts.R_386_JMP_SLOT), + (32, elf_csts.R_386_GLOB_DAT), + ]: + # S + addr = loc_db.get_name_offset(symbol_entry.name) + if addr is None: + log.warning("Unable to find symbol %r" % symbol_entry.name) + continue + is_ifunc = symbol_entry.info & 0xF == elf_csts.STT_GNU_IFUNC + where = base_addr + r_offset + else: + raise ValueError( + "Unknown relocation type: %d (%r)" % (reloc.type, + reloc) + ) + if is_ifunc: + # Resolve at runtime - not implemented for now + log.warning("Relocation for %r (at %x, currently pointing on %x) " + "has to be resolved at runtime", + name, where, sym_addr) + continue + + log.debug("Write %x at %x", addr, where) + addr_writer(where, addr) + + +def vm_load_elf(vm, fdata, name="", base_addr=0, loc_db=None, apply_reloc=False, + **kargs): """ Very dirty elf loader TODO XXX: implement real loader """ - # log.setLevel(logging.DEBUG) - e = elf_init.ELF(fdata, **kargs) + elf = elf_init.ELF(fdata, **kargs) i = interval() all_data = {} - for p in e.ph.phlist: + for p in elf.ph.phlist: if p.ph.type != elf_csts.PT_LOAD: continue log.debug( '0x%x 0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset, p.ph.filesz, p.ph.type) - data_o = e._content[p.ph.offset:p.ph.offset + p.ph.filesz] - addr_o = p.ph.vaddr + data_o = elf._content[p.ph.offset:p.ph.offset + p.ph.filesz] + addr_o = p.ph.vaddr + base_addr a_addr = addr_o & ~0xFFF b_addr = addr_o + max(p.ph.memsz, p.ph.filesz) b_addr = (b_addr + 0xFFF) & ~0xFFF @@ -81,7 +267,31 @@ def vm_load_elf(vm, fdata, name="", **kargs): for r_vaddr, data in all_data.items(): vm.set_mem(r_vaddr, data) - return e + + if loc_db is not None: + fill_loc_db_with_symbols(elf, loc_db, base_addr) + + if apply_reloc: + arch = guess_arch(elf) + sections = [] + for section in elf.sh: + if not hasattr(section, 'reltab'): + continue + if isinstance(section, elf_init.RelATable): + pass + elif isinstance(section, elf_init.RelTable): + if arch == "x86_64": + log.warning("REL section should not happen in x86_64") + else: + raise RuntimeError("Unknown relocation section type: %r" % section) + sections.append(section) + for section in sections: + if arch in ["x86_64", "x86_32"]: + apply_reloc_x86(elf, vm, section, base_addr, loc_db) + else: + log.debug("Unsupported relocation for arch %r" % arch) + + return elf class libimp_elf(libimp): diff --git a/miasm2/jitter/op_semantics.c b/miasm2/jitter/op_semantics.c index 061e7736..e997226a 100644 --- a/miasm2/jitter/op_semantics.c +++ b/miasm2/jitter/op_semantics.c @@ -302,11 +302,12 @@ unsigned int x86_cpuid(unsigned int a, unsigned int reg_num) fprintf(stderr, "not implemented x86_cpuid reg %x\n", reg_num); exit(EXIT_FAILURE); } - + // cases are output: EAX: 0; EBX: 1; ECX: 2; EDX: 3 if (a == 0){ switch(reg_num){ case 0: return 0xa; + // "GenuineIntel" case 1: return 0x756E6547; case 2: @@ -319,8 +320,10 @@ unsigned int x86_cpuid(unsigned int a, unsigned int reg_num) else if (a == 1){ switch(reg_num){ case 0: - //return 0x000006FB; - return 0x00020652; + // Using a version too high will enable recent + // instruction set + return 0x000006FB; + //return 0x00020652; case 1: //return 0x02040800; return 0x00000800; @@ -328,13 +331,58 @@ unsigned int x86_cpuid(unsigned int a, unsigned int reg_num) //return 0x0004E3BD; return 0x00000209; case 3: - //return 0xBFEBFBFF; - return 0x078bf9ff; + return (/* fpu */ 1 << 0) | + (/* tsc */ 1 << 4) | + (/* cx8 */ 1 << 8) | + (/* cmov */ 1 << 15) | + (/* mmx */ 1 << 23) | + (/* sse */ 1 << 25) | + (/* sse2 */ 1 << 26) | + (/* ia64 */ 1 << 30); + } + } + // Cache and TLB + else if (a == 2){ + switch(reg_num){ + case 0: + return 0x00000000; + case 1: + return 0x00000000; + case 2: + return 0x00000000; + case 3: + return 0x00000000; + } + } + // Intel thread/core and cache topology + else if (a == 4){ + switch(reg_num){ + case 0: + return 0x00000000; + case 1: + return 0x00000000; + case 2: + return 0x00000000; + case 3: + return 0x00000000; + } + } + // Extended features + else if (a == 7){ + switch(reg_num){ + case 0: + return 0x00000000; + case 1: + return (/* fsgsbase */ 1 << 0) | (/* bmi1 */ 1 << 3); + case 2: + return 0x00000000; + case 3: + return 0x00000000; } } else{ fprintf(stderr, "WARNING not implemented x86_cpuid index %X!\n", a); - //exit(EXIT_FAILURE); + exit(EXIT_FAILURE); } return 0; } diff --git a/miasm2/os_dep/linux/__init__.py b/miasm2/os_dep/linux/__init__.py new file mode 100644 index 00000000..4434ce50 --- /dev/null +++ b/miasm2/os_dep/linux/__init__.py @@ -0,0 +1 @@ +# Linux emulation diff --git a/miasm2/os_dep/linux/environment.py b/miasm2/os_dep/linux/environment.py new file mode 100644 index 00000000..c4d0536e --- /dev/null +++ b/miasm2/os_dep/linux/environment.py @@ -0,0 +1,894 @@ +from collections import namedtuple +import functools +import os +import struct +import termios + +from miasm2.core.interval import interval +from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE + + +StatInfo = namedtuple("StatInfo", [ + "st_dev", "st_ino", "st_nlink", "st_mode", "st_uid", "st_gid", "st_rdev", + "st_size", "st_blksize", "st_blocks", "st_atime", "st_atimensec", + "st_mtime", "st_mtimensec", "st_ctime", "st_ctimensec" +]) +StatFSInfo = namedtuple("StatFSInfo", [ + "f_type", "f_bsize", "f_blocks", "f_bfree", "f_bavail", "f_files", + "f_ffree", "f_fsid", "f_namelen", "f_frsize", "f_flags", "f_spare", +]) + + +class FileDescriptor(object): + """Stand for a file descriptor on a system + + According to inode(7), following types are possibles: + - socket + - symbolic link + - regular file + - block device + - directory + - character device + - FIFO + """ + + # st_mode's file type + file_type = None + # st_mode's file mode (9 least bits are file permission bits) + file_mode = 0o0777 + # st_dev / st_rdev + cont_device_id = None + device_id = 0 + # inode number (st_ino) + inode = None + # Number of hardlink (st_nlink) + nlink = 0 + # Owner / group + uid = None + gid = None + # Size (st_size / st_blksize / st_blocks) + size = 0 + blksize = 0 + blocks = 0 + # Times + atime = 0 + atimensec = 0 + mtime = 0 + mtimensec = 0 + ctime = 0 + ctimensec = 0 + + def __init__(self, number): + self.number = number + self.is_closed = False + + def stat(self): + mode = self.file_type | self.file_mode + return StatInfo( + st_dev=self.cont_device_id, st_ino=self.inode, + st_nlink=self.nlink, st_mode=mode, + st_uid=self.uid, st_gid=self.gid, + st_rdev=self.device_id, st_size=self.size, + st_blksize=self.blksize, st_blocks=self.blocks, + st_atime=self.atime, st_atimensec=self.atimensec, + st_mtime=self.mtime, st_mtimensec=self.mtimensec, + st_ctime=self.ctime, st_ctimensec=self.ctimensec + ) + + def close(self): + self.is_closed = True + + +class FileDescriptorCharDevice(FileDescriptor): + file_type = 0o0020000 # S_IFCHR + file_mode = 0o0620 + cont_device_id = 1 + device_id = 1 + + +class FileDescriptorSTDIN(FileDescriptorCharDevice): + """Special file descriptor standinf for STDIN""" + inode = 0 + + def read(self, count): + raise RuntimeError("Not implemented") + + +class FileDescriptorSTDOUT(FileDescriptorCharDevice): + """Special file descriptor standinf for STDOUT""" + inode = 1 + + def write(self, data): + print "[STDOUT] %s" % data.rstrip() + + +class FileDescriptorSTDERR(FileDescriptorCharDevice): + """Special file descriptor standinf for STDERR""" + inode = 2 + + def write(self, data): + print "[STDERR] %s" % data.rstrip() + + +class FileDescriptorDirectory(FileDescriptor): + """FileDescription designing a directory""" + + file_type = 0o0040000 # S_IFDIR + + def __init__(self, number, flags, filesystem, real_path): + super(FileDescriptorDirectory, self).__init__(number) + self.filesystem = filesystem + self.real_path = real_path + self.cur_listdir = None + self.flags = flags + + def listdir(self): + if self.cur_listdir is None: + self.cur_listdir = os.listdir(self.real_path) + while self.cur_listdir: + yield self.cur_listdir.pop() + + +class FileDescriptorRegularFile(FileDescriptor): + """FileDescriptor designing a regular file""" + + file_type = 0o0100000 # S_IFREG + + def __init__(self, number, flags, filesystem, real_fd): + super(FileDescriptorRegularFile, self).__init__(number) + self.flags = flags + self.filesystem = filesystem + self.real_fd = real_fd + + def write(self, data): + raise RuntimeError("Not implemented") + + def read(self, count): + return os.read(self.real_fd, count) + + def close(self): + super(FileDescriptorRegularFile, self).close() + return os.close(self.real_fd) + + def lseek(self, offset, whence): + return os.lseek(self.real_fd, offset, whence) # SEEK_SET + + def tell(self): + return self.lseek(0, 1) # SEEK_CUR + + def seek(self, offset): + return self.lseek(offset, 0) # SEEK_SET + + +class FileDescriptorSocket(FileDescriptor): + """FileDescription standing for a socket""" + + file_type = 0o0140000 # S_IFSOCK + + def __init__(self, number, family, type_, protocol): + super(FileDescriptorSocket, self).__init__(number) + self.family = family + self.type_ = type_ + self.protocol = protocol + + +class FileSystem(object): + """File system abstraction + Provides standard operations on the filesystem, (a bit like FUSE) + + API using FileSystem only used sandbox-side path. FileSystem should be the + only object able to interact with real path, outside the sandbox. + + Thus, if `resolve_path` is correctly implemented and used, it should not be + possible to modify files outside the sandboxed path + """ + + device_id = 0x1234 # ID of device containing file (stat.st_dev) + blocksize = 0x1000 # Size of block on this filesystem + f_type = 0xef53 # (Type of filesystem) EXT4_SUPER_MAGIC + nb_total_block = 0x1000 + nb_free_block = 0x100 + nb_avail_block = nb_free_block # Available to unprivileged user + nb_total_fnode = 100 # Total file nodes in filesystem + nb_free_fnode = 50 + max_filename_len = 256 + fragment_size = 0 + mount_flags = 0 + + def __init__(self, base_path, linux_env): + self.base_path = base_path + self.linux_env = linux_env + self.passthrough = [] + self.path_to_inode = {} # Real path (post-resolution) -> inode number + + def resolve_path(self, path, follow_link=True): + """Resolve @path to the corresponding sandboxed path""" + # Remove '../', etc. + path = os.path.normpath(path) + + # Passthrough + for passthrough in self.passthrough: + if hasattr(passthrough, "match"): + if passthrough.match(path): + return path + elif passthrough == path: + return path + + # Remove leading '/' if any (multiple '//' are handled by 'abspath' + if path.startswith(os.path.sep): + path = path[1:] + + base_path = os.path.abspath(self.base_path) + out_path = os.path.join(base_path, path) + assert out_path.startswith(base_path + os.path.sep) + if os.path.islink(out_path): + link_target = os.readlink(out_path) + # Link can be absolute or relative -> absolute + link = os.path.normpath(os.path.join(os.path.dirname(path), link_target)) + if follow_link: + out_path = self.resolve_path(link) + else: + out_path = link + return out_path + + def get_path_inode(self, real_path): + inode = self.path_to_inode.setdefault(real_path, len(self.path_to_inode)) + return inode + + def exists(self, path): + sb_path = self.resolve_path(path) + return os.path.exists(sb_path) + + def readlink(self, path): + sb_path = self.resolve_path(path, follow_link=False) + if not os.path.islink(sb_path): + return None + return os.path.readlink(sb_path) + + def statfs(self): + return StatFSInfo( + f_type=self.f_type, f_bsize=self.blocksize, + f_blocks=self.nb_total_block, f_bfree=self.nb_free_block, + f_bavail=self.nb_avail_block, f_files=self.nb_total_fnode, + f_ffree=self.nb_free_fnode, f_fsid=self.device_id, + f_namelen=self.max_filename_len, + f_frsize=self.fragment_size, f_flags=self.mount_flags, f_spare=0) + + def getattr_(self, path, follow_link=True): + sb_path = self.resolve_path(path, follow_link=follow_link) + flags = self.linux_env.O_RDONLY + if os.path.isdir(sb_path): + flags |= self.linux_env.O_DIRECTORY + + fd = self.open_(path, flags, follow_link=follow_link) + info = self.linux_env.fstat(fd) + self.linux_env.close(fd) + return info + + def open_(self, path, flags, follow_link=True): + path = self.resolve_path(path, follow_link=follow_link) + if not os.path.exists(path): + # ENOENT (No such file or directory) + return -1 + fd = self.linux_env.next_fd() + acc_mode = flags & self.linux_env.O_ACCMODE + + if os.path.isdir(path): + assert flags & self.linux_env.O_DIRECTORY == self.linux_env.O_DIRECTORY + if acc_mode == self.linux_env.O_RDONLY: + fdesc = FileDescriptorDirectory(fd, flags, self, path) + else: + raise RuntimeError("Not implemented") + elif os.path.isfile(path): + if acc_mode == os.O_RDONLY: + # Read only + real_fd = os.open(path, os.O_RDONLY) + else: + raise RuntimeError("Not implemented") + fdesc = FileDescriptorRegularFile(fd, flags, self, real_fd) + + elif os.path.islink(path): + raise RuntimeError("Not implemented") + else: + raise RuntimeError("Unknown file type for %r" % path) + + self.linux_env.file_descriptors[fd] = fdesc + # Set stat info + fdesc.cont_device_id = self.device_id + fdesc.inode = self.get_path_inode(path) + fdesc.uid = self.linux_env.user_uid + fdesc.gid = self.linux_env.user_gid + size = os.path.getsize(path) + fdesc.size = size + fdesc.blksize = self.blocksize + fdesc.blocks = (size + ((512 - (size % 512)) % 512)) / 512 + return fd + + +class Networking(object): + """Network abstraction""" + + def __init__(self, linux_env): + self.linux_env = linux_env + + def socket(self, family, type_, protocol): + fd = self.linux_env.next_fd() + fdesc = FileDescriptorSocket(fd, family, type_, protocol) + self.linux_env.file_descriptors[fd] = fdesc + return fd + + +class LinuxEnvironment(object): + """A LinuxEnvironment regroups information to simulate a Linux-like + environment""" + + # To be overrided + platform_arch = None + + # User information + user_uid = 1000 + user_euid = 1000 + user_gid = 1000 + user_egid = 1000 + user_name = "user" + + # Memory mapping information + brk_current = 0x74000000 + mmap_current = 0x75000000 + + # System information + sys_sysname = "Linux" + sys_nodename = "user-pc" + sys_release = "4.13.0-19-generic" + sys_version = "#22-Ubuntu" + sys_machine = None + + # Filesystem + filesystem_base = "file_sb" + file_descriptors = None + + # Current process + process_tid = 1000 + process_pid = 1000 + + # Syscall restrictions + ioctl_allowed = None # list of (fd, cmd), None value for wildcard + ioctl_disallowed = None # list of (fd, cmd), None value for wildcard + + # Time + base_time = 1531900000 + + # Arch specific constant + O_ACCMODE = None + O_CLOEXEC = None + O_DIRECTORY = None + O_LARGEFILE = None + O_NONBLOCK = None + O_RDONLY = None + + def __init__(self): + stdin = FileDescriptorSTDIN(0) + stdout = FileDescriptorSTDOUT(1) + stderr = FileDescriptorSTDERR(2) + for std in [stdin, stdout, stderr]: + std.uid = self.user_uid + std.gid = self.user_gid + self.file_descriptors = { + 0: stdin, + 1: stdout, + 2: stderr, + } + self.ioctl_allowed = [ + (0, termios.TCGETS), + (0, termios.TIOCGWINSZ), + (0, termios.TIOCSWINSZ), + (1, termios.TCGETS), + (1, termios.TIOCGWINSZ), + (1, termios.TIOCSWINSZ), + ] + self.ioctl_disallowed = [ + (2, termios.TCGETS), + (0, termios.TCSETSW), + ] + self.filesystem = FileSystem(self.filesystem_base, self) + self.network = Networking(self) + + def next_fd(self): + return len(self.file_descriptors) + + def clock_gettime(self): + out = self.base_time + self.base_time += 1 + return out + + def open_(self, path, flags, follow_link=True): + """Stub for 'open' syscall""" + return self.filesystem.open_(path, flags, follow_link=follow_link) + + def socket(self, family, type_, protocol): + """Stub for 'socket' syscall""" + return self.network.socket(family, type_, protocol) + + def fstat(self, fd): + """Get file status through fd""" + fdesc = self.file_descriptors.get(fd) + if fdesc is None: + return None + return fdesc.stat() + + def stat(self, path): + """Get file status through path""" + return self.filesystem.getattr_(path) + + def lstat(self, path): + """Get file status through path (not following links)""" + return self.filesystem.getattr_(path, follow_link=False) + + def close(self, fd): + """Stub for 'close' syscall""" + fdesc = self.file_descriptors.get(fd) + if fdesc is None: + return None + return fdesc.close() + + def write(self, fd, data): + """Stub for 'write' syscall""" + fdesc = self.file_descriptors.get(fd) + if fdesc is None: + return None + fdesc.write(data) + return len(data) + + def read(self, fd, count): + """Stub for 'read' syscall""" + fdesc = self.file_descriptors.get(fd) + if fdesc is None: + return None + return fdesc.read(count) + + def getdents(self, fd, count, packing_callback): + """Stub for 'getdents' syscall + + 'getdents64' must be handled by caller (only the structure layout is + modified) + + @fd: getdents' fd argument + @count: getdents' count argument + @packing_callback(cur_len, d_ino, d_type, name) -> entry + """ + fdesc = self.file_descriptors[fd] + if not isinstance(fdesc, FileDescriptorDirectory): + raise RuntimeError("Not implemented") + + out = "" + # fdesc.listdir continues from where it stopped + for name in fdesc.listdir(): + d_ino = 1 # Not the real one + d_type = 0 # DT_UNKNOWN (getdents(2) "All applications must properly + # handle a return of DT_UNKNOWN.") + entry = packing_callback(len(out), d_ino, d_type, name) + + if len(out) + len(entry) > count: + # Report to a further call + fdesc.cur_listdir.append(name) + break + out = out + entry + return out + + def ioctl(self, fd, cmd, arg): + """Stub for 'ioctl' syscall + Return the list of element to pack back depending on target ioctl + If the ioctl is disallowed, return False + """ + allowed = False + disallowed = False + for test in [(fd, cmd), (None, cmd), (fd, None)]: + if test in self.ioctl_allowed: + allowed = True + if test in self.ioctl_disallowed: + disallowed = True + + if allowed and disallowed: + raise ValueError("fd: %x, cmd: %x is allowed and disallowed" % (fd, cmd)) + + if allowed: + if cmd == termios.TCGETS: + return 0, 0, 0, 0 + elif cmd == termios.TIOCGWINSZ: + # struct winsize + # { + # unsigned short ws_row; /* rows, in characters */ + # unsigned short ws_col; /* columns, in characters */ + # unsigned short ws_xpixel; /* horizontal size, pixels */ + # unsigned short ws_ypixel; /* vertical size, pixels */ + # }; + return 1000, 360, 1000, 1000 + elif cmd == termios.TIOCSWINSZ: + # Ignore it + return + else: + raise RuntimeError("Not implemented") + + elif disallowed: + return False + + else: + raise KeyError("Unknown ioctl fd:%x cmd:%x" % (fd, cmd)) + + def mmap(self, addr, len_, prot, flags, fd, off, vmmngr): + """Stub for 'mmap' syscall + + 'mmap2' must be implemented by calling this function with off * 4096 + """ + if addr == 0: + addr = self.mmap_current + self.mmap_current += (len_ + 0x1000) & ~0xfff + + all_mem = vmmngr.get_all_memory() + mapped = interval([(start, start + info["size"] - 1) + for start, info in all_mem.iteritems()]) + + MAP_FIXED = 0x10 + if flags & MAP_FIXED: + # Alloc missing and override + missing = interval([(addr, addr + len_ - 1)]) - mapped + for start, stop in missing: + vmmngr.add_memory_page(start, PAGE_READ|PAGE_WRITE, + "\x00" * (stop - start + 1), + "mmap allocated") + else: + # Find first candidate segment nearby addr + for start, stop in mapped: + if stop < addr: + continue + rounded = (stop + 1 + 0x1000) & ~0xfff + if (interval([(rounded, rounded + len_)]) & mapped).empty: + addr = rounded + break + else: + assert (interval([(addr, addr + len_)]) & mapped).empty + + vmmngr.add_memory_page(addr, PAGE_READ|PAGE_WRITE, "\x00" * len_, + "mmap allocated") + + + if fd == 0xffffffff: + if off != 0: + raise RuntimeError("Not implemented") + data = "\x00" * len_ + else: + fdesc = self.file_descriptors[fd] + cur_pos = fdesc.tell() + fdesc.seek(off) + data = fdesc.read(len_) + fdesc.seek(cur_pos) + + vmmngr.set_mem(addr, data) + return addr + + def brk(self, addr, vmmngr): + """Stub for 'brk' syscall""" + if addr == 0: + addr = self.brk_current + else: + all_mem = vmmngr.get_all_memory() + mapped = interval([(start, start + info["size"] - 1) + for start, info in all_mem.iteritems()]) + + # Alloc missing and override + missing = interval([(self.brk_current, addr)]) - mapped + for start, stop in missing: + vmmngr.add_memory_page(start, PAGE_READ|PAGE_WRITE, + "\x00" * (stop - start + 1), + "BRK") + + self.brk_current = addr + return addr + + +class LinuxEnvironment_x86_64(LinuxEnvironment): + platform_arch = "x86_64" + sys_machine = "x86_64" + + O_ACCMODE = 0x3 + O_CLOEXEC = 0x80000 + O_DIRECTORY = 0x10000 + O_LARGEFILE = 0x8000 + O_NONBLOCK = 0x800 + O_RDONLY = 0 + + +class LinuxEnvironment_arml(LinuxEnvironment): + platform_arch = "arml" + sys_machine = "arml" + + O_ACCMODE = 0x3 + O_CLOEXEC = 0x80000 + O_DIRECTORY = 0x4000 + O_LARGEFILE = 0x20000 + O_NONBLOCK = 0x800 + O_RDONLY = 0 + + # ARM specific + tls = 0 + # get_tls: __kuser_helper_version >= 1 + # cmpxchg: __kuser_helper_version >= 2 + # memory_barrier: __kuser_helper_version >= 3 + kuser_helper_version = 3 + + +class AuxVec(object): + """Auxiliary vector abstraction, filled with default values + (mainly based on https://lwn.net/Articles/519085) + + # Standard usage + >>> auxv = AuxVec(elf_base_addr, cont_target.entry_point, linux_env) + + # Enable AT_SECURE + >>> auxv = AuxVec(..., AuxVec.AT_SECURE=1) + # Modify AT_RANDOM + >>> auxv = AuxVec(..., AuxVec.AT_RANDOM="\x00"*0x10) + + # Using AuxVec instance for stack preparation + # First, fill memory with vectors data + >>> for AT_number, data in auxv.data_to_map(): + dest_ptr = ... + copy_to_dest(data, dest_ptr) + auxv.ptrs[AT_number] = dest_ptr + # Then, get the key: value (with value being sometime a pointer) + >>> for auxid, auxval in auxv.iteritems(): + ... + """ + + AT_PHDR = 3 + AT_PHNUM = 5 + AT_PAGESZ = 6 + AT_ENTRY = 9 + AT_UID = 11 + AT_EUID = 12 + AT_GID = 13 + AT_EGID = 14 + AT_PLATFORM = 15 + AT_HWCAP = 16 + AT_SECURE = 23 + AT_RANDOM = 25 + AT_SYSINFO_EHDR = 33 + + def __init__(self, elf_phdr_vaddr, entry_point, linux_env, **kwargs): + """Instanciate an AuxVec, with required elements: + - elf_phdr_vaddr: virtual address of the ELF's PHDR in memory + - entry_point: virtual address of the ELF entry point + - linux_env: LinuxEnvironment instance, used to provides some of the + option values + + Others options can be overrided by named arguments + + """ + self.info = { + self.AT_PHDR: elf_phdr_vaddr, + self.AT_PHNUM: 9, + self.AT_PAGESZ: 0x1000, + self.AT_ENTRY: entry_point, + self.AT_UID: linux_env.user_uid, + self.AT_EUID: linux_env.user_euid, + self.AT_GID: linux_env.user_gid, + self.AT_EGID: linux_env.user_egid, + self.AT_PLATFORM: linux_env.platform_arch, + self.AT_HWCAP: 0, + self.AT_SECURE: 0, + self.AT_RANDOM: "\x00" * 0x10, + # vDSO is not mandatory + self.AT_SYSINFO_EHDR: None, + } + self.info.update(kwargs) + self.ptrs = {} # info key -> corresponding virtual address + + def data_to_map(self): + """Iterator on (AT_number, data) + Once the data has been mapped, the corresponding ptr must be set in + 'self.ptrs[AT_number]' + """ + for AT_number in [self.AT_PLATFORM, self.AT_RANDOM]: + yield (AT_number, self.info[AT_number]) + + def iteritems(self): + """Iterator on auxiliary vector id and values""" + for AT_number, value in self.info.iteritems(): + if AT_number in self.ptrs: + value = self.ptrs[AT_number] + if value is None: + # AT to ignore + continue + yield (AT_number, value) + + +def prepare_loader_x86_64(jitter, argv, envp, auxv, linux_env, + hlt_address=0x13371acc): + """Fill the environment with enough information to run a linux loader + + @jitter: Jitter instance + @argv: list of strings + @envp: dict of environment variables names to their values + @auxv: AuxVec instance + @hlt_address (default to 0x13371acc): stopping address + + Example of use: + >>> jitter = machine.jitter() + >>> jitter.init_stack() + >>> linux_env = LinuxEnvironment_x86_64() + >>> argv = ["/bin/ls", "-lah"] + >>> envp = {"PATH": "/usr/local/bin", "USER": linux_env.user_name} + >>> auxv = AuxVec(elf_base_addr, entry_point, linux_env) + >>> prepare_loader_x86_64(jitter, argv, envp, auxv, linux_env) + # One may want to enable syscall handling here + # The program can now run from the loader + >>> jitter.init_run(ld_entry_point) + >>> jitter.continue_run() + """ + # Stack layout looks like + # [data] + # - auxv values + # - envp name=value + # - argv arguments + # [auxiliary vector] + # [environment pointer] + # [argument vector] + + for AT_number, data in auxv.data_to_map(): + data += "\x00" + jitter.cpu.RSP -= len(data) + ptr = jitter.cpu.RSP + jitter.vm.set_mem(ptr, data) + auxv.ptrs[AT_number] = ptr + + env_ptrs = [] + for name, value in envp.iteritems(): + env = "%s=%s\x00" % (name, value) + jitter.cpu.RSP -= len(env) + ptr = jitter.cpu.RSP + jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + + argv_ptrs = [] + for arg in argv: + arg += "\x00" + jitter.cpu.RSP -= len(arg) + ptr = jitter.cpu.RSP + jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + jitter.push_uint64_t(hlt_address) + jitter.push_uint64_t(0) + jitter.push_uint64_t(0) + for auxid, auxval in auxv.iteritems(): + jitter.push_uint64_t(auxval) + jitter.push_uint64_t(auxid) + jitter.push_uint64_t(0) + for ptr in reversed(env_ptrs): + jitter.push_uint64_t(ptr) + jitter.push_uint64_t(0) + for ptr in reversed(argv_ptrs): + jitter.push_uint64_t(ptr) + jitter.push_uint64_t(len(argv)) + + + +def _arml__kuser_get_tls(linux_env, jitter): + # __kuser_get_tls + jitter.pc = jitter.cpu.LR + jitter.cpu.R0 = linux_env.tls + return True + +def _arml__kuser_cmpxchg(jitter): + oldval = jitter.cpu.R0 + newval = jitter.cpu.R1 + ptr = jitter.cpu.R2 + + value = struct.unpack("<I", jitter.vm.get_mem(ptr, 4))[0] + if value == oldval: + jitter.vm.set_mem(ptr, struct.pack("<I", newval)) + jitter.cpu.R0 = 0 + jitter.cpu.cf = 1 + else: + jitter.cpu.R0 = -1 + jitter.cpu.cf = 0 + + jitter.pc = jitter.cpu.LR + return True + +def _arml__kuser_memory_barrier(jitter): + # __kuser_memory_barrier + jitter.pc = jitter.cpu.LR + return True + +def _arml__kuser_helper_version(linux_env, jitter): + jitter.pc = jitter.cpu.LR + jitter.cpu.R0 = linux_env.kuser_helper_version + return True + + +def prepare_loader_arml(jitter, argv, envp, auxv, linux_env, + hlt_address=0x13371acc): + """Fill the environment with enough information to run a linux loader + + @jitter: Jitter instance + @argv: list of strings + @envp: dict of environment variables names to their values + @auxv: AuxVec instance + @hlt_address (default to 0x13371acc): stopping address + + Example of use: + >>> jitter = machine.jitter() + >>> jitter.init_stack() + >>> linux_env = LinuxEnvironment_arml() + >>> argv = ["/bin/ls", "-lah"] + >>> envp = {"PATH": "/usr/local/bin", "USER": linux_env.user_name} + >>> auxv = AuxVec(elf_base_addr, entry_point, linux_env) + >>> prepare_loader_arml(jitter, argv, envp, auxv, linux_env) + # One may want to enable syscall handling here + # The program can now run from the loader + >>> jitter.init_run(ld_entry_point) + >>> jitter.continue_run() + """ + # Stack layout looks like + # [data] + # - auxv values + # - envp name=value + # - argv arguments + # [auxiliary vector] + # [environment pointer] + # [argument vector] + + for AT_number, data in auxv.data_to_map(): + data += "\x00" + jitter.cpu.SP -= len(data) + ptr = jitter.cpu.SP + jitter.vm.set_mem(ptr, data) + auxv.ptrs[AT_number] = ptr + + env_ptrs = [] + for name, value in envp.iteritems(): + env = "%s=%s\x00" % (name, value) + jitter.cpu.SP -= len(env) + ptr = jitter.cpu.SP + jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + + argv_ptrs = [] + for arg in argv: + arg += "\x00" + jitter.cpu.SP -= len(arg) + ptr = jitter.cpu.SP + jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + jitter.push_uint32_t(hlt_address) + jitter.push_uint32_t(0) + jitter.push_uint32_t(0) + for auxid, auxval in auxv.iteritems(): + jitter.push_uint32_t(auxval) + jitter.push_uint32_t(auxid) + jitter.push_uint32_t(0) + for ptr in reversed(env_ptrs): + jitter.push_uint32_t(ptr) + jitter.push_uint32_t(0) + for ptr in reversed(argv_ptrs): + jitter.push_uint32_t(ptr) + jitter.push_uint32_t(len(argv)) + + # Add kernel user helpers + # from Documentation/arm/kernel_user_helpers.txt + + if linux_env.kuser_helper_version >= 1: + jitter.add_breakpoint( + 0xFFFF0FE0, + functools.partial(_arml__kuser_get_tls, linux_env) + ) + + if linux_env.kuser_helper_version >= 2: + jitter.add_breakpoint(0XFFFF0FC0, _arml__kuser_cmpxchg) + + if linux_env.kuser_helper_version >= 3: + jitter.add_breakpoint(0xFFFF0FA0, _arml__kuser_memory_barrier) + + jitter.add_breakpoint(0xffff0ffc, _arml__kuser_helper_version) diff --git a/miasm2/os_dep/linux/syscall.py b/miasm2/os_dep/linux/syscall.py new file mode 100644 index 00000000..87839dc4 --- /dev/null +++ b/miasm2/os_dep/linux/syscall.py @@ -0,0 +1,1040 @@ +import fcntl +import functools +import logging +import os +import struct +import termios + +from miasm2.jitter.csts import EXCEPT_PRIV_INSN, EXCEPT_INT_XX + +log = logging.getLogger('syscalls') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.WARNING) + + +def _dump_struct_stat_x86_64(info): + data = struct.pack( + "QQQIIIIQQQQQQQQQQQQQ", + info.st_dev, + info.st_ino, + info.st_nlink, + info.st_mode, + info.st_uid, + info.st_gid, + 0, # 32 bit padding + info.st_rdev, + info.st_size, + info.st_blksize, + info.st_blocks, + info.st_atime, + info.st_atimensec, + info.st_mtime, + info.st_mtimensec, + info.st_ctime, + info.st_ctimensec, + 0, # unused + 0, # unused + 0, # unused + ) + return data + + +def _dump_struct_stat_arml(info): + data = struct.pack( + "QIIIIIIIIIIIIIIIIII", + info.st_dev, + 0, # pad + info.st_ino, + info.st_mode, + info.st_nlink, + info.st_uid, + info.st_gid, + info.st_rdev, + info.st_size, + info.st_blksize, + info.st_blocks, + info.st_atime, + info.st_atimensec, + info.st_mtime, + info.st_mtimensec, + info.st_ctime, + info.st_ctimensec, + 0, # unused + 0, # unused + ) + return data + + +def sys_x86_64_rt_sigaction(jitter, linux_env): + # Parse arguments + sig, act, oact, sigsetsize = jitter.syscall_args_systemv(4) + log.debug("sys_rt_sigaction(%x, %x, %x, %x)", sig, act, oact, sigsetsize) + + # Stub + if oact != 0: + # Return an empty old action + jitter.vm.set_mem(oact, "\x00" * sigsetsize) + jitter.syscall_ret_systemv(0) + + +def sys_generic_brk(jitter, linux_env): + # Parse arguments + addr, = jitter.syscall_args_systemv(1) + log.debug("sys_brk(%d)", addr) + + # Stub + jitter.syscall_ret_systemv(linux_env.brk(addr, jitter.vm)) + + +def sys_x86_64_newuname(jitter, linux_env): + # struct utsname { + # char sysname[]; /* Operating system name (e.g., "Linux") */ + # char nodename[]; /* Name within "some implementation-defined + # network" */ + # char release[]; /* Operating system release (e.g., "2.6.28") */ + # char version[]; /* Operating system version */ + # char machine[]; /* Hardware identifier */ + # } + + # Parse arguments + nameptr, = jitter.syscall_args_systemv(1) + log.debug("sys_newuname(%x)", nameptr) + + # Stub + info = [ + linux_env.sys_sysname, + linux_env.sys_nodename, + linux_env.sys_release, + linux_env.sys_version, + linux_env.sys_machine + ] + # TODO: Elements start at 0x41 multiples on my tests... + output = "" + for elem in info: + output += elem + output += "\x00" * (0x41 - len(elem)) + jitter.vm.set_mem(nameptr, output) + jitter.syscall_ret_systemv(0) + + +def sys_arml_newuname(jitter, linux_env): + # struct utsname { + # char sysname[]; /* Operating system name (e.g., "Linux") */ + # char nodename[]; /* Name within "some implementation-defined + # network" */ + # char release[]; /* Operating system release (e.g., "2.6.28") */ + # char version[]; /* Operating system version */ + # char machine[]; /* Hardware identifier */ + # } + + # Parse arguments + nameptr, = jitter.syscall_args_systemv(1) + log.debug("sys_newuname(%x)", nameptr) + + # Stub + info = [ + linux_env.sys_sysname, + linux_env.sys_nodename, + linux_env.sys_release, + linux_env.sys_version, + linux_env.sys_machine + ] + # TODO: Elements start at 0x41 multiples on my tests... + output = "" + for elem in info: + output += elem + output += "\x00" * (0x41 - len(elem)) + jitter.vm.set_mem(nameptr, output) + jitter.syscall_ret_systemv(0) + + +def sys_generic_access(jitter, linux_env): + # Parse arguments + pathname, mode = jitter.syscall_args_systemv(2) + rpathname = jitter.get_str_ansi(pathname) + rmode = mode + if mode == 1: + rmode = "F_OK" + elif mode == 2: + rmode = "R_OK" + log.debug("sys_access(%s, %s)", rpathname, rmode) + + # Stub + # Do not check the mode + if linux_env.filesystem.exists(rpathname): + jitter.syscall_ret_systemv(0) + else: + jitter.syscall_ret_systemv(-1) + + +def sys_x86_64_openat(jitter, linux_env): + # Parse arguments + dfd, filename, flags, mode = jitter.syscall_args_systemv(4) + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_openat(%x, %r, %x, %x)", dfd, rpathname, flags, mode) + + # Stub + # flags, openat particularity over 'open' are ignored + jitter.syscall_ret_systemv(linux_env.open_(rpathname, flags)) + + +def sys_x86_64_newstat(jitter, linux_env): + # Parse arguments + filename, statbuf = jitter.syscall_args_systemv(2) + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_newstat(%r, %x)", rpathname, statbuf) + + # Stub + if linux_env.filesystem.exists(rpathname): + info = linux_env.stat(rpathname) + data = _dump_struct_stat_x86_64(info) + jitter.vm.set_mem(statbuf, data) + jitter.syscall_ret_systemv(0) + else: + # ENOENT (No such file or directory) + jitter.syscall_ret_systemv(-1) + + +def sys_arml_stat64(jitter, linux_env): + # Parse arguments + filename, statbuf = jitter.syscall_args_systemv(2) + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_newstat(%r, %x)", rpathname, statbuf) + + # Stub + if linux_env.filesystem.exists(rpathname): + info = linux_env.stat(rpathname) + data = _dump_struct_stat_arml(info) + jitter.vm.set_mem(statbuf, data) + jitter.syscall_ret_systemv(0) + else: + # ENOENT (No such file or directory) + jitter.syscall_ret_systemv(-1) + + +def sys_x86_64_writev(jitter, linux_env): + # Parse arguments + fd, vec, vlen = jitter.syscall_args_systemv(3) + log.debug("sys_writev(%d, %d, %x)", fd, vec, vlen) + + # Stub + fdesc = linux_env.file_descriptors[fd] + for iovec_num in xrange(vlen): + # struct iovec { + # void *iov_base; /* Starting address */ + # size_t iov_len; /* Number of bytes to transfer */ + # }; + iovec = jitter.vm.get_mem(vec + iovec_num * 8 * 2, 8*2) + iov_base, iov_len = struct.unpack("QQ", iovec) + fdesc.write(jitter.get_str_ansi(iov_base)[:iov_len]) + + jitter.syscall_ret_systemv(vlen) + + +def sys_arml_writev(jitter, linux_env): + # Parse arguments + fd, vec, vlen = jitter.syscall_args_systemv(3) + log.debug("sys_writev(%d, %d, %x)", fd, vec, vlen) + + # Stub + fdesc = linux_env.file_descriptors[fd] + for iovec_num in xrange(vlen): + # struct iovec { + # void *iov_base; /* Starting address */ + # size_t iov_len; /* Number of bytes to transfer */ + # }; + iovec = jitter.vm.get_mem(vec + iovec_num * 4 * 2, 4*2) + iov_base, iov_len = struct.unpack("II", iovec) + fdesc.write(jitter.get_str_ansi(iov_base)[:iov_len]) + + jitter.syscall_ret_systemv(vlen) + + +def sys_generic_exit_group(jitter, linux_env): + # Parse arguments + status, = jitter.syscall_args_systemv(1) + log.debug("sys_exit_group(%d)", status) + + # Stub + log.debug("Exit with status code %d", status) + jitter.run = False + + +def sys_generic_read(jitter, linux_env): + # Parse arguments + fd, buf, count = jitter.syscall_args_systemv(3) + log.debug("sys_read(%d, %x, %x)", fd, buf, count) + + # Stub + data = linux_env.read(fd, count) + jitter.vm.set_mem(buf, data) + jitter.syscall_ret_systemv(len(data)) + + +def sys_x86_64_fstat(jitter, linux_env): + # Parse arguments + fd, statbuf = jitter.syscall_args_systemv(2) + log.debug("sys_fstat(%d, %x)", fd, statbuf) + + # Stub + info = linux_env.fstat(fd) + data = _dump_struct_stat_x86_64(info) + jitter.vm.set_mem(statbuf, data) + jitter.syscall_ret_systemv(0) + + +def sys_arml_fstat64(jitter, linux_env): + # Parse arguments + fd, statbuf = jitter.syscall_args_systemv(2) + log.debug("sys_fstat(%d, %x)", fd, statbuf) + + # Stub + info = linux_env.fstat(fd) + data = _dump_struct_stat_arml(info) + jitter.vm.set_mem(statbuf, data) + jitter.syscall_ret_systemv(0) + + +def sys_generic_mmap(jitter, linux_env): + # Parse arguments + addr, len_, prot, flags, fd, off = jitter.syscall_args_systemv(6) + log.debug("sys_mmap(%x, %x, %x, %x, %x, %x)", addr, len_, prot, flags, fd, off) + + # Stub + addr = linux_env.mmap(addr, len_, prot & 0xFFFFFFFF, flags & 0xFFFFFFFF, + fd & 0xFFFFFFFF, off, jitter.vm) + jitter.syscall_ret_systemv(addr) + + +def sys_generic_mmap2(jitter, linux_env): + # Parse arguments + addr, len_, prot, flags, fd, off = jitter.syscall_args_systemv(6) + log.debug("sys_mmap2(%x, %x, %x, %x, %x, %x)", addr, len_, prot, flags, fd, off) + off = off * 4096 + + # Stub + addr = linux_env.mmap(addr, len_, prot & 0xFFFFFFFF, flags & 0xFFFFFFFF, + fd & 0xFFFFFFFF, off, jitter.vm) + jitter.syscall_ret_systemv(addr) + + +def sys_generic_mprotect(jitter, linux_env): + # Parse arguments + start, len_, prot = jitter.syscall_args_systemv(3) + assert jitter.vm.is_mapped(start, len_) + log.debug("sys_mprotect(%x, %x, %x)", start, len_, prot) + + # Do nothing + jitter.syscall_ret_systemv(0) + + +def sys_generic_close(jitter, linux_env): + # Parse arguments + fd, = jitter.syscall_args_systemv(1) + log.debug("sys_close(%x)", fd) + + # Stub + linux_env.close(fd) + jitter.syscall_ret_systemv(0) + + +def sys_x86_64_arch_prctl(jitter, linux_env): + # Parse arguments + code_name = { + 0x1001: "ARCH_SET_GS", + 0x1002: "ARCH_SET_FS", + 0x1003: "ARCH_GET_FS", + 0x1004: "ARCH_GET_GS", + } + code = jitter.cpu.RDI + rcode = code_name[code] + addr = jitter.cpu.RSI + log.debug("sys_arch_prctl(%s, %x)", rcode, addr) + + if code == 0x1002: + jitter.cpu.set_segm_base(jitter.cpu.FS, addr) + else: + raise RuntimeError("Not implemented") + jitter.cpu.RAX = 0 + + +def sys_x86_64_set_tid_address(jitter, linux_env): + # Parse arguments + tidptr = jitter.cpu.RDI + # clear_child_tid = tidptr + log.debug("sys_set_tid_address(%x)", tidptr) + + jitter.cpu.RAX = linux_env.process_tid + + +def sys_x86_64_set_robust_list(jitter, linux_env): + # Parse arguments + head = jitter.cpu.RDI + len_ = jitter.cpu.RSI + # robust_list = head + log.debug("sys_set_robust_list(%x, %x)", head, len_) + jitter.cpu.RAX = 0 + +def sys_x86_64_rt_sigprocmask(jitter, linux_env): + # Parse arguments + how = jitter.cpu.RDI + nset = jitter.cpu.RSI + oset = jitter.cpu.RDX + sigsetsize = jitter.cpu.R10 + log.debug("sys_rt_sigprocmask(%x, %x, %x, %x)", how, nset, oset, sigsetsize) + if oset != 0: + raise RuntimeError("Not implemented") + jitter.cpu.RAX = 0 + + +def sys_x86_64_prlimit64(jitter, linux_env): + # Parse arguments + pid = jitter.cpu.RDI + resource = jitter.cpu.RSI + new_rlim = jitter.cpu.RDX + if new_rlim != 0: + raise RuntimeError("Not implemented") + old_rlim = jitter.cpu.R10 + log.debug("sys_prlimit64(%x, %x, %x, %x)", pid, resource, new_rlim, + old_rlim) + + # Stub + if resource == 3: + # RLIMIT_STACK + jitter.vm.set_mem(old_rlim, + struct.pack("QQ", + 0x100000, + 0x7fffffffffffffff, # RLIM64_INFINITY + )) + else: + raise RuntimeError("Not implemented") + jitter.cpu.RAX = 0 + + +def sys_x86_64_statfs(jitter, linux_env): + # Parse arguments + pathname = jitter.cpu.RDI + buf = jitter.cpu.RSI + rpathname = jitter.get_str_ansi(pathname) + log.debug("sys_statfs(%r, %x)", rpathname, buf) + + # Stub + if not linux_env.filesystem.exists(rpathname): + jitter.cpu.RAX = -1 + else: + info = linux_env.filesystem.statfs() + raise RuntimeError("Not implemented") + + +def sys_x86_64_ioctl(jitter, linux_env): + # Parse arguments + fd, cmd, arg = jitter.syscall_args_systemv(3) + log.debug("sys_ioctl(%x, %x, %x)", fd, cmd, arg) + + info = linux_env.ioctl(fd, cmd, arg) + if info is False: + jitter.syscall_ret_systemv(-1) + else: + if cmd == termios.TCGETS: + data = struct.pack("BBBB", *info) + jitter.vm.set_mem(arg, data) + elif cmd == termios.TIOCGWINSZ: + data = struct.pack("HHHH", *info) + jitter.vm.set_mem(arg, data) + else: + assert data is None + jitter.syscall_ret_systemv(0) + + +def sys_arml_ioctl(jitter, linux_env): + # Parse arguments + fd, cmd, arg = jitter.syscall_args_systemv(3) + log.debug("sys_ioctl(%x, %x, %x)", fd, cmd, arg) + + info = linux_env.ioctl(fd, cmd, arg) + if info is False: + jitter.syscall_ret_systemv(-1) + else: + if cmd == termios.TCGETS: + data = struct.pack("BBBB", *info) + jitter.vm.set_mem(arg, data) + elif cmd == termios.TIOCGWINSZ: + data = struct.pack("HHHH", *info) + jitter.vm.set_mem(arg, data) + else: + assert data is None + jitter.syscall_ret_systemv(0) + +def sys_generic_open(jitter, linux_env): + # Parse arguments + filename, flags, mode = jitter.syscall_args_systemv(3) + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_open(%r, %x, %x)", rpathname, flags, mode) + # Stub + # 'mode' is ignored + jitter.syscall_ret_systemv(linux_env.open_(rpathname, flags)) + + +def sys_generic_write(jitter, linux_env): + # Parse arguments + fd, buf, count = jitter.syscall_args_systemv(3) + log.debug("sys_write(%d, %x, %x)", fd, buf, count) + + # Stub + data = jitter.vm.get_mem(buf, count) + jitter.syscall_ret_systemv(linux_env.write(fd, data)) + + +def sys_x86_64_getdents(jitter, linux_env): + # Parse arguments + fd = jitter.cpu.RDI + dirent = jitter.cpu.RSI + count = jitter.cpu.RDX + log.debug("sys_getdents(%x, %x, %x)", fd, dirent, count) + + # Stub + def packing_callback(cur_len, d_ino, d_type, name): + # struct linux_dirent { + # unsigned long d_ino; /* Inode number */ + # unsigned long d_off; /* Offset to next linux_dirent */ + # unsigned short d_reclen; /* Length of this linux_dirent */ + # char d_name[]; /* Filename (null-terminated) */ + # /* length is actually (d_reclen - 2 - + # offsetof(struct linux_dirent, d_name)) */ + # /* + # char pad; // Zero padding byte + # char d_type; // File type (only since Linux + # // 2.6.4); offset is (d_reclen - 1) + # */ + # } + d_reclen = 8 * 2 + 2 + 1 + len(name) + 1 + d_off = cur_len + d_reclen + entry = struct.pack("QqH", d_ino, d_off, d_reclen) + \ + name + "\x00" + struct.pack("B", d_type) + assert len(entry) == d_reclen + return entry + + out = linux_env.getdents(fd, count, packing_callback) + jitter.vm.set_mem(dirent, out) + jitter.cpu.RAX = len(out) + + +def sys_arml_getdents64(jitter, linux_env): + # Parse arguments + fd = jitter.cpu.R0 + dirent = jitter.cpu.R1 + count = jitter.cpu.R2 + log.debug("sys_getdents64(%x, %x, %x)", fd, dirent, count) + + # Stub + def packing_callback(cur_len, d_ino, d_type, name): + # struct linux_dirent64 { + # ino64_t d_ino; /* 64-bit inode number */ + # off64_t d_off; /* 64-bit offset to next structure */ + # unsigned short d_reclen; /* Size of this dirent */ + # unsigned char d_type; /* File type */ + # char d_name[]; /* Filename (null-terminated) */ + # }; + d_reclen = 8 * 2 + 2 + 1 + len(name) + 1 + d_off = cur_len + d_reclen + entry = struct.pack("QqHB", d_ino, d_off, d_reclen, d_type) + \ + name + "\x00" + assert len(entry) == d_reclen + return entry + + out = linux_env.getdents(fd, count, packing_callback) + jitter.vm.set_mem(dirent, out) + jitter.cpu.R0 = len(out) + + +def sys_x86_64_newlstat(jitter, linux_env): + # Parse arguments + filename = jitter.cpu.RDI + statbuf = jitter.cpu.RSI + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_newlstat(%s, %x)", rpathname, statbuf) + + # Stub + if not linux_env.filesystem.exists(rpathname): + # ENOENT (No such file or directory) + jitter.cpu.RAX = -1 + else: + info = linux_env.lstat(rpathname) + data = _dump_struct_stat_x86_64(info) + jitter.vm.set_mem(statbuf, data) + jitter.cpu.RAX = 0 + + +def sys_arml_lstat64(jitter, linux_env): + # Parse arguments + filename = jitter.cpu.R0 + statbuf = jitter.cpu.R1 + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_newlstat(%s, %x)", rpathname, statbuf) + + # Stub + if not linux_env.filesystem.exists(rpathname): + # ENOENT (No such file or directory) + jitter.cpu.R0 = -1 + else: + info = linux_env.lstat(rpathname) + data = _dump_struct_stat_arml(info) + jitter.vm.set_mem(statbuf, data) + jitter.cpu.R0 = 0 + + +def sys_x86_64_lgetxattr(jitter, linux_env): + # Parse arguments + pathname = jitter.cpu.RDI + name = jitter.cpu.RSI + value = jitter.cpu.RDX + size = jitter.cpu.R10 + rpathname = jitter.get_str_ansi(pathname) + rname = jitter.get_str_ansi(name) + log.debug("sys_lgetxattr(%r, %r, %x, %x)", rpathname, rname, value, size) + + # Stub + jitter.vm.set_mem(value, "\x00" * size) + jitter.cpu.RAX = 0 + + +def sys_x86_64_getxattr(jitter, linux_env): + # Parse arguments + pathname = jitter.cpu.RDI + name = jitter.cpu.RSI + value = jitter.cpu.RDX + size = jitter.cpu.R10 + rpathname = jitter.get_str_ansi(pathname) + rname = jitter.get_str_ansi(name) + log.debug("sys_getxattr(%r, %r, %x, %x)", rpathname, rname, value, size) + + # Stub + jitter.vm.set_mem(value, "\x00" * size) + jitter.cpu.RAX = 0 + + +def sys_x86_64_socket(jitter, linux_env): + # Parse arguments + family = jitter.cpu.RDI + type_ = jitter.cpu.RSI + protocol = jitter.cpu.RDX + log.debug("sys_socket(%x, %x, %x)", family, type_, protocol) + + jitter.cpu.RAX = linux_env.socket(family, type_, protocol) + + +def sys_x86_64_connect(jitter, linux_env): + # Parse arguments + fd = jitter.cpu.RDI + uservaddr = jitter.cpu.RSI + addrlen = jitter.cpu.RDX + raddr = jitter.get_str_ansi(uservaddr + 2) + log.debug("sys_connect(%x, %r, %x)", fd, raddr, addrlen) + + # Stub + # Always refuse the connexion + jitter.cpu.RAX = -1 + + +def sys_x86_64_clock_gettime(jitter, linux_env): + # Parse arguments + which_clock = jitter.cpu.RDI + tp = jitter.cpu.RSI + log.debug("sys_clock_gettime(%x, %x)", which_clock, tp) + + # Stub + value = linux_env.clock_gettime() + jitter.vm.set_mem(tp, struct.pack("Q", value)) + jitter.cpu.RAX = 0 + + +def sys_x86_64_lseek(jitter, linux_env): + # Parse arguments + fd = jitter.cpu.RDI + offset = jitter.cpu.RSI + whence = jitter.cpu.RDX + log.debug("sys_lseek(%d, %x, %x)", fd, offset, whence) + + # Stub + fdesc = linux_env.file_descriptors[fd] + mask = (1 << 64) - 1 + if offset > (1 << 63): + offset = - ((offset ^ mask) + 1) + + new_offset = fdesc.lseek(offset, whence) + jitter.cpu.RAX = new_offset + + +def sys_x86_64_munmap(jitter, linux_env): + # Parse arguments + addr = jitter.cpu.RDI + len_ = jitter.cpu.RSI + log.debug("sys_munmap(%x, %x)", addr, len_) + + # Do nothing + jitter.cpu.RAX = 0 + + +def sys_x86_64_readlink(jitter, linux_env): + # Parse arguments + path = jitter.cpu.RDI + buf = jitter.cpu.RSI + bufsize = jitter.cpu.RDX + rpath = jitter.get_str_ansi(path) + log.debug("sys_readlink(%r, %x, %x)", rpath, buf, bufsize) + + # Stub + link = linux_env.filesystem.readlink(rpath) + if link is None: + # Not a link + jitter.cpu.RAX = -1 + else: + data = link[:bufsize - 1] + "\x00" + jitter.vm.set_mem(buf, data) + jitter.cpu.RAX = len(data) - 1 + +def sys_x86_64_getpid(jitter, linux_env): + # Parse arguments + log.debug("sys_getpid()") + + # Stub + jitter.cpu.RAX = linux_env.process_pid + + +def sys_x86_64_sysinfo(jitter, linux_env): + # Parse arguments + info = jitter.cpu.RDI + log.debug("sys_sysinfo(%x)", info) + + # Stub + data = struct.pack("QQQQQQQQQQHQQI", + 0x1234, # uptime + 0x2000, # loads (1 min) + 0x2000, # loads (5 min) + 0x2000, # loads (15 min) + 0x10000000, # total ram + 0x10000000, # free ram + 0x10000000, # shared memory + 0x0, # memory used by buffers + 0x0, # total swap + 0x0, # free swap + 0x1, # nb current processes + 0x0, # total high mem + 0x0, # available high mem + 0x1, # memory unit size + ) + jitter.vm.set_mem(info, data) + jitter.cpu.RAX = 0 + + +def sys_generic_geteuid(jitter, linux_env): + # Parse arguments + log.debug("sys_geteuid()") + + # Stub + jitter.syscall_ret_systemv(linux_env.user_euid) + + +def sys_generic_getegid(jitter, linux_env): + # Parse arguments + log.debug("sys_getegid()") + + # Stub + jitter.syscall_ret_systemv(linux_env.user_egid) + + +def sys_generic_getuid(jitter, linux_env): + # Parse arguments + log.debug("sys_getuid()") + + # Stub + jitter.syscall_ret_systemv(linux_env.user_uid) + + +def sys_generic_getgid(jitter, linux_env): + # Parse arguments + log.debug("sys_getgid()") + + # Stub + jitter.syscall_ret_systemv(linux_env.user_gid) + + +def sys_generic_setgid(jitter, linux_env): + # Parse arguments + gid, = jitter.syscall_args_systemv(1) + log.debug("sys_setgid(%x)", gid) + + # Stub + # Denied if different + if gid != linux_env.user_gid: + jitter.syscall_ret_systemv(-1) + else: + jitter.syscall_ret_systemv(0) + + +def sys_generic_setuid(jitter, linux_env): + # Parse arguments + uid, = jitter.syscall_args_systemv(1) + log.debug("sys_setuid(%x)", uid) + + # Stub + # Denied if different + if uid != linux_env.user_uid: + jitter.syscall_ret_systemv(-1) + else: + jitter.syscall_ret_systemv(0) + + +def sys_arml_set_tls(jitter, linux_env): + # Parse arguments + ptr = jitter.cpu.R0 + log.debug("sys_set_tls(%x)", ptr) + + # Stub + linux_env.tls = ptr + jitter.cpu.R0 = 0 + + +def sys_generic_fcntl64(jitter, linux_env): + # Parse arguments + fd, cmd, arg = jitter.syscall_args_systemv(3) + log.debug("sys_fcntl(%x, %x, %x)", fd, cmd, arg) + + # Stub + fdesc = linux_env.file_descriptors[fd] + if cmd == fcntl.F_GETFL: + jitter.syscall_ret_systemv(fdesc.flags) + elif cmd == fcntl.F_SETFL: + # Ignore flag change + jitter.syscall_ret_systemv(0) + elif cmd == fcntl.F_GETFD: + jitter.syscall_ret_systemv(fdesc.flags) + elif cmd == fcntl.F_SETFD: + # Ignore flag change + jitter.syscall_ret_systemv(0) + else: + raise RuntimeError("Not implemented") + + +def sys_x86_64_pread64(jitter, linux_env): + # Parse arguments + fd = jitter.cpu.RDI + buf = jitter.cpu.RSI + count = jitter.cpu.RDX + pos = jitter.cpu.R10 + log.debug("sys_pread64(%x, %x, %x, %x)", fd, buf, count, pos) + + # Stub + fdesc = linux_env.file_descriptors[fd] + cur_pos = fdesc.tell() + fdesc.seek(pos) + data = fdesc.read(count) + jitter.vm.set_mem(buf, data) + fdesc.seek(cur_pos) + jitter.cpu.RAX = len(data) + + +def sys_arml_gettimeofday(jitter, linux_env): + # Parse arguments + tv = jitter.cpu.R0 + tz = jitter.cpu.R1 + log.debug("sys_gettimeofday(%x, %x)", tv, tz) + + # Stub + value = linux_env.clock_gettime() + if tv: + jitter.vm.set_mem(tv, struct.pack("II", value, 0)) + if tz: + jitter.vm.set_mem(tz, struct.pack("II", 0, 0)) + jitter.cpu.R0 = 0 + + +syscall_callbacks_x86_64 = { + 0x0: sys_generic_read, + 0x1: sys_generic_write, + 0x2: sys_generic_open, + 0x3: sys_generic_close, + 0x4: sys_x86_64_newstat, + 0x5: sys_x86_64_fstat, + 0x6: sys_x86_64_newlstat, + 0x8: sys_x86_64_lseek, + 0x9: sys_generic_mmap, + 0x10: sys_x86_64_ioctl, + 0xA: sys_generic_mprotect, + 0xB: sys_x86_64_munmap, + 0xC: sys_generic_brk, + 0xD: sys_x86_64_rt_sigaction, + 0xE: sys_x86_64_rt_sigprocmask, + 0x11: sys_x86_64_pread64, + 0x14: sys_x86_64_writev, + 0x15: sys_generic_access, + 0x27: sys_x86_64_getpid, + 0x29: sys_x86_64_socket, + 0x2A: sys_x86_64_connect, + 0x3F: sys_x86_64_newuname, + 0x48: sys_generic_fcntl64, + 0x4E: sys_x86_64_getdents, + 0x59: sys_x86_64_readlink, + 0x63: sys_x86_64_sysinfo, + 0x66: sys_generic_getuid, + 0x68: sys_generic_getgid, + 0x6B: sys_generic_geteuid, + 0x6C: sys_generic_getegid, + 0xE4: sys_x86_64_clock_gettime, + 0x89: sys_x86_64_statfs, + 0x9E: sys_x86_64_arch_prctl, + 0xBF: sys_x86_64_getxattr, + 0xC0: sys_x86_64_lgetxattr, + 0xDA: sys_x86_64_set_tid_address, + 0xE7: sys_generic_exit_group, + 0x101: sys_x86_64_openat, + 0x111: sys_x86_64_set_robust_list, + 0x12E: sys_x86_64_prlimit64, +} + + +syscall_callbacks_arml = { + + 0x3: sys_generic_read, + 0x4: sys_generic_write, + 0x5: sys_generic_open, + 0x6: sys_generic_close, + 0x2d: sys_generic_brk, + 0x21: sys_generic_access, + 0x36: sys_arml_ioctl, + 0x7a: sys_arml_newuname, + 0x7d: sys_generic_mprotect, + 0x92: sys_arml_writev, + 0xc0: sys_generic_mmap2, + 0xc3: sys_arml_stat64, + 0xc4: sys_arml_lstat64, + 0xc5: sys_arml_fstat64, + 0xc7: sys_generic_getuid, + 0xc8: sys_generic_getgid, + 0xc9: sys_generic_geteuid, + 0xcA: sys_generic_getegid, + 0x4e: sys_arml_gettimeofday, + 0xd5: sys_generic_setuid, + 0xd6: sys_generic_setgid, + 0xd9: sys_arml_getdents64, + 0xdd: sys_generic_fcntl64, + 0xf8: sys_generic_exit_group, + + # ARM-specific ARM_NR_BASE == 0x0f0000 + 0xf0005: sys_arml_set_tls, +} + +def syscall_x86_64_exception_handler(linux_env, syscall_callbacks, jitter): + """Call to actually handle an EXCEPT_PRIV_INSN exception + In the case of an error raised by a SYSCALL, call the corresponding + syscall_callbacks + @linux_env: LinuxEnvironment_x86_64 instance + @syscall_callbacks: syscall number -> func(jitter, linux_env) + """ + # Ensure the jitter has break on a SYSCALL + cur_instr = jitter.jit.mdis.dis_instr(jitter.pc) + if cur_instr.name != "SYSCALL": + return True + + # Dispatch to SYSCALL stub + syscall_number = jitter.cpu.RAX + callback = syscall_callbacks.get(syscall_number) + if callback is None: + raise KeyError( + "No callback found for syscall number 0x%x" % syscall_number + ) + callback(jitter, linux_env) + log.debug("-> %x", jitter.cpu.RAX) + + # Clean exception and move pc to the next instruction, to let the jitter + # continue + jitter.cpu.set_exception(jitter.cpu.get_exception() ^ EXCEPT_PRIV_INSN) + jitter.pc += cur_instr.l + return True + + + +def syscall_x86_32_exception_handler(linux_env, syscall_callbacks, jitter): + """Call to actually handle an EXCEPT_PRIV_INSN exception + In the case of an error raised by a SYSCALL, call the corresponding + syscall_callbacks + @linux_env: LinuxEnvironment_x86_32 instance + @syscall_callbacks: syscall number -> func(jitter, linux_env) + """ + # Ensure the jitter has break on a SYSCALL + if jitter.cpu.interrupt_num != 0x80: + return True + + # Dispatch to SYSCALL stub + syscall_number = jitter.cpu.EAX + callback = syscall_callbacks.get(syscall_number) + if callback is None: + raise KeyError( + "No callback found for syscall number 0x%x" % syscall_number + ) + callback(jitter, linux_env) + log.debug("-> %x", jitter.cpu.EAX) + + # Clean exception and move pc to the next instruction, to let the jitter + # continue + jitter.cpu.set_exception(jitter.cpu.get_exception() ^ EXCEPT_INT_XX) + return True + + + +def syscall_arml_exception_handler(linux_env, syscall_callbacks, jitter): + """Call to actually handle an EXCEPT_PRIV_INSN exception + In the case of an error raised by a SYSCALL, call the corresponding + syscall_callbacks + @linux_env: LinuxEnvironment_arml instance + @syscall_callbacks: syscall number -> func(jitter, linux_env) + """ + # Ensure the jitter has break on a SYSCALL + if jitter.cpu.interrupt_num != 0x0: + return True + + # Dispatch to SYSCALL stub + syscall_number = jitter.cpu.R7 + callback = syscall_callbacks.get(syscall_number) + if callback is None: + raise KeyError( + "No callback found for syscall number 0x%x" % syscall_number + ) + callback(jitter, linux_env) + log.debug("-> %x", jitter.cpu.R0) + + # Clean exception and move pc to the next instruction, to let the jitter + # continue + jitter.cpu.set_exception(jitter.cpu.get_exception() ^ EXCEPT_INT_XX) + return True + + + +def enable_syscall_handling(jitter, linux_env, syscall_callbacks): + """Activate handling of syscall for the current jitter instance. + Syscall handlers are provided by @syscall_callbacks + @linux_env: LinuxEnvironment instance + @syscall_callbacks: syscall number -> func(jitter, linux_env) + + Example of use: + >>> linux_env = LinuxEnvironment_x86_64() + >>> enable_syscall_handling(jitter, linux_env, syscall_callbacks_x86_64) + """ + arch_name = jitter.jit.arch_name + if arch_name == "x8664": + handler = syscall_x86_64_exception_handler + handler = functools.partial(handler, linux_env, syscall_callbacks) + jitter.add_exception_handler(EXCEPT_PRIV_INSN, handler) + elif arch_name == "x8632": + handler = syscall_x86_32_exception_handler + handler = functools.partial(handler, linux_env, syscall_callbacks) + jitter.add_exception_handler(EXCEPT_INT_XX, handler) + elif arch_name == "arml": + handler = syscall_arml_exception_handler + handler = functools.partial(handler, linux_env, syscall_callbacks) + jitter.add_exception_handler(EXCEPT_INT_XX, handler) + else: + raise ValueError("No syscall handler implemented for %s" % arch_name) + diff --git a/setup.py b/setup.py index 209219b2..7cff1b34 100755 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ def buil_all(): 'miasm2/ir/translators', 'miasm2/analysis', 'miasm2/os_dep', + 'miasm2/os_dep/linux', 'miasm2/jitter', 'miasm2/jitter/arch', 'miasm2/jitter/loader', diff --git a/test/core/locationdb.py b/test/core/locationdb.py index b9a5f707..61bc4563 100644 --- a/test/core/locationdb.py +++ b/test/core/locationdb.py @@ -92,6 +92,11 @@ loc_key5_bis = loc_db.get_or_create_name_location(name3) assert loc_db.get_name_location(name3) == loc_key5_bis loc_db.consistency_check() +# Name and offset manipulation +assert loc_db.get_name_offset(name2) is None +assert loc_db.get_name_offset("unk_name") is None +assert loc_db.get_name_offset("first_name") == 0x5678 + # Merge loc_db2 = LocationDB() loc_db2.add_location(offset=0x3344) diff --git a/test/test_all.py b/test/test_all.py index 3fb0a5b7..9bf54608 100755 --- a/test/test_all.py +++ b/test/test_all.py @@ -1,6 +1,7 @@ #! /usr/bin/env python2 import argparse +from distutils.spawn import find_executable import time import os import tempfile @@ -686,6 +687,18 @@ for jitter in ExampleJitter.jitter_engines: ["--jitter", jitter], products=[Example.get_sample("box_upx_exe_unupx.bin")], tags=tags.get(jitter, [])) + if jitter != "python": + tags = tags.get(jitter, []) + [TAGS["long"]] + ls_path = find_executable("ls") + file_path = find_executable("file") + # Launch simulation of "file /bin/ls", with access to libs and ld info + testset += ExampleJitter(["run_with_linuxenv.py", "-v", "-p", + '/(.*lib.*\.so(\.\d+)?)|(/etc/ld.so.*)|(.*magic.*)|(%s)' % ls_path, + ] + ["--jitter", jitter] + [ + file_path, ls_path, + ], + tags=tags) + for script, dep in [(["x86_32.py", Example.get_sample("x86_32_sc.bin")], []), (["arm.py", Example.get_sample("md5_arm"), "--mimic-env"], |