about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--example/disasm_03.py10
-rw-r--r--example/test_dis.py42
-rw-r--r--miasm2/analysis/binary.py202
3 files changed, 180 insertions, 74 deletions
diff --git a/example/disasm_03.py b/example/disasm_03.py
index 08b209a1..1141dc55 100644
--- a/example/disasm_03.py
+++ b/example/disasm_03.py
@@ -1,20 +1,16 @@
 import sys
-from elfesteem import pe_init
 from miasm2.arch.x86.disasm import dis_x86_32
 from miasm2.core.asmbloc import bloc2graph
-from miasm2.core.bin_stream import bin_stream_pe
+from miasm2.analysis.binary import Container
 
 if len(sys.argv) != 3:
     print 'Example:'
     print "%s box_upx.exe 0x410f90" % sys.argv[0]
     sys.exit(0)
 
-fname = sys.argv[1]
 ad = int(sys.argv[2], 16)
-e = pe_init.PE(open(fname).read())
-bs = bin_stream_pe(e.virt)
-
-mdis = dis_x86_32(bs)
+cont = Container.from_stream(open(sys.argv[1]))
+mdis = dis_x86_32(cont.bin_stream)
 # inform the engine not to disasm nul instructions
 mdis.dont_dis_nulstart_bloc = True
 blocs = mdis.dis_multibloc(ad)
diff --git a/example/test_dis.py b/example/test_dis.py
index dc3f7274..722e99c9 100644
--- a/example/test_dis.py
+++ b/example/test_dis.py
@@ -2,13 +2,10 @@ import sys
 import os
 import time
 
-from miasm2.core.bin_stream import bin_stream_elf, bin_stream_pe, bin_stream_str
-from elfesteem import *
+from miasm2.analysis.binary import Container
 from miasm2.core.asmbloc import *
-from miasm2.expression.simplifications import expr_simp
 from optparse import OptionParser
-from miasm2.core.cpu import dum_arg
-from miasm2.expression.expression import *
+from miasm2.expression.expression import ExprId
 from miasm2.core.interval import interval
 from miasm2.analysis.machine import Machine
 from pdb import pm
@@ -95,36 +92,13 @@ if options.bw != None:
 if options.funcswd != None:
     options.funcswd = int(options.funcswd)
 
-log.info('load binary')
-b = open(fname).read()
-
-default_addr = 0
-bs = None
-if b.startswith('MZ'):
-    try:
-        e = pe_init.PE(b)
-        if e.isPE() and e.NTsig.signature_value == 0x4550:
-            bs = bin_stream_pe(e.virt)
-            default_addr = e.rva2virt(e.Opthdr.AddressOfEntryPoint)
-    except:
-        log.error('Cannot read PE!')
-elif b.startswith('\x7fELF'):
-    try:
-        e = elf_init.ELF(b)
-        bs = bin_stream_elf(e.virt)
-        default_addr = e.Ehdr.entry
-    except:
-        log.error('Cannot read ELF!')
-
-
-if bs is None or options.shiftoffset is not None:
-
-    if options.shiftoffset is None:
-        options.shiftoffset = "0"
-    shift = int(options.shiftoffset, 16)
-    log.warning('fallback to string input (offset=%s)' % hex(shift))
-    bs = bin_stream_str(b, shift=shift)
+log.info('Load binary')
+with open(fname) as fdesc:
+    cont = Container.from_stream(fdesc, addr=options.shiftoffset)
 
+default_addr = cont.entry_point
+bs = cont.bin_stream
+e = cont.executable
 
 log.info('ok')
 mdis = dis_engine(bs)
diff --git a/miasm2/analysis/binary.py b/miasm2/analysis/binary.py
index 77f1610d..bc662265 100644
--- a/miasm2/analysis/binary.py
+++ b/miasm2/analysis/binary.py
@@ -1,48 +1,184 @@
-from miasm2.core.bin_stream import *
 import logging
+
+from miasm2.core.bin_stream import *
+from elfesteem import pe_init, elf_init
 from miasm2.jitter.jitload import vm_load_pe, vm_load_elf
 from miasm2.jitter.csts import PAGE_READ
 
+
 log = logging.getLogger("binary")
 console_handler = logging.StreamHandler()
 console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
 log.addHandler(console_handler)
 log.setLevel(logging.ERROR)
 
+
+# Container
+## Exceptions
+class ContainerSignatureException(Exception):
+    "The container does not match the current container signature"
+
+
+class ContainerParsingException(Exception):
+    "Error during container parsing"
+
+
+## Parent class
 class Container(object):
-    def __init__(self, filename, vm = None, addr = None):
-        data = open(filename).read()
-        log.info('load binary')
-        e, bs, ep = None, None, None
+    """Container abstraction layer
 
-        if data.startswith('MZ'):
-            try:
-                if vm is not None:
-                    e = vm_load_pe(vm, filename)
-                else:
-                    e = pe_init.PE(data)
-                if e.isPE() and e.NTsig.signature_value == 0x4550:
-                    bs = bin_stream_pe(e.virt)
-                    ep = e.rva2virt(e.Opthdr.AddressOfEntryPoint)
-            except:
-                log.error('Cannot read PE!')
-        elif data.startswith('\x7fELF'):
-            try:
-                if vm is not None:
-                    e = vm_load_elf(vm, filename)
-                else:
-                    e = elf_init.ELF(data)
-                bs = bin_stream_elf(e.virt)
-                ep = e.Ehdr.entry
-            except:
-                log.error('Cannot read ELF!')
+    This class aims to offer a common interface for abstracting container
+    such as PE or ELF.
+    """
+
+    available_container = []  # Available container formats
+    fallback_container = None # Fallback container format
+
+    @classmethod
+    def from_string(cls, data, vm=None, addr=None):
+        """Instanciate a container and parse the binary
+        @data: str containing the binary
+        @vm: (optional) VmMngr instance to link with the executable
+        @addr: (optional) Base address for the binary. If set,
+               force the unknown format
+        """
+        log.info('Load binary')
+
+        if not addr:
+            addr = 0
         else:
-            bs = bin_stream_str(data)
+            # Force fallback mode
+            log.warning('Fallback to string input (offset=%s)' % hex(addr))
+            return cls.fallback_container(data, vm, addr)
+
+        # Try each available format
+        for container_type in cls.available_container:
+            try:
+                return container_type(data, vm)
+            except ContainerSignatureException:
+                continue
+            except ContainerParsingException, error:
+                log.error(error)
+
+        # Fallback mode
+        log.warning('Fallback to string input (offset=%s)' % hex(addr))
+        return cls.fallback_container(data, vm, addr)
+
+    @classmethod
+    def register_container(cls, container):
+        "Add a Container format"
+        cls.available_container.append(container)
+
+    @classmethod
+    def register_fallback(cls, container):
+        "Set the Container fallback format"
+        cls.fallback_container = container
+
+    @classmethod
+    def from_stream(cls, stream, *args, **kwargs):
+        """Instanciate a container and parse the binary
+        @stream: stream to use as binary
+        @vm: (optional) VmMngr instance to link with the executable
+        @addr: (optional) Shift to apply before parsing the binary. If set,
+               force the unknown format
+        """
+        return Container.from_string(stream.read(), *args, **kwargs)
+
+    def parse(self, data, *args, **kwargs):
+        "Launch parsing of @data"
+        raise NotImplentedError("Abstract method")
+
+    def __init__(self, *args, **kwargs):
+        "Alias for 'parse'"
+        self.parse(*args, **kwargs)
+
+    @property
+    def bin_stream(self):
+        "Return the BinStream instance corresponding to container content"
+        return self._bin_stream
+
+    @property
+    def executable(self):
+        "Return the abstract instance standing for parsed executable"
+        return self._executable
+
+    @property
+    def entry_point(self):
+        "Return the detected entry_point"
+        return self._entry_point
+
+
+## Format dependent classes
+class ContainerPE(Container):
+    "Container abstraction for PE"
+
+    def parse(self, data, vm=None):
+        # Parse signature
+        if not data.startswith('MZ'):
+            raise ContainerSignatureException()
+
+        # Build executable instance
+        try:
+            if vm is not None:
+                self._executable = vm_load_pe(vm, filename)
+            else:
+                self._executable = pe_init.PE(data)
+        except Exception, error:
+            raise ContainerParsingException('Cannot read PE: %s' % error)
+
+        # Check instance validity
+        if not self._executable.isPE() or \
+                self._executable.NTsig.signature_value != 0x4550:
+            raise ContainerSignatureException()
+
+        # Build the bin_stream instance and set the entry point
+        try:
+            self._bin_stream = bin_stream_pe(self._executable.virt)
+            ep_detected = self._executable.Opthdr.AddressOfEntryPoint
+            self._entry_point = self._executable.rva2virt(ep_detected)
+        except Exception, error:
+            raise ContainerParsingException('Cannot read PE: %s' % error)
+
+
+class ContainerELF(Container):
+    "Container abstraction for ELF"
+
+    def parse(self, data, vm=None):
+        # Parse signature
+        if not data.startswith('\x7fELF'):
+            raise ContainerSignatureException()
+
+        # Build executable instance
+        try:
             if vm is not None:
-                if addr is None:
-                    raise ValueError('set load addr')
-                vm.add_memory_page(addr,
-                                   PAGE_READ,
-                                   data)
+                self._executable = vm_load_elf(vm, filename)
+            else:
+                self._executable = elf_init.ELF(data)
+        except Exception, error:
+            raise ContainerParsingException('Cannot read ELF: %s' % error)
+
+        # Build the bin_stream instance and set the entry point
+        try:
+            self._bin_stream = bin_stream_elf(self._executable.virt)
+            self._entry_point = self._executable.Ehdr.entry
+        except Exception, error:
+            raise ContainerParsingException('Cannot read ELF: %s' % error)
+
+
+class ContainerUnknown(Container):
+    "Container abstraction for unknown format"
+
+    def parse(self, data, vm, addr):
+        self._bin_stream = bin_stream_str(data, shift=addr)
+        if vm is not None:
+            vm.add_memory_page(addr,
+                               PAGE_READ,
+                               data)
+        self._executable = None
+        self._entry_point = 0
+
 
-        self.e, self.bs, self.ep = e, bs, ep
+## Register containers
+Container.register_container(ContainerPE)
+Container.register_container(ContainerELF)
+Container.register_fallback(ContainerUnknown)