about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--README.md10
-rw-r--r--miasm2/jitter/jitcore_cc_base.py124
-rw-r--r--miasm2/jitter/jitcore_gcc.py132
-rw-r--r--miasm2/jitter/jitcore_tcc.py183
-rw-r--r--miasm2/jitter/jitload.py3
5 files changed, 191 insertions, 261 deletions
diff --git a/README.md b/README.md
index 09a5d44e..3d0dda88 100644
--- a/README.md
+++ b/README.md
@@ -424,10 +424,10 @@ How does it work?
 Miasm embeds its own disassembler, intermediate language and
 instruction semantic. It is written in Python.
 
-To emulate code, it uses LibTCC, LLVM, GCC or Python to JIT the intermediate
-representation. It can emulate shellcodes and all or parts of binaries. Python
-callbacks can be executed to interact with the execution, for instance to
-emulate library functions effects.
+To emulate code, it uses LibTCC, LLVM, GCC, Clang or Python to JIT the
+intermediate representation. It can emulate shellcodes and all or parts of
+binaries. Python callbacks can be executed to interact with the execution, for
+instance to emulate library functions effects.
 
 Documentation
 =============
@@ -453,6 +453,7 @@ Miasm uses:
 
 To enable code JIT, one of the following module is mandatory:
 * GCC
+* Clang
 * LLVM v3.2 with python-llvm, see below
 * LibTCC [tinycc (ONLY version 0.9.26)](http://repo.or.cz/w/tinycc.git)
 
@@ -472,6 +473,7 @@ sudo python setup.py install
 
 To use the jitter, GCC, TCC or LLVM is recommended
 * GCC (any version)
+* Clang (any version)
 * LibTCC needs to be configured with the `--disable-static` option
   * remove `libtcc-dev` from the system to avoid conflicts
   * clone [TinyCC](http://repo.or.cz/tinycc.git): `git clone http://repo.or.cz/tinycc.git`
diff --git a/miasm2/jitter/jitcore_cc_base.py b/miasm2/jitter/jitcore_cc_base.py
new file mode 100644
index 00000000..baebc294
--- /dev/null
+++ b/miasm2/jitter/jitcore_cc_base.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+import os
+import tempfile
+from distutils.sysconfig import get_python_inc
+from hashlib import md5
+
+from miasm2.jitter.jitcore import JitCore
+from miasm2.core.utils import keydefaultdict
+
+
+def gen_core(arch, attrib):
+    lib_dir = os.path.dirname(os.path.realpath(__file__))
+
+    txt = ""
+    txt += '#include "%s/queue.h"\n' % lib_dir
+    txt += '#include "%s/vm_mngr.h"\n' % lib_dir
+    txt += '#include "%s/vm_mngr_py.h"\n' % lib_dir
+    txt += '#include "%s/JitCore.h"\n' % lib_dir
+    txt += '#include "%s/arch/JitCore_%s.h"\n' % (lib_dir, arch.name)
+
+    txt += r'''
+#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;}
+'''
+    return txt
+
+
+class myresolver:
+
+    def __init__(self, offset):
+        self.offset = offset
+
+    def ret(self):
+        return "return PyLong_FromUnsignedLongLong(0x%X);" % self.offset
+
+
+class resolver:
+
+    def __init__(self):
+        self.resolvers = keydefaultdict(myresolver)
+
+    def get_resolver(self, offset):
+        return self.resolvers[offset]
+
+
+class JitCore_Cc_Base(JitCore):
+    "JiT management, abstract class using a C compiler as backend"
+
+    def __init__(self, ir_arch, bs=None):
+        self.jitted_block_delete_cb = self.deleteCB
+        super(JitCore_Cc_Base, self).__init__(ir_arch, bs)
+        self.resolver = resolver()
+        self.ir_arch = ir_arch
+        self.states = {}
+        self.tempdir = os.path.join(tempfile.gettempdir(), "miasm_cache")
+        try:
+            os.mkdir(self.tempdir, 0755)
+        except OSError:
+            pass
+        if not os.access(self.tempdir, os.R_OK | os.W_OK):
+            raise RuntimeError(
+                'Cannot access cache directory %s ' % self.tempdir)
+        self.exec_wrapper = None
+        self.libs = None
+        self.include_files = None
+
+    def deleteCB(self, offset):
+        raise NotImplementedError()
+
+    def load(self):
+        lib_dir = os.path.dirname(os.path.realpath(__file__))
+        libs = [os.path.join(lib_dir, 'VmMngr.so'),
+                os.path.join(lib_dir,
+                             'arch/JitCore_%s.so' % (self.ir_arch.arch.name))]
+
+        include_files = [os.path.dirname(__file__),
+                         get_python_inc()]
+        self.include_files = include_files
+        self.libs = libs
+
+    def init_codegen(self, codegen):
+        """
+        Get the code generator @codegen
+        @codegen: an CGen instance
+        """
+        self.codegen = codegen
+
+    def label2fname(self, label):
+        """
+        Generate function name from @label
+        @label: asm_label instance
+        """
+        return "block_%s" % label.name
+
+    def gen_c_code(self, label, block):
+        """
+        Return the C code corresponding to the @irblocks
+        @label: asm_label of the block to jit
+        @irblocks: list of irblocks
+        """
+        f_name = self.label2fname(label)
+        f_declaration = 'int %s(block_id * BlockDst, JitCpu* jitcpu)' % f_name
+        out = self.codegen.gen_c(block, log_mn=self.log_mn, log_regs=self.log_regs)
+        out = [f_declaration + '{'] + out + ['}\n']
+        c_code = out
+
+        return self.gen_C_source(self.ir_arch, c_code)
+
+    @staticmethod
+    def gen_C_source(ir_arch, func_code):
+        raise NotImplementedError()
+
+    def hash_block(self, block):
+        """
+        Build a hash of the block @block
+        @block: asmbloc
+        """
+        block_raw = "".join(line.b for line in block.lines)
+        block_hash = md5("%X_%s_%s_%s" % (block.label.offset,
+                                          self.log_mn,
+                                          self.log_regs,
+                                          block_raw)).hexdigest()
+        return block_hash
diff --git a/miasm2/jitter/jitcore_gcc.py b/miasm2/jitter/jitcore_gcc.py
index 7f72d8e7..0d9d5778 100644
--- a/miasm2/jitter/jitcore_gcc.py
+++ b/miasm2/jitter/jitcore_gcc.py
@@ -5,110 +5,25 @@ import os
 import tempfile
 import ctypes
 import _ctypes
-from distutils.sysconfig import get_python_inc
 from subprocess import check_call
-from hashlib import md5
 
-from miasm2.jitter import jitcore, Jitgcc
-from miasm2.core.utils import keydefaultdict
+from miasm2.jitter import Jitgcc
+from miasm2.jitter.jitcore_cc_base import JitCore_Cc_Base, gen_core
 
 
-def gen_core(arch, attrib):
-    lib_dir = os.path.dirname(os.path.realpath(__file__))
-
-    txt = ""
-    txt += '#include "%s/queue.h"\n' % lib_dir
-    txt += '#include "%s/vm_mngr.h"\n' % lib_dir
-    txt += '#include "%s/vm_mngr_py.h"\n' % lib_dir
-    txt += '#include "%s/JitCore.h"\n' % lib_dir
-    txt += '#include "%s/arch/JitCore_%s.h"\n' % (lib_dir, arch.name)
-    txt += r'''
-#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;}
-'''
-    return txt
-
-
-def gen_C_source(ir_arch, func_code):
-    c_source = ""
-    c_source += "\n".join(func_code)
-
-    c_source = gen_core(ir_arch.arch, ir_arch.attrib) + c_source
-    c_source = "#include <Python.h>\n" + c_source
-
-    return c_source
-
-
-class myresolver(object):
-
-    def __init__(self, offset):
-        self.offset = offset
-
-    def ret(self):
-        return "return PyLong_FromUnsignedLongLong(0x%X);" % self.offset
-
-
-class resolver(object):
-
-    def __init__(self):
-        self.resolvers = keydefaultdict(myresolver)
-
-    def get_resolver(self, offset):
-        return self.resolvers[offset]
-
-
-class JitCore_Gcc(jitcore.JitCore):
-
-    "JiT management, using GCC as backend"
+class JitCore_Gcc(JitCore_Cc_Base):
+    "JiT management, using a C compiler as backend"
 
     def __init__(self, ir_arch, bs=None):
-        self.jitted_block_delete_cb = self.deleteCB
         super(JitCore_Gcc, self).__init__(ir_arch, bs)
-        self.resolver = resolver()
-        self.gcc_states = {}
-        self.ir_arch = ir_arch
-        self.tempdir = os.path.join(tempfile.gettempdir(), "miasm_gcc_cache")
-        try:
-            os.mkdir(self.tempdir, 0755)
-        except OSError:
-            pass
-        if not os.access(self.tempdir, os.R_OK | os.W_OK):
-            raise RuntimeError(
-                'Cannot access gcc cache directory %s ' % self.tempdir)
         self.exec_wrapper = Jitgcc.gcc_exec_bloc
-        self.libs = None
-        self.include_files = None
 
     def deleteCB(self, offset):
         """Free the state associated to @offset and delete it
         @offset: gcc state offset
         """
-        _ctypes.dlclose(self.gcc_states[offset]._handle)
-        del self.gcc_states[offset]
-
-    def load(self):
-        lib_dir = os.path.dirname(os.path.realpath(__file__))
-        libs = [os.path.join(lib_dir, 'VmMngr.so'),
-                os.path.join(lib_dir,
-                             'arch/JitCore_%s.so' % (self.ir_arch.arch.name))]
-
-        include_files = [os.path.dirname(__file__),
-                         get_python_inc()]
-        self.include_files = include_files
-        self.libs = libs
-
-    def init_codegen(self, codegen):
-        """
-        Get the code generator @codegen
-        @codegen: an CGen instance
-        """
-        self.codegen = codegen
-
-    def label2fname(self, label):
-        """
-        Generate function name from @label
-        @label: asm_label instance
-        """
-        return "block_%s" % label.name
+        _ctypes.dlclose(self.states[offset]._handle)
+        del self.states[offset]
 
     def load_code(self, label, fname_so):
         f_name = self.label2fname(label)
@@ -116,32 +31,13 @@ class JitCore_Gcc(jitcore.JitCore):
         func = getattr(lib, f_name)
         addr = ctypes.cast(func, ctypes.c_void_p).value
         self.lbl2jitbloc[label.offset] = addr
-        self.gcc_states[label.offset] = lib
-
-
-    def gen_c_code(self, label, block):
-        """
-        Return the C code corresponding to the @irblocks
-        @label: asm_label of the block to jit
-        @irblocks: list of irblocks
-        """
-        f_name = self.label2fname(label)
-        f_declaration = 'int %s(block_id * BlockDst, JitCpu* jitcpu)' % f_name
-        out = self.codegen.gen_c(block, log_mn=self.log_mn, log_regs=self.log_regs)
-        out = [f_declaration + '{'] + out + ['}\n']
-        c_code = out
-
-        return gen_C_source(self.ir_arch, c_code)
+        self.states[label.offset] = lib
 
     def add_bloc(self, block):
         """Add a bloc to JiT and JiT it.
         @block: block to jit
         """
-        block_raw = "".join(line.b for line in block.lines)
-        block_hash = md5("%X_%s_%s_%s" % (block.label.offset,
-                                          self.log_mn,
-                                          self.log_regs,
-                                          block_raw)).hexdigest()
+        block_hash = self.hash_block(block)
         fname_out = os.path.join(self.tempdir, "%s.so" % block_hash)
 
         if not os.access(fname_out, os.R_OK | os.X_OK):
@@ -158,7 +54,7 @@ class JitCore_Gcc(jitcore.JitCore):
 
             inc_dir = ["-I%s" % inc for inc in self.include_files]
             libs = ["%s" % lib for lib in self.libs]
-            args = ["gcc"] + ["-O3"] + [
+            args = ["cc"] + ["-O3"] + [
                 "-shared", "-fPIC", fname_in, '-o', fname_tmp] + inc_dir + libs
             check_call(args)
             # Move temporary file to final file
@@ -166,3 +62,13 @@ class JitCore_Gcc(jitcore.JitCore):
             os.remove(fname_in)
 
         self.load_code(block.label, fname_out)
+
+    @staticmethod
+    def gen_C_source(ir_arch, func_code):
+        c_source = ""
+        c_source += "\n".join(func_code)
+
+        c_source = gen_core(ir_arch.arch, ir_arch.attrib) + c_source
+        c_source = "#include <Python.h>\n" + c_source
+
+        return c_source
diff --git a/miasm2/jitter/jitcore_tcc.py b/miasm2/jitter/jitcore_tcc.py
index d3e90f85..1ab7df4d 100644
--- a/miasm2/jitter/jitcore_tcc.py
+++ b/miasm2/jitter/jitcore_tcc.py
@@ -1,134 +1,37 @@
 #!/usr/bin/env python
 #-*- coding:utf-8 -*-
-
 import os
-from distutils.sysconfig import get_python_inc
-from subprocess import Popen, PIPE
-from hashlib import md5
 import tempfile
+from subprocess import Popen, PIPE
 
-from miasm2.jitter import jitcore, Jittcc
-
-
-def jit_tcc_compil(func_name, func_code):
-    global Jittcc
-    c = Jittcc.tcc_compil(func_name, func_code)
-    return c
-
-
-class jit_tcc_code():
-
-    def __init__(self, c):
-        self.c = c
-
-    def __call__(self, cpu, vm):
-        return Jittcc.tcc_exec_bloc(self.c, cpu, vm)
-
-
-def gen_core(arch, attrib):
-    lib_dir = os.path.dirname(os.path.realpath(__file__))
-
-    txt = ""
-    txt += '#include "%s/queue.h"\n' % lib_dir
-    txt += '#include "%s/vm_mngr.h"\n' % lib_dir
-    txt += '#include "%s/vm_mngr_py.h"\n' % lib_dir
-    txt += '#include "%s/JitCore.h"\n' % lib_dir
-    txt += '#include "%s/arch/JitCore_%s.h"\n' % (lib_dir, arch.name)
-
-    txt += r'''
-#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;}
-'''
-    return txt
-
-
-def gen_C_source(ir_arch, func_code):
-    c_source = ""
-    c_source += "\n".join(func_code)
-
-    c_source = gen_core(ir_arch.arch, ir_arch.attrib) + c_source
-
-    c_source = """
- #ifdef __x86_64__
- #ifndef __LP64__
- /*
-  for ubuntu ?!? XXX TODO
-  /!\ force 64 bit system using 64 bits libc
-  change this to __ILP32__ to do so.
- */
- #define __LP64__
- #endif
- #endif
- """ + "#include <Python.h>\n" + c_source
-
-    return c_source
-
-
-class objref:
-
-    def __init__(self, obj):
-        self.obj = obj
-
-
-class myresolver:
-
-    def __init__(self, offset):
-        self.offset = offset
-
-    def ret(self):
-        return "return PyLong_FromUnsignedLongLong(0x%X);" % self.offset
-
-from miasm2.core.utils import keydefaultdict
-
-
-class resolver:
-
-    def __init__(self):
-        self.resolvers = keydefaultdict(myresolver)
-
-    def get_resolver(self, offset):
-        return self.resolvers[offset]
+from miasm2.jitter import Jittcc
+from miasm2.jitter.jitcore_cc_base import JitCore_Cc_Base, gen_core
 
 
-class JitCore_Tcc(jitcore.JitCore):
+class JitCore_Tcc(JitCore_Cc_Base):
 
     "JiT management, using LibTCC as backend"
 
     def __init__(self, ir_arch, bs=None):
-        self.jitted_block_delete_cb = self.deleteCB
         super(JitCore_Tcc, self).__init__(ir_arch, bs)
-        self.resolver = resolver()
         self.exec_wrapper = Jittcc.tcc_exec_bloc
-        self.tcc_states = {}
-        self.ir_arch = ir_arch
-
-        self.tempdir = os.path.join(tempfile.gettempdir(), "miasm_gcc_cache")
-        try:
-            os.mkdir(self.tempdir, 0755)
-        except OSError:
-            pass
 
     def deleteCB(self, offset):
         "Free the TCCState corresponding to @offset"
-        if offset in self.tcc_states:
-            Jittcc.tcc_end(self.tcc_states[offset])
-            del self.tcc_states[offset]
+        if offset in self.states:
+            Jittcc.tcc_end(self.states[offset])
+            del self.states[offset]
 
     def load(self):
-        # os.path.join(os.path.dirname(os.path.realpath(__file__)), "jitter")
-        lib_dir = os.path.dirname(os.path.realpath(__file__))
-        libs = []
-        libs.append(os.path.join(lib_dir, 'VmMngr.so'))
-        libs.append(
-            os.path.join(lib_dir, 'arch/JitCore_%s.so' % (self.ir_arch.arch.name)))
-        libs = ';'.join(libs)
+        super(JitCore_Tcc, self).load()
+        libs = ';'.join(self.libs)
         jittcc_path = Jittcc.__file__
         include_dir = os.path.dirname(jittcc_path)
         include_dir += ";" + os.path.join(include_dir, "arch")
-        # print include_dir
 
         # XXX HACK
         # As debian/ubuntu have moved some include files using arch directory,
-        # TCC doesn't know them, so we get the info from GCC
+        # TCC doesn't know them, so we get the info from CC
         # For example /usr/include/x86_64-linux-gnu which contains limits.h
         p = Popen(["cc", "-Wp,-v", "-E", "-"],
                   stdout=PIPE, stderr=PIPE, stdin=PIPE)
@@ -136,27 +39,16 @@ class JitCore_Tcc(jitcore.JitCore):
         include_files = p.stderr.read().split('\n')
         include_files = [x[1:]
                          for x in include_files if x.startswith(' /usr/include')]
-        include_files += [include_dir, get_python_inc()]
+        include_files += self.include_files
         include_files = ";".join(include_files)
         Jittcc.tcc_set_emul_lib_path(include_files, libs)
 
-    def init_codegen(self, codegen):
-        """
-        Get the code generator @codegen
-        @codegen: an CGen instance
-        """
-        self.codegen = codegen
-
     def __del__(self):
-        for tcc_state in self.tcc_states.values():
+        for tcc_state in self.states.values():
             Jittcc.tcc_end(tcc_state)
 
-    def label2fname(self, label):
-        """
-        Generate function name from @label
-        @label: asm_label instance
-        """
-        return "block_%s" % label.name
+    def jit_tcc_compil(self, func_name, func_code):
+        return Jittcc.tcc_compil(func_name, func_code)
 
     def compil_code(self, block, func_code):
         """
@@ -166,34 +58,17 @@ class JitCore_Tcc(jitcore.JitCore):
         """
         label = block.label
         self.jitcount += 1
-        tcc_state, mcode = jit_tcc_compil(self.label2fname(label), func_code)
+        tcc_state, mcode = self.jit_tcc_compil(self.label2fname(label), func_code)
         self.lbl2jitbloc[label.offset] = mcode
-        self.tcc_states[label.offset] = tcc_state
-
-    def gen_c_code(self, label, block):
-        """
-        Return the C code corresponding to the @irblocks
-        @label: asm_label of the block to jit
-        @irblocks: list of irblocks
-        """
-        f_name = self.label2fname(label)
-        f_declaration = 'int %s(block_id * BlockDst, JitCpu* jitcpu)' % f_name
-        out = self.codegen.gen_c(block, log_mn=self.log_mn, log_regs=self.log_regs)
-        out = [f_declaration + '{'] + out + ['}\n']
-        c_code = out
-
-        return gen_C_source(self.ir_arch, c_code)
+        self.states[label.offset] = tcc_state
 
     def add_bloc(self, block):
         """Add a bloc to JiT and JiT it.
         @block: block to jit
         """
-        block_raw = "".join(line.b for line in block.lines)
-        block_hash = md5("%X_%s_%s_%s" % (block.label.offset,
-                                          self.log_mn,
-                                          self.log_regs,
-                                          block_raw)).hexdigest()
+        block_hash = self.hash_block(block)
         fname_out = os.path.join(self.tempdir, "%s.c" % block_hash)
+
         if os.access(fname_out, os.R_OK):
             func_code = open(fname_out).read()
         else:
@@ -206,3 +81,25 @@ class JitCore_Tcc(jitcore.JitCore):
             os.rename(fname_tmp, fname_out)
 
         self.compil_code(block, func_code)
+
+    @staticmethod
+    def gen_C_source(ir_arch, func_code):
+        c_source = ""
+        c_source += "\n".join(func_code)
+
+        c_source = gen_core(ir_arch.arch, ir_arch.attrib) + c_source
+
+        c_source = """
+     #ifdef __x86_64__
+     #ifndef __LP64__
+     /*
+      for ubuntu ?!? XXX TODO
+      /!\ force 64 bit system using 64 bits libc
+      change this to __ILP32__ to do so.
+     */
+     #define __LP64__
+     #endif
+     #endif
+     """ + "#include <Python.h>\n" + c_source
+
+        return c_source
diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py
index d8393230..e28c6765 100644
--- a/miasm2/jitter/jitload.py
+++ b/miasm2/jitter/jitload.py
@@ -10,6 +10,7 @@ from miasm2.core.bin_stream import bin_stream_vm
 from miasm2.core.interval import interval
 from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec
 from miasm2.jitter.codegen import CGen
+from miasm2.jitter.jitcore_cc_base import JitCore_Cc_Base
 
 hnd = logging.StreamHandler()
 hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s"))
@@ -216,7 +217,7 @@ class jitter:
             raise RuntimeError('Unsupported jitter: %s' % jit_type)
 
         self.jit = JitCore(self.ir_arch, self.bs)
-        if jit_type in ['tcc', 'gcc']:
+        if isinstance(self.jit, JitCore_Cc_Base):
             self.jit.init_codegen(self.C_Gen(self.ir_arch))
         elif jit_type == "python":
             self.jit.set_cpu_vm(self.cpu, self.vm)