diff options
| -rw-r--r-- | README.md | 10 | ||||
| -rw-r--r-- | miasm2/jitter/jitcore_cc_base.py | 124 | ||||
| -rw-r--r-- | miasm2/jitter/jitcore_gcc.py | 132 | ||||
| -rw-r--r-- | miasm2/jitter/jitcore_tcc.py | 183 | ||||
| -rw-r--r-- | miasm2/jitter/jitload.py | 3 |
5 files changed, 191 insertions, 261 deletions
diff --git a/README.md b/README.md index 09a5d44e..3d0dda88 100644 --- a/README.md +++ b/README.md @@ -424,10 +424,10 @@ How does it work? Miasm embeds its own disassembler, intermediate language and instruction semantic. It is written in Python. -To emulate code, it uses LibTCC, LLVM, GCC or Python to JIT the intermediate -representation. It can emulate shellcodes and all or parts of binaries. Python -callbacks can be executed to interact with the execution, for instance to -emulate library functions effects. +To emulate code, it uses LibTCC, LLVM, GCC, Clang or Python to JIT the +intermediate representation. It can emulate shellcodes and all or parts of +binaries. Python callbacks can be executed to interact with the execution, for +instance to emulate library functions effects. Documentation ============= @@ -453,6 +453,7 @@ Miasm uses: To enable code JIT, one of the following module is mandatory: * GCC +* Clang * LLVM v3.2 with python-llvm, see below * LibTCC [tinycc (ONLY version 0.9.26)](http://repo.or.cz/w/tinycc.git) @@ -472,6 +473,7 @@ sudo python setup.py install To use the jitter, GCC, TCC or LLVM is recommended * GCC (any version) +* Clang (any version) * LibTCC needs to be configured with the `--disable-static` option * remove `libtcc-dev` from the system to avoid conflicts * clone [TinyCC](http://repo.or.cz/tinycc.git): `git clone http://repo.or.cz/tinycc.git` diff --git a/miasm2/jitter/jitcore_cc_base.py b/miasm2/jitter/jitcore_cc_base.py new file mode 100644 index 00000000..baebc294 --- /dev/null +++ b/miasm2/jitter/jitcore_cc_base.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import os +import tempfile +from distutils.sysconfig import get_python_inc +from hashlib import md5 + +from miasm2.jitter.jitcore import JitCore +from miasm2.core.utils import keydefaultdict + + +def gen_core(arch, attrib): + lib_dir = os.path.dirname(os.path.realpath(__file__)) + + txt = "" + txt += '#include "%s/queue.h"\n' % lib_dir + txt += '#include "%s/vm_mngr.h"\n' % lib_dir + txt += '#include "%s/vm_mngr_py.h"\n' % lib_dir + txt += '#include "%s/JitCore.h"\n' % lib_dir + txt += '#include "%s/arch/JitCore_%s.h"\n' % (lib_dir, arch.name) + + txt += r''' +#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} +''' + return txt + + +class myresolver: + + def __init__(self, offset): + self.offset = offset + + def ret(self): + return "return PyLong_FromUnsignedLongLong(0x%X);" % self.offset + + +class resolver: + + def __init__(self): + self.resolvers = keydefaultdict(myresolver) + + def get_resolver(self, offset): + return self.resolvers[offset] + + +class JitCore_Cc_Base(JitCore): + "JiT management, abstract class using a C compiler as backend" + + def __init__(self, ir_arch, bs=None): + self.jitted_block_delete_cb = self.deleteCB + super(JitCore_Cc_Base, self).__init__(ir_arch, bs) + self.resolver = resolver() + self.ir_arch = ir_arch + self.states = {} + self.tempdir = os.path.join(tempfile.gettempdir(), "miasm_cache") + try: + os.mkdir(self.tempdir, 0755) + except OSError: + pass + if not os.access(self.tempdir, os.R_OK | os.W_OK): + raise RuntimeError( + 'Cannot access cache directory %s ' % self.tempdir) + self.exec_wrapper = None + self.libs = None + self.include_files = None + + def deleteCB(self, offset): + raise NotImplementedError() + + def load(self): + lib_dir = os.path.dirname(os.path.realpath(__file__)) + libs = [os.path.join(lib_dir, 'VmMngr.so'), + os.path.join(lib_dir, + 'arch/JitCore_%s.so' % (self.ir_arch.arch.name))] + + include_files = [os.path.dirname(__file__), + get_python_inc()] + self.include_files = include_files + self.libs = libs + + def init_codegen(self, codegen): + """ + Get the code generator @codegen + @codegen: an CGen instance + """ + self.codegen = codegen + + def label2fname(self, label): + """ + Generate function name from @label + @label: asm_label instance + """ + return "block_%s" % label.name + + def gen_c_code(self, label, block): + """ + Return the C code corresponding to the @irblocks + @label: asm_label of the block to jit + @irblocks: list of irblocks + """ + f_name = self.label2fname(label) + f_declaration = 'int %s(block_id * BlockDst, JitCpu* jitcpu)' % f_name + out = self.codegen.gen_c(block, log_mn=self.log_mn, log_regs=self.log_regs) + out = [f_declaration + '{'] + out + ['}\n'] + c_code = out + + return self.gen_C_source(self.ir_arch, c_code) + + @staticmethod + def gen_C_source(ir_arch, func_code): + raise NotImplementedError() + + def hash_block(self, block): + """ + Build a hash of the block @block + @block: asmbloc + """ + block_raw = "".join(line.b for line in block.lines) + block_hash = md5("%X_%s_%s_%s" % (block.label.offset, + self.log_mn, + self.log_regs, + block_raw)).hexdigest() + return block_hash diff --git a/miasm2/jitter/jitcore_gcc.py b/miasm2/jitter/jitcore_gcc.py index 7f72d8e7..0d9d5778 100644 --- a/miasm2/jitter/jitcore_gcc.py +++ b/miasm2/jitter/jitcore_gcc.py @@ -5,110 +5,25 @@ import os import tempfile import ctypes import _ctypes -from distutils.sysconfig import get_python_inc from subprocess import check_call -from hashlib import md5 -from miasm2.jitter import jitcore, Jitgcc -from miasm2.core.utils import keydefaultdict +from miasm2.jitter import Jitgcc +from miasm2.jitter.jitcore_cc_base import JitCore_Cc_Base, gen_core -def gen_core(arch, attrib): - lib_dir = os.path.dirname(os.path.realpath(__file__)) - - txt = "" - txt += '#include "%s/queue.h"\n' % lib_dir - txt += '#include "%s/vm_mngr.h"\n' % lib_dir - txt += '#include "%s/vm_mngr_py.h"\n' % lib_dir - txt += '#include "%s/JitCore.h"\n' % lib_dir - txt += '#include "%s/arch/JitCore_%s.h"\n' % (lib_dir, arch.name) - txt += r''' -#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} -''' - return txt - - -def gen_C_source(ir_arch, func_code): - c_source = "" - c_source += "\n".join(func_code) - - c_source = gen_core(ir_arch.arch, ir_arch.attrib) + c_source - c_source = "#include <Python.h>\n" + c_source - - return c_source - - -class myresolver(object): - - def __init__(self, offset): - self.offset = offset - - def ret(self): - return "return PyLong_FromUnsignedLongLong(0x%X);" % self.offset - - -class resolver(object): - - def __init__(self): - self.resolvers = keydefaultdict(myresolver) - - def get_resolver(self, offset): - return self.resolvers[offset] - - -class JitCore_Gcc(jitcore.JitCore): - - "JiT management, using GCC as backend" +class JitCore_Gcc(JitCore_Cc_Base): + "JiT management, using a C compiler as backend" def __init__(self, ir_arch, bs=None): - self.jitted_block_delete_cb = self.deleteCB super(JitCore_Gcc, self).__init__(ir_arch, bs) - self.resolver = resolver() - self.gcc_states = {} - self.ir_arch = ir_arch - self.tempdir = os.path.join(tempfile.gettempdir(), "miasm_gcc_cache") - try: - os.mkdir(self.tempdir, 0755) - except OSError: - pass - if not os.access(self.tempdir, os.R_OK | os.W_OK): - raise RuntimeError( - 'Cannot access gcc cache directory %s ' % self.tempdir) self.exec_wrapper = Jitgcc.gcc_exec_bloc - self.libs = None - self.include_files = None def deleteCB(self, offset): """Free the state associated to @offset and delete it @offset: gcc state offset """ - _ctypes.dlclose(self.gcc_states[offset]._handle) - del self.gcc_states[offset] - - def load(self): - lib_dir = os.path.dirname(os.path.realpath(__file__)) - libs = [os.path.join(lib_dir, 'VmMngr.so'), - os.path.join(lib_dir, - 'arch/JitCore_%s.so' % (self.ir_arch.arch.name))] - - include_files = [os.path.dirname(__file__), - get_python_inc()] - self.include_files = include_files - self.libs = libs - - def init_codegen(self, codegen): - """ - Get the code generator @codegen - @codegen: an CGen instance - """ - self.codegen = codegen - - def label2fname(self, label): - """ - Generate function name from @label - @label: asm_label instance - """ - return "block_%s" % label.name + _ctypes.dlclose(self.states[offset]._handle) + del self.states[offset] def load_code(self, label, fname_so): f_name = self.label2fname(label) @@ -116,32 +31,13 @@ class JitCore_Gcc(jitcore.JitCore): func = getattr(lib, f_name) addr = ctypes.cast(func, ctypes.c_void_p).value self.lbl2jitbloc[label.offset] = addr - self.gcc_states[label.offset] = lib - - - def gen_c_code(self, label, block): - """ - Return the C code corresponding to the @irblocks - @label: asm_label of the block to jit - @irblocks: list of irblocks - """ - f_name = self.label2fname(label) - f_declaration = 'int %s(block_id * BlockDst, JitCpu* jitcpu)' % f_name - out = self.codegen.gen_c(block, log_mn=self.log_mn, log_regs=self.log_regs) - out = [f_declaration + '{'] + out + ['}\n'] - c_code = out - - return gen_C_source(self.ir_arch, c_code) + self.states[label.offset] = lib def add_bloc(self, block): """Add a bloc to JiT and JiT it. @block: block to jit """ - block_raw = "".join(line.b for line in block.lines) - block_hash = md5("%X_%s_%s_%s" % (block.label.offset, - self.log_mn, - self.log_regs, - block_raw)).hexdigest() + block_hash = self.hash_block(block) fname_out = os.path.join(self.tempdir, "%s.so" % block_hash) if not os.access(fname_out, os.R_OK | os.X_OK): @@ -158,7 +54,7 @@ class JitCore_Gcc(jitcore.JitCore): inc_dir = ["-I%s" % inc for inc in self.include_files] libs = ["%s" % lib for lib in self.libs] - args = ["gcc"] + ["-O3"] + [ + args = ["cc"] + ["-O3"] + [ "-shared", "-fPIC", fname_in, '-o', fname_tmp] + inc_dir + libs check_call(args) # Move temporary file to final file @@ -166,3 +62,13 @@ class JitCore_Gcc(jitcore.JitCore): os.remove(fname_in) self.load_code(block.label, fname_out) + + @staticmethod + def gen_C_source(ir_arch, func_code): + c_source = "" + c_source += "\n".join(func_code) + + c_source = gen_core(ir_arch.arch, ir_arch.attrib) + c_source + c_source = "#include <Python.h>\n" + c_source + + return c_source diff --git a/miasm2/jitter/jitcore_tcc.py b/miasm2/jitter/jitcore_tcc.py index d3e90f85..1ab7df4d 100644 --- a/miasm2/jitter/jitcore_tcc.py +++ b/miasm2/jitter/jitcore_tcc.py @@ -1,134 +1,37 @@ #!/usr/bin/env python #-*- coding:utf-8 -*- - import os -from distutils.sysconfig import get_python_inc -from subprocess import Popen, PIPE -from hashlib import md5 import tempfile +from subprocess import Popen, PIPE -from miasm2.jitter import jitcore, Jittcc - - -def jit_tcc_compil(func_name, func_code): - global Jittcc - c = Jittcc.tcc_compil(func_name, func_code) - return c - - -class jit_tcc_code(): - - def __init__(self, c): - self.c = c - - def __call__(self, cpu, vm): - return Jittcc.tcc_exec_bloc(self.c, cpu, vm) - - -def gen_core(arch, attrib): - lib_dir = os.path.dirname(os.path.realpath(__file__)) - - txt = "" - txt += '#include "%s/queue.h"\n' % lib_dir - txt += '#include "%s/vm_mngr.h"\n' % lib_dir - txt += '#include "%s/vm_mngr_py.h"\n' % lib_dir - txt += '#include "%s/JitCore.h"\n' % lib_dir - txt += '#include "%s/arch/JitCore_%s.h"\n' % (lib_dir, arch.name) - - txt += r''' -#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} -''' - return txt - - -def gen_C_source(ir_arch, func_code): - c_source = "" - c_source += "\n".join(func_code) - - c_source = gen_core(ir_arch.arch, ir_arch.attrib) + c_source - - c_source = """ - #ifdef __x86_64__ - #ifndef __LP64__ - /* - for ubuntu ?!? XXX TODO - /!\ force 64 bit system using 64 bits libc - change this to __ILP32__ to do so. - */ - #define __LP64__ - #endif - #endif - """ + "#include <Python.h>\n" + c_source - - return c_source - - -class objref: - - def __init__(self, obj): - self.obj = obj - - -class myresolver: - - def __init__(self, offset): - self.offset = offset - - def ret(self): - return "return PyLong_FromUnsignedLongLong(0x%X);" % self.offset - -from miasm2.core.utils import keydefaultdict - - -class resolver: - - def __init__(self): - self.resolvers = keydefaultdict(myresolver) - - def get_resolver(self, offset): - return self.resolvers[offset] +from miasm2.jitter import Jittcc +from miasm2.jitter.jitcore_cc_base import JitCore_Cc_Base, gen_core -class JitCore_Tcc(jitcore.JitCore): +class JitCore_Tcc(JitCore_Cc_Base): "JiT management, using LibTCC as backend" def __init__(self, ir_arch, bs=None): - self.jitted_block_delete_cb = self.deleteCB super(JitCore_Tcc, self).__init__(ir_arch, bs) - self.resolver = resolver() self.exec_wrapper = Jittcc.tcc_exec_bloc - self.tcc_states = {} - self.ir_arch = ir_arch - - self.tempdir = os.path.join(tempfile.gettempdir(), "miasm_gcc_cache") - try: - os.mkdir(self.tempdir, 0755) - except OSError: - pass def deleteCB(self, offset): "Free the TCCState corresponding to @offset" - if offset in self.tcc_states: - Jittcc.tcc_end(self.tcc_states[offset]) - del self.tcc_states[offset] + if offset in self.states: + Jittcc.tcc_end(self.states[offset]) + del self.states[offset] def load(self): - # os.path.join(os.path.dirname(os.path.realpath(__file__)), "jitter") - lib_dir = os.path.dirname(os.path.realpath(__file__)) - libs = [] - libs.append(os.path.join(lib_dir, 'VmMngr.so')) - libs.append( - os.path.join(lib_dir, 'arch/JitCore_%s.so' % (self.ir_arch.arch.name))) - libs = ';'.join(libs) + super(JitCore_Tcc, self).load() + libs = ';'.join(self.libs) jittcc_path = Jittcc.__file__ include_dir = os.path.dirname(jittcc_path) include_dir += ";" + os.path.join(include_dir, "arch") - # print include_dir # XXX HACK # As debian/ubuntu have moved some include files using arch directory, - # TCC doesn't know them, so we get the info from GCC + # TCC doesn't know them, so we get the info from CC # For example /usr/include/x86_64-linux-gnu which contains limits.h p = Popen(["cc", "-Wp,-v", "-E", "-"], stdout=PIPE, stderr=PIPE, stdin=PIPE) @@ -136,27 +39,16 @@ class JitCore_Tcc(jitcore.JitCore): include_files = p.stderr.read().split('\n') include_files = [x[1:] for x in include_files if x.startswith(' /usr/include')] - include_files += [include_dir, get_python_inc()] + include_files += self.include_files include_files = ";".join(include_files) Jittcc.tcc_set_emul_lib_path(include_files, libs) - def init_codegen(self, codegen): - """ - Get the code generator @codegen - @codegen: an CGen instance - """ - self.codegen = codegen - def __del__(self): - for tcc_state in self.tcc_states.values(): + for tcc_state in self.states.values(): Jittcc.tcc_end(tcc_state) - def label2fname(self, label): - """ - Generate function name from @label - @label: asm_label instance - """ - return "block_%s" % label.name + def jit_tcc_compil(self, func_name, func_code): + return Jittcc.tcc_compil(func_name, func_code) def compil_code(self, block, func_code): """ @@ -166,34 +58,17 @@ class JitCore_Tcc(jitcore.JitCore): """ label = block.label self.jitcount += 1 - tcc_state, mcode = jit_tcc_compil(self.label2fname(label), func_code) + tcc_state, mcode = self.jit_tcc_compil(self.label2fname(label), func_code) self.lbl2jitbloc[label.offset] = mcode - self.tcc_states[label.offset] = tcc_state - - def gen_c_code(self, label, block): - """ - Return the C code corresponding to the @irblocks - @label: asm_label of the block to jit - @irblocks: list of irblocks - """ - f_name = self.label2fname(label) - f_declaration = 'int %s(block_id * BlockDst, JitCpu* jitcpu)' % f_name - out = self.codegen.gen_c(block, log_mn=self.log_mn, log_regs=self.log_regs) - out = [f_declaration + '{'] + out + ['}\n'] - c_code = out - - return gen_C_source(self.ir_arch, c_code) + self.states[label.offset] = tcc_state def add_bloc(self, block): """Add a bloc to JiT and JiT it. @block: block to jit """ - block_raw = "".join(line.b for line in block.lines) - block_hash = md5("%X_%s_%s_%s" % (block.label.offset, - self.log_mn, - self.log_regs, - block_raw)).hexdigest() + block_hash = self.hash_block(block) fname_out = os.path.join(self.tempdir, "%s.c" % block_hash) + if os.access(fname_out, os.R_OK): func_code = open(fname_out).read() else: @@ -206,3 +81,25 @@ class JitCore_Tcc(jitcore.JitCore): os.rename(fname_tmp, fname_out) self.compil_code(block, func_code) + + @staticmethod + def gen_C_source(ir_arch, func_code): + c_source = "" + c_source += "\n".join(func_code) + + c_source = gen_core(ir_arch.arch, ir_arch.attrib) + c_source + + c_source = """ + #ifdef __x86_64__ + #ifndef __LP64__ + /* + for ubuntu ?!? XXX TODO + /!\ force 64 bit system using 64 bits libc + change this to __ILP32__ to do so. + */ + #define __LP64__ + #endif + #endif + """ + "#include <Python.h>\n" + c_source + + return c_source diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index d8393230..e28c6765 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -10,6 +10,7 @@ from miasm2.core.bin_stream import bin_stream_vm from miasm2.core.interval import interval from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec from miasm2.jitter.codegen import CGen +from miasm2.jitter.jitcore_cc_base import JitCore_Cc_Base hnd = logging.StreamHandler() hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) @@ -216,7 +217,7 @@ class jitter: raise RuntimeError('Unsupported jitter: %s' % jit_type) self.jit = JitCore(self.ir_arch, self.bs) - if jit_type in ['tcc', 'gcc']: + if isinstance(self.jit, JitCore_Cc_Base): self.jit.init_codegen(self.C_Gen(self.ir_arch)) elif jit_type == "python": self.jit.set_cpu_vm(self.cpu, self.vm) |