about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--example/ida/ctype_propagation.py266
-rw-r--r--example/ida/rpyc_ida.py25
-rw-r--r--miasm2/core/ctypesmngr.py22
-rw-r--r--miasm2/expression/parser.py70
-rw-r--r--miasm2/ir/ir.py1
-rw-r--r--miasm2/ir/symbexec.py95
-rw-r--r--miasm2/ir/symbexec_top.py221
-rw-r--r--miasm2/ir/symbexec_types.py243
-rw-r--r--miasm2/jitter/emulatedsymbexec.py6
-rw-r--r--miasm2/jitter/llvmconvert.py25
-rw-r--r--test/arch/x86/unit/asm_test.py10
-rw-r--r--test/arch/x86/unit/mn_div.py17
-rw-r--r--test/expression/parser.py16
-rwxr-xr-xtest/test_all.py3
14 files changed, 1002 insertions, 18 deletions
diff --git a/example/ida/ctype_propagation.py b/example/ida/ctype_propagation.py
new file mode 100644
index 00000000..e9986522
--- /dev/null
+++ b/example/ida/ctype_propagation.py
@@ -0,0 +1,266 @@
+import os
+import tempfile
+
+from miasm2.core.bin_stream_ida import bin_stream_ida
+from miasm2.expression import expression as m2_expr
+
+from miasm2.expression.simplifications import expr_simp
+from miasm2.analysis.depgraph import DependencyGraph
+from miasm2.ir.ir import IRBlock, AssignBlock
+
+from utils import guess_machine
+
+from miasm2.arch.x86.ctype import CTypeAMD64_unk
+from miasm2.expression.expression import ExprId
+
+from miasm2.core.objc import CTypesManagerNotPacked, CTypeAnalyzer, ExprToAccessC, CHandler
+from miasm2.core.ctypesmngr import CAstTypes
+from miasm2.expression.expression import ExprMem, ExprId, ExprInt, ExprOp, ExprAff
+from miasm2.ir.symbexec_types import SymbExecCType
+
+from miasm2.expression.parser import str_to_expr
+
+
+class TypePropagationForm(Form):
+
+    def __init__(self, ira):
+
+        self.ira = ira
+        self.stk_unalias_force = False
+
+        default_types_info = r"""ExprId("RDX", 64): char *
+"""
+        archs = ["AMD64_unk", "X86_32_unk"]
+
+        Form.__init__(self,
+r"""BUTTON YES* Launch
+BUTTON CANCEL NONE
+Dependency Graph Settings
+<##Header file :{headerFile}>
+<Architecture/complator:{cbReg}>
+<Types informations:{strTypesInfo}>
+<Unalias stack:{rUnaliasStack}>{cMethod}>
+""", {
+    'headerFile': Form.FileInput(swidth=20, open=True),
+    'cbReg': Form.DropdownListControl(
+        items=archs,
+        readonly=False,
+        selval=archs[0]),
+    'strTypesInfo': Form.MultiLineTextControl(text=default_types_info,
+                                              flags=Form.MultiLineTextControl.TXTF_FIXEDFONT),
+    'cMethod': Form.ChkGroupControl(("rUnaliasStack",)),
+    })
+        self.Compile()
+
+    @property
+    def unalias_stack(self):
+        return self.cMethod.value & 1 or self.stk_unalias_force
+
+
+def get_block(ir_arch, mdis, addr):
+    """Get IRBlock at address @addr"""
+    mdis.job_done.clear()
+    lbl = ir_arch.get_label(addr)
+    if not lbl in ir_arch.blocks:
+        block = mdis.dis_bloc(lbl.offset)
+        ir_arch.add_bloc(block)
+    irblock = ir_arch.get_bloc(lbl)
+    if irblock is None:
+        raise LookupError('No block found at that address: %s' % lbl)
+    return irblock
+
+
+def get_types_mngr(headerFile):
+    text = open(headerFile).read()
+    base_types = CTypeAMD64_unk()
+    types_ast = CAstTypes()
+
+    # Add C types definition
+    types_ast.add_c_decl(text)
+
+    types_mngr = CTypesManagerNotPacked(types_ast, base_types)
+    return types_mngr
+
+
+
+# add stack alignment simpl
+def simp_stack_align(expr_simp, expr):
+    if not expr.is_op("&"):
+        return expr
+    if len(expr.args) != 2:
+        return expr
+    if not expr.args[1].is_int(0xFFFFFFF0):
+        return expr
+    if expr.args[0] != ir_arch.arch.regs.ESP_init:
+        return expr
+    return expr.args[0]
+
+expr_simp.enable_passes({m2_expr.ExprOp: [simp_stack_align]})
+
+# Init
+machine = guess_machine()
+mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira
+
+bs = bin_stream_ida()
+mdis = dis_engine(bs, dont_dis_nulstart_bloc=True)
+ir_arch = ira(mdis.symbol_pool)
+
+# Get the current function
+func = idaapi.get_func(ScreenEA())
+addr = func.startEA
+blocks = mdis.dis_multibloc(addr)
+# Generate IR
+for block in blocks:
+    ir_arch.add_bloc(block)
+
+
+class MyCTypeAnalyzer(CTypeAnalyzer):
+    allow_none_result = True
+
+
+class MyExprToAccessC(ExprToAccessC):
+    allow_none_result = True
+
+
+class MyCHandler(CHandler):
+    cTypeAnalyzer_cls = MyCTypeAnalyzer
+    exprToAccessC_cls = MyExprToAccessC
+
+
+# Get settings
+settings = TypePropagationForm(ir_arch)
+ret = settings.Execute()
+if ret:
+
+    ir_arch = ir_arch
+
+
+    types_mngr = get_types_mngr(settings.headerFile.value)
+    mychandler = MyCHandler(types_mngr, {})
+
+    #print 'Expr', settings.iStr1.value
+    #print 'Type', settings.iStr2.value
+    #print 'Header', settings.headerFile.value
+    print 'InfoTypes', settings.strTypesInfo.value
+    infos_types = {}
+    for line in settings.strTypesInfo.value.split('\n'):
+        if not line:
+            continue
+        expr_str, ctype_str = line.split(':')
+        expr_str, ctype_str = expr_str.strip(), ctype_str.strip()
+        expr = str_to_expr(expr_str)
+        ast = mychandler.type_analyzer.types_mngr.types_ast.parse_c_type(ctype_str)
+        ctype = mychandler.type_analyzer.types_mngr.types_ast.ast_parse_declaration(ast.ext[0])
+        objc = types_mngr.get_objc(ctype)
+        print '='*20
+        print expr, objc
+        infos_types[expr] = objc
+
+    # Add fake head
+    lbl_real_start = ir_arch.symbol_pool.getby_offset(addr)
+    lbl_head = ir_arch.symbol_pool.getby_name_create("start")
+
+    first_block = blocks.label2block(lbl_real_start)
+
+    assignblk_head = AssignBlock([ExprAff(ir_arch.IRDst, ExprId(lbl_real_start, ir_arch.IRDst.size)),
+                                  ExprAff(ir_arch.sp, ir_arch.arch.regs.regs_init[ir_arch.sp])
+                                 ], first_block.lines[0])
+    irb_head = IRBlock(lbl_head, [assignblk_head])
+    ir_arch.blocks[lbl_head] = irb_head
+    ir_arch.graph.add_uniq_edge(lbl_head, lbl_real_start)
+
+    class Engine(SymbExecCType):
+        def __init__(self, state):
+            mychandler = MyCHandler(types_mngr, state.infos_types)
+            super(Engine, self).__init__(ir_arch,
+                                         state.symbols,
+                                         state.infos_types,
+                                         mychandler)
+
+
+    def add_state(todo, states, addr, state):
+        addr = ir_arch.get_label(addr)
+        if addr not in states:
+            states[addr] = state
+            todo.add(addr)
+        else:
+            todo.add(addr)
+            states[addr] = states[addr].merge(state)
+
+
+    state = Engine.StateEngine(infos_types, infos_types)
+    states = {lbl_head:state}
+    todo = set([lbl_head])
+    done = set()
+
+    while todo:
+        lbl = todo.pop()
+        state = states[lbl]
+        if (lbl, state) in done:
+            continue
+        done.add((lbl, state))
+        symbexec_engine = Engine(state)
+
+        get_block(ir_arch, mdis, lbl)
+
+        addr = symbexec_engine.emul_ir_block(lbl)
+        symbexec_engine.del_mem_above_stack(ir_arch.sp)
+
+        ir_arch._graph = None
+        sons = ir_arch.graph.successors(lbl)
+        for son in sons:
+            if son.offset is None:
+                continue
+            add_state(todo, states, son.offset, symbexec_engine.get_state())
+
+
+    class SymbExecCTypeFix(SymbExecCType):
+        def emulbloc(self, irb, step=False):
+            """
+            Symbolic execution of the @irb on the current state
+            @irb: irblock instance
+            @step: display intermediate steps
+            """
+            offset2cmt = {}
+            for assignblk in irb.irs:
+                instr = assignblk.instr
+                tmp_rw = assignblk.get_rw()
+                for dst, src in assignblk.iteritems():
+                    for arg in set(instr.args).union(set([src])):
+                        if arg in tmp_rw and arg not in tmp_rw.values():
+                            continue
+                        objc = self.eval_expr(arg)
+                        if objc is None:
+                            continue
+                        if self.is_type_offset(objc):
+                            continue
+                        offset2cmt.setdefault(instr.offset, set()).add("%s: %s" % (arg, str(objc)))
+                self.eval_ir(assignblk)
+
+            for offset, value in offset2cmt.iteritems():
+                idc.MakeComm(offset, '\n'.join(value))
+
+            return self.eval_expr(self.ir_arch.IRDst)
+
+
+    class Engine(SymbExecCTypeFix):
+        def __init__(self, state):
+            mychandler = MyCHandler(types_mngr, state.infos_types)
+            super(Engine, self).__init__(ir_arch,
+                                         state.symbols,
+                                         state.infos_types,
+                                         mychandler)
+
+
+    for lbl, state in states.iteritems():
+        symbexec_engine = Engine(state)
+        addr = symbexec_engine.emul_ir_block(lbl)
+        symbexec_engine.del_mem_above_stack(ir_arch.sp)
+
+
+
+
+
+
+
+
diff --git a/example/ida/rpyc_ida.py b/example/ida/rpyc_ida.py
new file mode 100644
index 00000000..21faf43a
--- /dev/null
+++ b/example/ida/rpyc_ida.py
@@ -0,0 +1,25 @@
+"""rpyc IDA server"""
+
+from rpyc.utils.server import OneShotServer
+from rpyc.core import SlaveService
+
+
+
+def serve_threaded(hostname="localhost", port=4455):
+    """This will run a rpyc server in IDA, so a custom script client will be
+    able to access IDA api.
+    WARNING: IDA will be locked until the client script terminates.
+    """
+
+    print 'Running server'
+    server = OneShotServer(SlaveService, hostname=hostname,
+                           port=port, reuse_addr=True, ipv6=False,
+                           authenticator=None,
+                           auto_register=False)
+    server.logger.quiet = False
+
+    return server.start()
+
+
+if __name__ == "__main__":
+    serve_threaded()
diff --git a/miasm2/core/ctypesmngr.py b/miasm2/core/ctypesmngr.py
index 0c1d55f4..eeffb696 100644
--- a/miasm2/core/ctypesmngr.py
+++ b/miasm2/core/ctypesmngr.py
@@ -8,14 +8,16 @@ RE_HASH_CMT = re.compile(r'^#\s*\d+.*$', flags=re.MULTILINE)
 # http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf
 
 
-def c_to_ast(c_str):
+def c_to_ast(parser, c_str):
     """Transform a @c_str into a C ast
     Note: will ignore lines containing code refs ie:
     # 23 "miasm.h"
+
+    @parser: pycparser instance
+    @c_str: c string
     """
 
     new_str = re.sub(RE_HASH_CMT, "", c_str)
-    parser = c_parser.CParser()
     return parser.parse(new_str, filename='<stdin>')
 
 
@@ -301,6 +303,9 @@ class CAstTypes(object):
         self._typedefs = dict(knowntypedefs)
         self.cpt = 0
         self.loc_to_decl_info = {}
+        self.parser = c_parser.CParser()
+        self._cpt_decl = 0
+
 
         self.ast_to_typeid_rules = {
             c_ast.Struct: self.ast_to_typeid_struct,
@@ -458,7 +463,7 @@ class CAstTypes(object):
         """
         c_str = self.digest_decl(c_str)
 
-        ast = c_to_ast(c_str)
+        ast = c_to_ast(self.parser, c_str)
         self.add_c_decl_from_ast(ast)
 
         return ast
@@ -693,3 +698,14 @@ class CAstTypes(object):
         """
         for ext in ast.ext:
             ret = self.ast_parse_declaration(ext)
+
+    def parse_c_type(self, c_str):
+        """Parse a C string representing a C type and return the associated
+        Miasm C object.
+        @c_str: C string of a C type
+        """
+
+        new_str = "%s __MIASM_INTERNAL_%s;" % (c_str, self._cpt_decl)
+        ret = self.parser.cparser.parse(input=new_str, lexer=self.parser.clex)
+        self._cpt_decl += 1
+        return ret
diff --git a/miasm2/expression/parser.py b/miasm2/expression/parser.py
new file mode 100644
index 00000000..b3f3af1c
--- /dev/null
+++ b/miasm2/expression/parser.py
@@ -0,0 +1,70 @@
+import pyparsing
+from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \
+    ExprCond, ExprCompose, ExprOp, ExprAff
+
+integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t:
+                                                        int(t[0]))
+hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums)
+hex_int = pyparsing.Combine(hex_word).setParseAction(lambda t:
+                                                     int(t[0], 16))
+
+str_int_pos = (hex_int | integer)
+str_int_neg = (pyparsing.Suppress('-') + \
+                   (hex_int | integer)).setParseAction(lambda t: -t[0])
+
+str_int = str_int_pos | str_int_neg
+
+STR_EXPRINT = pyparsing.Suppress("ExprInt")
+STR_EXPRID = pyparsing.Suppress("ExprId")
+STR_EXPRSLICE = pyparsing.Suppress("ExprSlice")
+STR_EXPRMEM = pyparsing.Suppress("ExprMem")
+STR_EXPRCOND = pyparsing.Suppress("ExprCond")
+STR_EXPRCOMPOSE = pyparsing.Suppress("ExprCompose")
+STR_EXPROP = pyparsing.Suppress("ExprOp")
+STR_EXPRAFF = pyparsing.Suppress("ExprAff")
+
+STR_COMMA = pyparsing.Suppress(",")
+LPARENTHESIS = pyparsing.Suppress("(")
+RPARENTHESIS = pyparsing.Suppress(")")
+
+
+string_quote = pyparsing.QuotedString(quoteChar="'", escChar='\\', escQuote='\\')
+string_dquote = pyparsing.QuotedString(quoteChar='"', escChar='\\', escQuote='\\')
+
+
+string = string_quote | string_dquote
+
+expr = pyparsing.Forward()
+
+expr_int = pyparsing.Group(STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS)
+expr_id = pyparsing.Group(STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS)
+expr_slice = pyparsing.Group(STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS)
+expr_mem = pyparsing.Group(STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS)
+expr_cond = pyparsing.Group(STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS)
+expr_compose = pyparsing.Group(STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS)
+expr_op = pyparsing.Group(STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS)
+expr_aff = pyparsing.Group(STR_EXPRAFF + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS)
+
+expr << (expr_int | expr_id | expr_slice | expr_mem | expr_cond | \
+         expr_compose | expr_op | expr_aff)
+
+expr_int.setParseAction(lambda t: ExprInt(*t[0]))
+expr_id.setParseAction(lambda t: ExprId(*t[0]))
+expr_slice.setParseAction(lambda t: ExprSlice(*t[0]))
+expr_mem.setParseAction(lambda t: ExprMem(*t[0]))
+expr_cond.setParseAction(lambda t: ExprCond(*t[0]))
+expr_compose.setParseAction(lambda t: ExprCompose(*t[0]))
+expr_op.setParseAction(lambda t: ExprOp(*t[0]))
+expr_aff.setParseAction(lambda t: ExprAff(*t[0]))
+
+
+def str_to_expr(str_in):
+    """Parse the @str_in and return the corresponoding Expression
+    @str_in: repr string of an Expression"""
+
+    try:
+        value = expr.parseString(str_in)
+    except:
+        raise RuntimeError("Cannot parse expression %s" % str_in)
+    assert len(value) == 1
+    return value[0]
diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py
index 2509e901..7c39cf04 100644
--- a/miasm2/ir/ir.py
+++ b/miasm2/ir/ir.py
@@ -810,6 +810,7 @@ class IntermediateRepresentation(object):
             if (len(self.graph.predecessors(label)) == 0 and
                 len(self.graph.successors(label)) == 0):
                 self.graph.del_node(label)
+                del self.blocks[label]
         return modified
 
     def merge_blocks(self):
diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py
index f9444424..e98744c0 100644
--- a/miasm2/ir/symbexec.py
+++ b/miasm2/ir/symbexec.py
@@ -97,19 +97,77 @@ class SymbolMngr(object):
         return new_symbols
 
 
+class StateEngine(object):
+    """Stores an Engine state"""
+
+    def merge(self, other):
+        """Generate a new state, representing the merge of self and @other
+        @other: a StateEngine instance"""
+
+        raise NotImplementedError("Abstract method")
+
+
+class SymbolicState(StateEngine):
+    """Stores a SymbolicExecutionEngine state"""
+
+    def __init__(self, dct):
+        self._symbols = frozenset(dct.items())
+
+    def __hash__(self):
+        return hash((self.__class__, self._symbols))
+
+    def __eq__(self, other):
+        if self is other:
+            return True
+        if self.__class__ != other.__class__:
+            return False
+        return self.symbols == other.symbols
+
+    def __iter__(self):
+        for dst, src in self._symbols:
+            yield dst, src
+
+    def iteritems(self):
+        return self.__iter__()
+
+    def merge(self, other):
+        """Merge two symbolic states
+        Only equal expressions are kept in both states
+        @other: second symbolic state
+        """
+
+        symb_a = self.symbols
+        symb_b = other.symbols
+        intersection = set(symb_a.keys()).intersection(symb_b.keys())
+        out = {}
+        for dst in intersection:
+            if symb_a[dst] == symb_b[dst]:
+                out[dst] = symb_a[dst]
+        return self.__class__(out)
+
+    @property
+    def symbols(self):
+        """Return the dictionnary of known symbols"""
+        return dict(self._symbols)
+
+
 class SymbolicExecutionEngine(object):
     """
     Symbolic execution engine
     Allow IR code emulation in symbolic domain
     """
 
-    def __init__(self, ir_arch, known_symbols,
+    StateEngine = SymbolicState
+
+    def __init__(self, ir_arch, state,
                  func_read=None,
                  func_write=None,
                  sb_expr_simp=expr_simp):
+
         self.symbols = SymbolMngr()
-        for expr, value in known_symbols.items():
-            self.symbols[expr] = value
+        for dst, src in state.iteritems():
+            self.symbols[dst] = src
+
         self.func_read = func_read
         self.func_write = func_write
         self.ir_arch = ir_arch
@@ -190,6 +248,18 @@ class SymbolicExecutionEngine(object):
         ret = self.expr_simp(self.symbols[ret][:size])
         return ret
 
+    def get_state(self):
+        """Return the current state of the SymbolicEngine"""
+        state = self.StateEngine(dict(self.symbols))
+        return state
+
+    def set_state(self, state):
+        """Restaure the @state of the engine
+        @state: StateEngine instance
+        """
+        self.symbols = SymbolMngr()
+        for dst, src in dict(state).iteritems():
+            self.symbols[dst] = src
 
     def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0):
         """
@@ -198,38 +268,40 @@ class SymbolicExecutionEngine(object):
             2. simplify
         """
 
+        expr = self.expr_simp(expr)
+
         #print '\t'*level, "Eval:", expr
         if expr in cache:
             ret = cache[expr]
             #print "In cache!", ret
-        elif isinstance(expr, m2_expr.ExprInt):
+        elif expr.is_int():
             return expr
-        elif isinstance(expr, m2_expr.ExprId):
+        elif expr.is_id():
             if isinstance(expr.name, asmblock.AsmLabel) and expr.name.offset is not None:
                 ret = m2_expr.ExprInt(expr.name.offset, expr.size)
             else:
                 ret = state.get(expr, expr)
-        elif isinstance(expr, m2_expr.ExprMem):
+        elif expr.is_mem():
             ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1)
             ret = m2_expr.ExprMem(ptr, expr.size)
             ret = self.get_mem_state(ret)
             assert expr.size == ret.size
-        elif isinstance(expr, m2_expr.ExprCond):
+        elif expr.is_cond():
             cond = self.apply_expr_on_state_visit_cache(expr.cond, state, cache, level+1)
             src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level+1)
             src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level+1)
             ret = m2_expr.ExprCond(cond, src1, src2)
-        elif isinstance(expr, m2_expr.ExprSlice):
+        elif expr.is_slice():
             arg = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1)
             ret = m2_expr.ExprSlice(arg, expr.start, expr.stop)
-        elif isinstance(expr, m2_expr.ExprOp):
+        elif expr.is_op():
             args = []
             for oarg in expr.args:
                 arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level+1)
                 assert oarg.size == arg.size
                 args.append(arg)
             ret = m2_expr.ExprOp(expr.op, *args)
-        elif isinstance(expr, m2_expr.ExprCompose):
+        elif expr.is_compose():
             args = []
             for arg in expr.args:
                 args.append(self.apply_expr_on_state_visit_cache(arg, state, cache, level+1))
@@ -390,7 +462,7 @@ class SymbolicExecutionEngine(object):
             elif isinstance(dst, m2_expr.ExprId):
                 pool_out[dst] = src
             else:
-                raise ValueError("affected zarb", str(dst))
+                raise ValueError("Unknown destination type", str(dst))
 
         return pool_out.iteritems()
 
@@ -442,6 +514,7 @@ class SymbolicExecutionEngine(object):
         """
         for assignblk in irb.irs:
             if step:
+                print 'Instr', assignblk.instr
                 print 'Assignblk:'
                 print assignblk
                 print '_' * 80
diff --git a/miasm2/ir/symbexec_top.py b/miasm2/ir/symbexec_top.py
new file mode 100644
index 00000000..fe54c65f
--- /dev/null
+++ b/miasm2/ir/symbexec_top.py
@@ -0,0 +1,221 @@
+from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine
+from miasm2.expression.simplifications import expr_simp
+from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\
+    ExprMem, ExprCond, ExprCompose, ExprOp
+from miasm2.core import asmblock
+
+
+TOPSTR = "TOP"
+
+def exprid_top(expr):
+    """Return a TOP expression (ExprId("TOP") of size @expr.size
+    @expr: expression to replace with TOP
+    """
+    return ExprId(TOPSTR, expr.size)
+
+
+class SymbolicStateTop(StateEngine):
+
+    def __init__(self, dct, regstop):
+        self._symbols = frozenset(dct.items())
+        self._regstop = frozenset(regstop)
+
+    def __hash__(self):
+        return hash((self.__class__, self._symbols, self._regstop))
+
+    def __str__(self):
+        out = []
+        for dst, src in sorted(self._symbols):
+            out.append("%s = %s" % (dst, src))
+        for dst in self._regstop:
+            out.append('TOP %s' %dst)
+        return "\n".join(out)
+
+    def __eq__(self, other):
+        if self is other:
+            return True
+        if self.__class__ != other.__class__:
+            return False
+        return (self.symbols == other.symbols and
+                self.regstop == other.regstop)
+
+    def __iter__(self):
+        for dst, src in self._symbols:
+            yield dst, src
+
+    def merge(self, other):
+        """Merge two symbolic states
+        Only equal expressions are kept in both states
+        @other: second symbolic state
+        """
+        symb_a = self.symbols
+        symb_b = other.symbols
+        intersection = set(symb_a.keys()).intersection(symb_b.keys())
+        diff = set(symb_a.keys()).union(symb_b.keys()).difference(intersection)
+        symbols = {}
+        regstop = set()
+        for dst in diff:
+            if dst.is_id():
+                regstop.add(dst)
+        for dst in intersection:
+            if symb_a[dst] == symb_b[dst]:
+                symbols[dst] = symb_a[dst]
+            else:
+                regstop.add(dst)
+        return self.__class__(symbols, regstop)
+
+    @property
+    def symbols(self):
+        """Return the dictionnary of known symbols"""
+        return dict(self._symbols)
+
+    @property
+    def regstop(self):
+        """Return the set of expression with TOP values"""
+        return self._regstop
+
+class SymbExecTopNoMem(SymbolicExecutionEngine):
+    """
+    Symbolic execution, include TOP value.
+    ExprMem are not propagated.
+    Any computation involving a TOP will generate TOP.
+    """
+
+    StateEngine = SymbolicStateTop
+
+    def __init__(self, ir_arch, state, regstop,
+                 func_read=None,
+                 func_write=None,
+                 sb_expr_simp=expr_simp):
+        known_symbols = dict(state)
+        super(SymbExecTopNoMem, self).__init__(ir_arch, known_symbols,
+                                               func_read,
+                                               func_write,
+                                               sb_expr_simp)
+        self.regstop = set(regstop)
+
+    def get_state(self):
+        """Return the current state of the SymbolicEngine"""
+        return self.StateEngine(self.symbols, self.regstop)
+
+    def eval_expr(self, expr, eval_cache=None):
+        if expr in self.regstop:
+            return exprid_top(expr)
+        ret = self.apply_expr_on_state(expr, eval_cache)
+        return ret
+
+    def manage_mem(self, expr, state, cache, level):
+        ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1)
+        ret = ExprMem(ptr, expr.size)
+        ret = self.get_mem_state(ret)
+        if ret.is_mem() and not ret.arg.is_int() and ret.arg == ptr:
+            ret = exprid_top(expr)
+        assert expr.size == ret.size
+        return ret
+
+    def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0):
+        """
+        Deep First evaluate nodes:
+            1. evaluate node's sons
+            2. simplify
+        """
+
+        if expr in cache:
+            ret = cache[expr]
+        elif expr in state:
+            return state[expr]
+        elif expr.is_int():
+            ret = expr
+        elif expr.is_id():
+            if isinstance(expr.name, asmblock.asm_label) and expr.name.offset is not None:
+                ret = ExprInt(expr.name.offset, expr.size)
+            elif expr in self.regstop:
+                ret = exprid_top(expr)
+            else:
+                ret = state.get(expr, expr)
+        elif expr.is_mem():
+            ret = self.manage_mem(expr, state, cache, level)
+        elif expr.is_cond():
+            cond = self.apply_expr_on_state_visit_cache(expr.cond, state, cache, level+1)
+            src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level+1)
+            src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level+1)
+            if cond.is_id(TOPSTR) or src1.is_id(TOPSTR) or src2.is_id(TOPSTR):
+                ret = exprid_top(expr)
+            else:
+                ret = ExprCond(cond, src1, src2)
+        elif expr.is_slice():
+            arg = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1)
+            if arg.is_id(TOPSTR):
+                ret = exprid_top(expr)
+            else:
+                ret = ExprSlice(arg, expr.start, expr.stop)
+        elif expr.is_op():
+            args = []
+            for oarg in expr.args:
+                arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level+1)
+                assert oarg.size == arg.size
+                if arg.is_id(TOPSTR):
+                    return exprid_top(expr)
+                args.append(arg)
+            ret = ExprOp(expr.op, *args)
+        elif expr.is_compose():
+            args = []
+            for arg in expr.args:
+                arg = self.apply_expr_on_state_visit_cache(arg, state, cache, level+1)
+                if arg.is_id(TOPSTR):
+                    return exprid_top(expr)
+
+                args.append(arg)
+            ret = ExprCompose(*args)
+        else:
+            raise TypeError("Unknown expr type")
+        ret = self.expr_simp(ret)
+        assert expr.size == ret.size
+        cache[expr] = ret
+        return ret
+
+    def apply_change(self, dst, src):
+        eval_cache = {}
+        if dst.is_mem():
+            # If Write to TOP, forget all memory information
+            ret = self.eval_expr(dst.arg, eval_cache)
+            if ret.is_id(TOPSTR):
+                to_del = set()
+                for dst_tmp in self.symbols:
+                    if dst_tmp.is_mem():
+                        to_del.add(dst_tmp)
+                for dst_to_del in to_del:
+                    del self.symbols[dst_to_del]
+            return
+        src_o = self.expr_simp(src)
+
+        # Force update. Ex:
+        # EBX += 1 (state: EBX = EBX+1)
+        # EBX -= 1 (state: EBX = EBX, must be updated)
+        if dst in self.regstop:
+            self.regstop.discard(dst)
+        self.symbols[dst] = src_o
+
+        if dst == src_o:
+            # Avoid useless X = X information
+            del self.symbols[dst]
+
+        if src_o.is_id(TOPSTR):
+            if dst in self.symbols:
+                del self.symbols[dst]
+            self.regstop.add(dst)
+
+class SymbExecTop(SymbExecTopNoMem):
+    """
+    Symbolic execution, include TOP value.
+    ExprMem are propagated.
+    Any computation involving a TOP will generate TOP.
+    WARNING: avoid memory aliases here!
+    """
+
+    def manage_mem(self, expr, state, cache, level):
+        ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1)
+        ret = ExprMem(ptr, expr.size)
+        ret = self.get_mem_state(ret)
+        assert expr.size == ret.size
+        return ret
diff --git a/miasm2/ir/symbexec_types.py b/miasm2/ir/symbexec_types.py
new file mode 100644
index 00000000..df159939
--- /dev/null
+++ b/miasm2/ir/symbexec_types.py
@@ -0,0 +1,243 @@
+from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine
+from miasm2.expression.simplifications import expr_simp
+from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\
+    ExprMem, ExprCond, ExprCompose, ExprOp
+
+from miasm2.core.ctypesmngr import CTypeId
+
+
+class SymbolicStateCTypes(StateEngine):
+    """Store C types of symbols"""
+
+    def __init__(self, dct, infos_types):
+        self._symbols = frozenset(dct.items())
+        self._infos_types = frozenset(infos_types.items())
+
+    def __hash__(self):
+        return hash((self.__class__, self._symbols, self._infos_types))
+
+    def __str__(self):
+        out = []
+        for dst, src in sorted(self._symbols):
+            out.append("%s = %s" % (dst, src))
+        return "\n".join(out)
+
+    def __eq__(self, other):
+        if self is other:
+            return True
+        if self.__class__ != other.__class__:
+            return False
+        return (self.symbols == other.symbols and
+                self.infos_types == other.infos_types)
+
+    def __iter__(self):
+        for dst, src in self._symbols:
+            yield dst, src
+
+    def merge(self, other):
+        """Merge two symbolic states
+        Only expressions with equal C types in both states are kept.
+        @other: second symbolic state
+        """
+        symb_a = self.symbols
+        symb_b = other.symbols
+        types_a = set(self.infos_types.items())
+        types_b = set(other.infos_types.items())
+        intersection = set(symb_a.keys()).intersection(symb_b.keys())
+        symbols = {}
+        infos_types = dict(types_a.intersection(types_b))
+        for dst in intersection:
+            if symb_a[dst] == symb_b[dst]:
+                symbols[dst] = symb_a[dst]
+        return self.__class__(symbols, infos_types)
+
+    @property
+    def symbols(self):
+        """Return the dictionnary of known symbols'types"""
+        return dict(self._symbols)
+
+    @property
+    def infos_types(self):
+        """Return known types of the state"""
+        return dict(self._infos_types)
+
+
+class SymbExecCType(SymbolicExecutionEngine):
+    """Engine of C types propagation
+    WARNING: avoid memory aliases here!
+    """
+
+    StateEngine = SymbolicStateCTypes
+    OBJC_INTERNAL = "___OBJC___"
+
+    def __init__(self, ir_arch,
+                 symbols, infos_types,
+                 chandler,
+                 func_read=None,
+                 func_write=None,
+                 sb_expr_simp=expr_simp):
+        self.chandler = chandler
+        self.infos_types = dict(infos_types)
+        super(SymbExecCType, self).__init__(ir_arch,
+                                            {},
+                                            func_read,
+                                            func_write,
+                                            sb_expr_simp)
+        self.symbols = dict(symbols)
+        offset_types = []
+        for name in [('int',), ('long',),
+                     ('long', 'long'),
+                     ('char',), ('short',),
+
+                     ('unsigned', 'char',), ('unsigned', 'short',),
+                     ('unsigned', 'int',), ('unsigned', 'long',),
+                     ('unsigned', 'long', 'long')]:
+            objc = self.chandler.type_analyzer.types_mngr.get_objc(CTypeId(*name))
+            offset_types.append(objc)
+        self.offset_types = offset_types
+
+    def is_type_offset(self, objc):
+        """Return True if @objc is char/short/int/long"""
+        return objc in self.offset_types
+
+    def get_tpye_int_by_size(self, size):
+        """Return a char/short/int/long type with the size equal to @size
+        @size: size in bit"""
+
+        for objc in self.offset_types:
+            if objc.size == size / 8:
+                return objc
+        return None
+
+    def is_offset_list(self, types, size):
+        """Return the corresponding char/short/int/long type of @size, if every
+        types in the list @types are type offset
+        @types: a list of c types
+        @size: size in bit"""
+
+        for arg_type in types:
+            if not self.is_type_offset(arg_type):
+                return None
+        objc = self.get_tpye_int_by_size(size)
+        if objc:
+            return objc
+        # default size
+        objc = self.offset_types[0]
+        return objc
+
+    def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0):
+        """
+        Deep First evaluate nodes:
+            1. evaluate node's sons
+            2. simplify
+        """
+
+        expr = self.expr_simp(expr)
+
+        if expr in cache:
+            return cache[expr]
+        elif expr in state:
+            return state[expr]
+        elif isinstance(expr, ExprInt):
+            objc = self.get_tpye_int_by_size(expr.size)
+            if objc is None:
+                objc = self.chandler.type_analyzer.types_mngr.get_objc(CTypeId('int'))
+            return objc
+        elif isinstance(expr, ExprId):
+            if expr in state:
+                return state[expr]
+            return None
+        elif isinstance(expr, ExprMem):
+            ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level + 1)
+            if ptr is None:
+                return None
+            self.chandler.type_analyzer.expr_types[self.OBJC_INTERNAL] = ptr
+            ptr_expr = ExprId(self.OBJC_INTERNAL, expr.arg.size)
+            objcs = self.chandler.expr_to_types(ExprMem(ptr_expr, expr.size))
+            if objcs is None:
+                return None
+            objc = objcs[0]
+            return objc
+        elif isinstance(expr, ExprCond):
+            src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level + 1)
+            src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level + 1)
+            types = [src1, src2]
+            objc = self.is_offset_list(types, expr.size)
+            if objc:
+                return objc
+            return None
+        elif isinstance(expr, ExprSlice):
+            objc = self.get_tpye_int_by_size(expr.size)
+            if objc is None:
+                # default size
+                objc = self.offset_types[0]
+            return objc
+        elif isinstance(expr, ExprOp):
+            args = []
+            types = []
+            for oarg in expr.args:
+                arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level + 1)
+                types.append(arg)
+            if None in types:
+                return None
+            objc = self.is_offset_list(types, expr.size)
+            if objc:
+                return objc
+            # Find Base + int
+            if expr.op != '+':
+                return None
+            args = list(expr.args)
+            if args[-1].is_int():
+                offset = args.pop()
+                types.pop()
+            if len(args) == 1:
+                arg, arg_type = args.pop(), types.pop()
+                self.chandler.type_analyzer.expr_types[self.OBJC_INTERNAL] = arg_type
+                ptr_expr = ExprId(self.OBJC_INTERNAL, arg.size)
+                objc = self.chandler.expr_to_types(ptr_expr + offset)
+                objc = objc[0]
+                return objc
+            return None
+        elif isinstance(expr, ExprCompose):
+            types = set()
+            for oarg in expr.args:
+                arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level + 1)
+                types.add(arg)
+            objc = self.is_offset_list(types, expr.size)
+            if objc:
+                return objc
+            return None
+        else:
+            raise TypeError("Unknown expr type")
+
+    def get_state(self):
+        """Return the current state of the SymbolicEngine"""
+        return self.StateEngine(self.symbols, self.infos_types)
+
+    def eval_ir_expr(self, assignblk):
+        """
+        Evaluate AssignBlock on the current state
+        @assignblk: AssignBlock instance
+        """
+        pool_out = {}
+        eval_cache = {}
+        for dst, src in assignblk.iteritems():
+            src = self.eval_expr(src, eval_cache)
+            if isinstance(dst, ExprMem):
+                continue
+            elif isinstance(dst, ExprId):
+                pool_out[dst] = src
+            else:
+                raise ValueError("affected zarb", str(dst))
+        return pool_out.iteritems()
+
+    def apply_change(self, dst, src):
+        objc = src
+        if objc is None and dst in self.symbols:
+            del self.symbols[dst]
+        else:
+            self.symbols[dst] = objc
+
+    def del_mem_above_stack(self, stack_ptr):
+        """No stack deletion"""
+        return
diff --git a/miasm2/jitter/emulatedsymbexec.py b/miasm2/jitter/emulatedsymbexec.py
index d4a67fe8..97f038dc 100644
--- a/miasm2/jitter/emulatedsymbexec.py
+++ b/miasm2/jitter/emulatedsymbexec.py
@@ -105,6 +105,8 @@ class EmulatedSymbExec(SymbolicExecutionEngine):
         """Handle 'segm' operation"""
         if not expr.is_op_segm():
             return expr
+        if not expr.args[0].is_int():
+            return expr
         segm_nb = int(expr.args[0])
         segmaddr = self.cpu.get_segm_base(segm_nb)
         return e_s(m2_expr.ExprInt(segmaddr, expr.size) + expr.args[1])
@@ -114,7 +116,9 @@ class EmulatedSymbExec(SymbolicExecutionEngine):
         if expr.op != "cpuid":
             return expr
 
-        a, reg_num = (int(x) for x in expr.args)
+        if any(not arg.is_int() for arg in expr.args):
+            return expr
+        a, reg_num = (int(arg) for arg in expr.args)
 
         # Not found error is keeped on purpose
         return m2_expr.ExprInt(self.cpuid[a][reg_num], expr.size)
diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py
index ed55aff8..cf6dea31 100644
--- a/miasm2/jitter/llvmconvert.py
+++ b/miasm2/jitter/llvmconvert.py
@@ -135,13 +135,31 @@ class LLVMContext_JIT(LLVMContext):
         self.library_filenames = library_filenames
         self.ir_arch = ir_arch
         self.arch_specific()
+        self.load_libraries()
         LLVMContext.__init__(self, name)
         self.vmcpu = {}
 
-    def new_module(self, name="mod"):
-        LLVMContext.new_module(self, name)
+    def load_libraries(self):
+        # Get LLVM specific functions
+        name = "libLLVM-%d.%d" % (llvm.llvm_version_info[0],
+                                  llvm.llvm_version_info[1],
+        )
+        try:
+            # On Windows, no need to add ".dll"
+            self.add_shared_library(name)
+        except RuntimeError:
+            try:
+                # On Linux, ".so" is needed
+                self.add_shared_library("%s.so" % name)
+            except RuntimeError:
+                pass
+
+        # Load additional libraries
         for lib_fname in self.library_filenames:
             self.add_shared_library(lib_fname)
+
+    def new_module(self, name="mod"):
+        LLVMContext.new_module(self, name)
         self.add_memlookups()
         self.add_get_exceptionflag()
         self.add_op()
@@ -512,7 +530,8 @@ class LLVMFunction():
 
         Get or create a (LLVM module-)global constant with *name* or *value*.
         """
-        module = self.mod
+        if name in self.mod.globals:
+            return self.mod.globals[name]
         data = llvm_ir.GlobalVariable(self.mod, value.type, name=name)
         data.global_constant = True
         data.initializer = value
diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py
index aba47df1..8a6b215c 100644
--- a/test/arch/x86/unit/asm_test.py
+++ b/test/arch/x86/unit/asm_test.py
@@ -90,3 +90,13 @@ class Asm_Test_16(Asm_Test):
         self.myjit.vm.add_memory_page(self.run_addr, PAGE_READ | PAGE_WRITE, self.assembly)
         self.myjit.push_uint16_t(self.ret_addr)
         self.myjit.add_breakpoint(self.ret_addr, lambda x:False)
+
+class Asm_Test_64(Asm_Test):
+    arch_name = "x86_64"
+    arch_attrib = 64
+    ret_addr = 0x1337beef
+
+    def init_machine(self):
+        self.myjit.vm.add_memory_page(self.run_addr, PAGE_READ | PAGE_WRITE, self.assembly)
+        self.myjit.push_uint64_t(self.ret_addr)
+        self.myjit.add_breakpoint(self.ret_addr, lambda x:False)
diff --git a/test/arch/x86/unit/mn_div.py b/test/arch/x86/unit/mn_div.py
new file mode 100644
index 00000000..84569607
--- /dev/null
+++ b/test/arch/x86/unit/mn_div.py
@@ -0,0 +1,17 @@
+import sys
+from asm_test import Asm_Test_64
+
+class Test_DIV(Asm_Test_64):
+    TXT = '''
+main:
+        MOV RAX, 0x8877665544332211
+        MOV RBX, 0x11223344556677
+        DIV RBX
+        RET
+    '''
+    def check(self):
+        assert self.myjit.cpu.RAX == 0x7F7
+        assert self.myjit.cpu.RDX == 0x440
+
+if __name__ == "__main__":
+    [test(*sys.argv[1:])() for test in [Test_DIV]]
diff --git a/test/expression/parser.py b/test/expression/parser.py
new file mode 100644
index 00000000..9c01c8a1
--- /dev/null
+++ b/test/expression/parser.py
@@ -0,0 +1,16 @@
+from miasm2.expression.parser import str_to_expr
+from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \
+    ExprCond, ExprCompose, ExprOp, ExprAff
+
+for expr_test in [ExprInt(0x12, 32),
+                  ExprId('test', 32),
+                  ExprSlice(ExprInt(0x10, 32), 0, 8),
+                  ExprMem(ExprInt(0x10, 32), 32),
+                  ExprCond(ExprInt(0x10, 32), ExprInt(0x11, 32), ExprInt(0x12, 32)),
+                  ExprCompose(ExprInt(0x10, 16), ExprInt(0x11, 8), ExprInt(0x12, 8)),
+                  ExprInt(0x11, 8) + ExprInt(0x12, 8),
+                  ExprAff(ExprId('EAX', 32),  ExprInt(0x12, 32)),
+                  ]:
+
+    print 'Test: %s' % expr_test
+    assert str_to_expr(repr(expr_test)) == expr_test
diff --git a/test/test_all.py b/test/test_all.py
index 41954ff1..3da2dbb5 100755
--- a/test/test_all.py
+++ b/test/test_all.py
@@ -60,6 +60,7 @@ class ArchUnitTest(RegressionTest):
 # script -> blacklisted jitter
 blacklist = {
     "x86/unit/mn_float.py": ["python", "llvm"],
+    "x86/unit/mn_div.py": ["tcc", "gcc"],
 }
 for script in ["x86/sem.py",
                "x86/unit/mn_strings.py",
@@ -80,6 +81,7 @@ for script in ["x86/sem.py",
                "x86/unit/mn_pushpop.py",
                "x86/unit/mn_seh.py",
                "x86/unit/mn_cpuid.py",
+               "x86/unit/mn_div.py",
                "arm/arch.py",
                "arm/sem.py",
                "aarch64/unit/mn_ubfm.py",
@@ -241,6 +243,7 @@ for script in ["modint.py",
                "simplifications.py",
                "expression_helper.py",
                "expr_pickle.py",
+               "parser.py",
                ]:
     testset += RegressionTest([script], base_dir="expression")