about summary refs log tree commit diff stats
path: root/miasm2/core
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--miasm2/core/__init__.py0
-rw-r--r--miasm2/core/asmbloc.py1412
-rw-r--r--miasm2/core/bin_stream.py175
-rw-r--r--miasm2/core/cpu.py1804
-rw-r--r--miasm2/core/graph.py126
-rw-r--r--miasm2/core/interval.py238
-rw-r--r--miasm2/core/parse_asm.py237
-rw-r--r--miasm2/core/utils.py39
8 files changed, 4031 insertions, 0 deletions
diff --git a/miasm2/core/__init__.py b/miasm2/core/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/miasm2/core/__init__.py
diff --git a/miasm2/core/asmbloc.py b/miasm2/core/asmbloc.py
new file mode 100644
index 00000000..945eb990
--- /dev/null
+++ b/miasm2/core/asmbloc.py
@@ -0,0 +1,1412 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+import logging
+import miasm2.expression.expression as m2_expr
+from miasm2.expression.modint import moduint, modint
+from miasm2.core.graph import DiGraph
+from utils import Disasm_Exception
+from miasm2.core.graph import DiGraph
+import inspect
+
+log_asmbloc = logging.getLogger("asmbloc")
+console_handler = logging.StreamHandler()
+console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
+log_asmbloc.addHandler(console_handler)
+log_asmbloc.setLevel(logging.WARNING)
+
+
+def whoami():
+    return inspect.stack()[2][3]
+
+
+def is_int(a):
+    return isinstance(a, int) or isinstance(a, long) or \
+        isinstance(a, moduint) or isinstance(a, modint)
+
+
+def expr_is_label(e):
+    if isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label):
+        return True
+    return False
+
+
+def expr_is_int_or_label(e):
+    if isinstance(e, m2_expr.ExprInt):
+        return True
+    if isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label):
+        return True
+    return False
+
+
+class asm_label:
+
+    def __init__(self, name="", offset=None):
+        # print whoami()
+        self.fixedblocs = False
+        if is_int(name):
+            name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF)
+        self.name = name
+        self.attrib = None
+        if offset is None:
+            self.offset = offset
+        else:
+            self.offset = int(offset)
+        self._hash = hash((self.name, self.offset))
+
+    def __str__(self):
+        if isinstance(self.offset, (int, long)):
+            return "%s:0x%08x" % (self.name, self.offset)
+        else:
+            return "%s:%s" % (self.name, str(self.offset))
+
+    def __repr__(self):
+        rep = '<asmlabel '
+        if self.name:
+            rep += repr(self.name) + ' '
+        rep += '>'
+        return rep
+
+    def __hash__(self):
+        return self._hash
+
+    def __eq__(self, a):
+        if isinstance(a, asm_label):
+            return self._hash == a._hash
+        else:
+            return False
+
+
+class asm_raw:
+
+    def __init__(self, raw=""):
+        self.raw = raw
+
+    def __str__(self):
+        return repr(self.raw)
+
+
+class asm_constraint(object):
+    c_to = "c_to"
+    c_next = "c_next"
+    c_bad = "c_bad"
+
+    def __init__(self, label=None, c_t=c_to):
+        self.label = label
+        self.c_t = c_t
+        self._hash = hash((self.label, self.c_t))
+
+    def __str__(self):
+        return "%s:%s" % (str(self.c_t), str(self.label))
+
+    def __hash__(self):
+        return self._hash
+
+    def __eq__(self, a):
+        if isinstance(a, asm_constraint):
+            return self._hash == a._hash
+        else:
+            return False
+
+
+class asm_constraint_next(asm_constraint):
+
+    def __init__(self, label=None):
+        super(asm_constraint_next, self).__init__(
+            label, c_t=asm_constraint.c_next)
+
+
+class asm_constraint_to(asm_constraint):
+
+    def __init__(self, label=None):
+        super(asm_constraint_to, self).__init__(
+            label, c_t=asm_constraint.c_to)
+
+
+class asm_constraint_bad(asm_constraint):
+
+    def __init__(self, label=None):
+        super(asm_constraint_bad, self).__init__(
+            label, c_t=asm_constraint.c_bad)
+
+
+class asm_bloc:
+
+    def __init__(self, label=None):
+        self.bto = set()
+        self.lines = []
+        self.label = label
+
+    def __str__(self):
+        out = []
+        out.append(str(self.label))
+        for l in self.lines:
+            out.append(str(l))
+        if self.bto:
+            lbls = ["->"]
+            for l in self.bto:
+                if l is None:
+                    lbls.append("Unknown? ")
+                else:
+                    lbls.append(str(l) + " ")
+            lbls = '\t'.join(lbls)
+            out.append(lbls)
+        return '\n'.join(out)
+
+    def addline(self, l):
+        self.lines.append(l)
+
+    def addto(self, c):
+        assert(type(self.bto) is set)
+        self.bto.add(c)
+
+    def split(self, offset, l):
+        log_asmbloc.debug('split at %x' % offset)
+        i = -1
+        offsets = [x.offset for x in self.lines]
+        if not l.offset in offsets:
+            log_asmbloc.warning(
+                'cannot split bloc at %X ' % offset +
+                'middle instruction? default middle')
+            offsets.sort()
+            return None
+        new_bloc = asm_bloc(l)
+        i = offsets.index(offset)
+
+        self.lines, new_bloc.lines = self.lines[:i], self.lines[i:]
+        flow_mod_instr = self.get_flow_instr()
+        log_asmbloc.debug('flow mod %r' % flow_mod_instr)
+        c = asm_constraint(l, asm_constraint.c_next)
+        # move dst if flowgraph modifier was in original bloc
+        # (usecase: split delayslot bloc)
+        if flow_mod_instr:
+            for xx in self.bto:
+                log_asmbloc.debug('lbl %s' % xx)
+            c_next = set(
+                [x for x in self.bto if x.c_t == asm_constraint.c_next])
+            c_to = [x for x in self.bto if x.c_t != asm_constraint.c_next]
+            self.bto = set([c] + c_to)
+            new_bloc.bto = c_next
+        else:
+            new_bloc.bto = self.bto
+            self.bto = set([c])
+        return new_bloc
+
+    def get_range(self):
+        if len(self.lines):
+            return self.lines[0].offset, self.lines[-1].offset
+        else:
+            return 0, 0
+
+    def get_offsets(self):
+        return [x.offset for x in self.lines]
+
+    def add_cst(self, offset, c_t, symbol_pool):
+        if type(offset) in [int, long]:
+            l = symbol_pool.getby_offset_create(offset)
+        elif type(offset) is str:
+            l = symbol_pool.getby_name_create(offset)
+        elif isinstance(offset, asm_label):
+            l = offset
+        else:
+            raise ValueError('unknown offset type %r' % offset)
+        c = asm_constraint(l, c_t)
+        self.bto.add(c)
+
+    def get_flow_instr(self):
+        if not self.lines:
+            return None
+        for i in xrange(-1, -1 - self.lines[0].delayslot - 1, -1):
+            l = self.lines[i]
+            if l.splitflow() or l.breakflow():
+                raise NotImplementedError('not fully functional')
+                return l
+
+    def get_subcall_instr(self):
+        if not self.lines:
+            return None
+        for i in xrange(-1, -1 - self.lines[0].delayslot - 1, -1):
+            l = self.lines[i]
+            if l.is_subcall():
+                return l
+
+    def get_next(self):
+        for x in self.bto:
+            if x.c_t == asm_constraint.c_next:
+                return x.label
+        return None
+
+
+class asm_symbol_pool:
+
+    def __init__(self, no_collision=True):
+        self.labels = []
+        self.s = {}
+        self.s_offset = {}
+        self.no_collision = no_collision
+        self.label_num = 0
+
+    def add_label(self, name="", offset=None):
+        """
+        This should be the only method to create new asm_label objects
+        """
+        l = asm_label(name, offset)
+        collision = None
+        if l.offset in self.s_offset and l != self.s_offset[l.offset]:
+            collision = 'offset'
+        if l.name in self.s and l != self.s[l.name]:
+            collision = 'name'
+        if self.no_collision and collision == 'offset':
+            raise ValueError('symbol %s has same offset as %s' %
+                             (l, self.s_offset[l.offset]))
+        if self.no_collision and collision == 'name':
+            raise ValueError(
+                'symbol %s has same name as %s' % (l, self.s[l.name]))
+        self.labels.append(l)
+        if l.offset is not None:
+            self.s_offset[l.offset] = l
+        if l.name != "":
+            self.s[l.name] = l
+        return l
+
+    def remove(self, obj):
+        """
+        obj can be an asm_label or an offset
+        """
+        if isinstance(obj, asm_label):
+            if obj.name in self.s:
+                del(self.s[obj.name])
+            if obj.offset is not None and obj.offset in self.s_offset:
+                del(self.s_offset[obj.offset])
+        else:
+            offset = int(obj)
+            if offset in self.s_offset:
+                obj = self.s_offset[offset]
+                del(self.s_offset[offset])
+            if obj.name in self.s:
+                del(self.s[obj.name])
+
+    def del_offset(self, l=None):
+        if l is not None:
+            if l.offset in self.s_offset:
+                del(self.s_offset[l.offset])
+            l.offset = None
+        else:
+            self.s_offset = {}
+            for l in self.s:
+                self.s[l].offset = None
+
+    def getby_offset(self, offset):
+        return self.s_offset.get(offset, None)
+
+    def getby_name(self, name):
+        return self.s.get(name, None)
+
+    def getby_name_create(self, name):
+        l = self.getby_name(name)
+        if l is None:
+            l = self.add_label(name)
+        return l
+
+    def getby_offset_create(self, offset):
+        l = self.getby_offset(offset)
+        if l is None:
+            l = self.add_label(offset, offset)
+        return l
+
+    def rename(self, s, newname):
+        if not s.name in self.s:
+            log_asmbloc.warn('unk symb')
+            return
+        del(self.s[s.name])
+        s.name = newname
+        self.s[s.name] = s
+
+    def set_offset(self, label, offset):
+        # Note that there is a special case when the offset is a list
+        # it happens when offsets are recomputed in resolve_symbol*
+        if not label in self.labels:
+            raise ValueError('label %s not in symbol pool' % label)
+        if not isinstance(label.offset, list) and label.offset in self.s_offset:
+            del(self.s_offset[label.offset])
+        label.offset = offset
+        if not isinstance(label.offset, list):
+            self.s_offset[label.offset] = label
+
+    def items(self):
+        return self.labels[:]
+
+    def __str__(self):
+        return reduce(lambda x, y: x + str(y) + '\n', self.labels, "")
+
+    def __in__(self, obj):
+        if obj in self.s:
+            return True
+        if obj in self.s_offset:
+            return True
+        return False
+
+    def __getitem__(self, item):
+        if item in self.s:
+            return self.s[item]
+        if item in self.s_offset:
+            return self.s_offset[item]
+        raise KeyError('unknown symbol %r' % item)
+
+    def __contains__(self, item):
+        return item in self.s or item in self.s_offset
+
+    def merge(self, symbol_pool):
+        self.labels += symbol_pool.labels
+        self.s.update(symbol_pool.s)
+        self.s_offset.update(symbol_pool.s_offset)
+
+    def gen_label(self):
+        l = self.add_label("lbl_gen_%.8X" % (self.label_num))
+        self.label_num += 1
+        return l
+
+
+def dis_bloc(mnemo, pool_bin, cur_bloc, offset, job_done, symbol_pool,
+             dont_dis=[], split_dis=[
+             ], follow_call=False, patch_instr_symb=True,
+             dontdis_retcall=False, lines_wd=None,
+             dis_bloc_callback=None, dont_dis_nulstart_bloc=False,
+             attrib={}):
+    # pool_bin.offset = offset
+    lines_cpt = 0
+    in_delayslot = False
+    delayslot_count = mnemo.delayslot
+    offsets_to_dis = set()
+    add_next_offset = False
+    log_asmbloc.debug("dis at %X" % int(offset))
+    while not in_delayslot or delayslot_count > 0:
+        if in_delayslot:
+            delayslot_count -= 1
+
+        if offset in dont_dis or (lines_cpt > 0 and offset in split_dis):
+            cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool)
+            offsets_to_dis.add(offset)
+            break
+
+        lines_cpt += 1
+        if lines_wd is not None and lines_cpt > lines_wd:
+            # log_asmbloc.warning( "lines watchdog reached at %X"%int(offset))
+            break
+
+        if offset in job_done:
+            cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool)
+            break
+
+        off_i = offset
+        try:
+            # print repr(pool_bin.getbytes(offset, 4))
+            instr = mnemo.dis(pool_bin, attrib, offset)
+        except (Disasm_Exception, IOError), e:
+            log_asmbloc.warning(e)
+            instr = None
+
+        if instr is None:
+            log_asmbloc.warning("cannot disasm at %X" % int(off_i))
+            cur_bloc.add_cst(off_i, asm_constraint.c_bad, symbol_pool)
+            break
+
+        # XXX TODO nul start block option
+        if dont_dis_nulstart_bloc and instr.b.count('\x00') == instr.l:
+            log_asmbloc.warning("reach nul instr at %X" % int(off_i))
+            cur_bloc.add_cst(off_i, asm_constraint.c_bad, symbol_pool)
+            break
+
+        # special case: flow graph modificator in delayslot
+        if in_delayslot and instr and (instr.splitflow() or instr.breakflow()):
+            add_next_offset = True
+            break
+
+        job_done.add(offset)
+        log_asmbloc.debug("dis at %X" % int(offset))
+
+        offset += instr.l
+        log_asmbloc.debug(instr)
+        log_asmbloc.debug(instr.args)
+
+        cur_bloc.addline(instr)
+        if not instr.breakflow():
+            continue
+        # test split
+        if instr.splitflow() and not (instr.is_subcall() and dontdis_retcall):
+            add_next_offset = True
+            # cur_bloc.add_cst(n, asm_constraint.c_next, symbol_pool)
+            pass
+        if instr.dstflow():
+            instr.dstflow2label(symbol_pool)
+            dst = instr.getdstflow(symbol_pool)
+            dstn = []
+            for d in dst:
+                if isinstance(d, m2_expr.ExprId) and isinstance(d.name, asm_label):
+                    dstn.append(d.name)
+            dst = dstn
+            if (not instr.is_subcall()) or follow_call:
+                cur_bloc.bto.update(
+                    [asm_constraint(x, asm_constraint.c_to) for x in dst])
+
+        # get in delayslot mode
+        in_delayslot = True
+        delayslot_count = instr.delayslot
+
+    for c in cur_bloc.bto:
+        if c.c_t == asm_constraint.c_bad:
+            continue
+        if isinstance(c.label, asm_label):
+            offsets_to_dis.add(c.label.offset)
+
+    if add_next_offset:
+        cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool)
+        offsets_to_dis.add(offset)
+
+    if dis_bloc_callback is not None:
+        dis_bloc_callback(
+            mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool)
+    # print 'dst', [hex(x) for x in offsets_to_dis]
+    return offsets_to_dis
+
+
+def split_bloc(mnemo, attrib, pool_bin, blocs,
+    symbol_pool, more_ref=None, dis_bloc_callback=None):
+    i = -1
+    err = False
+    if not more_ref:
+        more_ref = []
+
+    # get all possible dst
+    bloc_dst = [symbol_pool.s_offset[x] for x in more_ref]
+    for b in blocs:
+        for c in b.bto:
+            if not isinstance(c.label, asm_label):
+                continue
+            if c.c_t == asm_constraint.c_bad:
+                continue
+            bloc_dst.append(c.label)
+
+    bloc_dst = [x.offset for x in bloc_dst if x.offset is not None]
+
+    j = -1
+    while j < len(blocs) - 1:
+        j += 1
+        cb = blocs[j]
+        a, b = cb.get_range()
+
+        for off in bloc_dst:
+            if not (off > a and off <= b):
+                continue
+            l = symbol_pool.getby_offset_create(off)
+            new_b = cb.split(off, l)
+            log_asmbloc.debug("split bloc %x" % off)
+            if new_b is None:
+                log_asmbloc.error("cannot split %x!!" % off)
+                err = True
+                break
+            if dis_bloc_callback:
+                offsets_to_dis = set(
+                    [x.label.offset for x in new_b.bto
+                    if isinstance(x.label, asm_label)])
+                dis_bloc_callback(
+                    mnemo, attrib, pool_bin, new_b, offsets_to_dis,
+                    symbol_pool)
+            blocs.append(new_b)
+            a, b = cb.get_range()
+
+        """
+        if err:
+            break
+        """
+    return blocs
+
+
+def dis_bloc_all(mnemo, pool_bin, offset, job_done, symbol_pool, dont_dis=[],
+                 split_dis=[], follow_call=False, patch_instr_symb=True,
+                 dontdis_retcall=False,
+                 blocs_wd=None, lines_wd=None, blocs=None,
+                 dis_bloc_callback=None, dont_dis_nulstart_bloc=False,
+                 attrib={}):
+    log_asmbloc.info("dis bloc all")
+    if blocs is None:
+        blocs = []
+    todo = [offset]
+
+    bloc_cpt = 0
+    while len(todo):
+        bloc_cpt += 1
+        if blocs_wd is not None and bloc_cpt > blocs_wd:
+            log_asmbloc.debug("blocs watchdog reached at %X" % int(offset))
+            break
+
+        n = int(todo.pop(0))
+        if n is None:
+            continue
+        if n in job_done:
+            continue
+
+        if n in dont_dis:
+            continue
+        dd_flag = False
+        for dd in dont_dis:
+            if not isinstance(dd, tuple):
+                continue
+            dd_a, dd_b = dd
+            if dd_a <= n < dd_b:
+                dd_flag = True
+                break
+        if dd_flag:
+            continue
+        l = symbol_pool.getby_offset_create(n)
+        cur_bloc = asm_bloc(l)
+        todo += dis_bloc(mnemo, pool_bin, cur_bloc, n, job_done, symbol_pool,
+                         dont_dis, split_dis, follow_call, patch_instr_symb,
+                         dontdis_retcall,
+                         dis_bloc_callback=dis_bloc_callback,
+                         lines_wd=lines_wd,
+                         dont_dis_nulstart_bloc=dont_dis_nulstart_bloc,
+                         attrib=attrib)
+        blocs.append(cur_bloc)
+
+    return split_bloc(mnemo, attrib, pool_bin, blocs,
+    symbol_pool, dis_bloc_callback=dis_bloc_callback)
+
+
+def bloc2graph(blocs, label=False, lines=True):
+    # rankdir=LR;
+    out = """
+digraph asm_graph {
+size="80,50";
+node [
+fontsize = "16",
+shape = "box"
+];
+"""
+    for b in blocs:
+        out += '%s [\n' % b.label.name
+        out += 'label = "'
+
+        out += b.label.name + "\\l\\\n"
+        if lines:
+            for l in b.lines:
+                if label:
+                    out += "%.8X " % l.offset
+                out += ("%s\\l\\\n" % l).replace('"', '\\"')
+        out += '"\n];\n'
+
+    for b in blocs:
+        for n in b.bto:
+            # print 'xxxx', n.label, n.label.__class__
+            # if isinstance(n.label, ExprId):
+            #    print n.label.name, n.label.name.__class__
+            if isinstance(n.label, m2_expr.ExprId):
+                dst, name, cst = b.label.name, n.label.name, n.c_t
+                # out+='%s -> %s [ label = "%s" ];\n'%(b.label.name,
+                # n.label.name, n.c_t)
+            elif isinstance(b.label, asm_label):
+                dst, name, cst = b.label.name, n.label.name, n.c_t
+            else:
+                continue
+            out += '%s -> %s [ label = "%s" ];\n' % (dst, name, cst)
+
+    out += "}"
+    return out
+
+
+def conservative_asm(mnemo, mode, instr, symbols, conservative):
+    """
+    Asm instruction;
+    Try to keep original instruction bytes if it exists
+    """
+    candidates = mnemo.asm(instr, symbols)
+    if not candidates:
+        raise ValueError('cannot asm:%s' % str(instr))
+    if not hasattr(instr, "b"):
+        return candidates[0], candidates
+    if instr.b in candidates:
+        return instr.b, candidates
+    if conservative:
+        for c in candidates:
+            if len(c) == len(instr.b):
+                return c, candidates
+    return candidates[0], candidates
+
+
+def guess_blocs_size(mnemo, mode, blocs, symbols):
+    """
+    Asm and compute max bloc length
+    """
+    for b in blocs:
+        log_asmbloc.debug('---')
+        blen = 0
+        blen_max = 0
+        for instr in b.lines:
+            if isinstance(instr, asm_raw):
+                candidates = [instr.raw]
+                c = instr.raw
+                data = c
+                l = len(c)
+            else:
+                l = mnemo.max_instruction_len
+                data = None
+            instr.data = data
+            instr.l = l
+            blen += l
+
+        b.blen = blen
+        # bloc with max rel values encoded
+        b.blen_max = blen + blen_max
+        log_asmbloc.info("blen: %d max: %d" % (b.blen, b.blen_max))
+
+
+def group_blocs(blocs):
+    """
+    this function group asm blocs with next constraints
+    """
+    log_asmbloc.info('group_blocs')
+    # group adjacent blocs
+    rest = blocs[:]
+    groups_bloc = {}
+    d = dict([(x.label, x) for x in rest])
+    log_asmbloc.debug([str(x.label) for x in rest])
+
+    while rest:
+        b = [rest.pop()]
+        # find recursive son
+        fini = False
+        while not fini:
+            fini = True
+            for c in b[-1].bto:
+                if c.c_t != asm_constraint.c_next:
+                    continue
+                if c.label in d and d[c.label] in rest:
+                    b.append(d[c.label])
+                    rest.remove(d[c.label])
+                    fini = False
+                    break
+        # check if son in group:
+        found_in_group = False
+        for c in b[-1].bto:
+            if c.c_t != asm_constraint.c_next:
+                continue
+            if c.label in groups_bloc:
+                b += groups_bloc[c.label]
+                del(groups_bloc[c.label])
+                groups_bloc[b[0].label] = b
+                found_in_group = True
+                break
+
+        if not found_in_group:
+            groups_bloc[b[0].label] = b
+
+    # create max label range for bigbloc
+    for l in groups_bloc:
+        l.total_max_l = reduce(lambda x, y: x + y.blen_max, groups_bloc[l], 0)
+        log_asmbloc.debug(("offset totalmax l", l.offset, l.total_max_l))
+        if is_int(l.offset):
+            hof = hex(int(l.offset))
+        else:
+            hof = l.name
+        log_asmbloc.debug(("offset totalmax l", hof, l.total_max_l))
+    return groups_bloc
+
+
+def gen_free_space_intervals(f, max_offset=0xFFFFFFFF):
+    interval = {}
+    offset_label = dict([(x.offset_free, x) for x in f])
+    offset_label_order = offset_label.keys()
+    offset_label_order.sort()
+    offset_label_order.append(max_offset)
+    offset_label_order.reverse()
+
+    unfree_stop = 0L
+    while len(offset_label_order) > 1:
+        offset = offset_label_order.pop()
+        offset_end = offset + f[offset_label[offset]]
+        prev = 0
+        if unfree_stop > offset_end:
+            space = 0
+        else:
+            space = offset_label_order[-1] - offset_end
+            if space < 0:
+                space = 0
+            interval[offset_label[offset]] = space
+            if offset_label_order[-1] in offset_label:
+                prev = offset_label[offset_label_order[-1]]
+                prev = f[prev]
+
+        interval[offset_label[offset]] = space
+
+        unfree_stop = max(
+            unfree_stop, offset_end, offset_label_order[-1] + prev)
+    return interval
+
+
+def add_dont_erase(f, dont_erase=[]):
+    tmp_symbol_pool = asm_symbol_pool()
+    for a, b in dont_erase:
+        l = tmp_symbol_pool.add_label(a, a)
+        l.offset_free = a
+        f[l] = b - a
+    return
+
+
+def gen_non_free_mapping(group_bloc, dont_erase=[]):
+    non_free_mapping = {}
+    # calculate free space for bloc placing
+    for g in group_bloc:
+        rest_len = 0
+        g.fixedblocs = False
+        # if a label in the group is fixed
+        diff_offset = 0
+        for b in group_bloc[g]:
+            if not is_int(b.label.offset):
+                diff_offset += b.blen_max
+                continue
+            g.fixedblocs = True
+            g.offset_free = b.label.offset - diff_offset
+            break
+        if g.fixedblocs:
+            non_free_mapping[g] = g.total_max_l
+
+    log_asmbloc.debug("non free bloc:")
+    log_asmbloc.debug(non_free_mapping)
+    add_dont_erase(non_free_mapping, dont_erase)
+    log_asmbloc.debug("non free more:")
+    log_asmbloc.debug(non_free_mapping)
+    return non_free_mapping
+
+
+def resolve_symbol(
+    group_bloc, symbol_pool, dont_erase=[], max_offset=0xFFFFFFFF):
+    """
+    place all asmblocs
+    """
+    log_asmbloc.info('resolve_symbol')
+    log_asmbloc.info(str(dont_erase))
+    bloc_list = []
+    unr_bloc = reduce(lambda x, y: x + group_bloc[y], group_bloc, [])
+    ending_ad = []
+
+    non_free_mapping = gen_non_free_mapping(group_bloc, dont_erase)
+    free_interval = gen_free_space_intervals(non_free_mapping, max_offset)
+    log_asmbloc.debug(free_interval)
+
+    # first big ones
+    g_tab = [(x.total_max_l, x) for x in group_bloc]
+    g_tab.sort()
+    g_tab.reverse()
+    g_tab = [x[1] for x in g_tab]
+
+    # g_tab => label of grouped blov
+    # group_bloc => dict of grouped bloc labeled-key
+
+    # first, near callee placing algo
+    for g in g_tab:
+        if g.fixedblocs:
+            continue
+        finish = False
+        for x in group_bloc:
+            if not x in free_interval.keys():
+                continue
+            if free_interval[x] < g.total_max_l:
+                continue
+
+            for b in group_bloc[x]:
+                for c in b.bto:
+                    if c.label == g:
+                        tmp = free_interval[x] - g.total_max_l
+                        log_asmbloc.debug(
+                            "consumed %d rest: %d" % (g.total_max_l, int(tmp)))
+                        free_interval[g] = tmp
+                        del(free_interval[x])
+                        symbol_pool.set_offset(
+                            g, [group_bloc[x][-1].label, group_bloc[x][-1], 1])
+                        g.fixedblocs = True
+                        finish = True
+                        break
+                if finish:
+                    break
+            if finish:
+                break
+
+    # second, bigger in smaller algo
+    for g in g_tab:
+        if g.fixedblocs:
+            continue
+        # chose smaller free_interval first
+        k_tab = [(free_interval[x], x) for x in free_interval]
+        k_tab.sort()
+        k_tab = [x[1] for x in k_tab]
+        # choose free_interval
+        for k in k_tab:
+            if g.total_max_l > free_interval[k]:
+                continue
+            symbol_pool.set_offset(
+                g, [group_bloc[k][-1].label, group_bloc[k][-1], 1])
+            tmp = free_interval[k] - g.total_max_l
+            log_asmbloc.debug(
+                "consumed %d rest: %d" % (g.total_max_l, int(tmp)))
+            free_interval[g] = tmp
+            del(free_interval[k])
+
+            g.fixedblocs = True
+            break
+
+    while unr_bloc:
+        # propagate know offset
+        resolving = False
+        i = 0
+        while i < len(unr_bloc):
+            if unr_bloc[i].label.offset is None:
+                i += 1
+                continue
+            resolving = True
+            log_asmbloc.info("bloc %s resolved" % unr_bloc[i].label)
+            bloc_list.append((unr_bloc[i], 0))
+            g_found = None
+            for g in g_tab:
+                if unr_bloc[i] in group_bloc[g]:
+                    if g_found is not None:
+                        raise ValueError('blocin multiple group!!!')
+                    g_found = g
+            my_group = group_bloc[g_found]
+
+            index = my_group.index(unr_bloc[i])
+            if index > 0 and my_group[index - 1] in unr_bloc:
+                symbol_pool.set_offset(
+                    my_group[index - 1].label,
+                    [unr_bloc[i].label, unr_bloc[i - 1], -1])
+            if index < len(my_group) - 1 and my_group[index + 1] in unr_bloc:
+                symbol_pool.set_offset(
+                    my_group[index + 1].label,
+                    [unr_bloc[i].label, unr_bloc[i], 1])
+            del unr_bloc[i]
+
+        if not resolving:
+            log_asmbloc.warn("cannot resolve symbol! (no symbol fix found)")
+        else:
+            continue
+
+        for g in g_tab:
+            print g
+            if g.fixedblocs:
+                print "fixed"
+            else:
+                print "not fixed"
+        raise ValueError('enable to fix bloc')
+    return bloc_list
+
+
+def calc_symbol_offset(symbol_pool):
+    s_to_use = set()
+
+    s_dependent = {}
+
+    for label in symbol_pool.items():
+        if label.offset is None:
+            # raise ValueError("symbol missing?", label)
+            #print "symbol missing?? %s" % label
+            label.offset_g = None
+            continue
+        if not is_int(label.offset):
+            # construct dependant blocs tree
+            s_d = label.offset[0]
+            if not s_d in s_dependent:
+                s_dependent[s_d] = set()
+            s_dependent[s_d].add(label)
+        else:
+            s_to_use.add(label)
+        label.offset_g = label.offset
+
+    while s_to_use:
+        label = s_to_use.pop()
+        if not label in s_dependent:
+            continue
+        for l in s_dependent[label]:
+            if label.offset_g is None:
+                raise ValueError("unknown symbol: %s" % str(label.name))
+            l.offset_g = label.offset_g + l.offset_g[1].blen * l.offset_g[2]
+            s_to_use.add(l)
+
+
+def asmbloc_final(mnemo, mode, blocs, symbol_pool, symb_reloc_off=None, conservative = False):
+    log_asmbloc.info("asmbloc_final")
+    if symb_reloc_off is None:
+        symb_reloc_off = {}
+    fini = False
+    # asm with minimal instr len
+    # check if dst label are ok to this encoded form
+    # recompute if not
+    # TODO XXXX: implement todo list to remove n^high complexity!
+    while fini is not True:
+
+        fini = True
+        my_symb_reloc_off = {}
+
+        calc_symbol_offset(symbol_pool)
+
+        symbols = asm_symbol_pool()
+        for s, v in symbol_pool.s.items():
+            symbols.add_label(s, v.offset_g)
+        # print symbols
+        # test if bad encoded relative
+        for b, t in blocs:
+
+            offset_i = 0
+            blen = 0
+            my_symb_reloc_off[b.label] = []
+            for instr in b.lines:
+                if isinstance(instr, asm_raw):
+                    offset_i += instr.l  # len(instr.data)
+                    continue
+                # if not [True for a in instr.arg if mnemo.has_symb(a)]:
+                #    offset_i+=len(instr.data)
+                #    continue
+                sav_a = instr.args[:]  # [a.expr for a in instr.args]
+                # print [str(x) for x in sav_a]
+                args_e = instr.resolve_args_with_symbols(symbols)
+                for i, e in enumerate(args_e):
+                    # print 'ee', e.size, e
+                    instr.args[i] = e
+
+                instr.offset = b.label.offset_g + offset_i
+                if instr.dstflow():
+                    # instr.l = len(instr.data)
+                    instr.fixDstOffset()
+                    """
+                    lbls = {}
+                    xxx = instr.getdstflow()
+                    if len(xxx) !=1:
+                        raise ValueError('multi dst ?!')
+                    label = mnemo.get_label(xxx[0])
+                    is_mem = mnemo.is_mem(xxx[0])
+                    lbls[label.name] = label.offset_g
+                    instr.fixdst(lbls, b.label.offset_g+b.blen, is_mem)
+                    """
+                # else:
+                # instr.arg = [mnemo.fix_symbol(a, symbol_pool)
+                #    for a in instr.arg]
+                #    pass
+                symbol_reloc_off = []
+                old_l = instr.l
+                c, candidates = conservative_asm(
+                    mnemo, mode, instr, symbol_reloc_off, conservative)
+
+                # print "XXXX", instr
+                # print candidates
+                for i, e in enumerate(sav_a):
+                    instr.args[i] = e
+
+                if len(c) != instr.l:
+                    # good len, bad offset...XXX
+                    b.blen = b.blen - old_l + len(c)
+                    instr.data = c
+                    instr.l = len(c)
+                    fini = False
+                    continue
+                found = False
+                for cpos, c in enumerate(candidates):
+                    # if len(c) == len(instr.data):
+                    if len(c) == instr.l:
+                        # print 'UPDD', repr(instr.data), repr(c)
+                        # b.blen = b.blen-old_l+len(c)
+                        instr.data = c
+                        instr.l = len(c)
+
+                        found = True
+                        break
+                if not found:
+                    raise ValueError('something wrong in instr.data')
+
+                if cpos < len(symbol_reloc_off):
+                    my_s = symbol_reloc_off[cpos]
+                else:
+                    my_s = None
+
+                if my_s is not None:
+                    my_symb_reloc_off[b.label].append(offset_i + my_s)
+                offset_i += instr.l
+                blen += instr.l
+                assert(len(instr.data) == instr.l)
+    # we have fixed all relative values
+    # recompute good offsets
+    for label in symbol_pool.items():
+        # if label.offset_g is None:
+        #    fdfd
+        symbol_pool.set_offset(label, label.offset_g)
+
+    for a, b in my_symb_reloc_off.items():
+        symb_reloc_off[a] = b
+
+
+def asm_resolve_final(mnemo, mode, blocs, symbol_pool, dont_erase=[],
+                      max_offset=0xFFFFFFFF,
+                      symb_reloc_off=None, constrain_pos=False):
+    if symb_reloc_off is None:
+        symb_reloc_off = {}
+    # asmbloc(mnemo, mode, blocs, symbol_pool)
+    guess_blocs_size(mnemo, mode, blocs, symbol_pool)
+    bloc_g = group_blocs(blocs)
+
+    resolved_b = resolve_symbol(bloc_g, symbol_pool, dont_erase=dont_erase,
+                                max_offset=max_offset)
+
+    asmbloc_final(mnemo, mode, resolved_b, symbol_pool, symb_reloc_off)
+    written_bytes = {}
+    patches = {}
+    for b, t in resolved_b:
+        offset = b.label.offset
+        for i in b.lines:
+            assert(i.data is not None)
+            patches[offset] = i.data
+            for c in range(i.l):
+                if offset + c in written_bytes:
+                    raise ValueError(
+                        "overlapping bytes in asssembly %X" % int(offset))
+                written_bytes[offset + c] = 1
+            i.offset = offset
+            i.l = i.l
+            offset += i.l
+
+    return resolved_b, patches
+
+
+def blist2graph(ab):
+    """
+    ab: list of asmbloc
+    return: graph of asmbloc
+    """
+    g = DiGraph()
+    g.lbl2bloc = {}
+    for b in ab:
+        g.lbl2bloc[b.label] = b
+        g.add_node(b.label)
+        for x in b.bto:
+            g.add_edge(b.label, x.label)
+    return g
+
+
+class basicblocs:
+
+    def __init__(self, ab=[]):
+        self.blocs = {}
+        self.g = DiGraph()
+        self.add_blocs(ab)
+
+    def add(self, b):
+        self.blocs[b.label] = b
+        self.g.add_node(b.label)
+        for dst in b.bto:
+            if isinstance(dst.label, asm_label):
+                self.g.add_edge(b.label, dst.label)
+
+    def add_blocs(self, ab):
+        for b in ab:
+            self.add(b)
+
+    def get_bad_dst(self):
+        o = set()
+        for b in self.blocs.values():
+            for c in b.bto:
+                if c.c_t == asm_constraint.c_bad:
+                    o.add(b)
+        return o
+
+
+def find_parents(blocs, l):
+    p = set()
+    for b in blocs:
+        if l in [x.label for x in b.bto if isinstance(x.label, asm_label)]:
+            p.add(b.label)
+    return p
+
+
+def bloc_blink(blocs):
+    for b in blocs:
+        b.parents = find_parents(blocs, b.label)
+
+
+def getbloc_around(blocs, a, level=3, done=None, blocby_label=None):
+
+    if not blocby_label:
+        blocby_label = {}
+        for b in blocs:
+            blocby_label[b.label] = b
+    if done is None:
+        done = set()
+
+    done.add(a)
+    if not level:
+        return done
+    for b in a.parents:
+        b = blocby_label[b]
+        if b in done:
+            continue
+        done.update(getbloc_around(blocs, b, level - 1, done, blocby_label))
+    for b in a.bto:
+        b = blocby_label[b.label]
+        if b in done:
+            continue
+        done.update(getbloc_around(blocs, b, level - 1, done, blocby_label))
+    return done
+
+
+def getbloc_parents(blocs, a, level=3, done=None, blocby_label=None):
+
+    if not blocby_label:
+        blocby_label = {}
+        for b in blocs:
+            blocby_label[b.label] = b
+    if done is None:
+        done = set()
+
+    done.add(a)
+    if not level:
+        return done
+    for b in a.parents:
+        b = blocby_label[b]
+        if b in done:
+            continue
+        done.update(getbloc_parents(blocs, b, level - 1, done, blocby_label))
+    return done
+
+# get ONLY level_X parents
+
+
+def getbloc_parents_strict(
+    blocs, a, level=3, rez=None, done=None, blocby_label=None):
+
+    if not blocby_label:
+        blocby_label = {}
+        for b in blocs:
+            blocby_label[b.label] = b
+    if rez is None:
+        rez = set()
+    if done is None:
+        done = set()
+
+    done.add(a)
+    if level == 0:
+        rez.add(a)
+    if not level:
+        return rez
+    for b in a.parents:
+        b = blocby_label[b]
+        if b in done:
+            continue
+        rez.update(getbloc_parents_strict(
+            blocs, b, level - 1, rez, done, blocby_label))
+    return rez
+
+
+def bloc_find_path_next(blocs, blocby_label, a, b, path=None):
+    if path == None:
+        path = []
+    if a == b:
+        return [path]
+
+    all_path = []
+    for x in a.bto:
+        if x.c_t != asm_constraint.c_next:
+            continue
+        if not x.label in blocby_label:
+            print 'XXX unknown label'
+            continue
+        x = blocby_label[x.label]
+        all_path += bloc_find_path_next(blocs, blocby_label, x, b, path + [a])
+        # stop if at least one path found
+        if all_path:
+            return all_path
+    return all_path
+
+
+def bloc_merge(blocs, symbol_pool, dont_merge=[]):
+    i = -1
+    """
+    # TODO XXXX implement find all path for digraph
+
+    g = blist2graph(blocs)
+    g.lbl2node = dict([(b.label, b) for b in blocs])
+
+    while i<len(blocs)-1:
+        i+=1
+        b = blocs[i]
+        if b.label in dont_merge:
+            continue
+
+        successors = [x for x in g.successors(b.label)]
+        predecessors = [x for x in g.predecessors(b.label)]
+        # if bloc doesn't self ref
+        if b.label in successors:
+            continue
+        # and bloc has only one parent
+        if len(predecessors) != 1:
+            continue
+        # may merge
+        bpl = predecessors[0]
+
+        # and parent has only one son
+        p_s = [x for x in g.successors(bpl)]
+        if len(p_s)!=1:
+            continue
+
+        bp = g.lbl2node[bpl]
+        # and parent has not a next constraint yet
+        found = False
+        for gpl in g.predecessors(bpl):
+            gp = g.lbl2node[gpl]
+            for x in gp.bto:
+                if x.c_t != asm_constraint.c_next:
+                    continue
+                if x.label == bpl:
+                    found = True
+                    break
+            if found:
+                break
+        if found:
+            continue
+        if bp.lines:
+            l = bp.lines[-1]
+            #jmp opt; jcc opt
+            if l.is_subcall():
+                continue
+            if l.breakflow() and l.dstflow():
+                bp.lines.pop()
+        #merge
+        #sons = b.bto[:]
+
+        # update parents
+        for s in b.bto:
+            if not isinstance(s.label, asm_label): continue
+            if s.label.name == None:
+                continue
+            if not s.label in g.lbl2node:
+                print "unknown parent XXX"
+                continue
+            bs = g.lbl2node[s.label]
+            for p in g.predecessors(bs.label):
+                if p == b.label:
+                    bs.parents.discard(p)
+                    bs.parents.add(bp.label)
+        bp.lines+=b.lines
+        bp.bto = b.bto
+        #symbol_pool.remove(b.label)
+        del(blocs[i])
+        i = -1
+
+    return
+    """
+    blocby_label = {}
+    for b in blocs:
+        blocby_label[b.label] = b
+        b.parents = find_parents(blocs, b.label)
+
+    while i < len(blocs) - 1:
+        i += 1
+        b = blocs[i]
+        if b.label in dont_merge:
+            continue
+        p = set(b.parents)
+        # if bloc dont self ref
+        if b.label in p:
+            continue
+        # and bloc has only one parent
+        if len(p) != 1:
+            continue
+        # may merge
+        bpl = p.pop()
+        # bp = getblocby_label(blocs, bpl)
+        bp = blocby_label[bpl]
+        # and parent has only one son
+        if len(bp.bto) != 1:
+            continue
+        """
+        and will not create next loop composed of constraint_next from son to
+        parent
+        """
+        path = bloc_find_path_next(blocs, blocby_label, b, bp)
+        if path:
+            continue
+        if bp.lines:
+            l = bp.lines[-1]
+            # jmp opt; jcc opt
+            if l.is_subcall():
+                continue
+            if l.breakflow() and l.dstflow():
+                bp.lines.pop()
+        # merge
+        # sons = b.bto[:]
+
+        # update parents
+        for s in b.bto:
+            if not isinstance(s.label, asm_label):
+                continue
+            if s.label.name == None:
+                continue
+            if not s.label in blocby_label:
+                print "unknown parent XXX"
+                continue
+            bs = blocby_label[s.label]
+            for p in list(bs.parents):
+                if p == b.label:
+                    bs.parents.discard(p)
+                    bs.parents.add(bp.label)
+        bp.lines += b.lines
+        bp.bto = b.bto
+        # symbol_pool.remove(b.label)
+        del(blocs[i])
+        i = -1
+
+
+class disasmEngine(object):
+
+    def __init__(self, arch, attrib, bs=None, **kwargs):
+        self.arch = arch
+        self.attrib = attrib
+        self.bs = bs
+        self.symbol_pool = asm_symbol_pool()
+        self.dont_dis = []
+        self.split_dis = []
+        self.follow_call = False
+        self.patch_instr_symb = True
+        self.dontdis_retcall = False
+        self.lines_wd = None
+        self.blocs_wd = None
+        self.dis_bloc_callback = None
+        self.dont_dis_nulstart_bloc = False
+        self.job_done = set()
+        self.__dict__.update(kwargs)
+
+    def dis_bloc(self, offset):
+        job_done = set()
+        l = self.symbol_pool.getby_offset_create(offset)
+        current_bloc = asm_bloc(l)
+        dis_bloc(self.arch, self.bs, current_bloc, offset, self.job_done,
+                 self.symbol_pool,
+                 dont_dis=self.dont_dis, split_dis=self.split_dis,
+                 follow_call=self.follow_call,
+                 patch_instr_symb=self.patch_instr_symb,
+                 dontdis_retcall=self.dontdis_retcall,
+                 lines_wd=self.lines_wd,
+                 dis_bloc_callback=self.dis_bloc_callback,
+                 dont_dis_nulstart_bloc=self.dont_dis_nulstart_bloc,
+                 attrib=self.attrib)
+        return current_bloc
+
+    def dis_multibloc(self, offset, blocs=None):
+        blocs = dis_bloc_all(self.arch, self.bs, offset, self.job_done,
+                             self.symbol_pool,
+                             dont_dis=self.dont_dis, split_dis=self.split_dis,
+                             follow_call=self.follow_call,
+                             patch_instr_symb=self.patch_instr_symb,
+                             dontdis_retcall=self.dontdis_retcall,
+                             blocs_wd=self.blocs_wd,
+                             lines_wd=self.lines_wd,
+                             blocs=blocs,
+                             dis_bloc_callback=self.dis_bloc_callback,
+                             dont_dis_nulstart_bloc=self.dont_dis_nulstart_bloc,
+                             attrib=self.attrib)
+        return blocs
+
diff --git a/miasm2/core/bin_stream.py b/miasm2/core/bin_stream.py
new file mode 100644
index 00000000..7ae6d3fa
--- /dev/null
+++ b/miasm2/core/bin_stream.py
@@ -0,0 +1,175 @@
+#
+# Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+
+class bin_stream(object):
+
+    def __init__(self, *args, **kargs):
+        pass
+
+    def __repr__(self):
+        return "<%s !!>" % self.__class__.__name__
+
+    def hexdump(self, offset, l):
+        return
+
+    def getbytes(self, start, l=1):
+        return self.bin[start:start + l]
+
+    def getbits(self, start, n):
+        if not n:
+            return 0
+        o = 0
+        if n > self.getlen() * 8:
+            raise ValueError('not enought bits %r %r' % (n, len(self.bin) * 8))
+        while n:
+            # print 'xxx', n, start
+            i = start / 8
+            c = self.getbytes(i)
+            if not c:
+                raise IOError
+            c = ord(c)
+            # print 'o', hex(c)
+            r = 8 - start % 8
+            c &= (1 << r) - 1
+            # print 'm', hex(c)
+            l = min(r, n)
+            # print 'd', r-l
+            c >>= (r - l)
+            o <<= l
+            o |= c
+            n -= l
+            start += l
+        return o
+
+
+class bin_stream_str(bin_stream):
+
+    def __init__(self, bin="", offset=0L, shift=0):
+        bin_stream.__init__(self)
+        if offset > len(bin):
+            raise IOError
+        self.bin = bin
+        self.offset = offset
+        self.shift = shift
+        self.l = len(bin)
+        if "is_addr_in" in self.bin.__class__.__dict__:
+            self.is_addr_in = lambda ad: self.bin.is_addr_in(ad)
+
+    def getbytes(self, start, l=1):
+        if start + l > self.l:
+            raise IOError
+
+        return super(bin_stream_str, self).getbytes(start + self.shift, l)
+
+    def readbs(self, l=1):
+        if self.offset + l > self.l:
+            raise IOError
+        self.offset += l
+        print hex(self.offset + self.shift)
+        return self.bin[self.offset - l + self.shift:self.offset + self.shift]
+
+    def writebs(self, l=1):
+        raise ValueError('writebs unsupported')
+
+    def __str__(self):
+        out = self.bin[self.offset + self.shift:]
+        return out
+
+    def setoffset(self, val):
+        self.offset = val
+
+    def __len__(self):
+        return len(self.bin) - self.offset + self.shift
+
+    def getlen(self):
+        return len(self.bin) - self.offset + self.shift
+
+
+class bin_stream_file(bin_stream):
+
+    def __init__(self, bin, offset=0L):
+        bin_stream.__init__(self)
+        self.bin = bin
+        self.bin.seek(0, 2)
+        self.l = self.bin.tell()
+        self.offset = offset
+
+    def getoffset(self):
+        return self.bin.tell()
+
+    def setoffset(self, val):
+        self.bin.seek(val)
+    offset = property(getoffset, setoffset)
+
+    def readbs(self, l=1):
+        if self.offset + l > self.l:
+            raise IOError
+        return self.bin.read(l)
+
+    def writebs(self, l=1):
+        if self.offset + l > self.l:
+            raise IOError
+        return self.bin.write(l)
+
+    def __str__(self):
+        return str(self.bin)
+
+
+class bin_stream_pe(bin_stream):
+
+    def __init__(self, bin="", offset=0L):
+        bin_stream.__init__(self)
+        # print 'ELF/PE'
+        self.mylen = len(bin)
+        if offset > bin.__len__():
+            raise IOError
+        self.bin = bin
+        self.offset = offset
+        self.l = bin.__len__()
+        if "is_addr_in" in self.bin.__class__.__dict__:
+            self.is_addr_in = lambda ad: self.bin.is_addr_in(ad)
+
+    def getlen(self):
+        return self.mylen
+        # s = self.bin.parent.SHList[-1]
+        # l = self.bin.parent.rva2virt(s.addr+s.size)
+        # return l
+
+    def readbs(self, l=1):
+        if self.offset + l > self.l:
+            raise IOError
+        self.offset += l
+        return self.bin(self.offset - l, self.offset)
+
+    def writebs(self, l=1):
+        raise ValueError('writebs unsupported')
+
+    def getbytes(self, start, l=1):
+        return self.bin(start, start + l)
+
+    def __str__(self):
+        out = self.bin[self.offset:]
+        return out
+
+    def setoffset(self, val):
+        self.offset = val
+
+
+class bin_stream_elf(bin_stream_pe):
+    pass
diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py
new file mode 100644
index 00000000..7d672caa
--- /dev/null
+++ b/miasm2/core/cpu.py
@@ -0,0 +1,1804 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+import re
+import struct
+import logging
+from pyparsing import *
+from miasm2.expression.expression import *
+from miasm2.core import asmbloc
+from collections import defaultdict
+from bin_stream import bin_stream, bin_stream_str
+from utils import Disasm_Exception
+from miasm2.expression.simplifications import expr_simp
+
+log = logging.getLogger("cpuhelper")
+console_handler = logging.StreamHandler()
+console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
+log.addHandler(console_handler)
+log.setLevel(logging.WARN)
+
+# size2int = {8:ExprInt8, 16:ExprInt16, 32:ExprInt32,64:ExprInt64}
+
+
+class bitobj:
+
+    def __init__(self, s=""):
+        if not s:
+            bits = []
+        else:
+            bits = list(bin(int(str(s).encode('hex'), 16))[2:])
+            bits = [int(x) for x in bits]
+            if len(bits) % 8:
+                bits = [0 for x in xrange(8 - (len(bits) % 8))] + bits
+            bits = ['0' for x in xrange(len(s) * 8 - len(bits))] + bits
+        self.bits = bits
+        self.offset = 0
+
+    def __len__(self):
+        return len(self.bits) - self.offset
+
+    def getbits(self, n):
+        if not n:
+            return 0
+        o = 0
+        if n > len(self.bits) - self.offset:
+            raise ValueError('not enought bits %r %r' % (n, len(self.bits)))
+        b = self.bits[self.offset:self.offset + n]
+        b = int("".join([str(x) for x in b]), 2)
+        self.offset += n
+        return b
+
+    def putbits(self, b, n):
+        if not n:
+            return
+        bits = list(bin(b)[2:])
+        bits = [int(x) for x in bits]
+        bits = [0 for x in xrange(n - len(bits))] + bits
+        self.bits += bits
+
+    def tostring(self):
+        if len(self.bits) % 8:
+            raise ValueError(
+                'num bits must be 8 bit aligned: %d' % len(self.bits))
+        b = int("".join([str(x) for x in self.bits]), 2)
+        b = "%X" % b
+        b = '0' * (len(self.bits) / 4 - len(b)) + b
+        b = b.decode('hex')
+        return b
+
+    def reset(self):
+        self.offset = 0
+
+    def copy_state(self):
+        b = self.__class__()
+        b.bits = self.bits
+        b.offset = self.offset
+        return b
+
+
+def literal_list(l):
+    l = l[:]
+    l.sort()
+    l = l[::-1]
+    o = Literal(l[0])
+    for x in l[1:]:
+        o |= Literal(x)
+    return o
+
+
+class reg_info:
+
+    def __init__(self, reg_str, reg_expr):
+        self.str = reg_str
+        self.expr = reg_expr
+        self.parser = literal_list(reg_str).setParseAction(self.reg2expr)
+
+    def reg2expr(self, s):
+        i = self.str.index(s[0])
+        return self.expr[i]
+
+    def expr2regi(self, e):
+        return self.expr.index(e)
+
+
+def gen_reg(rname, env, sz=32):
+    """
+    Gen reg expr and parser
+    Equivalent to:
+        PC = ExprId('PC')
+        reg_pc_str = ['PC']
+        reg_pc_expr = [ExprId(x, sz) for x in reg_pc_str]
+        regpc = reg_info(reg_pc_str, reg_pc_expr)
+
+        class bs_rname(m_reg):
+            reg = regi_rname
+
+        bsrname = bs(l=0, cls=(bs_rname,))
+
+    """
+    rnamel = rname.lower()
+    r = ExprId(rname, sz)
+    reg_str = [rname]
+    reg_expr = [r]
+    regi = reg_info(reg_str, reg_expr)
+    # define as global val
+    cname = "bs_" + rnamel
+    c = type(cname, (m_reg,), {'reg': regi})
+    env[rname] = r
+    env["regi_" + rnamel] = regi
+    env[cname] = c
+    env["bs" + rnamel] = bs(l=0, cls=(c,))
+    return r, regi
+
+LPARENTHESIS = Literal("(")
+RPARENTHESIS = Literal(")")
+
+
+#
+
+
+def int2expr(t):
+    v = t[0]
+    return (ExprInt, v)
+
+
+def parse_op(t):
+    v = t[0]
+    return (ExprOp, v)
+
+
+def parse_id(t):
+    v = t[0]
+    return (ExprId, v)
+
+
+def ast_parse_op(t):
+    if len(t) == 1:
+        return t[0]
+    if len(t) == 2:
+        if t[0] in ['-', '+', '!']:
+            return ExprOp(t[0], t[1])
+    if len(t) == 3:
+        args = [t[0], t[2]]
+        return ExprOp(t[1], t[0], t[2])
+    t = t[::-1]
+    while len(t) >= 3:
+        o1, op, o2 = t.pop(), t.pop(), t.pop()
+        e = ExprOp(op, o1, o2)
+        t.append(e)
+    if len(t) != 1:
+        raise NotImplementedError('strange op')
+    return t[0]
+
+
+def ast_id2expr(a):
+    return ExprId(a, 32)
+
+
+def ast_int2expr(a):
+    return ExprInt32(a)
+
+
+def ast_raw2expr(a, my_id2expr, my_int2expr):
+    assert(isinstance(a, tuple))
+    if a[0] is ExprId:
+        e = my_id2expr(a[1])
+    elif a[0] is ExprInt:
+        e = my_int2expr(a[1])
+    elif a[0] is ExprOp:
+        out = []
+        for x in a[1]:
+            if isinstance(x, tuple):
+                x = ast_raw2expr(x, my_id2expr, my_int2expr)
+            out.append(x)
+        e = ast_parse_op(out)
+    else:
+        raise TypeError('unknown type')
+    return e
+
+
+def ast_get_ids(a):
+    assert(isinstance(a, tuple))
+    if a[0] is ExprId:
+        return set([a[1]])
+    elif a[0] is ExprInt:
+        return set()
+    elif a[0] is ExprOp:
+        out = set()
+        for x in a[1]:
+            if isinstance(x, tuple):
+                out.update(ast_get_ids(x))
+        return out
+    raise TypeError('unknown type')
+
+
+def _extract_ast_core(a):
+    assert(isinstance(a, tuple))
+    if a[0] in [ExprInt, ExprId]:
+        return a
+    elif a[0] is ExprOp:
+        out = []
+        for x in a[1]:
+            if isinstance(x, tuple):
+                x = _extract_ast_core(x)
+            out.append(x)
+        return tuple([a[0]] + [out])
+    else:
+        raise TypeError('unknown type')
+
+
+def extract_ast_core(v, my_id2expr, my_int2expr):
+    ast_tokens = _extract_ast_core(v)
+    ids = ast_get_ids(ast_tokens)
+    # print 'IDS', ids
+    ids_expr = [my_id2expr(x) for x in ids]
+    # print 'IDS_expr', ids_expr
+    sizes = set([i.size for i in ids_expr])
+    # print "SIZE", sizes
+    if len(sizes) == 0:
+        pass
+    elif len(sizes) == 1:
+        size = sizes.pop()
+        my_int2expr = lambda x: ExprInt_fromsize(size, x)
+    else:
+        raise ValueError('multiple sizes in ids')
+    e = ast_raw2expr(ast_tokens, my_id2expr, my_int2expr)
+    return e
+
+
+class parse_ast:
+
+    def __init__(self, id2expr, int2expr, extract_ast=extract_ast_core):
+        self.id2expr = id2expr
+        self.int2expr = int2expr
+        self.extract_ast_core = extract_ast
+
+    def __call__(self, v):
+        v = v[0]
+        if isinstance(v, Expr):
+            return v
+        return self.extract_ast_core(v, self.id2expr, self.int2expr)
+
+
+def neg_int(t):
+    x = -t[0]
+    return x
+
+
+integer = Word(nums).setParseAction(lambda s, l, t: int(t[0]))
+hex_int = Combine(Literal('0x') + Word(hexnums)).setParseAction(
+    lambda s, l, t: int(t[0], 16))
+
+# str_int = (Optional('-') + (hex_int | integer))
+str_int_pos = (hex_int | integer)
+str_int_neg = (Suppress('-') + (hex_int | integer)).setParseAction(neg_int)
+
+str_int = str_int_pos | str_int_neg
+str_int.setParseAction(int2expr)
+
+logicop = oneOf('& | ^ >> << <<< >>>')
+signop = oneOf('+ -')
+multop = oneOf('* / %')
+plusop = oneOf('+ -')
+
+
+def gen_base_expr():
+    variable = Word(alphas + "_$.", alphanums + "_")
+    variable.setParseAction(parse_id)
+    operand = str_int | variable
+    base_expr = operatorPrecedence(operand,
+                                   [("!", 1, opAssoc.RIGHT, parse_op),
+                                    (logicop, 2, opAssoc.RIGHT, parse_op),
+                                    (signop, 1, opAssoc.RIGHT, parse_op),
+                                    (multop, 2, opAssoc.LEFT, parse_op),
+                                    (plusop, 2, opAssoc.LEFT, parse_op), ]
+                                   )
+    return variable, operand, base_expr
+
+
+variable, operand, base_expr = gen_base_expr()
+
+my_var_parser = parse_ast(ast_id2expr, ast_int2expr)
+base_expr.setParseAction(my_var_parser)
+
+#
+
+
+default_prio = 0x1337
+
+
+def isbin(s):
+    return re.match('[0-1]+$', s)
+
+
+def int2bin(i, l):
+    s = '0' * l + bin(i)[2:]
+    return s[-l:]
+
+
+def myror32(v, r):
+    return ((v & 0xFFFFFFFFL) >> r) | ((v << (32 - r)) & 0xFFFFFFFFL)
+
+
+def myrol32(v, r):
+    return ((v & 0xFFFFFFFFL) >> (32 - r)) | ((v << r) & 0xFFFFFFFFL)
+
+
+class bs(object):
+    all_new_c = {}
+    prio = default_prio
+
+    def __init__(self, strbits=None, l=None, cls=None,
+                 fname=None, order=0, flen=None, **kargs):
+        if fname is None:
+            # fname = hex(id((strbits, l, cls, fname, order, flen, kargs)))
+            # fname = hex(id((strbits, l, fname, order, flen)))
+            # print str((strbits, l, cls, fname, order, flen, kargs))
+            fname = hex(id(str((strbits, l, cls, fname, order, flen, kargs))))
+            # print fname
+        if strbits is None:
+            strbits = ""  # "X"*l
+        elif l is None:
+            l = len(strbits)
+        if strbits and isbin(strbits):
+            value = int(strbits, 2)
+        elif 'default_val' in kargs:
+            value = int(kargs['default_val'], 2)
+        else:
+            value = None
+        allbits = list(strbits)
+        allbits.reverse()
+        fbits = 0
+        fmask = 0
+        while allbits:
+            a = allbits.pop()
+            if a == " ":
+                continue
+            fbits <<= 1
+            fmask <<= 1
+            if a in '01':
+                a = int(a)
+                fbits |= a
+                fmask |= 1
+        lmask = (1 << l) - 1
+        # gen conditional field
+        # if flen is None:
+        #    flen = lambda mode, v:l
+        if cls:
+            for b in cls:
+                if 'flen' in b.__dict__:
+                    flen = getattr(b, 'flen')
+
+        self.strbits = strbits
+        self.l = l
+        self.cls = cls
+        self.fname = fname
+        self.order = order
+        self.lmask = lmask
+        self.fbits = fbits
+        self.fmask = fmask
+        self.flen = flen
+        self.value = value
+        self.kargs = kargs
+
+    def __getitem__(self, item):
+        return getattr(self, item)
+
+    def __repr__(self):
+        o = self.__class__.__name__
+        if self.fname:
+            o += "_%s" % self.fname
+        o += "_%(strbits)s" % self
+        if self.cls:
+            o += '_' + '_'.join([x.__name__ for x in self.cls])
+        return o
+
+    def gen(self, parent):
+        c_name = 'nbsi'
+        if self.cls:
+            c_name += '_' + '_'.join([x.__name__ for x in self.cls])
+            bases = list(self.cls)
+        else:
+            bases = []
+        # bsi added at end of list
+        # used to use first function of added class
+        bases += [bsi]
+        # new_c = type(c_name, tuple(bases), {})
+        k = c_name, tuple(bases)
+        if k in self.all_new_c:
+            new_c = self.all_new_c[k]
+        else:
+            new_c = type(c_name, tuple(bases), {})
+            self.all_new_c[k] = new_c
+        c = new_c(parent,
+                  self.strbits, self.l, self.cls,
+                  self.fname, self.order, self.lmask, self.fbits,
+                  self.fmask, self.value, self.flen, **self.kargs)
+        return c
+
+    def check_fbits(self, v):
+        return v & self.fmask == self.fbits
+
+    @classmethod
+    def flen(cls, v):
+        raise NotImplementedError('not fully functional')
+
+
+class dum_arg(object):
+
+    def __init__(self, e=None):
+        self.expr = e
+
+    @staticmethod
+    def arg2str(e):
+        return str(e)
+
+
+class bsopt(bs):
+
+    def ispresent(self):
+        return True
+
+
+class bsi(object):
+
+    def __init__(self, parent, strbits, l, cls, fname, order,
+                 lmask, fbits, fmask, value, flen, **kargs):
+        self.parent = parent
+        self.strbits = strbits
+        self.l = l
+        self.cls = cls
+        self.fname = fname
+        self.order = order
+        self.lmask = lmask
+        self.fbits = fbits
+        self.fmask = fmask
+        self.flen = flen
+        self.value = value
+        self.kargs = kargs
+        self.__dict__.update(self.kargs)
+
+    def decode(self, v):
+        self.value = v & self.lmask
+        return True
+
+    def encode(self):
+        # self.value = v&self.lmask
+        return True
+
+    def clone(self):
+        s = self.__class__(self.parent,
+                           self.strbits, self.l, self.cls,
+                           self.fname, self.order, self.lmask, self.fbits,
+                           self.fmask, self.value, self.flen, **self.kargs)
+        s.__dict__.update(self.kargs)
+        if hasattr(self, 'expr'):
+            s.expr = self.expr
+        return s
+
+    def __hash__(self):
+        kargs = []
+        for k, v in self.kargs.items():
+            if isinstance(v, list):
+                v = tuple(v)
+            kargs.append((k, v))
+        l = [self.strbits, self.l, self.cls,
+             self.fname, self.order, self.lmask, self.fbits,
+             self.fmask, self.value]  # + kargs
+        # l = [self.value]
+        return hash(tuple(l))
+
+
+class bs_divert(object):
+    prio = default_prio
+
+    def __init__(self, **kargs):
+        self.args = kargs
+
+    def __getattr__(self, item):
+        if item in self.__dict__:
+            return self.__dict__[item]
+        elif item in self.args:
+            return self.args.get(item)
+        else:
+            raise AttributeError
+
+
+class bs_name(bs_divert):
+    prio = 1
+
+    def divert(self, i, candidates):
+        out = []
+        for candidate in candidates:
+            cls, name, bases, dct, fields = candidate
+            for new_name, value in self.args['name'].items():
+                nfields = fields[:]
+                s = int2bin(value, self.args['l'])
+                args = dict(self.args)
+                args.update({'strbits': s})
+                f = bs(**args)
+                nfields[i] = f
+                ndct = dict(dct)
+                ndct['name'] = new_name
+                out.append((cls, new_name, bases, ndct, nfields))
+        return out
+
+
+class bs_mod_name(bs_divert):
+    prio = 2
+
+    def divert(self, i, candidates):
+        out = []
+        for candidate in candidates:
+            cls, name, bases, dct, fields = candidate
+            for value, new_name in enumerate(self.args['mn_mod']):
+                nfields = fields[:]
+                s = int2bin(value, self.args['l'])
+                args = dict(self.args)
+                args.update({'strbits': s})
+                f = bs(**args)
+                nfields[i] = f
+                ndct = dict(dct)
+                # new_name = ndct['name'] + new_name
+                ndct['name'] = self.modname(ndct['name'], value)
+                # ndct['name'] = new_name
+                out.append((cls, new_name, bases, ndct, nfields))
+        return out
+
+    def modname(self, name, i):
+        return name + self.args['mn_mod'][i]
+
+
+class bs_cond(bsi):
+    pass
+
+
+class bs_swapargs(bs_divert):
+
+    def divert(self, i, candidates):
+        # print candidates
+        out = []
+        for cls, name, bases, dct, fields in candidates:
+            # args not permuted
+            ndct = dict(dct)
+            nfields = fields[:]
+            # gen fix field
+            f = gen_bsint(0, self.args['l'], self.args)
+            nfields[i] = f
+            out.append((cls, name, bases, ndct, nfields))
+
+            # args permuted
+            ndct = dict(dct)
+            nfields = fields[:]
+            ap = ndct['args_permut'][:]
+            a = ap.pop(0)
+            b = ap.pop(0)
+            ndct['args_permut'] = [b, a] + ap
+            # print ndct['args_permut']
+            # gen fix field
+            f = gen_bsint(1, self.args['l'], self.args)
+            nfields[i] = f
+
+            out.append((cls, name, bases, ndct, nfields))
+        return out
+
+
+class m_arg(object):
+
+    def fromstring(self, s, parser_result=None):
+        if parser_result:
+            e, start, stop = parser_result[self.parser]
+            self.expr = e
+            return start, stop
+        try:
+            v, start, stop = self.parser.scanString(s).next()
+        except StopIteration:
+            return None, None
+        self.expr = v[0]
+        return start, stop
+
+    @staticmethod
+    def arg2str(e):
+        return str(e)
+
+
+class m_reg(m_arg):
+    prio = default_prio
+
+    @property
+    def parser(self):
+        return self.reg.parser
+
+    def decode(self, v):
+        self.expr = self.reg.expr[0]
+        return True
+
+    def encode(self):
+        return self.expr == self.reg.expr[0]
+
+    @staticmethod
+    def arg2str(e):
+        return str(e)
+
+
+class reg_noarg(object):
+    reg_info = None
+    parser = None
+
+    def fromstring(self, s, parser_result=None):
+        if parser_result:
+            e, start, stop = parser_result[self.parser]
+            self.expr = e
+            return start, stop
+        try:
+            v, start, stop = self.parser.scanString(s).next()
+        except StopIteration:
+            return None, None
+        self.expr = v[0]
+        return start, stop
+
+    @staticmethod
+    def arg2str(e):
+        return str(e)
+
+    def decode(self, v):
+        v = v & self.lmask
+        if v >= len(self.reg_info.expr):
+            return False
+        self.expr = self.reg_info.expr[v]
+        return True
+
+    def encode(self):
+        if not self.expr in self.reg_info.expr:
+            log.debug("cannot encode reg %r" % self.expr)
+            return False
+        self.value = self.reg_info.expr.index(self.expr)
+        if self.value > self.lmask:
+            log.debug("cannot encode field value %x %x" %
+                      (self.value, self.lmask))
+            return False
+        return True
+
+    def check_fbits(self, v):
+        return v & self.fmask == self.fbits
+
+
+class mn_prefix:
+
+    def __init__(self):
+        b = None
+
+
+def swap16(v):
+    return struct.unpack('<H', struct.pack('>H', v))[0]
+
+
+def swap32(v):
+    return struct.unpack('<I', struct.pack('>I', v))[0]
+
+
+def perm_inv(p):
+    o = [None for x in xrange(len(p))]
+    for i, x in enumerate(p):
+        o[x] = i
+    return o
+
+
+def gen_bsint(value, l, args):
+    s = int2bin(value, l)
+    args = dict(args)
+    args.update({'strbits': s})
+    f = bs(**args)
+    return f
+
+total_scans = 0
+
+
+def branch2nodes(branch, nodes=None):
+    if nodes is None:
+        node = []
+    for k, v in branch.items():
+        if not isinstance(v, dict):
+            continue
+        for k2 in v.keys():
+            nodes.append((k, k2))
+        branch2nodes(v, nodes)
+
+
+def factor_one_bit(tree):
+    if isinstance(tree, set):
+        return tree
+    new_keys = defaultdict(lambda: defaultdict(dict))
+    if len(tree) == 1:
+        return tree
+    for k, v in tree.items():
+        # print k, v
+        if k == "mn":
+            new_keys[k] = v
+            continue
+        l, fmask, fbits, fname, flen = k
+        if flen is not None or l <= 1:
+            new_keys[k] = v
+            continue
+        cfmask = fmask >> (l - 1)
+        nfmask = fmask & ((1 << (l - 1)) - 1)
+        cfbits = fbits >> (l - 1)
+        nfbits = fbits & ((1 << (l - 1)) - 1)
+        ck = 1, cfmask, cfbits, None, flen
+        nk = l - 1, nfmask, nfbits, fname, flen
+        # print ck
+        if nk in new_keys[ck]:
+            raise NotImplementedError('not fully functional')
+        new_keys[ck][nk] = v
+    for k, v in new_keys.items():
+        new_keys[k] = factor_one_bit(v)
+    # try factor sons
+    if len(new_keys) != 1:
+        return new_keys
+    subtree = new_keys.values()[0]
+    if len(subtree) != 1:
+        return new_keys
+    if subtree.keys()[0] == 'mn':
+        return new_keys
+
+    return new_keys
+
+
+def factor_fields(tree):
+    if not isinstance(tree, dict):
+        return tree
+    if len(tree) != 1:
+        return tree
+    # merge
+    k1, v1 = tree.items()[0]
+    if k1 == "mn":
+        return tree
+    l1, fmask1, fbits1, fname1, flen1 = k1
+    if fname1 is not None:
+        return tree
+    if flen1 is not None:
+        return tree
+
+    if not isinstance(v1, dict):
+        return tree
+    if len(v1) != 1:
+        return tree
+    k2, v2 = v1.items()[0]
+    if k2 == "mn":
+        return tree
+    l2, fmask2, fbits2, fname2, flen2 = k2
+    if fname2 is not None:
+        return tree
+    if flen2 is not None:
+        return tree
+    l = l1 + l2
+    fmask = (fmask1 << l2) | fmask2
+    fbits = (fbits1 << l2) | fbits2
+    fname = fname2
+    flen = flen2
+    k = l, fmask, fbits, fname, flen
+    new_keys = {k: v2}
+    return new_keys
+
+
+def factor_fields_all(tree):
+    if not isinstance(tree, dict):
+        return tree
+    new_keys = {}
+    for k, v in tree.items():
+        v = factor_fields(v)
+        new_keys[k] = factor_fields_all(v)
+    return new_keys
+
+
+def factor_tree(tree):
+    new_keys = {}
+    i = 1
+    min_len = min([x[0] for x in tree.keys()])
+    while i < min_len:
+
+        i += 1
+
+
+def graph_tree(tree):
+    nodes = []
+    branch2nodes(tree, nodes)
+
+    out = """
+          digraph G {
+          """
+    for a, b in nodes:
+        # print a, id(a)
+        # print b, id(b)
+        if b == 'mn':
+            continue
+        out += "%s -> %s;\n" % (id(a), id(b))
+    out += "}"
+    open('graph.txt', 'w').write(out)
+
+
+def add_candidate_to_tree(tree, c):
+    branch = tree
+    for f in c.fields:
+        if f.l == 0:
+            continue
+        # print len(bits), f.l
+        # if f.flen:
+        #    pass
+        # print f
+        node = f.l, f.fmask, f.fbits, f.fname, f.flen
+        # node = f.strbits, f.l, f.cls, f.fname, f.order, f.lmask, f.fbits,
+        # f.fmask, f.value#, tuple(f.kargs.items())
+
+        if not node in branch:
+            branch[node] = {}
+        branch = branch[node]
+    if not 'mn' in branch:
+        branch['mn'] = set()
+    branch['mn'].add(c)
+
+
+def add_candidate(bases, c):
+    add_candidate_to_tree(bases[0].bintree, c)
+
+
+def getfieldby_name(fields, fname):
+    f = filter(lambda x: hasattr(x, 'fname') and x.fname == fname, fields)
+    if len(f) != 1:
+        raise ValueError('more than one field with name: %s' % fname)
+    return f[0]
+
+
+def getfieldindexby_name(fields, fname):
+    for i, f in enumerate(fields):
+        if hasattr(f, 'fname') and f.fname == fname:
+            return f, i
+    return None
+
+
+class metamn(type):
+
+    def __new__(mcs, name, bases, dct):
+        if name == "cls_mn" or name.startswith('mn_'):
+            return type.__new__(mcs, name, bases, dct)
+        alias = dct.get('alias', False)
+        # fields = [bm_cond]+dct['fields']
+        fields = bases[0].mod_fields(dct['fields'])
+        # print 'f1', dct['fields']
+        # print 'f2', fields
+        if not 'name' in dct:
+            dct["name"] = bases[0].getmn(name)
+        if 'args' in dct:
+            # special case for permuted arguments
+            o = []
+            p = []
+            for i, a in enumerate(dct['args']):
+                o.append((i, a))
+                if a in fields:
+                    p.append((fields.index(a), a))
+            p.sort()
+            p = [x[1] for x in p]
+            p = [dct['args'].index(x) for x in p]
+            dct['args_permut'] = perm_inv(p)
+        # order fields
+        f_ordered = [x for x in enumerate(fields)]
+        f_ordered.sort(key=lambda x: (x[1].prio, x[0]))
+        candidates = bases[0].gen_modes(mcs, name, bases, dct, fields)
+        for i, fc in f_ordered:
+            # print fc, isinstance(fc, bs_divert)
+            if isinstance(fc, bs_divert):
+                # print 'iiii', fc
+                candidates = fc.divert(i, candidates)
+        for cls, name, bases, dct, fields in candidates:
+            ndct = dict(dct)
+            fields = [f for f in fields if f]
+            ndct['fields'] = fields
+            ndct['mn_len'] = sum([x.l for x in fields])
+            c = type.__new__(cls, name, bases, ndct)
+            c.alias = alias
+            c.check_mnemo(fields)
+            c.num = bases[0].num
+            bases[0].num += 1
+            bases[0].all_mn.append(c)
+            mode = dct['mode']
+            # print 'add mnemo', c.name, c.mode, len(bases[0].all_mn_mode[mode])
+            # print fields
+            # if 'args_permut' in dct:
+            #    print dct['args_permut']
+            bases[0].all_mn_mode[mode].append(c)
+            bases[0].all_mn_name[c.name].append(c)
+            i = c()
+            i.init_class()
+            bases[0].all_mn_inst[c].append(i)
+            add_candidate(bases, c)
+            # gen byte lookup
+            off = 0
+            o = ""
+            for f in i.fields_order:
+                if not isinstance(f, bsi):
+                    raise ValueError('f is not bsi')
+                if f.l == 0:
+                    continue
+                # if f.fmask:
+                o += f.strbits
+            # print o, len(o)
+            # fd
+        return c
+
+
+class instruction(object):
+
+    def __init__(self, name, mode, args, args_str=None, additional_info=None):
+        self.name = name
+        self.mode = mode
+        self.args = args
+        if args_str is None:
+            raise NotImplementedError('not fully functional')
+        self.args_str = args_str
+        self.additional_info = additional_info
+
+    def gen_args(self, args):
+        out = ', '.join([str(x) for x in args])
+        return out
+
+    def __str__(self):
+        o = "%-10s " % self.name
+        args = []
+        args_str = self.args_str
+        if args_str is None:
+            args_str = [lambda x:str(x) for i in xrange(len(self.args))]
+        for arg, arg_str in zip(self.args, args_str):
+            if not isinstance(arg, Expr):
+                raise ValueError('zarb arg type')
+            x = arg_str(arg)
+            args.append(x)
+        o += self.gen_args(args)
+        return o
+
+    def resolve_args_with_symbols(self, symbols=None):
+        if symbols is None:
+            symbols = {}
+        args_out = []
+        for a in self.args:
+            e = a
+            # try to resolve symbols using symbols (0 for default value)
+            ids = get_expr_ids(e)
+            fixed_ids = {}
+            for x in ids:
+                if isinstance(x.name, asmbloc.asm_label):
+                    name = x.name.name
+                    if not name in symbols:
+                        raise ValueError('unresolved symbol! %r' % x)
+                else:
+                    name = x.name
+                # special symbol
+                if name == '$':
+                    value = ExprInt_from(x, self.offset)
+                    fixed_ids[x] = value
+                    continue
+                if not name in symbols:
+                    continue
+                if symbols[name].offset is None:
+                    default_size = self.get_symbol_size(x, symbols)
+                    value = ExprInt_fromsize(default_size, 0)  # default value
+                else:
+                    size = x.size
+                    if size is None:
+                        default_size = self.get_symbol_size(x, symbols)
+                        size = default_size
+                    value = ExprInt_fromsize(size, symbols[name].offset)
+                fixed_ids[x] = value
+            e = e.replace_expr(fixed_ids)
+            # print 'replaced e', e, fixed_ids
+            e = expr_simp(e)
+            # print 'replaced e simp', e, fixed_ids
+            args_out.append(e)
+        # print "args out", [str(x) for x in args_out]
+        return args_out
+
+    def get_info(self, c):
+        return
+
+
+class cls_mn(object):
+    __metaclass__ = metamn
+    args_symb = []
+    instruction = instruction
+
+    @classmethod
+    def guess_mnemo(cls, bs, mode, pre_dis_info, offset):
+        candidates = []
+
+        candidates = set()
+
+        fname_values = pre_dis_info
+        todo = [(0, dict(fname_values), branch, offset * 8)
+                for branch in cls.bintree.items()]
+        cpt = 0
+        if hasattr(bs, 'getlen'):
+            bs_l = bs.getlen()
+        else:
+            bs_l = len(bs)
+        # print fname_values
+        for bvalo, fname_values, branch, offset_b in todo:
+            (l, fmask, fbits, fname, flen), vals = branch
+            cpt += 1
+            # print 'len', l, fmask, fbits, fname, flen
+            if flen is not None:
+                l = flen(mode, fname_values)
+            # print 'len', fname, l
+            if l is not None:
+                # print fname, hex(bs_l), l
+                if bs_l * 8 - offset_b < l:
+                    continue
+                # print hex(offset_b)
+                v = cls.getbits(bs, offset_b, l)
+                bval = (bvalo << l) + v
+                # print 'TEST', bval, fname, offset_b, cpt, (l, fmask, fbits),
+                # hex(v), hex(v & fmask), hex(fbits), v & fmask == fbits
+                offset_b += l
+                if v & fmask != fbits:
+                    continue
+                if fname is not None and not fname in fname_values:
+                    fname_values[fname] = bval
+                    bval = 0
+            # print vals
+            for nb, v in vals.items():
+                if 'mn' in nb:
+                    candidates.update(v)
+                else:
+                    todo.append((bval, dict(fname_values), (nb, v), offset_b))
+
+        candidates = [c for c in candidates]  # if c.mode == mode]
+
+        if not candidates:
+            raise Disasm_Exception('cannot disasm (guess) at %X' % offset)
+        return candidates
+
+    def reset_class(self):
+        for f in self.fields_order:
+            if f.strbits and isbin(f.strbits):
+                # print 'a',
+                f.value = int(f.strbits, 2)
+            elif 'default_val' in f.kargs:
+                # print 'b',
+                f.value = int(f.kargs['default_val'], 2)
+            else:
+                # print 'c',
+                f.value = None
+            # print "reset", f.fname, f.value
+            if f.fname:
+                # print 'SET asm', f.fname
+                setattr(self, f.fname, f)
+
+    def init_class(self):
+        args = []
+        fields_order = []
+        to_decode = []
+        off = 0
+        for i, fc in enumerate(self.fields):
+            f = fc.gen(self)
+            f.offset = off
+            off += f.l
+            fields_order.append(f)
+            to_decode.append((i, f))
+
+            if isinstance(f, m_arg):
+                args.append(f)
+            # print f, fc.fname
+            if f.fname:
+                # print 'SET asm', f.fname
+                setattr(self, f.fname, f)
+        # print args
+        if hasattr(self, 'args_permut'):
+            args = [args[self.args_permut[i]]
+                    for i in xrange(len(self.args_permut))]
+        to_decode.sort(key=lambda x: (x[1].order, x[0]))
+        to_decode = [fields_order.index(f[1]) for f in to_decode]
+        self.args = args
+        self.fields_order = fields_order
+        self.to_decode = to_decode
+
+    def add_pre_dis_info(self, prefix=None):
+        return True
+
+    @classmethod
+    def getbits(cls, bs, offset_b, l):
+        return bs.getbits(offset_b, l)
+
+    @classmethod
+    def getbytes(cls, bs, offset, l):
+        return bs.getbytes(offset, l)
+
+    @classmethod
+    def pre_dis(cls, v_o, mode_o, offset):
+        return {}, v_o, mode_o, offset, 0
+
+    def post_dis(self):
+        return self
+
+    @classmethod
+    def check_mnemo(cls, fields):
+        pass
+
+    @classmethod
+    def mod_fields(cls, fields):
+        return fields
+
+    @classmethod
+    def dis(cls, bs_o, mode_o, offset=0):
+        if not isinstance(bs_o, bin_stream):
+            bs_o = bin_stream_str(bs_o)
+        loggg = False
+        # bs_o == 'fg\x11\x90\x00\x00'#False#'\x48\x15\x44\x33\x22\x11'==bs_o
+        # print 'disfunc', repr(bs_o)
+        offset_o = offset
+        # print 'DIS', hex(offset), mode_o#repr(bs_o.bin)
+        pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis(
+            bs_o, mode_o, offset)
+        candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset)
+        # print 'guess', repr(v), mode, prefix.rex_w
+        out = []
+        out_c = []
+        # print 'DIS CAND', len(candidates), mode
+        if hasattr(bs, 'getlen'):
+            bs_l = bs.getlen()
+        else:
+            bs_l = len(bs)
+
+        alias = False
+        for c in candidates:
+            # print 'RRR'
+            if loggg:
+                print "*" * 40, mode, c.mode
+                print c.fields
+            # c.mode_o = mode_o
+            # off = c.parse_prefix(mode_o, v)
+            # bits = bin_stream(v)#[:c.mn_len/8])
+
+            # c = c()
+            # c.init_class()
+            c = cls.all_mn_inst[c][0]
+            # c.init_class()
+            c.reset_class()
+            c.mode = mode
+            # for f in c.fields_order: print f.is_present
+
+            if not c.add_pre_dis_info(pre_dis_info):  # = prefix#cls.mnprefix()
+                continue
+            # print "zz", c.rex_w.value
+            """
+            if prefix.opmode != c.mp[1]:
+                continue
+            if prefix.admode != c.mp[2]:
+                continue
+            """
+
+            args = []
+            todo = {}
+            getok = True
+            fname_values = dict(pre_dis_info)
+            offset_b = offset * 8
+            # print pre_dis_info
+            total_l = 0
+            for i, f in enumerate(c.fields_order):
+                # print 'XX', i, f, id(f)
+                # print 'ZZ', c.rex_x.value
+                if f.flen is not None:
+                    l = f.flen(mode, fname_values)
+                else:
+                    l = f.l
+                # print 'len', l
+                # print "zz", c.rex_w, c.rex_w.value
+                if l is not None:
+                    total_l += l
+                    f.l = l
+                    f.is_present = True
+                    if loggg:
+                        print "FIELD", f.__class__, f.fname, offset_b, l
+                    if bs_l * 8 - offset_b < l:
+                        getok = False
+                        break
+                    bv = cls.getbits(bs, offset_b, l)
+                    offset_b += l
+                    if not f.fname in fname_values:
+                        fname_values[f.fname] = bv
+                    todo[i] = bv
+                else:
+                    f.is_present = False
+                    todo[i] = None
+
+                # print "decode", id(f), f.fname,
+                # print "l", l, "off", offset_b, "v", todo[i]
+            # print "zzz", c.rex_w, c.rex_w.value
+
+            if not getok:
+                continue
+
+            # print 'PRIOdec', [(x[0], x[1].order) for x in c.to_decode]
+            for i in c.to_decode:
+                f = c.fields_order[i]
+                if f.is_present:
+                    # print "zz", f.fname, f.is_present, c.rex_w.value,
+                    # c.rex_b.value, c.rex_x.value
+                    ret = f.decode(todo[i])
+                    if not ret:
+                        log.debug("cannot decode %r" % (f))
+                        break
+
+            if not ret:
+                continue
+            for a in c.args:
+                a.expr = expr_simp(a.expr)
+            # print offset, offset_o, total_l
+            c.l = prefix_len + total_l / 8
+            c.b = cls.getbytes(bs, offset, total_l / 8)
+            c.offset = offset_o
+            c = c.post_dis()
+            if c is None:
+                continue
+            c_args = [a.expr for a in c.args]
+            c_args_str = []
+            for a in c.args:
+                if hasattr(a, 'arg2str'):
+                    c_args_str.append(a.arg2str)
+                else:
+                    raise NotImplementedError('not fully functional')
+                    c_args_str.append(str)
+            # c_args_str = [a.arg2str for a in c.args]
+            instr = cls.instruction(c.name, mode, c_args, c_args_str,
+                                    additional_info=c.additional_info())
+            instr.l = prefix_len + total_l / 8
+            instr.b = cls.getbytes(bs, offset, total_l / 8)
+            instr.offset = offset_o
+            instr.get_info(c)
+            # instr = c.post_dis()
+            if c.alias:
+                alias = True
+            out.append(instr)
+            out_c.append(c)
+        if not out:
+            raise Disasm_Exception('cannot disasm at %X' % offset_o)
+        if len(out) != 1:
+            if not alias:
+                log.warning('dis multiple args ret default')
+
+            assert(len(out) == 2)
+            for i, o in enumerate(out_c):
+                if o.alias:
+                    return out[i]
+            raise NotImplementedError('not fully functional')
+            # for xx in out:
+            #    print xx
+            # if xx.name == "ADC":
+            #    pass
+        return out[0]
+
+    @classmethod
+    def fromstring(cls, s, mode):
+        global total_scans
+        name = re.search('(\S+)', s).groups()
+        if not name:
+            raise ValueError('cannot find name', s)
+        name = name[0]
+        # print "mnemo_name", name
+        if not name in cls.all_mn_name:
+            raise ValueError('unknown name', name)
+        clist = [x for x in cls.all_mn_name[name]]  # if x.mode == mode]
+        out = []
+        out_args = []
+        parsers = defaultdict(dict)
+        # print 'ASM CAND', len(clist), name
+
+        for cc in clist:
+            #"""
+            # c = cc()
+            # c.init_class()
+            #"""
+            """
+            c = cls.all_mn_inst[cc][0]
+            c.reset_class()
+            c.mode = mode
+            """
+            for c in cls.get_cls_instance(cc, mode):
+                args_expr = []
+                args_str = s[len(name):].strip(' ')
+
+                start = 0
+                cannot_parse = False
+                len_o = len(args_str)
+
+                for i, f in enumerate(c.args):
+                    start_i = len_o - len(args_str)
+                    # print i, "will parse", repr(args_str)
+                    if type(f.parser) == tuple:
+                        parser = f.parser
+                    else:
+                        parser = (f.parser,)
+                    for p in parser:
+                        if p in parsers[(i, start_i)]:
+                            continue
+                        try:
+                            total_scans += 1
+                            # print type(p)
+                            v, start, stop = p.scanString(args_str).next()
+                            # print "pp", args_str, v, start, stop
+                        except StopIteration:
+                            v, start, stop = [None], None, None
+                        if start != 0:
+                            v, start, stop = [None], None, None
+                        parsers[(i, start_i)][p] = v[0], start, stop
+
+                    start, stop = f.fromstring(args_str, parsers[(i, start_i)])
+                    # print args_str, start, stop#, f.expr
+                    # if start is not None: print f.expr
+                    if start != 0:
+                        log.debug("cannot fromstring %r" % (args_str))
+                        cannot_parse = True
+                        # print "cannot_parse1"
+                        break
+                    if f.expr is None:
+                        raise NotImplementedError('not fully functional')
+                    # print "f expr", repr(f.expr)
+                    f.expr = expr_simp(f.expr)
+                    args_expr.append(f.expr)
+                    a = args_str[start:stop]
+                    args_str = args_str[stop:].strip(' ')
+                    if args_str.startswith(','):
+                        args_str = args_str[1:]
+                    args_str = args_str.strip(' ')
+                if args_str:
+                    # print "cannot_parse", repr(args_str)
+                    cannot_parse = True
+                if cannot_parse:
+                    continue
+                # print [x for x in c.args]
+                # print [str(x) for x in c.args]
+                """
+                try:
+                    c.value()
+                except Exception, e:
+                    log.debug("cannot encode %r\n%s"%(e, traceback.format_exc()))
+                    cannot_parse = True
+                if cannot_parse:
+                    continue
+                """
+                out.append(c)
+                out_args.append(args_expr)
+                break
+
+        if len(out) == 0:
+            raise ValueError('cannot fromstring %r' % s)
+        if len(out) != 1:
+            log.warning('fromstring multiple args ret default')
+            # raise ValueError("cannot parse %r (%d cand)"%(s, len(out)))
+        c = out[0]
+        c_args = out_args[0]
+
+        c_args_str = []
+        for a in c.args:
+            if hasattr(a, 'arg2str'):
+                c_args_str.append(a.arg2str)
+            else:
+                raise NotImplementedError('not fully functional')
+                c_args_str.append(str)
+
+        instr = cls.instruction(c.name, mode, c_args, c_args_str,
+                                additional_info=c.additional_info())
+        # instruction(name, attrib, args, args_str, additional_info):
+        # c = c()
+        # c.init_class()
+        # re parse instruction
+        """
+        args_str = s[len(name):].strip(' ')
+        for i, f in enumerate(c.args):
+            if isinstance(f, m_arg):
+                start, stop = f.fromstring(args_str)
+                args_str = args_str[stop:].strip(' ')
+                if args_str.startswith(','):
+                    args_str = args_str[1:]
+                args_str = args_str.strip(' ')
+        """
+
+        return instr
+
+    def dup_info(self, infos):
+        return
+
+    @classmethod
+    def get_cls_instance(cls, cc, mode, infos=None):
+        c = cls.all_mn_inst[cc][0]
+
+        c.reset_class()
+        c.add_pre_dis_info()
+        c.dup_info(infos)
+
+        c.mode = mode
+        yield c
+
+    @classmethod
+    def asm(cls, instr, symbols=None):
+        # t = time.time()
+        """
+        Re asm instruction by searching mnemo using name and args. We then
+        can modify args and get the hex of a modified instruction
+        """
+        clist = cls.all_mn_name[instr.name]
+        clist = [x for x in clist]  # if x.mode == instr.mode]
+        # print 'ASM CAN', len(clist)
+        vals = []
+        candidates = []
+        # print "resolve"
+        args = instr.resolve_args_with_symbols(symbols)
+        # print "ok", [str(x) for x in args]
+        """
+        args = []
+        for i, f in enumerate(cls.args):
+            e = f.expr
+            # try to resolve symbols using symbols (0 for default value)
+            if symbols:
+                #print 'origine', e
+                ids = get_expr_ids(e)
+                fixed_ids = {}
+                for x in ids:
+                    if not x.name in symbols:
+                        #print 'not IN', x
+                        continue
+                    if symbols[x.name].offset is None:
+                        value = ExprInt32(0) # default value
+                    else:
+                        value = ExprInt_fromsize(x.size, symbols[x.name].offset)
+                    fixed_ids[x] = value
+                e = e.replace_expr(fixed_ids)
+                #print 'replaced e', e, fixed_ids
+                e = expr_simp(e)
+                #print 'replaced e simp', e, fixed_ids
+            args.append(e)
+        """
+        for cc in clist:
+            # if cc.mode != cls.mode:
+            #    continue
+            """
+            c = c()
+            c.init_class()
+            """
+            for c in cls.get_cls_instance(
+                cc, instr.mode, instr.additional_info):
+
+                # c = cls.all_mn_inst[cc][0]
+                # c = cc()
+                # c.init_class()
+
+                cannot_parse = False
+                if len(c.args) != len(instr.args):
+                    continue
+                # print c.mode, c.mp, c.fields[6:]
+                # print "eee", c.fields
+                # print [str(x.expr) for x in cls.args]
+                # only fix args expr
+                for i in xrange(len(c.args)):
+                    c.args[i].expr = args[i]
+                # print 'ARGS', [str(x) for x in args]
+                # for a in c.args:
+                #    print a.expr,
+                # print
+                # print instr.mode
+                v = c.value(instr.mode)
+                if not v:
+                    log.debug("cannot encode %r" % (c))
+                    cannot_parse = True
+                if cannot_parse:
+                    continue
+                vals += v
+                candidates.append((c, v))
+        if len(vals) == 0:
+            raise ValueError('cannot asm %r %r' %
+                             (instr.name, [str(x) for x in instr.args]))
+        if len(vals) != 1:
+            log.debug('asm multiple args ret default')
+            # raise ValueError("cannot parse %r (%d cand)"%(s, len(out)))
+            """
+            for x in out:
+                print repr(x.value())
+                print [str(a.expr) for a in x.args]
+            """
+        vals = cls.filter_asm_candidates(instr, candidates)
+        # vals = list(set(vals))
+        # vals.sort(key=lambda x:len(x))
+        # dt = time.time() - t
+        # print 'TIME', dt, str(cls)
+
+        return vals
+
+    @classmethod
+    def filter_asm_candidates(cls, instr, candidates):
+        o = []
+        for c, v in candidates:
+            o += v
+        o.sort(key=lambda x: len(x))
+        return o
+
+    def value(self, mode):
+        # print 'PRIOenc', [(x, self.fields_order[x].order) for x in
+        # self.to_decode[::-1]]
+        todo = [(0, [(x, self.fields_order[x]) for x in self.to_decode[::-1]])]
+        # print todo
+        result = []
+        done = []
+        cpt = 0
+
+        # print 'VALUE'#, self.fields[6:]
+        while todo:
+            index, to_decode = todo.pop()
+            # TEST XXX
+            for i, f in to_decode:
+                setattr(self, f.fname, f)
+            # print 'todo:', len(todo), index, to_decode
+            # print "OOOOOOO"
+            # if (index, hash(tuple(to_decode))) in done:
+            if (index, [x[1].value for x in to_decode]) in done:
+                # print 'skip', to_decode
+                continue
+            done.append((index, [x[1].value for x in to_decode]))
+
+            # done.append((index, to_decode))
+            cpt += 1
+            can_encode = True
+            for i, f in to_decode[index:]:
+                # print 'before', f.value, repr(f)
+                ret = f.encode()
+                # print 'encode', len(todo), index, f.fname, f.value, f.l, ret
+                # print 'ret', ret
+                if not ret:
+                    log.debug('cannot encode %r' % f)
+                    can_encode = False
+                    break
+                index += 1
+                if ret is True:
+                    continue
+
+                # print ret, index
+                gcpt = 0
+                for i in ret:
+                    gcpt += 1
+                    o = []
+                    if ((index, [xx[1].value for xx in to_decode]) in todo or
+                        (index, [xx[1].value for xx in to_decode]) in done):
+                        raise NotImplementedError('not fully functional')
+                        continue
+                    for p, f in to_decode:
+                        fnew = f.clone()
+                        o.append((p, fnew))
+                    todo.append((index, o))
+                can_encode = False
+                # print 'gcpt', gcpt
+                break
+            if not can_encode:
+                continue
+            result.append(to_decode)
+        # print 'CPT', cpt
+        # print "HEX", len(result), result
+        return self.decoded2bytes(result)
+
+    def encodefields(self, decoded):
+        bits = bitobj()
+        for p, f in decoded:
+            setattr(self, f.fname, f)
+
+            if f.value is None:
+                continue
+            bits.putbits(f.value, f.l)
+            # if f.l:
+            #    print f.l, hex(f.value), len(bits.bits), bits.bits
+        xx = bits.tostring()
+        return bits.tostring()
+
+    def decoded2bytes(self, result):
+        if not result:
+            return []
+
+        out = []
+        for decoded in result:
+            decoded.sort()
+            # print [f.value for p, f in decoded]
+            o = self.encodefields(decoded)
+            if o is None:
+                continue
+            out.append(o)
+        out = list(set(out))
+        return out
+
+    def gen_args(self, args):
+        out = ', '.join([str(x) for x in args])
+        return out
+
+    def args2str(self):
+        args = []
+        for arg in self.args:
+            # XXX todo test
+            if not (isinstance(arg, Expr) or isinstance(arg.expr, Expr)):
+                raise ValueError('zarb arg type')
+            x = str(arg)
+            args.append(x)
+        return args
+
+    def __str__(self):
+        o = "%-10s " % self.name
+        args = []
+        for arg in self.args:
+            # XXX todo test
+            if not (isinstance(arg, Expr) or isinstance(arg.expr, Expr)):
+                raise ValueError('zarb arg type')
+            x = str(arg)
+            args.append(x)
+
+        o += self.gen_args(args)
+        return o
+
+    def parse_prefix(self, v):
+        return 0
+
+    def set_dst_symbol(self, symbol_pool):
+        dst = self.getdstflow(symbol_pool)
+        args = []
+        for d in dst:
+            if isinstance(d, ExprInt):
+                l = symbol_pool.getby_offset_create(int(d.arg))
+                # print l
+                a = ExprId(l.name, d.size)
+            else:
+                a = d
+            args.append(a)
+        self.args_symb = args
+
+    def getdstflow(self, symbol_pool):
+        return [self.args[0].expr]
+
+
+class imm_noarg(object):
+    # parser = str_int
+    intsize = 32
+    intmask = (1 << intsize) - 1
+    # expr2int = lambda self,x:int(self.expr.arg&self.lmask)
+
+    def int2expr(self, v):
+        if (v & ~self.intmask) != 0:
+            return None
+        return ExprInt_fromsize(self.intsize, v)
+
+    def expr2int(self, e):
+        if not isinstance(e, ExprInt):
+            return None
+        v = int(e.arg)
+        # print "testimm2", hex(v), hex(self.intmask)
+        if v & ~self.intmask != 0:
+            return None
+        return v
+
+    def fromstring(self, s, parser_result=None):
+        if parser_result:
+            e, start, stop = parser_result[self.parser]
+        else:
+            try:
+                e, start, stop = self.parser.scanString(s).next()
+            except StopIteration:
+                return None, None
+        if e is None:
+            return None, None
+        # print 'fromstring', hex(e), self.int2expr
+        assert(isinstance(e, Expr))
+        if isinstance(e, tuple):
+            self.expr = self.int2expr(e[1])
+        elif isinstance(e, Expr):
+            self.expr = e
+        else:
+            raise TypeError('zarb expr')
+        if self.expr is None:
+            log.debug('cannot fromstring int %r' % s)
+            return None, None
+        return start, stop
+
+    def decodeval(self, v):
+        return v
+
+    def encodeval(self, v):
+        if v > self.lmask:
+            return False
+        return v
+
+    @staticmethod
+    def arg2str(e):
+        return str(e)
+
+    def decode(self, v):
+        v = v & self.lmask
+        v = self.decodeval(v)
+        e = self.int2expr(v)
+        if not e:
+            return False
+        self.expr = e
+        return True
+
+    def encode(self):
+        v = self.expr2int(self.expr)
+        if v is None:
+            return False
+        v = self.encodeval(v)
+        if v is False:
+            return False
+        self.value = v
+        return True
+
+
+class imm08_noarg(object):
+    int2expr = lambda self, x: ExprInt08(x)
+
+
+class imm16_noarg(object):
+    int2expr = lambda self, x: ExprInt16(x)
+
+
+class imm32_noarg(object):
+    int2expr = lambda self, x: ExprInt32(x)
+
+
+class imm64_noarg(object):
+    int2expr = lambda self, x: ExprInt64(x)
+
+
+class int32_noarg(imm_noarg):
+    intsize = 32
+    intmask = (1 << intsize) - 1
+
+    def decode(self, v):
+        v = sign_ext(v, self.l, self.intsize)
+        v = self.decodeval(v)
+        self.expr = self.int2expr(v)
+        return True
+
+    def encode(self):
+        if not isinstance(self.expr, ExprInt):
+            return False
+        v = int(self.expr.arg)
+        if sign_ext(v & self.lmask, self.l, self.intsize) != v:
+            return False
+        v = self.encodeval(v & self.lmask)
+        self.value = v & self.lmask
+        return True
+
+
+def swap_uint(size, i):
+    if size == 8:
+        return i & 0xff
+    elif size == 16:
+        return struct.unpack('<H', struct.pack('>H', i & 0xffff))[0]
+    elif size == 32:
+        return struct.unpack('<I', struct.pack('>I', i & 0xffffffff))[0]
+    elif size == 64:
+        return struct.unpack('<Q', struct.pack('>Q', i & 0xffffffffffffffff))[0]
+    raise ValueError('unknown int len %r' % size)
+
+
+def swap_sint(size, i):
+    if size == 8:
+        return i
+    elif size == 16:
+        return struct.unpack('<h', struct.pack('>H', i & 0xffff))[0]
+    elif size == 32:
+        return struct.unpack('<i', struct.pack('>I', i & 0xffffffff))[0]
+    elif size == 64:
+        return struct.unpack('<q', struct.pack('>Q', i & 0xffffffffffffffff))[0]
+    raise ValueError('unknown int len %r' % size)
+
+
+def sign_ext(v, s_in, s_out):
+    assert(s_in <= s_out)
+    v &= (1 << s_in) - 1
+    sign_in = v & (1 << (s_in - 1))
+    if not sign_in:
+        return v
+    m = (1 << (s_out)) - 1
+    m ^= (1 << s_in) - 1
+    v |= m
+    return v
diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py
new file mode 100644
index 00000000..47047269
--- /dev/null
+++ b/miasm2/core/graph.py
@@ -0,0 +1,126 @@
+class DiGraph:
+
+    def __init__(self):
+        self._nodes = set()
+        self._edges = []
+        self._nodes_to = {}
+        self._nodes_from = {}
+
+    def __repr__(self):
+        out = []
+        for n in self._nodes:
+            out.append(str(n))
+        for a, b in self._edges:
+            out.append("%s -> %s" % (a, b))
+        return '\n'.join(out)
+
+    def nodes(self):
+        return self._nodes
+
+    def edges(self):
+        return self._edges
+
+    def add_node(self, n):
+        if n in self._nodes:
+            return
+        self._nodes.add(n)
+        self._nodes_to[n] = []
+        self._nodes_from[n] = []
+
+    def add_edge(self, a, b):
+        if not a in self._nodes:
+            self.add_node(a)
+        if not b in self._nodes:
+            self.add_node(b)
+        self._edges.append((a, b))
+        self._nodes_to[a].append((a, b))
+        self._nodes_from[b].append((a, b))
+
+    def add_uniq_edge(self, a, b):
+        if (a, b) in self._edges:
+            return
+        else:
+            self.add_edge(a, b)
+
+    def del_edge(self, a, b):
+        self._edges.remove((a, b))
+        self._nodes_to[a].remove((a, b))
+        self._nodes_from[b].remove((a, b))
+
+    def predecessors_iter(self, n):
+        if not n in self._nodes_from:
+            raise StopIteration
+        for a, _ in self._nodes_from[n]:
+            yield a
+
+    def predecessors(self, n):
+        return [x for x in self.predecessors_iter(n)]
+
+    def successors_iter(self, n):
+        if not n in self._nodes_to:
+            raise StopIteration
+        for _, b in self._nodes_to[n]:
+            yield b
+
+    def successors(self, n):
+        return [x for x in self.successors_iter(n)]
+
+    def leaves_iter(self):
+        for n in self._nodes:
+            if len(self._nodes_to[n]) == 0:
+                yield n
+
+    def leaves(self):
+        return [x for x in self.leaves_iter()]
+
+    def roots_iter(self):
+        for n in self._nodes:
+            if len(self._nodes_from[n]) == 0:
+                yield n
+
+    def roots(self):
+        return [x for x in self.roots_iter()]
+
+    def find_path(self, a, b, cycles_count=0, done=None):
+        if done is None:
+            done = {}
+        if b in done and done[b] > cycles_count:
+            return [[]]
+        if a == b:
+            return [[a]]
+        out = []
+        for n in self.predecessors(b):
+            done_n = dict(done)
+            done_n[b] = done_n.get(b, 0) + 1
+            for path in self.find_path(a, n, cycles_count, done_n):
+                if path and path[0] == a:
+                    out.append(path + [b])
+        return out
+
+    def node2str(self, n):
+        return str(n)
+
+    def edge2str(self, a, b):
+        return ""
+
+    def dot(self):
+        out = """
+digraph asm_graph {
+graph [
+splines=polyline,
+];
+node [
+fontsize = "16",
+shape = "box"
+];
+"""
+        for n in self.nodes():
+            out += '%s [label="%s"];\n' % (
+                hash(n) & 0xFFFFFFFFFFFFFFFF, self.node2str(n))
+
+        for a, b in self.edges():
+            out += '%s -> %s [label="%s"]\n' % (hash(a) & 0xFFFFFFFFFFFFFFFF,
+                                                hash(b) & 0xFFFFFFFFFFFFFFFF,
+                                                self.edge2str(a, b))
+        out += "}"
+        return out
diff --git a/miasm2/core/interval.py b/miasm2/core/interval.py
new file mode 100644
index 00000000..cd2a793e
--- /dev/null
+++ b/miasm2/core/interval.py
@@ -0,0 +1,238 @@
+INT_EQ = 0
+INT_B_IN_A = 1
+INT_A_IN_B = -1
+INT_DISJOIN = 2
+INT_JOIN = 3
+INT_JOIN_AB = 4
+INT_JOIN_BA = 5
+
+# 0  => eq
+# 1  => b in a
+# -1 => a in b
+# 2  => disjoin
+# 3  => join
+# 4  => join a,b touch
+# 5  => join b,a touch
+
+
+def cmp_interval(a, b):
+    if a == b:
+        return INT_EQ
+    a1, a2 = a
+    b1, b2 = b
+    if a1 <= b1 and a2 >= b2:
+        return INT_B_IN_A
+    if b1 <= a1 and b2 >= a2:
+        return INT_A_IN_B
+    if a2 + 1 == b1:
+        return INT_JOIN_AB
+    if b2 + 1 == a1:
+        return INT_JOIN_BA
+    if a1 > b2 + 1 or b1 > a2 + 1:
+        return INT_DISJOIN
+    return INT_JOIN
+
+# interval is: [a, b]
+
+
+class interval:
+
+    def __init__(self, a=None):
+        if a is None:
+            a = []
+        if isinstance(a, interval):
+            a = a.intervals
+        self.is_cannon = False
+        self.intervals = a
+        self.cannon()
+
+    def __iter__(self):
+        for x in self.intervals:
+            yield x
+
+    @classmethod
+    def cannon_list(cls, tmp):
+        """
+        Return a cannonizes list of intervals
+        """
+        tmp = sorted([x for x in tmp if x[0] <= x[1]])
+        out = []
+        if not tmp:
+            return out
+        out.append(tmp.pop())
+        while tmp:
+            x = tmp.pop()
+            rez = cmp_interval(out[-1], x)
+            # print out[-1], x, rez
+            if rez == INT_EQ:
+                continue
+            elif rez == INT_DISJOIN:
+                out.append(x)
+            elif rez == INT_B_IN_A:
+                continue
+            elif rez in [INT_JOIN, INT_JOIN_AB, INT_JOIN_BA, INT_A_IN_B]:
+                u, v = x
+                while out and cmp_interval(out[-1], (u, v)) in [
+                    INT_JOIN, INT_JOIN_AB, INT_JOIN_BA, INT_A_IN_B]:
+                    u = min(u, out[-1][0])
+                    v = max(v, out[-1][1])
+                    out.pop()
+                out.append((u, v))
+            else:
+                raise ValueError('unknown state', rez)
+        return out[::-1]
+
+    def cannon(self):
+        if self.is_cannon is True:
+            return
+        self.intervals = interval.cannon_list(self.intervals)
+        self.is_cannon = True
+
+    def __repr__(self):
+        if self.intervals:
+            o = " U ".join(["[0x%X 0x%X]" % (x[0], x[1])
+                           for x in self.intervals])
+        else:
+            o = "[]"
+        return o
+
+    def __contains__(self, i):
+        if isinstance(i, interval):
+            for x in self.intervals:
+                is_out = True
+                for y in i.intervals:
+                    if cmp_interval(x, y) in [INT_EQ, INT_B_IN_A]:
+                        is_out = False
+                        break
+                if is_out:
+                    return False
+            return True
+        else:
+            for x in self.intervals:
+                if x[0] <= i <= x[1]:
+                    return True
+            return False
+
+    def __eq__(self, i):
+        return self.intervals == i.intervals
+
+    def __add__(self, i):
+        if isinstance(i, interval):
+            i = i.intervals
+        i = interval(self.intervals + i)
+        return i
+
+    def __sub__(self, v):
+        to_test = self.intervals[:]
+        i = -1
+        to_del = v.intervals[:]
+        while i < len(to_test) - 1:
+            i += 1
+            x = to_test[i]
+            if x[0] > x[1]:
+                del(to_test[i])
+                i -= 1
+                continue
+
+            while to_del and to_del[0][1] < x[0]:
+                del(to_del[0])
+
+            for y in to_del:
+                if y[0] > x[1]:
+                    break
+                rez = cmp_interval(x, y)
+                if rez == INT_DISJOIN:
+                    continue
+                elif rez == INT_EQ:
+                    del(to_test[i])
+                    i -= 1
+                    break
+                elif rez == INT_A_IN_B:
+                    del(to_test[i])
+                    i -= 1
+                    break
+                elif rez == INT_B_IN_A:
+                    del(to_test[i])
+                    i1 = (x[0], y[0] - 1)
+                    i2 = (y[1] + 1, x[1])
+                    to_test[i:i] = [i1, i2]
+                    i -= 1
+                    break
+                elif rez in [INT_JOIN_AB, INT_JOIN_BA]:
+                    continue
+                elif rez == INT_JOIN:
+                    del(to_test[i])
+                    if x[0] < y[0]:
+                        to_test[i:i] = [(x[0], y[0] - 1)]
+                    else:
+                        to_test[i:i] = [(y[1] + 1, x[1])]
+                    i -= 1
+                    break
+                else:
+                    raise ValueError('unknown state', rez)
+        return interval(to_test)
+
+    def __and__(self, v):
+        out = []
+        for x in self.intervals:
+            # print "x", x
+            if x[0] > x[1]:
+                continue
+            for y in v.intervals:
+                # print 'y', y
+                rez = cmp_interval(x, y)
+                # print x, y, rez
+                if rez == INT_DISJOIN:
+                    continue
+                elif rez == INT_EQ:
+                    out.append(x)
+                    continue
+                elif rez == INT_A_IN_B:
+                    out.append(x)
+                    continue
+                elif rez == INT_B_IN_A:
+                    out.append(y)
+                    continue
+                elif rez == INT_JOIN_AB:
+                    continue
+                elif rez == INT_JOIN_BA:
+                    continue
+                elif rez == INT_JOIN:
+                    if x[0] < y[0]:
+                        out.append((y[0], x[1]))
+                    else:
+                        out.append((x[0], y[1]))
+                    continue
+                else:
+                    raise ValueError('unknown state', rez)
+        return interval(out)
+
+    def hull(self):
+        if not self.intervals:
+            return None, None
+        return self.intervals[0][0], self.intervals[-1][1]
+
+    def show(self, img_x=1350, img_y=20, dry_run=False):
+        """
+        show image representing the itnerval
+        """
+        try:
+            import Image
+            import ImageDraw
+        except ImportError:
+            print 'cannot import python PIL imaging'
+            return
+
+        img = Image.new('RGB', (img_x, img_y), (100, 100, 100))
+        draw = ImageDraw.Draw(img)
+        i_min, i_max = self.hull()
+
+        print hex(i_min), hex(i_max)
+
+        def addr2x(addr):
+            return (addr - i_min) * img_x / (i_max - i_min)
+        for a, b in self.intervals:
+            draw.rectangle((addr2x(a), 0, addr2x(b), img_y), (200, 0, 0))
+
+        if dry_run is False:
+            img.show()
diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py
new file mode 100644
index 00000000..c8967dab
--- /dev/null
+++ b/miasm2/core/parse_asm.py
@@ -0,0 +1,237 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+import re
+import struct
+import miasm2.expression.expression as m2_expr
+from miasm2.core.asmbloc import *
+
+declarator = {'byte': 'B',
+              'word': 'H',
+              'dword': 'I',
+              'qword': 'Q',
+              'long': 'I', 'zero': 'I',
+              }
+
+
+def guess_next_new_label(symbol_pool, gen_label_index=0):
+    i = 0
+    gen_name = "loc_%.8X"
+    while True:
+        name = gen_name % i
+        l = symbol_pool.getby_name(name)
+        if l is None:
+            return symbol_pool.add_label(name)
+        i += 1
+
+
+def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0):
+    if symbol_pool is None:
+        symbol_pool = asm_symbol_pool()
+
+    lines_text = []
+    lines_data = []
+    lines_bss = []
+
+    lines = lines_text
+    # parse each line
+    for line in txt.split('\n'):
+        # empty
+        if re.match(r'\s*$', line):
+            continue
+        # comment
+        if re.match(r'\s*;\S*', line):
+            continue
+        # labels to forget
+        r = re.match(r'\s*\.LF[BE]\d\s*:', line)
+        if r:
+            continue
+        # label beginning with .L
+        r = re.match(r'\s*(\.L\S+)\s*:', line)
+        if r:
+            l = r.groups()[0]
+            l = symbol_pool.getby_name_create(l)
+            lines.append(l)
+            continue
+        # directive
+        if re.match(r'\s*\.', line):
+            r = re.match(r'\s*\.(\S+)', line)
+            directive = r.groups()[0]
+            if directive == 'text':
+                lines = lines_text
+                continue
+            if directive == 'data':
+                lines = lines_data
+                continue
+            if directive == 'bss':
+                lines = lines_bss
+                continue
+            if directive in ['string', 'ascii']:
+                # XXX HACK
+                line = line.replace(r'\n', '\n').replace(r'\r', '\r')
+                raw = line[line.find(r'"') + 1:line.rfind(r"'")]
+                if directive == 'string':
+                    raw += "\x00"
+                lines.append(asm_raw(raw))
+                continue
+            if directive == 'ustring':
+                # XXX HACK
+                line = line.replace(r'\n', '\n').replace(r'\r', '\r')
+                raw = line[line.find(r'"') + 1:line.rfind(r"'")] + "\x00"
+                raw = "".join(map(lambda x: x + '\x00', raw))
+                lines.append(asm_raw(raw))
+                continue
+            if directive in declarator:
+                data_raw = line[r.end():].split()
+                try:
+                    data_int = []
+                    for b in data_raw:
+                        if re.search(r'0x', b):
+                            data_int.append(int(b, 16))
+                        else:
+                            data_int.append(int(b) % (1 << 32))
+                    raw = reduce(lambda x, y: x + struct.pack(
+                        declarator[directive], y), data_int, "")
+                except ValueError:
+                    raw = line
+                lines.append(asm_raw(raw))
+                continue
+            if directive == 'comm':
+                # TODO
+                continue
+            if directive == 'split':  # custom command
+                lines.append(asm_raw(line.strip()))
+                continue
+            if directive == 'dontsplit':  # custom command
+                lines.append(asm_raw(line.strip()))
+                continue
+            if directive in ['file', 'intel_syntax', 'globl', 'local',
+                             'type', 'size', 'align', 'ident', 'section']:
+                continue
+            if directive[0:4] == 'cfi_':
+                continue
+
+            raise ValueError("unknown directive %s" % str(directive))
+
+        # label
+        r = re.match(r'\s*(\S+)\s*:', line)
+        if r:
+            l = r.groups()[0]
+            l = symbol_pool.getby_name_create(l)
+            lines.append(l)
+            continue
+
+        # code
+        if ';' in line:
+            line = line[:line.find(';')]
+        line = line.strip(' ').strip('\t')
+        instr = mnemo.fromstring(line, attrib)
+        if instr.dstflow():
+            instr.dstflow2label(symbol_pool)
+        lines.append(instr)
+
+    log_asmbloc.info("___pre asm oki___")
+    # make blocs
+    # gen_label_index = 0
+
+    blocs_sections = []
+    bloc_num = 0
+    for lines in [lines_text, lines_data, lines_bss]:
+        state = 0
+        i = 0
+        blocs = []
+        blocs_sections.append(blocs)
+        bloc_to_nlink = None
+        block_may_link = False
+        while i < len(lines):
+            # print 'DEAL', lines[i], state
+            # no current bloc
+            if state == 0:
+                if not isinstance(lines[i], asm_label):
+                    l = guess_next_new_label(symbol_pool)
+                    lines[i:i] = [l]
+                else:
+                    l = lines[i]
+                    b = asm_bloc(l)
+                    b.bloc_num = bloc_num
+                    bloc_num += 1
+                    blocs.append(b)
+                    state = 1
+                    i += 1
+                    if bloc_to_nlink:
+                        # print 'nlink!'
+                        bloc_to_nlink.addto(
+                            asm_constraint(b.label, asm_constraint.c_next))
+                        bloc_to_nlink = None
+
+            # in bloc
+            elif state == 1:
+                # asm_raw
+                if isinstance(lines[i], asm_raw):
+                    if lines[i].raw.startswith('.split'):
+                        state = 0
+                        block_may_link = False
+                        i += 1
+                    elif lines[i].raw.startswith('.dontsplit'):
+                        # print 'dontsplit'
+                        state = 1
+                        block_may_link = True
+                        i += 1
+                    else:
+                        b.addline(lines[i])
+                        i += 1
+                # asm_label
+                elif isinstance(lines[i], asm_label):
+                    if block_may_link:
+                        # print 'nlink!'
+                        b.addto(
+                            asm_constraint(lines[i], asm_constraint.c_next))
+                        block_may_link = False
+                    state = 0
+                # instruction
+                else:
+                    b.addline(lines[i])
+                    if lines[i].dstflow():
+                        '''
+                        mydst = lines[i].args
+                        if len(mydst)==1 and mnemo.get_symbols(mydst[0]):
+                            arg = dict(mydst[0])
+                            symbs = mnemo.get_symbols(arg)
+                            """
+                            TODO XXX redo this (as many miasm parts)
+                            """
+                            l = symbs[0][0]
+                            lines[i].setdstflow([l])
+                            b.addto(asm_constraint(l, asm_constraint.c_to))
+                        '''
+                        for x in lines[i].getdstflow(symbol_pool):
+                            if not isinstance(x, m2_expr.ExprId):
+                                continue
+                            if x in mnemo.regs.all_regs_ids:
+                                continue
+                            b.addto(asm_constraint(x, asm_constraint.c_to))
+
+                        # TODO XXX redo this really
+
+                        if not lines[i].breakflow() and i + 1 < len(lines):
+                            if isinstance(lines[i + 1], asm_label):
+                                l = lines[i + 1]
+                            else:
+                                l = guess_next_new_label(symbol_pool)
+                                lines[i + 1:i + 1] = [l]
+                        else:
+                            state = 0
+
+                        if lines[i].splitflow():
+                            bloc_to_nlink = b
+                    if not lines[i].breakflow() or lines[i].splitflow():
+                        block_may_link = True
+                    else:
+                        block_may_link = False
+
+                    i += 1
+
+    for b in blocs_sections[0]:
+        log_asmbloc.info(b)
+
+    return blocs_sections, symbol_pool
diff --git a/miasm2/core/utils.py b/miasm2/core/utils.py
new file mode 100644
index 00000000..44d1e8e2
--- /dev/null
+++ b/miasm2/core/utils.py
@@ -0,0 +1,39 @@
+import struct
+
+upck8 = lambda x: struct.unpack('B', x)[0]
+upck16 = lambda x: struct.unpack('H', x)[0]
+upck32 = lambda x: struct.unpack('I', x)[0]
+upck64 = lambda x: struct.unpack('Q', x)[0]
+pck16 = lambda x: struct.pack('H', x)
+pck32 = lambda x: struct.pack('I', x)
+pck64 = lambda x: struct.pack('Q', x)
+
+
+class Disasm_Exception(Exception):
+    pass
+
+
+def hexdump(src, length=16):
+    FILTER = ''.join(
+        [(len(repr(chr(x))) == 3) and chr(x) or '.' for x in range(256)])
+    lines = []
+    for c in xrange(0, len(src), length):
+        chars = src[c:c + length]
+        hex = ' '.join(["%02x" % ord(x) for x in chars])
+        printable = ''.join(
+            ["%s" % ((ord(x) <= 127 and FILTER[ord(x)]) or '.') for x in chars])
+        lines.append("%04x  %-*s  %s\n" % (c, length * 3, hex, printable))
+    print ''.join(lines)
+
+# stackoverflow.com/questions/2912231
+
+import collections
+
+
+class keydefaultdict(collections.defaultdict):
+
+    def __missing__(self, key):
+        if self.default_factory is None:
+            raise KeyError(key)
+        value = self[key] = self.default_factory(key)
+        return value