about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorserpilliere <serpilliere@users.noreply.github.com>2015-01-26 19:52:07 +0100
committerserpilliere <serpilliere@users.noreply.github.com>2015-01-26 19:52:07 +0100
commita3900fe1a46b585771a576478ce47bc94e35537f (patch)
treee06802c76e800e8c57aa8d1a903fd90eb440854b
parent9ce67b459cc8946c81181ab22030786450b8b0c5 (diff)
parentcc48c135915bd71ba23bd895fe6f2d57e61a0995 (diff)
downloadmiasm-a3900fe1a46b585771a576478ce47bc94e35537f.tar.gz
miasm-a3900fe1a46b585771a576478ce47bc94e35537f.zip
Merge pull request #49 from commial/refactor-asmbloc
Refactor asmbloc
-rw-r--r--example/asm/simple.py2
-rw-r--r--miasm2/core/asmbloc.py178
-rw-r--r--miasm2/jitter/jitcore.py3
-rw-r--r--test/arch/x86/sem.py2
4 files changed, 41 insertions, 144 deletions
diff --git a/example/asm/simple.py b/example/asm/simple.py
index 1ae3ae05..8664672d 100644
--- a/example/asm/simple.py
+++ b/example/asm/simple.py
@@ -30,7 +30,7 @@ symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0)
 resolved_b, patches = asmbloc.asm_resolve_final(mn_x86, blocs[0], symbol_pool)
 
 # Show resolved blocs
-for bloc, dummy in resolved_b:
+for bloc in resolved_b:
     print bloc
 
 # Print offset -> bytes
diff --git a/miasm2/core/asmbloc.py b/miasm2/core/asmbloc.py
index b80dd1e3..ee93cba7 100644
--- a/miasm2/core/asmbloc.py
+++ b/miasm2/core/asmbloc.py
@@ -2,15 +2,15 @@
 #-*- coding:utf-8 -*-
 
 import logging
+import inspect
+
+
 import miasm2.expression.expression as m2_expr
 from miasm2.expression.simplifications import expr_simp
-
 from miasm2.expression.modint import moduint, modint
-from miasm2.core.graph import DiGraph
 from miasm2.core.utils import Disasm_Exception, pck
 from miasm2.core.graph import DiGraph
 
-import inspect
 
 log_asmbloc = logging.getLogger("asmbloc")
 console_handler = logging.StreamHandler()
@@ -29,23 +29,18 @@ def is_int(a):
 
 
 def expr_is_label(e):
-    if isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label):
-        return True
-    return False
+    return isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label)
 
 
 def expr_is_int_or_label(e):
-    if isinstance(e, m2_expr.ExprInt):
-        return True
-    if isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label):
-        return True
-    return False
+    return isinstance(e, m2_expr.ExprInt) or \
+        (isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label))
 
 
 class asm_label:
+    "Stand for an assembly label"
 
     def __init__(self, name="", offset=None):
-        # print whoami()
         self.fixedblocs = False
         if is_int(name):
             name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF)
@@ -373,8 +368,7 @@ class asm_symbol_pool:
 
 def dis_bloc(mnemo, pool_bin, cur_bloc, offset, job_done, symbol_pool,
              dont_dis=[], split_dis=[
-             ], follow_call=False, patch_instr_symb=True,
-             dontdis_retcall=False, lines_wd=None,
+             ], follow_call=False, dontdis_retcall=False, lines_wd=None,
              dis_bloc_callback=None, dont_dis_nulstart_bloc=False,
              attrib={}):
     # pool_bin.offset = offset
@@ -476,8 +470,6 @@ def dis_bloc(mnemo, pool_bin, cur_bloc, offset, job_done, symbol_pool,
 
 def split_bloc(mnemo, attrib, pool_bin, blocs,
     symbol_pool, more_ref=None, dis_bloc_callback=None):
-    i = -1
-    err = False
     if not more_ref:
         more_ref = []
 
@@ -507,7 +499,6 @@ def split_bloc(mnemo, attrib, pool_bin, blocs,
             log_asmbloc.debug("split bloc %x" % off)
             if new_b is None:
                 log_asmbloc.error("cannot split %x!!" % off)
-                err = True
                 break
             if dis_bloc_callback:
                 offsets_to_dis = set(
@@ -519,16 +510,11 @@ def split_bloc(mnemo, attrib, pool_bin, blocs,
             blocs.append(new_b)
             a, b = cb.get_range()
 
-        """
-        if err:
-            break
-        """
     return blocs
 
 
 def dis_bloc_all(mnemo, pool_bin, offset, job_done, symbol_pool, dont_dis=[],
-                 split_dis=[], follow_call=False, patch_instr_symb=True,
-                 dontdis_retcall=False,
+                 split_dis=[], follow_call=False, dontdis_retcall=False,
                  blocs_wd=None, lines_wd=None, blocs=None,
                  dis_bloc_callback=None, dont_dis_nulstart_bloc=False,
                  attrib={}):
@@ -565,8 +551,7 @@ def dis_bloc_all(mnemo, pool_bin, offset, job_done, symbol_pool, dont_dis=[],
         l = symbol_pool.getby_offset_create(n)
         cur_bloc = asm_bloc(l)
         todo += dis_bloc(mnemo, pool_bin, cur_bloc, n, job_done, symbol_pool,
-                         dont_dis, split_dis, follow_call, patch_instr_symb,
-                         dontdis_retcall,
+                         dont_dis, split_dis, follow_call, dontdis_retcall,
                          dis_bloc_callback=dis_bloc_callback,
                          lines_wd=lines_wd,
                          dont_dis_nulstart_bloc=dont_dis_nulstart_bloc,
@@ -647,7 +632,7 @@ def fix_expr_val(e, symbols):
     return e
 
 
-def guess_blocs_size(mnemo, blocs, symbols):
+def guess_blocs_size(mnemo, blocs):
     """
     Asm and compute max bloc length
     """
@@ -778,7 +763,6 @@ def gen_non_free_mapping(group_bloc, dont_erase=[]):
     non_free_mapping = {}
     # calculate free space for bloc placing
     for g in group_bloc:
-        rest_len = 0
         g.fixedblocs = False
         # if a label in the group is fixed
         diff_offset = 0
@@ -808,7 +792,6 @@ def resolve_symbol(group_bloc, symbol_pool, dont_erase=[], max_offset=0xFFFFFFFF
     log_asmbloc.info(str(dont_erase))
     bloc_list = []
     unr_bloc = reduce(lambda x, y: x + group_bloc[y], group_bloc, [])
-    ending_ad = []
 
     non_free_mapping = gen_non_free_mapping(group_bloc, dont_erase)
     free_interval = gen_free_space_intervals(non_free_mapping, max_offset)
@@ -885,7 +868,7 @@ def resolve_symbol(group_bloc, symbol_pool, dont_erase=[], max_offset=0xFFFFFFFF
                 continue
             resolving = True
             log_asmbloc.info("bloc %s resolved" % unr_bloc[i].label)
-            bloc_list.append((unr_bloc[i], 0))
+            bloc_list.append(unr_bloc[i])
             g_found = None
             for g in g_tab:
                 if unr_bloc[i] in group_bloc[g]:
@@ -911,11 +894,11 @@ def resolve_symbol(group_bloc, symbol_pool, dont_erase=[], max_offset=0xFFFFFFFF
             continue
 
         for g in g_tab:
-            print g
+            log_asmbloc.debug(g)
             if g.fixedblocs:
-                print "fixed"
+                log_asmbloc.debug("fixed")
             else:
-                print "not fixed"
+                log_asmbloc.debug("not fixed")
         raise ValueError('enable to fix bloc')
     return bloc_list
 
@@ -973,12 +956,11 @@ def asmbloc_final(mnemo, blocs, symbol_pool, symb_reloc_off=None, conservative =
             symbols.add_label(s, v.offset_g)
         # print symbols
         # test if bad encoded relative
-        for b, t in blocs:
+        for bloc in blocs:
 
             offset_i = 0
-            blen = 0
-            my_symb_reloc_off[b.label] = []
-            for instr in b.lines:
+            my_symb_reloc_off[bloc.label] = []
+            for instr in bloc.lines:
                 if isinstance(instr, asm_raw):
                     if isinstance(instr.raw, list):
                         # fix special asm_raw
@@ -991,7 +973,7 @@ def asmbloc_final(mnemo, blocs, symbol_pool, symb_reloc_off=None, conservative =
                     offset_i += instr.l
                     continue
                 sav_a = instr.args[:]
-                instr.offset = b.label.offset_g + offset_i
+                instr.offset = bloc.label.offset_g + offset_i
                 args_e = instr.resolve_args_with_symbols(symbols)
                 for i, e in enumerate(args_e):
                     instr.args[i] = e
@@ -1010,17 +992,14 @@ def asmbloc_final(mnemo, blocs, symbol_pool, symb_reloc_off=None, conservative =
 
                 if len(c) != instr.l:
                     # good len, bad offset...XXX
-                    b.blen = b.blen - old_l + len(c)
+                    bloc.blen = bloc.blen - old_l + len(c)
                     instr.data = c
                     instr.l = len(c)
                     fini = False
                     continue
                 found = False
                 for cpos, c in enumerate(candidates):
-                    # if len(c) == len(instr.data):
                     if len(c) == instr.l:
-                        # print 'UPDD', repr(instr.data), repr(c)
-                        # b.blen = b.blen-old_l+len(c)
                         instr.data = c
                         instr.l = len(c)
 
@@ -1035,9 +1014,8 @@ def asmbloc_final(mnemo, blocs, symbol_pool, symb_reloc_off=None, conservative =
                     my_s = None
 
                 if my_s is not None:
-                    my_symb_reloc_off[b.label].append(offset_i + my_s)
+                    my_symb_reloc_off[bloc.label].append(offset_i + my_s)
                 offset_i += instr.l
-                blen += instr.l
                 assert(len(instr.data) == instr.l)
     # we have fixed all relative values
     # recompute good offsets
@@ -1051,11 +1029,10 @@ def asmbloc_final(mnemo, blocs, symbol_pool, symb_reloc_off=None, conservative =
 
 
 def asm_resolve_final(mnemo, blocs, symbol_pool, dont_erase=[],
-                      max_offset=0xFFFFFFFF,
-                      symb_reloc_off=None, constrain_pos=False):
+                      max_offset=0xFFFFFFFF, symb_reloc_off=None):
     if symb_reloc_off is None:
         symb_reloc_off = {}
-    guess_blocs_size(mnemo, blocs, symbol_pool)
+    guess_blocs_size(mnemo, blocs)
     bloc_g = group_blocs(blocs)
 
     resolved_b = resolve_symbol(bloc_g, symbol_pool, dont_erase=dont_erase,
@@ -1064,19 +1041,18 @@ def asm_resolve_final(mnemo, blocs, symbol_pool, dont_erase=[],
     asmbloc_final(mnemo, resolved_b, symbol_pool, symb_reloc_off)
     written_bytes = {}
     patches = {}
-    for b, t in resolved_b:
-        offset = b.label.offset
-        for i in b.lines:
-            assert(i.data is not None)
-            patches[offset] = i.data
-            for c in range(i.l):
-                if offset + c in written_bytes:
+    for bloc in resolved_b:
+        offset = bloc.label.offset
+        for line in bloc.lines:
+            assert(line.data is not None)
+            patches[offset] = line.data
+            for cur_pos in xrange(line.l):
+                if offset + cur_pos in written_bytes:
                     raise ValueError(
                         "overlapping bytes in asssembly %X" % int(offset))
-                written_bytes[offset + c] = 1
-            i.offset = offset
-            i.l = i.l
-            offset += i.l
+                written_bytes[offset + cur_pos] = 1
+            line.offset = offset
+            offset += line.l
 
     return resolved_b, patches
 
@@ -1220,7 +1196,7 @@ def bloc_find_path_next(blocs, blocby_label, a, b, path=None):
         if x.c_t != asm_constraint.c_next:
             continue
         if not x.label in blocby_label:
-            print 'XXX unknown label'
+            log_asmbloc.error('XXX unknown label')
             continue
         x = blocby_label[x.label]
         all_path += bloc_find_path_next(blocs, blocby_label, x, b, path + [a])
@@ -1230,87 +1206,13 @@ def bloc_find_path_next(blocs, blocby_label, a, b, path=None):
     return all_path
 
 
-def bloc_merge(blocs, symbol_pool, dont_merge=[]):
-    i = -1
-    """
-    # TODO XXXX implement find all path for digraph
-
-    g = blist2graph(blocs)
-    g.lbl2node = dict([(b.label, b) for b in blocs])
-
-    while i<len(blocs)-1:
-        i+=1
-        b = blocs[i]
-        if b.label in dont_merge:
-            continue
-
-        successors = [x for x in g.successors(b.label)]
-        predecessors = [x for x in g.predecessors(b.label)]
-        # if bloc doesn't self ref
-        if b.label in successors:
-            continue
-        # and bloc has only one parent
-        if len(predecessors) != 1:
-            continue
-        # may merge
-        bpl = predecessors[0]
-
-        # and parent has only one son
-        p_s = [x for x in g.successors(bpl)]
-        if len(p_s)!=1:
-            continue
-
-        bp = g.lbl2node[bpl]
-        # and parent has not a next constraint yet
-        found = False
-        for gpl in g.predecessors(bpl):
-            gp = g.lbl2node[gpl]
-            for x in gp.bto:
-                if x.c_t != asm_constraint.c_next:
-                    continue
-                if x.label == bpl:
-                    found = True
-                    break
-            if found:
-                break
-        if found:
-            continue
-        if bp.lines:
-            l = bp.lines[-1]
-            #jmp opt; jcc opt
-            if l.is_subcall():
-                continue
-            if l.breakflow() and l.dstflow():
-                bp.lines.pop()
-        #merge
-        #sons = b.bto[:]
-
-        # update parents
-        for s in b.bto:
-            if not isinstance(s.label, asm_label): continue
-            if s.label.name == None:
-                continue
-            if not s.label in g.lbl2node:
-                print "unknown parent XXX"
-                continue
-            bs = g.lbl2node[s.label]
-            for p in g.predecessors(bs.label):
-                if p == b.label:
-                    bs.parents.discard(p)
-                    bs.parents.add(bp.label)
-        bp.lines+=b.lines
-        bp.bto = b.bto
-        #symbol_pool.remove(b.label)
-        del(blocs[i])
-        i = -1
-
-    return
-    """
+def bloc_merge(blocs, dont_merge=[]):
     blocby_label = {}
     for b in blocs:
         blocby_label[b.label] = b
         b.parents = find_parents(blocs, b.label)
 
+    i = -1
     while i < len(blocs) - 1:
         i += 1
         b = blocs[i]
@@ -1354,7 +1256,7 @@ def bloc_merge(blocs, symbol_pool, dont_merge=[]):
             if s.label.name == None:
                 continue
             if not s.label in blocby_label:
-                print "unknown parent XXX"
+                log_asmbloc.error("unknown parent XXX")
                 continue
             bs = blocby_label[s.label]
             for p in list(bs.parents):
@@ -1363,7 +1265,7 @@ def bloc_merge(blocs, symbol_pool, dont_merge=[]):
                     bs.parents.add(bp.label)
         bp.lines += b.lines
         bp.bto = b.bto
-        # symbol_pool.remove(b.label)
+
         del(blocs[i])
         i = -1
 
@@ -1378,7 +1280,6 @@ class disasmEngine(object):
         self.dont_dis = []
         self.split_dis = []
         self.follow_call = False
-        self.patch_instr_symb = True
         self.dontdis_retcall = False
         self.lines_wd = None
         self.blocs_wd = None
@@ -1388,14 +1289,12 @@ class disasmEngine(object):
         self.__dict__.update(kwargs)
 
     def dis_bloc(self, offset):
-        job_done = set()
         l = self.symbol_pool.getby_offset_create(offset)
         current_bloc = asm_bloc(l)
         dis_bloc(self.arch, self.bs, current_bloc, offset, self.job_done,
                  self.symbol_pool,
                  dont_dis=self.dont_dis, split_dis=self.split_dis,
                  follow_call=self.follow_call,
-                 patch_instr_symb=self.patch_instr_symb,
                  dontdis_retcall=self.dontdis_retcall,
                  lines_wd=self.lines_wd,
                  dis_bloc_callback=self.dis_bloc_callback,
@@ -1408,7 +1307,6 @@ class disasmEngine(object):
                              self.symbol_pool,
                              dont_dis=self.dont_dis, split_dis=self.split_dis,
                              follow_call=self.follow_call,
-                             patch_instr_symb=self.patch_instr_symb,
                              dontdis_retcall=self.dontdis_retcall,
                              blocs_wd=self.blocs_wd,
                              lines_wd=self.lines_wd,
diff --git a/miasm2/jitter/jitcore.py b/miasm2/jitter/jitcore.py
index 5ebc615e..c025c956 100644
--- a/miasm2/jitter/jitcore.py
+++ b/miasm2/jitter/jitcore.py
@@ -116,8 +116,7 @@ class JitCore(object):
         try:
             asmbloc.dis_bloc(self.ir_arch.arch, self.bs, cur_bloc, addr,
                              set(), self.ir_arch.symbol_pool, [],
-                             follow_call=False, patch_instr_symb=True,
-                             dontdis_retcall=False,
+                             follow_call=False, dontdis_retcall=False,
                              lines_wd=self.options["jit_maxline"],
                              # max 10 asm lines
                              attrib=self.ir_arch.attrib,
diff --git a/test/arch/x86/sem.py b/test/arch/x86/sem.py
index de2d14f3..d2c998c8 100644
--- a/test/arch/x86/sem.py
+++ b/test/arch/x86/sem.py
@@ -51,7 +51,7 @@ def compute_txt(ir, mode, txt, inputstate={}, debug=False):
         mn, blocs[0], symbol_pool)
     interm = ir(symbol_pool)
     for bbl in resolved_b:
-        interm.add_bloc(bbl[0])
+        interm.add_bloc(bbl)
     return symb_exec(interm, inputstate, debug)
 
 op_add = lambda a, b: a+b