about summary refs log tree commit diff stats
path: root/miasm2/core
diff options
context:
space:
mode:
Diffstat (limited to 'miasm2/core')
-rw-r--r--miasm2/core/asmblock.py104
-rw-r--r--miasm2/core/cpu.py59
-rw-r--r--miasm2/core/parse_asm.py4
-rw-r--r--miasm2/core/sembuilder.py28
4 files changed, 118 insertions, 77 deletions
diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py
index f763d85f..9764590a 100644
--- a/miasm2/core/asmblock.py
+++ b/miasm2/core/asmblock.py
@@ -1,11 +1,13 @@
 #-*- coding:utf-8 -*-
 
 import logging
-import inspect
 import warnings
 from collections import namedtuple
 
-import miasm2.expression.expression as m2_expr
+from miasm2.expression.expression import ExprId, ExprInt, ExprLoc, \
+    get_expr_labels
+from miasm2.core.asmblock import AsmSymbolPool
+from miasm2.expression.expression import LocKey
 from miasm2.expression.simplifications import expr_simp
 from miasm2.expression.modint import moduint, modint
 from miasm2.core.utils import Disasm_Exception, pck
@@ -25,20 +27,13 @@ def is_int(a):
         isinstance(a, moduint) or isinstance(a, modint)
 
 
-def expr_is_label(e):
-    return isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel)
-
-
-def expr_is_int_or_label(e):
-    return isinstance(e, m2_expr.ExprInt) or \
-        (isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel))
-
-
 class AsmLabel(object):
 
     "Stand for an assembly label"
 
-    def __init__(self, name="", offset=None):
+    def __init__(self, loc_key, name="", offset=None):
+        assert isinstance(loc_key, LocKey)
+        self.loc_key = loc_key
         self.fixedblocs = False
         if is_int(name):
             name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF)
@@ -77,6 +72,9 @@ class AsmRaw(object):
     def __str__(self):
         return repr(self.raw)
 
+    def to_string(self, symbol_pool):
+        return str(self)
+
 
 class asm_raw(AsmRaw):
 
@@ -363,6 +361,11 @@ class AsmSymbolPool(object):
         self._name2label = {}
         self._offset2label = {}
         self._label_num = 0
+        self._loc_key_to_label = {}
+
+    def loc_key_to_label(self, label_index):
+        assert isinstance(label_index, LocKey)
+        return self._loc_key_to_label.get(label_index.key, None)
 
     def add_label(self, name, offset=None):
         """
@@ -370,7 +373,7 @@ class AsmSymbolPool(object):
         @name: label's name
         @offset: (optional) label's offset
         """
-        label = AsmLabel(name, offset)
+        label = AsmLabel(LocKey(self._label_num), name, offset)
 
         # Test for collisions
         if (label.offset in self._offset2label and
@@ -383,6 +386,9 @@ class AsmSymbolPool(object):
                              (label, self._name2label[label.name]))
 
         self._labels.add(label)
+        self._label_num += 1
+        self._loc_key_to_label[label.loc_key.key] = label
+
         if label.offset is not None:
             self._offset2label[label.offset] = label
         if label.name != "":
@@ -480,6 +486,19 @@ class AsmSymbolPool(object):
         self._name2label.update(symbol_pool._name2label)
         self._offset2label.update(symbol_pool._offset2label)
 
+    def canonize_to_exprloc(self, expr):
+        """
+        If expr is ExprInt, return ExprLoc with corresponding loc_key
+        Else, return expr
+
+        @expr: Expr instance
+        """
+        if expr.is_int():
+            label = self.getby_offset_create(int(expr))
+            ret = ExprLoc(label.loc_key, expr.size)
+            return ret
+        return expr
+
     def gen_label(self):
         """Generate a new unpinned label"""
         label = self.add_label("lbl_gen_%.8X" % (self._label_num))
@@ -511,7 +530,9 @@ class AsmCFG(DiGraph):
     AsmCFGPending = namedtuple("AsmCFGPending",
                                ["waiter", "constraint"])
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, symbol_pool=None, *args, **kwargs):
+        if symbol_pool is None:
+            raise DeprecationWarning("AsmCFG needs a non empty symbol_pool")
         super(AsmCFG, self).__init__(*args, **kwargs)
         # Edges -> constraint
         self.edges2constraint = {}
@@ -519,6 +540,15 @@ class AsmCFG(DiGraph):
         self._pendings = {}
         # Label2block built on the fly
         self._label2block = {}
+        # symbol_pool
+        self.symbol_pool = symbol_pool
+
+
+    def copy(self):
+        """Copy the current graph instance"""
+        graph = self.__class__(self.symbol_pool)
+        return graph + self
+
 
     # Compatibility with old list API
     def append(self, *args, **kwargs):
@@ -639,9 +669,9 @@ class AsmCFG(DiGraph):
             if self._dot_offset:
                 yield [self.DotCellDescription(text="%.8X" % line.offset,
                                                attr={}),
-                       self.DotCellDescription(text=str(line), attr={})]
+                       self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={})]
             else:
-                yield self.DotCellDescription(text=str(line), attr={})
+                yield self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={})
 
     def node_attr(self, node):
         if isinstance(node, AsmBlockBad):
@@ -784,7 +814,7 @@ class AsmCFG(DiGraph):
             if len(pred_next) > 1:
                 raise RuntimeError("Too many next constraints for bloc %r"
                                    "(%s)" % (block.label,
-                                             map(lambda x: x.label, pred_next)))
+                                             [x.label for x in pred_next]))
 
     def guess_blocks_size(self, mnemo):
         """Asm and compute max block size
@@ -971,13 +1001,13 @@ def conservative_asm(mnemo, instr, symbols, conservative):
 def fix_expr_val(expr, symbols):
     """Resolve an expression @expr using @symbols"""
     def expr_calc(e):
-        if isinstance(e, m2_expr.ExprId):
+        if isinstance(e, ExprId):
             s = symbols._name2label[e.name]
-            e = m2_expr.ExprInt(s.offset, e.size)
+            e = ExprInt(s.offset, e.size)
         return e
     result = expr.visit(expr_calc)
     result = expr_simp(result)
-    if not isinstance(result, m2_expr.ExprInt):
+    if not isinstance(result, ExprInt):
         raise RuntimeError('Cannot resolve symbol %s' % expr)
     return result
 
@@ -1222,12 +1252,11 @@ def get_block_labels(block):
         if isinstance(instr, AsmRaw):
             if isinstance(instr.raw, list):
                 for expr in instr.raw:
-                    symbols.update(m2_expr.get_expr_ids(expr))
+                    symbols.update(get_expr_labels(expr))
         else:
             for arg in instr.args:
-                symbols.update(m2_expr.get_expr_ids(arg))
-    labels = filter_exprid_label(symbols)
-    return labels
+                symbols.update(get_expr_labels(arg))
+    return symbols
 
 
 def assemble_block(mnemo, block, symbol_pool, conservative=False):
@@ -1285,7 +1314,8 @@ def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False):
     lbl2block = {block.label: block for block in blocks}
     blocks_using_label = {}
     for block in blocks:
-        labels = get_block_labels(block)
+        exprlocs = get_block_labels(block)
+        labels = set(symbol_pool.loc_key_to_label(expr.loc_key) for expr in exprlocs)
         for label in labels:
             blocks_using_label.setdefault(label, set()).add(block)
 
@@ -1544,21 +1574,19 @@ class disasmEngine(object):
             # test split
             if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall):
                 add_next_offset = True
-                pass
             if instr.dstflow():
                 instr.dstflow2label(self.symbol_pool)
-                dst = instr.getdstflow(self.symbol_pool)
-                dstn = []
-                for d in dst:
-                    if isinstance(d, m2_expr.ExprId) and \
-                            isinstance(d.name, AsmLabel):
-                        dstn.append(d.name)
-                        if d.name.offset in self.dont_dis_retcall_funcs:
-                            add_next_offset = False
-                dst = dstn
+                destinations = instr.getdstflow(self.symbol_pool)
+                known_dsts = []
+                for dst in destinations:
+                    if not dst.is_label():
+                        continue
+                    label = self.symbol_pool.loc_key_to_label(dst.loc_key)
+                    known_dsts.append(label)
+                    if label.offset in self.dont_dis_retcall_funcs:
+                        add_next_offset = False
                 if (not instr.is_subcall()) or self.follow_call:
-                    cur_block.bto.update(
-                        [AsmConstraint(x, AsmConstraint.c_to) for x in dst])
+                    cur_block.bto.update([AsmConstraint(label, AsmConstraint.c_to) for label in known_dsts])
 
             # get in delayslot mode
             in_delayslot = True
@@ -1608,7 +1636,7 @@ class disasmEngine(object):
         log_asmblock.info("dis bloc all")
         job_done = set()
         if blocks is None:
-            blocks = AsmCFG()
+            blocks = AsmCFG(self.symbol_pool)
         todo = [offset]
 
         bloc_cpt = 0
diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py
index d75b99cf..8ea96e22 100644
--- a/miasm2/core/cpu.py
+++ b/miasm2/core/cpu.py
@@ -8,13 +8,12 @@ from collections import defaultdict
 import pyparsing
 
 import miasm2.expression.expression as m2_expr
-from miasm2.core import asmblock
 from miasm2.core.bin_stream import bin_stream, bin_stream_str
 from miasm2.core.utils import Disasm_Exception
 from miasm2.expression.simplifications import expr_simp
 
 
-from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp
+from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstOp
 
 log = logging.getLogger("cpuhelper")
 console_handler = logging.StreamHandler()
@@ -985,18 +984,24 @@ class instruction(object):
         self.mode = mode
         self.args = args
         self.additional_info = additional_info
+        self.offset = None
+        self.l = None
+        self.b = None
 
     def gen_args(self, args):
         out = ', '.join([str(x) for x in args])
         return out
 
     def __str__(self):
+        return self.to_string()
+
+    def to_string(self, symbol_pool=None):
         o = "%-10s " % self.name
         args = []
         for i, arg in enumerate(self.args):
             if not isinstance(arg, m2_expr.Expr):
                 raise ValueError('zarb arg type')
-            x = self.arg2str(arg, pos = i)
+            x = self.arg2str(arg, i, symbol_pool)
             args.append(x)
         o += self.gen_args(args)
         return o
@@ -1011,40 +1016,38 @@ class instruction(object):
         if symbols is None:
             symbols = {}
         args_out = []
-        for a in self.args:
-            e = a
+        for expr in self.args:
             # try to resolve symbols using symbols (0 for default value)
-            ids = m2_expr.get_expr_ids(e)
-            fixed_ids = {}
-            for x in ids:
-                if isinstance(x.name, asmblock.AsmLabel):
-                    name = x.name.name
-                    # special symbol $
-                    if name == '$':
-                        fixed_ids[x] = self.get_asm_offset(x)
-                        continue
-                    if name == '_':
-                        fixed_ids[x] = self.get_asm_next_offset(x)
-                        continue
-                    if not name in symbols:
-                        raise ValueError('unresolved symbol! %r' % x)
-                else:
-                    name = x.name
-                if not name in symbols:
+            labels = m2_expr.get_expr_labels(expr)
+            fixed_expr = {}
+            for exprloc in labels:
+                label = symbols.loc_key_to_label(exprloc.loc_key)
+                name = label.name
+                # special symbols
+                if name == '$':
+                    fixed_expr[exprloc] = self.get_asm_offset(exprloc)
+                    continue
+                if name == '_':
+                    fixed_expr[exprloc] = self.get_asm_next_offset(exprloc)
                     continue
+                if not name in symbols:
+                    raise ValueError('Unresolved symbol: %r' % exprloc)
+
                 if symbols[name].offset is None:
                     raise ValueError('The offset of label "%s" cannot be '
                                      'determined' % name)
                 else:
-                    size = x.size
+                    # Fix symbol with its offset
+                    size = exprloc.size
                     if size is None:
-                        default_size = self.get_symbol_size(x, symbols)
+                        default_size = self.get_symbol_size(exprloc, symbols)
                         size = default_size
                     value = m2_expr.ExprInt(symbols[name].offset, size)
-                fixed_ids[x] = value
-            e = e.replace_expr(fixed_ids)
-            e = expr_simp(e)
-            args_out.append(e)
+                fixed_expr[exprloc] = value
+
+            expr = expr.replace_expr(fixed_expr)
+            expr = expr_simp(expr)
+            args_out.append(expr)
         return args_out
 
     def get_info(self, c):
diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py
index df419680..07155fd9 100644
--- a/miasm2/core/parse_asm.py
+++ b/miasm2/core/parse_asm.py
@@ -4,7 +4,7 @@ import re
 import miasm2.expression.expression as m2_expr
 import miasm2.core.asmblock as asmblock
 from miasm2.core.cpu import instruction, base_expr
-from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp
+from miasm2.core.asm_ast import AstInt, AstId, AstOp
 
 declarator = {'byte': 8,
               'word': 16,
@@ -237,7 +237,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None):
     cur_block = None
     state = STATE_NO_BLOC
     i = 0
-    blocks = asmblock.AsmCFG()
+    blocks = asmblock.AsmCFG(symbol_pool)
     block_to_nlink = None
     delayslot = 0
     while i < len(lines):
diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py
index 8d6d3e07..68b4439f 100644
--- a/miasm2/core/sembuilder.py
+++ b/miasm2/core/sembuilder.py
@@ -142,11 +142,19 @@ class SemBuilder(object):
     def _create_labels(lbl_else=False):
         """Return the AST standing for label creations
         @lbl_else (optional): if set, create a label 'lbl_else'"""
-        lbl_end = "lbl_end = ExprId(ir.get_next_label(instr), ir.IRDst.size)"
+        lbl_end = "lbl_end = ir.get_next_label(instr)"
+        lbl_end_expr = "lbl_end_expr = ExprLoc(lbl_end.loc_key, ir.IRDst.size)"
         out = ast.parse(lbl_end).body
-        out += ast.parse("lbl_if = ExprId(ir.gen_label(), ir.IRDst.size)").body
+        out += ast.parse(lbl_end_expr).body
+        lbl_if = "lbl_if = ir.gen_label()"
+        lbl_if_expr = "lbl_if_expr = ExprLoc(lbl_if.loc_key, ir.IRDst.size)"
+        out += ast.parse(lbl_if).body
+        out += ast.parse(lbl_if_expr).body
         if lbl_else:
-            out += ast.parse("lbl_else = ExprId(ir.gen_label(), ir.IRDst.size)").body
+            lbl_else = "lbl_else = ir.gen_label()"
+            lbl_else_expr = "lbl_else_expr = ExprLoc(lbl_else.loc_key, ir.IRDst.size)"
+            out += ast.parse(lbl_else).body
+            out += ast.parse(lbl_else_expr).body
         return out
 
     def _parse_body(self, body, argument_names):
@@ -200,9 +208,9 @@ class SemBuilder(object):
                 cond = statement.test
                 real_body += self._create_labels(lbl_else=True)
 
-                lbl_end = ast.Name(id='lbl_end', ctx=ast.Load())
-                lbl_if = ast.Name(id='lbl_if', ctx=ast.Load())
-                lbl_else = ast.Name(id='lbl_else', ctx=ast.Load()) \
+                lbl_end = ast.Name(id='lbl_end_expr', ctx=ast.Load())
+                lbl_if = ast.Name(id='lbl_if_expr', ctx=ast.Load())
+                lbl_else = ast.Name(id='lbl_else_expr', ctx=ast.Load()) \
                            if statement.orelse else lbl_end
                 dst = ast.Call(func=ast.Name(id='ExprCond',
                                              ctx=ast.Load()),
@@ -261,9 +269,11 @@ class SemBuilder(object):
 
 
                     ## Replace the block with a call to 'IRBlock'
-                    lbl_if_name = ast.Attribute(value=ast.Name(id=lbl_name,
-                                                               ctx=ast.Load()),
-                                                attr='name', ctx=ast.Load())
+                    lbl_if_name = value= ast.Attribute(
+                        value=ast.Name(id=lbl_name, ctx=ast.Load()),
+                        attr="loc_key",
+                        ctx=ast.Load()
+                    )
 
                     assignblks = ast.List(elts=[assignblk],
                                           ctx=ast.Load())