about summary refs log tree commit diff stats
path: root/miasm2
diff options
context:
space:
mode:
Diffstat (limited to 'miasm2')
-rw-r--r--miasm2/analysis/data_analysis.py6
-rw-r--r--miasm2/analysis/depgraph.py6
-rw-r--r--miasm2/arch/x86/ira.py5
-rw-r--r--miasm2/ir/analysis.py124
-rw-r--r--miasm2/ir/ir.py128
5 files changed, 142 insertions, 127 deletions
diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py
index 8462f150..9451a407 100644
--- a/miasm2/analysis/data_analysis.py
+++ b/miasm2/analysis/data_analysis.py
@@ -150,7 +150,7 @@ def inter_bloc_flow_link(ir_arch, flow_graph, todo, link_exec_to_data):
     x_nodes = tuple(sorted(list(irb.dst.get_r())))
 
     todo = set()
-    for lbl_dst in ir_arch.g.successors(irb.label):
+    for lbl_dst in ir_arch.graph.successors(irb.label):
         todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes))
 
     # pp(('OUT', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()]))
@@ -166,7 +166,7 @@ def create_implicit_flow(ir_arch, flow_graph):
     while todo:
         lbl = todo.pop()
         irb = ir_arch.blocs[lbl]
-        for lbl_son in ir_arch.g.successors(irb.label):
+        for lbl_son in ir_arch.graph.successors(irb.label):
             if not lbl_son in ir_arch.blocs:
                 print "cannot find bloc!!", lbl
                 continue
@@ -189,7 +189,7 @@ def create_implicit_flow(ir_arch, flow_graph):
                     irb.in_nodes[n_r] = irb.label, 0, n_r
                 node_n_r = irb.in_nodes[n_r]
                 # print "###", node_n_r
-                for lbl_p in ir_arch.g.predecessors(irb.label):
+                for lbl_p in ir_arch.graph.predecessors(irb.label):
                     todo.add(lbl_p)
 
                 flow_graph.add_uniq_edge(node_n_r, node_n_w)
diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py
index 838183bf..0a5d38aa 100644
--- a/miasm2/analysis/depgraph.py
+++ b/miasm2/analysis/depgraph.py
@@ -686,7 +686,6 @@ class DependencyGraph(object):
     def __init__(self, ira, implicit=False, apply_simp=True, follow_mem=True,
                  follow_call=True):
         """Create a DependencyGraph linked to @ira
-        The IRA graph must have been computed
 
         @ira: IRAnalysis instance
         @implicit: (optional) Imply implicit dependencies
@@ -702,9 +701,6 @@ class DependencyGraph(object):
         self._step_counter = itertools.count()
         self._current_step = next(self._step_counter)
 
-        # The IRA graph must be computed
-        assert hasattr(self._ira, 'g')
-
         # Create callback filters. The order is relevant.
         self._cb_follow = []
         if apply_simp:
@@ -892,7 +888,7 @@ class DependencyGraph(object):
     def _get_previousblocks(self, label):
         """Return an iterator on predecessors blocks of @label, with their
         lengths"""
-        preds = self._ira.g.predecessors_iter(label)
+        preds = self._ira.graph.predecessors_iter(label)
         for pred_label in preds:
             length = len(self._get_irs(pred_label))
             yield (pred_label, length)
diff --git a/miasm2/arch/x86/ira.py b/miasm2/arch/x86/ira.py
index 5bc4761f..b7a1f19f 100644
--- a/miasm2/arch/x86/ira.py
+++ b/miasm2/arch/x86/ira.py
@@ -3,6 +3,7 @@
 
 from miasm2.expression.expression import ExprAff, ExprOp, ExprId
 from miasm2.core.graph import DiGraph
+from miasm2.core.asmbloc import expr_is_label
 from miasm2.ir.ir import ir, irbloc
 from miasm2.ir.analysis import ira
 from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64
@@ -46,7 +47,7 @@ class ir_a_x86_16(ir_x86_16, ira):
         if not l.is_subcall():
             return
         sub_call_dst = l.args[0]
-        if self.ExprIsLabel(sub_call_dst):
+        if expr_is_label(sub_call_dst):
             sub_call_dst = sub_call_dst.name
         for b in ir_blocs:
             l = b.lines[-1]
@@ -54,7 +55,7 @@ class ir_a_x86_16(ir_x86_16, ira):
             if not l.is_subcall():
                 continue
             sub_call_dst = l.args[0]
-            if self.ExprIsLabel(sub_call_dst):
+            if expr_is_label(sub_call_dst):
                 sub_call_dst = sub_call_dst.name
             lbl = bloc.get_next()
             new_lbl = self.gen_label()
diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py
index 31f6294c..40a3bf64 100644
--- a/miasm2/ir/analysis.py
+++ b/miasm2/ir/analysis.py
@@ -4,7 +4,7 @@
 import logging
 
 from miasm2.ir.symbexec import symbexec
-from miasm2.core.graph import DiGraph
+from miasm2.ir.ir import ir
 from miasm2.expression.expression \
     import ExprAff, ExprCond, ExprId, ExprInt, ExprMem
 
@@ -14,109 +14,21 @@ console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
 log.addHandler(console_handler)
 log.setLevel(logging.WARNING)
 
-class ira:
+
+class ira(ir):
+    """IR Analysis
+    This class provides higher level manipulations on IR, such as dead
+    instruction removals.
+
+    This class can be used as a common parent with `miasm2.ir.ir::ir` class.
+    For instance:
+        class ira_x86_16(ir_x86_16, ira)
+    """
 
     def ira_regs_ids(self):
         """Returns ids of all registers used in the IR"""
         return self.arch.regs.all_regs_ids + [self.IRDst]
 
-    def sort_dst(self, todo, done):
-        out = set()
-        while todo:
-            dst = todo.pop()
-            if self.ExprIsLabel(dst):
-                done.add(dst)
-            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
-                done.add(dst)
-            elif isinstance(dst, ExprCond):
-                todo.add(dst.src1)
-                todo.add(dst.src2)
-            elif isinstance(dst, ExprId):
-                out.add(dst)
-            else:
-                done.add(dst)
-        return out
-
-    def dst_trackback(self, b):
-        dst = b.dst
-        todo = set([dst])
-        done = set()
-
-        for irs in reversed(b.irs):
-            if len(todo) == 0:
-                break
-            out = self.sort_dst(todo, done)
-            found = set()
-            follow = set()
-            for i in irs:
-                if not out:
-                    break
-                for o in out:
-                    if i.dst == o:
-                        follow.add(i.src)
-                        found.add(o)
-                for o in found:
-                    out.remove(o)
-
-            for o in out:
-                if o not in found:
-                    follow.add(o)
-            todo = follow
-
-        return done
-
-    def gen_graph(self, link_all = True):
-        """
-        Gen irbloc digraph
-        @link_all: also gen edges to non present irblocs
-        """
-        self.g = DiGraph()
-        for lbl, b in self.blocs.items():
-            # print 'add', lbl
-            self.g.add_node(lbl)
-            # dst = self.get_bloc_dst(b)
-            dst = self.dst_trackback(b)
-            # print "\tdst", dst
-            for d in dst:
-                if isinstance(d, ExprInt):
-                    d = ExprId(
-                        self.symbol_pool.getby_offset_create(int(d.arg)))
-                if self.ExprIsLabel(d):
-                    if d.name in self.blocs or link_all is True:
-                        self.g.add_edge(lbl, d.name)
-
-    def graph(self):
-        """Output the graphviz script"""
-        out = """
-    digraph asm_graph {
-    size="80,50";
-    node [
-    fontsize = "16",
-    shape = "box"
-    ];
-        """
-        all_lbls = {}
-        for lbl in self.g.nodes():
-            if lbl not in self.blocs:
-                continue
-            irb = self.blocs[lbl]
-            ir_txt = [str(lbl)]
-            for irs in irb.irs:
-                for l in irs:
-                    ir_txt.append(str(l))
-                ir_txt.append("")
-            ir_txt.append("")
-            all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
-        for l, v in all_lbls.items():
-            # print l, v
-            out += '%s [label="%s"];\n' % (l, v)
-
-        for a, b in self.g.edges():
-            # print 'edge', a, b, hash(a), hash(b)
-            out += '%s -> %s;\n' % (hash(a), hash(b))
-        out += '}'
-        return out
-
     def remove_dead_instr(self, irb, useful):
         """Remove dead affectations using previous reaches analysis
         @irb: irbloc instance
@@ -149,12 +61,12 @@ class ira:
 
         useful = set()
 
-        for node in self.g.nodes():
+        for node in self.graph.nodes():
             if node not in self.blocs:
                 continue
 
             block = self.blocs[node]
-            successors = self.g.successors(node)
+            successors = self.graph.successors(node)
             has_son = bool(successors)
             for p_son in successors:
                 if p_son not in self.blocs:
@@ -274,7 +186,7 @@ class ira:
                       for key, value in irb.cur_reach[0].iteritems()}
 
         # Compute reach from predecessors
-        for n_pred in self.g.predecessors(irb.label):
+        for n_pred in self.graph.predecessors(irb.label):
             p_block = self.blocs[n_pred]
 
             # Handle each register definition
@@ -313,7 +225,7 @@ class ira:
         analysis"""
 
         fixed = True
-        for node in self.g.nodes():
+        for node in self.graph.nodes():
             if node in self.blocs:
                 irb = self.blocs[node]
                 if (irb.cur_reach != irb.prev_reach or
@@ -329,13 +241,11 @@ class ira:
 
         Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
         IBM Thomas J. Watson Research Division, page 43
-
-        PRE: gen_graph()
         """
         fixed_point = False
         log.debug('iteration...')
         while not fixed_point:
-            for node in self.g.nodes():
+            for node in self.graph.nodes():
                 if node in self.blocs:
                     self.compute_reach_block(self.blocs[node])
             fixed_point = self._test_kill_reach_fix()
@@ -347,8 +257,6 @@ class ira:
 
         Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
         IBM Thomas J. Watson Research Division, page 43
-
-        PRE: gen_graph()
         """
         # Update r/w variables for all irblocs
         self.get_rw(self.ira_regs_ids())
diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py
index e051dc8c..f957fcab 100644
--- a/miasm2/ir/ir.py
+++ b/miasm2/ir/ir.py
@@ -22,15 +22,16 @@
 
 import miasm2.expression.expression as m2_expr
 from miasm2.expression.expression_helper import get_missing_interval
-from miasm2.core import asmbloc
 from miasm2.expression.simplifications import expr_simp
-from miasm2.core.asmbloc import asm_symbol_pool
+from miasm2.core.asmbloc import asm_symbol_pool, expr_is_label, asm_label, \
+    asm_bloc
+from miasm2.core.graph import DiGraph
 
 
 class irbloc(object):
 
     def __init__(self, label, irs, lines = []):
-        assert(isinstance(label, asmbloc.asm_label))
+        assert(isinstance(label, asm_label))
         self.label = label
         self.irs = irs
         self.lines = lines
@@ -119,6 +120,47 @@ class irbloc(object):
         return "\n".join(o)
 
 
+class DiGraphIR(DiGraph):
+    """DiGraph for IR instances"""
+
+    def __init__(self, blocks, *args, **kwargs):
+        """Instanciate a DiGraphIR
+        @blocks: IR blocks
+        """
+        self._blocks = blocks
+        super(DiGraphIR, self).__init__(*args, **kwargs)
+
+    def dot(self):
+        """Output the graphviz script"""
+        out = """
+    digraph asm_graph {
+    size="80,50";
+    node [
+    fontsize = "16",
+    shape = "box"
+    ];
+        """
+        all_lbls = {}
+        for lbl in self.nodes():
+            if lbl not in self._blocks:
+                continue
+            irb = self._blocks[lbl]
+            ir_txt = [str(lbl)]
+            for irs in irb.irs:
+                for l in irs:
+                    ir_txt.append(str(l))
+                ir_txt.append("")
+            ir_txt.append("")
+            all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
+        for l, v in all_lbls.items():
+            out += '%s [label="%s"];\n' % (l, v)
+
+        for a, b in self.edges():
+            out += '%s -> %s;\n' % (hash(a), hash(b))
+        out += '}'
+        return out
+
+
 class ir(object):
 
     def __init__(self, arch, attrib, symbol_pool=None):
@@ -130,6 +172,8 @@ class ir(object):
         self.sp = arch.getsp(attrib)
         self.arch = arch
         self.attrib = attrib
+        # Lazy structure
+        self._graph = None
 
     def instr2ir(self, l):
         ir_bloc_cur, ir_blocs_extra = self.get_ir(l)
@@ -140,13 +184,13 @@ class ir(object):
         @ad: an ExprId/ExprInt/label/int"""
 
         if (isinstance(ad, m2_expr.ExprId) and
-            isinstance(ad.name, asmbloc.asm_label)):
+            isinstance(ad.name, asm_label)):
             ad = ad.name
         if isinstance(ad, m2_expr.ExprInt):
             ad = int(ad.arg)
         if type(ad) in [int, long]:
             ad = self.symbol_pool.getby_offset_create(ad)
-        elif isinstance(ad, asmbloc.asm_label):
+        elif isinstance(ad, asm_label):
             ad = self.symbol_pool.getby_name_create(ad.name)
         return ad
 
@@ -158,7 +202,7 @@ class ir(object):
         return self.blocs.get(label, None)
 
     def add_instr(self, l, ad=0, gen_pc_updt = False):
-        b = asmbloc.asm_bloc(l)
+        b = asm_bloc(l)
         b.lines = [l]
         self.add_bloc(b, gen_pc_updt)
 
@@ -299,6 +343,8 @@ class ir(object):
 
             self.blocs[irb.label] = irb
 
+        # Forget graph if any
+        self._graph = None
 
     def get_instr_label(self, instr):
         """Returns the label associated to an instruction
@@ -334,6 +380,70 @@ class ir(object):
         for b in self.blocs.values():
             b.get_rw(regs_ids)
 
-    def ExprIsLabel(self, l):
-        return isinstance(l, m2_expr.ExprId) and isinstance(l.name,
-                                                            asmbloc.asm_label)
+    def sort_dst(self, todo, done):
+        out = set()
+        while todo:
+            dst = todo.pop()
+            if expr_is_label(dst):
+                done.add(dst)
+            elif isinstance(dst, m2_expr.ExprMem) or isinstance(dst, m2_expr.ExprInt):
+                done.add(dst)
+            elif isinstance(dst, m2_expr.ExprCond):
+                todo.add(dst.src1)
+                todo.add(dst.src2)
+            elif isinstance(dst, m2_expr.ExprId):
+                out.add(dst)
+            else:
+                done.add(dst)
+        return out
+
+    def dst_trackback(self, b):
+        dst = b.dst
+        todo = set([dst])
+        done = set()
+
+        for irs in reversed(b.irs):
+            if len(todo) == 0:
+                break
+            out = self.sort_dst(todo, done)
+            found = set()
+            follow = set()
+            for i in irs:
+                if not out:
+                    break
+                for o in out:
+                    if i.dst == o:
+                        follow.add(i.src)
+                        found.add(o)
+                for o in found:
+                    out.remove(o)
+
+            for o in out:
+                if o not in found:
+                    follow.add(o)
+            todo = follow
+
+        return done
+
+    def _gen_graph(self):
+        """
+        Gen irbloc digraph
+        """
+        self._graph = DiGraphIR(self.blocs)
+        for lbl, b in self.blocs.iteritems():
+            self._graph.add_node(lbl)
+            dst = self.dst_trackback(b)
+            for d in dst:
+                if isinstance(d, m2_expr.ExprInt):
+                    d = m2_expr.ExprId(
+                        self.symbol_pool.getby_offset_create(int(d.arg)))
+                if expr_is_label(d):
+                    self._graph.add_edge(lbl, d.name)
+
+    @property
+    def graph(self):
+        """Get a DiGraph representation of current IR instance.
+        Lazy property, building the graph on-demand"""
+        if self._graph is None:
+            self._gen_graph()
+        return self._graph