about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCamille Mougey <commial@gmail.com>2015-01-09 15:40:28 +0100
committerCamille Mougey <commial@gmail.com>2015-01-09 15:40:28 +0100
commitae9004eec4ef264013208e1ce50f6e884a4da23f (patch)
treee09503cc60f0ce9576a830a2bdc78b7f303961f2
parent0d6454f6b507c3c05c45507ffb56ac9f12eca536 (diff)
parent48581006f578d7de1a5944c31952faa5a4e947b4 (diff)
downloadmiasm-ae9004eec4ef264013208e1ce50f6e884a4da23f.tar.gz
miasm-ae9004eec4ef264013208e1ce50f6e884a4da23f.zip
Merge pull request #32 from serpilliere/improve_liveness_analysis
Improve liveness analysis
-rw-r--r--miasm2/arch/arm/ira.py4
-rw-r--r--miasm2/core/graph.py25
-rw-r--r--miasm2/ir/analysis.py180
-rw-r--r--miasm2/ir/ir.py12
4 files changed, 179 insertions, 42 deletions
diff --git a/miasm2/arch/arm/ira.py b/miasm2/arch/arm/ira.py
index 8cfe2da0..74548f86 100644
--- a/miasm2/arch/arm/ira.py
+++ b/miasm2/arch/arm/ira.py
@@ -67,9 +67,9 @@ class ir_a_arml(ir_a_arml_base):
             irs = self.call_effects(pc_val)
             irs.append([ExprAff(self.IRDst, ExprId(lbl, size=self.pc.size))])
             nbloc = irbloc(new_lbl, irs)
-            nbloc.lines = [l]
+            nbloc.lines = [l]*len(irs)
             self.blocs[new_lbl] = nbloc
-            irb.dst = ExprId(new_lbl, size=self.pc.size)
+            irb.set_dst(ExprId(new_lbl, size=self.pc.size))
 
         """
         if not bloc.lines:
diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py
index 47047269..d29517cc 100644
--- a/miasm2/core/graph.py
+++ b/miasm2/core/graph.py
@@ -27,6 +27,17 @@ class DiGraph:
         self._nodes_to[n] = []
         self._nodes_from[n] = []
 
+    def del_node(self, node):
+        """Delete the @node of the graph; Also delete every edge to/from this
+        @node"""
+
+        if node in self._nodes:
+            self._nodes.remove(node)
+        for pred in self.predecessors(node):
+            self.del_edge(pred, node)
+        for succ in self.successors(node):
+            self.del_edge(node, succ)
+
     def add_edge(self, a, b):
         if not a in self._nodes:
             self.add_node(a)
@@ -97,6 +108,20 @@ class DiGraph:
                     out.append(path + [b])
         return out
 
+    def get_all_parents(self, node):
+        """Return every parents nodes for a given @node"""
+
+        todo = set([node])
+        done = set()
+        while todo:
+            node = todo.pop()
+            if node in done:
+                continue
+            done.add(node)
+            for parent in self.predecessors(node):
+                todo.add(parent)
+        return done
+
     def node2str(self, n):
         return str(n)
 
diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py
index eaa5f6c8..790a790e 100644
--- a/miasm2/ir/analysis.py
+++ b/miasm2/ir/analysis.py
@@ -1,10 +1,17 @@
 #!/usr/bin/env python
 #-*- coding:utf-8 -*-
 
+import logging
+
 from miasm2.ir.symbexec import symbexec
 from miasm2.core.graph import DiGraph
 from miasm2.expression.expression import *
 
+log = logging.getLogger("analysis")
+console_handler = logging.StreamHandler()
+console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
+log.addHandler(console_handler)
+log.setLevel(logging.WARNING)
 
 class ira:
 
@@ -55,7 +62,7 @@ class ira:
 
         return done
 
-    def gen_graph(self, link_all = False):
+    def gen_graph(self, link_all = True):
         """
         Gen irbloc digraph
         @link_all: also gen edges to non present irblocs
@@ -76,6 +83,7 @@ class ira:
                         self.g.add_edge(lbl, d.name)
 
     def graph(self):
+        """Output the graphviz script"""
         out = """
     digraph asm_graph {
     size="80,50";
@@ -83,46 +91,72 @@ class ira:
     fontsize = "16",
     shape = "box"
     ];
-    """
+        """
         all_lbls = {}
         for lbl in self.g.nodes():
             if not lbl in self.blocs:
                 continue
-            b = self.blocs[lbl]
+            irb = self.blocs[lbl]
             ir_txt = [str(lbl)]
-            for irs in b.irs:
+            for irs in irb.irs:
                 for l in irs:
                     ir_txt.append(str(l))
                 ir_txt.append("")
             ir_txt.append("")
-            all_lbls[id(lbl)] = "\l\\\n".join(ir_txt)
+            all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
         for l, v in all_lbls.items():
+            # print l, v
             out += '%s [label="%s"];\n' % (l, v)
 
         for a, b in self.g.edges():
-            out += '%s -> %s;\n' % (id(a), id(b))
+            # print 'edge', a, b, hash(a), hash(b)
+            out += '%s -> %s;\n' % (hash(a), hash(b))
         out += '}'
         return out
 
-    def remove_dead(self, b):
-        for ir, _, c_out in zip(b.irs, b.c_in, b.c_out):
+    def remove_dead(self, irb):
+        """Remove dead affectations using previous liveness analysis
+        @irb: irbloc instance
+        Return True iff the bloc state has changed
+        PRE: compute_in_out(@irb)
+        """
+
+        # print 'state1'
+        # self.dump_bloc_state(irb)
+
+        modified = False
+        for ir, _, c_out in zip(irb.irs, irb.c_in, irb.c_out):
             j = 0
             while j < len(ir):
                 i_cur = ir[j]
                 if not isinstance(i_cur.dst, ExprId):
                     pass
+                elif i_cur.dst == self.IRDst:
+                    # never delete irdst
+                    pass
                 elif (isinstance(i_cur.src, ExprOp) and
                     i_cur.src.op.startswith('call')):
                     # /!\ never remove ir calls
                     pass
                 elif not i_cur.dst in c_out:
                     del(ir[j])
+                    modified = True
                     continue
                 j += 1
 
+        # print 'state2'
+        # self.dump_bloc_state(irb)
+
+        return modified
+
     def remove_blocs_dead(self):
+        """Call remove_dead on each irbloc
+        Return True iff one of the bloc state has changed
+        """
+        modified = False
         for b in self.blocs.values():
-            self.remove_dead(b)
+            modified |= self.remove_dead(b)
+        return modified
 
     # for test XXX TODO
     def set_dead_regs(self, b):
@@ -131,77 +165,143 @@ class ira:
     def add_unused_regs(self):
         pass
 
-    def compute_in_out(self, b):
+    def dump_bloc_state(self, irb):
+        print '*'*80
+        for i, (ir, c_in, c_out) in enumerate(zip(irb.irs, irb.c_in, irb.c_out)):
+            print 'ir'
+            for x in ir:
+                print '\t', x
+            print 'R', [str(x) for x in irb.r[i]]#c_in]
+            print 'W', [str(x) for x in irb.w[i]]#c_out]
+            print 'IN', [str(x) for x in c_in]
+            print 'OUT', [str(x) for x in c_out]
+
+        print 'OUT final', [str(x) for x in irb.c_out[-1]]
+
+    def compute_in_out(self, irb):
+        """Liveness computation for a single bloc
+        @irb: irbloc instance
+        Return True iff bloc state has changed
+        """
         # get out/in from bloc sons
         modified = False
         # set b in
-        if b.c_in[-1] != set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))):
+        if irb.c_in[-1] != set(irb.r[-1].union(irb.c_out[-1].difference(irb.w[-1]))):
             modified = True
-        b.c_in[-1] = set(b.r[-1].union(b.c_out[-1].difference(b.w[-1])))
+        irb.c_in[-1] = set(irb.r[-1].union(irb.c_out[-1].difference(irb.w[-1])))
 
         # set b out
         c_out = set()
         has_son = False
-        for n_son in self.g.successors(b.label):
-            # print n_me, n_son
+        for n_son in self.g.successors(irb.label):
             has_son = True
             if not n_son in self.blocs:
-                print "leaf has lost her sons!"
-                continue
-            b_son = self.blocs[n_son]
-            c_out.update(b_son.c_in[0])
+                # If the son is not defined, we will propagate our current out
+                # nodes to the in nodes's son
+                son_c_in = irb.c_out_missing
+            else:
+                son_c_in = self.blocs[n_son].c_in[0]
+            c_out.update(son_c_in)
         if not has_son:
             # special case: leaf nodes architecture dependant
-            c_out = self.get_out_regs(b)
-        if b.c_out[-1] != set(c_out):
+            c_out = self.get_out_regs(irb)
+        if irb.c_out[-1] != set(c_out):
             modified = True
-        b.c_out[-1] = set(c_out)
+        irb.c_out[-1] = set(c_out)
 
         # get out/in for bloc
-        for i in reversed(xrange(len(b.irs))):
-            if b.c_in[i] != set(b.r[i].union(b.c_out[i].difference(b.w[i]))):
+        for i in reversed(xrange(len(irb.irs))):
+            if irb.c_in[i] != set(irb.r[i].union(irb.c_out[i].difference(irb.w[i]))):
                 modified = True
-            b.c_in[i] = set(b.r[i].union(b.c_out[i].difference(b.w[i])))
-            if b.c_out[i] != set(b.c_in[i + 1]):
+            irb.c_in[i] = set(irb.r[i].union(irb.c_out[i].difference(irb.w[i])))
+            if irb.c_out[i] != set(irb.c_in[i + 1]):
                 modified = True
-            b.c_out[i] = set(b.c_in[i + 1])
+            irb.c_out[i] = set(irb.c_in[i + 1])
+
         return modified
 
     def test_in_out_fix(self):
+        """Return True iff a fixed point has been reached during liveness
+        analysis"""
+
         fixed = True
-        for n in self.g.nodes():
-            if not n in self.blocs:
+        for node in self.g.nodes():
+            if not node in self.blocs:
                 # leaf has lost her son
                 continue
-            b = self.blocs[n]
-            if b.c_in != b.l_in or b.c_out != b.l_out:
+            irb = self.blocs[node]
+            if irb.c_in != irb.l_in or irb.c_out != irb.l_out:
                 fixed = False
-            b.l_in = [set(x) for x in b.c_in]
-            b.l_out = [set(x) for x in b.c_out]
+            irb.l_in = [set(x) for x in irb.c_in]
+            irb.l_out = [set(x) for x in irb.c_out]
         return fixed
 
+    def fill_missing_son_c_in(self):
+        """Find nodes with missing sons in graph, and add virtual link to all
+        written variables of all parents.
+        PRE: gen_graph() and get_rw()"""
+
+        for node in self.g.nodes():
+            if not node in self.blocs:
+                continue
+            self.blocs[node].c_out_missing = set()
+            has_all_son = True
+            for node_son in self.g.successors(node):
+                if not node_son in self.blocs:
+                    has_all_son = False
+                    break
+            if has_all_son:
+                continue
+            parents = self.g.get_all_parents(node)
+            for parent in parents:
+                irb = self.blocs[parent]
+                for var_w in irb.w:
+                    self.blocs[node].c_out_missing.update(var_w)
+
     def compute_dead(self):
-        self.get_rw()
+        """Iterate liveness analysis until a fixed point is reached.
+        PRE: gen_graph()
+        """
 
         it = 0
         fixed_point = False
-        print 'iteration...',
+        log.debug('iteration...')
         while not fixed_point:
-            print it,
+            log.debug(it)
             it += 1
             for n in self.g.nodes():
                 if not n in self.blocs:
                     # leaf has lost her son
                     continue
-                b = self.blocs[n]
-                self.compute_in_out(b)
+                irb = self.blocs[n]
+                self.compute_in_out(irb)
 
             fixed_point = self.test_in_out_fix()
-        print
 
     def dead_simp(self):
-        self.compute_dead()
-        self.remove_blocs_dead()
+        """This function is used to analyse relation of a * complete function *
+        This mean the blocs under study represent a solid full function graph.
+
+        Ref: CS 5470 Compiler Techniques and Principles (Liveness
+        analysis/Dataflow equations)
+
+        PRE: call to gen_graph
+        """
+
+        modified = True
+        while modified:
+            log.debug('dead_simp step')
+
+            # Update r/w variables for all irblocs
+            self.get_rw()
+            # Fill c_in for missing sons
+            self.fill_missing_son_c_in()
+
+            # Liveness step
+            self.compute_dead()
+            modified = self.remove_blocs_dead()
+
+        # Simplify expressions
         self.simplify_blocs()
 
     def gen_equations(self):
diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py
index 9db6d696..b955bac7 100644
--- a/miasm2/ir/ir.py
+++ b/miasm2/ir/ir.py
@@ -52,6 +52,18 @@ class irbloc:
         self._dst = dst
         return dst
 
+    def set_dst(self, value):
+        """Find and replace the IRDst affectation's source by @value"""
+        dst = None
+        for ir in self.irs:
+            for i in ir:
+                if isinstance(i.dst, m2_expr.ExprId) and i.dst.name == "IRDst":
+                    if dst is not None:
+                        raise ValueError('Multiple destinations!')
+                    dst = value
+                    i.src = value
+        self._dst = value
+
     dst = property(get_dst)
 
     def get_rw(self):