1 files changed, 202 insertions, 148 deletions
diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py
index ad37a1df..87c53d44 100644
--- a/miasm2/ir/analysis.py
+++ b/miasm2/ir/analysis.py
@@ -6,7 +6,7 @@ import logging
 from miasm2.ir.symbexec import symbexec
 from miasm2.core.graph import DiGraph
 from miasm2.expression.expression \
-    import ExprAff, ExprCond, ExprId, ExprInt, ExprMem, ExprOp
+    import ExprAff, ExprCond, ExprId, ExprInt, ExprMem
 
 log = logging.getLogger("analysis")
 console_handler = logging.StreamHandler()
@@ -16,6 +16,10 @@ log.setLevel(logging.WARNING)
 
 class ira:
 
+    def ira_regs_ids(self):
+        """Returns ids of all registers used in the IR"""
+        return self.arch.regs.all_regs_ids + [self.IRDst]
+
     def sort_dst(self, todo, done):
         out = set()
         while todo:
@@ -115,194 +119,244 @@ class ira:
         out += '}'
         return out
 
-    def remove_dead(self, irb):
-        """Remove dead affectations using previous liveness analysis
+    def remove_dead_instr(self, irb, useful):
+        """Remove dead affectations using previous reaches analysis
         @irb: irbloc instance
-        Return True iff the bloc state has changed
-        PRE: compute_in_out(@irb)
+        @useful: useful statements from previous reach analysis
+        Return True iff the block state has changed
+        PRE: compute_reach(self)
         """
-
-        # print 'state1'
-        # self.dump_bloc_state(irb)
-
         modified = False
-        for ir, _, c_out in zip(irb.irs, irb.c_in, irb.c_out):
+        for k, ir in enumerate(irb.irs):
             j = 0
             while j < len(ir):
-                i_cur = ir[j]
-                if not isinstance(i_cur.dst, ExprId):
-                    pass
-                elif i_cur.dst == self.IRDst:
-                    # never delete irdst
-                    pass
-                elif (isinstance(i_cur.src, ExprOp) and
-                    i_cur.src.op.startswith('call')):
-                    # /!\ never remove ir calls
-                    pass
-                elif i_cur.dst not in c_out:
-                    del(ir[j])
+                cur_instr = ir[j]
+                if (isinstance(cur_instr.dst, ExprId)
+                    and (irb.label, k, cur_instr) not in useful):
+                    del ir[j]
                     modified = True
-                    continue
-                j += 1
+                else:
+                    j += 1
+        return modified
 
-        # print 'state2'
-        # self.dump_bloc_state(irb)
+    def init_useful_instr(self):
+        """Computes a set of triples (block, instruction number, instruction)
+        containing initially useful instructions :
+          - Instructions affecting final value of return registers
+          - Instructions affecting IRDst register
+          - Instructions writing in memory
+          - Function call instructions
+        Return set of intial useful instructions
+        """
 
-        return modified
+        useful = set()
+
+        for node in self.g.nodes():
+            if node not in self.blocs:
+                continue
+
+            block = self.blocs[node]
+            successors = self.g.successors(node)
+            has_son = bool(successors)
+            for p_son in successors:
+                if p_son not in self.blocs:
+                    # Leaf has lost its son: don't remove anything
+                    # reaching this block
+                    for r in self.ira_regs_ids():
+                        useful.update(block.cur_reach[-1][r].union(
+                                block.defout[-1][r]))
+
+            # Function call, memory write or IRDst affectation
+            for k, ir in enumerate(block.irs):
+                for i_cur in ir:
+                    if i_cur.is_function_call():
+                        # /!\ never remove ir calls
+                        useful.add((block.label, k, i_cur))
+                    if isinstance(i_cur.dst, ExprMem):
+                        useful.add((block.label, k, i_cur))
+                    useful.update(block.defout[k][self.IRDst])
+
+            # Affecting return registers
+            if not has_son:
+                for r in self.get_out_regs(block):
+                    useful.update(block.defout[-1][r]
+                                  if block.defout[-1][r] else
+                                  block.cur_reach[-1][r])
+
+        return useful
+
+    def _mark_useful_code(self):
+        """Mark useful statements using previous reach analysis
+
+        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
+        IBM Thomas J. Watson Research Division,  Algorithm MK
+
+        Return a set of triplets (block, instruction number, instruction) of
+        useful instructions
+        PRE: compute_reach(self)
 
-    def remove_blocs_dead(self):
-        """Call remove_dead on each irbloc
-        Return True iff one of the bloc state has changed
         """
+
+        useful = self.init_useful_instr()
+        worklist = useful.copy()
+        while worklist:
+            elem = worklist.pop()
+            useful.add(elem)
+            irb, irs_ind, ins = elem
+
+            block = self.blocs[irb]
+            instr_defout = block.defout[irs_ind]
+            cur_kill = block.cur_kill[irs_ind]
+            cur_reach = block.cur_reach[irs_ind]
+
+            # Handle dependencies of used variables in ins
+            for reg in ins.get_r(True).intersection(self.ira_regs_ids()):
+                worklist.update(
+                    cur_reach[reg].difference(useful).difference(
+                        cur_kill[reg]
+                        if not instr_defout[reg] else
+                        set()))
+                for _, _, i in instr_defout[reg]:
+                    # Loop case (i in defout of current block)
+                    if i == ins:
+                        worklist.update(cur_reach[reg].difference(useful))
+        return useful
+
+    def remove_dead_code(self):
+        """Remove dead instructions in each block of the graph using the reach
+        analysis .
+        Returns True if a block has been modified
+        PRE : compute_reach(self)
+        """
+        useful = self._mark_useful_code()
         modified = False
-        for b in self.blocs.values():
-            modified |= self.remove_dead(b)
+        for block in self.blocs.values():
+            modified |= self.remove_dead_instr(block, useful)
         return modified
 
-    # for test XXX TODO
     def set_dead_regs(self, b):
         pass
 
     def add_unused_regs(self):
         pass
 
+    @staticmethod
+    def print_set(v_set):
+        """Print each triplet contained in a set
+        @v_set: set containing triplets elements
+        """
+        for p in v_set:
+            print '    (%s, %s, %s)' % p
+
     def dump_bloc_state(self, irb):
         print '*'*80
-        for i, (ir, c_in, c_out) in enumerate(zip(irb.irs, irb.c_in, irb.c_out)):
-            print 'ir'
-            for x in ir:
-                print '\t', x
-            print 'R', [str(x) for x in irb.r[i]]#c_in]
-            print 'W', [str(x) for x in irb.w[i]]#c_out]
-            print 'IN', [str(x) for x in c_in]
-            print 'OUT', [str(x) for x in c_out]
-
-
-    def compute_in_out(self, irb):
-        """Liveness computation for a single bloc
+        for k, irs in enumerate(irb.irs):
+            for i in xrange(len(irs)):
+                print 5*"-"
+                print 'instr', k, irs[i]
+                print 5*"-"
+                for v in self.ira_regs_ids():
+                    if irb.cur_reach[k][v]:
+                        print 'REACH[%d][%s]' % (k, v)
+                        self.print_set(irb.cur_reach[k][v])
+                    if irb.cur_kill[k][v]:
+                        print 'KILL[%d][%s]' % (k, v)
+                        self.print_set(irb.cur_kill[k][v])
+                    if irb.defout[k][v]:
+                        print 'DEFOUT[%d][%s]' % (k, v)
+                        self.print_set(irb.defout[k][v])
+
+    def compute_reach_block(self, irb):
+        """Variable influence computation for a single block
         @irb: irbloc instance
-        Return True iff bloc state has changed
+        PRE: init_reach()
         """
-        modified = False
-
-        # Compute OUT for last irb entry
-        c_out = set()
-        has_son = False
-        for n_son in self.g.successors(irb.label):
-            has_son = True
-            if n_son not in self.blocs:
-                # If the son is not defined, we will propagate our current out
-                # nodes to the in nodes's son
-                son_c_in = irb.c_out_missing
-            else:
-                son_c_in = self.blocs[n_son].c_in[0]
-            c_out.update(son_c_in)
-        if not has_son:
-            # Special case: leaf nodes architecture dependant
-            c_out = self.get_out_regs(irb)
-
-        if irb.c_out[-1] != c_out:
-            irb.c_out[-1] = c_out
-            modified = True
-
-        # Compute out/in intra bloc
-        for i in reversed(xrange(len(irb.irs))):
-            new_in = set(irb.r[i].union(irb.c_out[i].difference(irb.w[i])))
-            if irb.c_in[i] != new_in:
-                irb.c_in[i] = new_in
-                modified = True
-
-            if i >= len(irb.irs) - 1:
-                # Last out has been previously updated
-                continue
-            new_out = set(irb.c_in[i + 1])
-            if irb.c_out[i] != new_out:
-                irb.c_out[i] = new_out
-                modified = True
 
-        return modified
-
-    def test_in_out_fix(self):
-        """Return True iff a fixed point has been reached during liveness
+        reach_block = {key: value.copy()
+                      for key, value in irb.cur_reach[0].iteritems()}
+
+        # Compute reach from predecessors
+        for n_pred in self.g.predecessors(irb.label):
+            p_block = self.blocs[n_pred]
+
+            # Handle each register definition
+            for c_reg in self.ira_regs_ids():
+                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
+                pred_through = p_block.defout[-1][c_reg].union(
+                    p_block.cur_reach[-1][c_reg].difference(
+                        p_block.cur_kill[-1][c_reg]))
+                reach_block[c_reg].update(pred_through)
+
+        # If a predecessor has changed
+        if reach_block != irb.cur_reach[0]:
+            irb.cur_reach[0] = reach_block
+            for c_reg in self.ira_regs_ids():
+                if irb.defout[0][c_reg]:
+                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
+                    irb.cur_kill[0][c_reg].update(
+                        reach_block[c_reg].difference(irb.defout[0][c_reg]))
+
+        # Compute reach and kill for block's instructions
+        for i in xrange(1, len(irb.irs)):
+            for c_reg in self.ira_regs_ids():
+                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
+                pred_through = irb.defout[i - 1][c_reg].union(
+                    irb.cur_reach[i - 1][c_reg].difference(
+                        irb.cur_kill[i - 1][c_reg]))
+                irb.cur_reach[i][c_reg].update(pred_through)
+                if irb.defout[i][c_reg]:
+                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
+                    irb.cur_kill[i][c_reg].update(
+                        irb.cur_reach[i][c_reg].difference(
+                            irb.defout[i][c_reg]))
+
+    def _test_kill_reach_fix(self):
+        """Return True iff a fixed point has been reached during reach
         analysis"""
 
         fixed = True
         for node in self.g.nodes():
-            if node not in self.blocs:
-                # leaf has lost her son
-                continue
-            irb = self.blocs[node]
-            if irb.c_in != irb.l_in or irb.c_out != irb.l_out:
-                fixed = False
-            irb.l_in = [set(x) for x in irb.c_in]
-            irb.l_out = [set(x) for x in irb.c_out]
+            if node in self.blocs:
+                irb = self.blocs[node]
+                if (irb.cur_reach != irb.prev_reach or
+                    irb.cur_kill != irb.prev_kill):
+                    fixed = False
+                    irb.prev_reach = irb.cur_reach[:]
+                    irb.prev_kill = irb.cur_kill[:]
         return fixed
 
-    def fill_missing_son_c_in(self):
-        """Find nodes with missing sons in graph, and add virtual link to all
-        written variables of all parents.
-        PRE: gen_graph() and get_rw()"""
+    def compute_reach(self):
+        """
+        Compute reach, defout and kill sets until a fixed point is reached.
+
+        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
+        IBM Thomas J. Watson Research Division, page 43
 
-        for node in self.g.nodes():
-            if node not in self.blocs:
-                continue
-            self.blocs[node].c_out_missing = set()
-            has_all_son = True
-            for node_son in self.g.successors(node):
-                if node_son not in self.blocs:
-                    has_all_son = False
-                    break
-            if has_all_son:
-                continue
-            parents = self.g.reachable_parents(node)
-            for parent in parents:
-                irb = self.blocs[parent]
-                for var_w in irb.w:
-                    self.blocs[node].c_out_missing.update(var_w)
-
-    def compute_dead(self):
-        """Iterate liveness analysis until a fixed point is reached.
         PRE: gen_graph()
         """
-
-        it = 0
         fixed_point = False
         log.debug('iteration...')
         while not fixed_point:
-            log.debug(it)
-            it += 1
-            for n in self.g.nodes():
-                if n not in self.blocs:
-                    # leaf has lost her son
-                    continue
-                irb = self.blocs[n]
-                self.compute_in_out(irb)
-
-            fixed_point = self.test_in_out_fix()
+            for node in self.g.nodes():
+                if node in self.blocs:
+                    self.compute_reach_block(self.blocs[node])
+            fixed_point = self._test_kill_reach_fix()
 
     def dead_simp(self):
-        """This function is used to analyse relation of a * complete function *
-        This mean the blocs under study represent a solid full function graph.
-
-        Ref: CS 5470 Compiler Techniques and Principles (Liveness
-        analysis/Dataflow equations)
-
-        PRE: call to gen_graph
         """
+        This function is used to analyse relation of a * complete function *
+        This means the blocks under study represent a solid full function graph.
 
-        modified = True
-        while modified:
-            log.debug('dead_simp step')
-
-            # Update r/w variables for all irblocs
-            self.get_rw()
-            # Fill c_in for missing sons
-            self.fill_missing_son_c_in()
-
-            # Liveness step
-            self.compute_dead()
-            modified = self.remove_blocs_dead()
+        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
+        IBM Thomas J. Watson Research Division, page 43
 
+        PRE: gen_graph()
+        """
+        # Update r/w variables for all irblocs
+        self.get_rw(self.ira_regs_ids())
+        # Liveness step
+        self.compute_reach()
+        self.remove_dead_code()
         # Simplify expressions
         self.simplify_blocs()