about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--miasm2/analysis/ssa.py207
1 files changed, 127 insertions, 80 deletions
diff --git a/miasm2/analysis/ssa.py b/miasm2/analysis/ssa.py
index e4a6684f..b68eef7c 100644
--- a/miasm2/analysis/ssa.py
+++ b/miasm2/analysis/ssa.py
@@ -629,7 +629,7 @@ def irblock_has_phi(irblock):
     return False
 
 
-class VarInfos(object):
+class Varinfo(object):
     """Store liveness information for a variable"""
     __slots__ = ["live_index", "loc_key", "index"]
 
@@ -695,14 +695,25 @@ class UnSSADiGraph(object):
         self.ssa = ssa
         self.head = head
 
+        # Set of created variables
         self.copy_vars = set()
-        self.parallel_copy_block_at_end = {}
-        self.parallel_copy_block_at_phi = {}
+        # Virtual parallel copies
+
+        # On loc_key's Phi node dst -> set((parent, src))
+        self.phi_parent_sources = {}
+        # On loc_key's Phi node, loc_key -> set(Phi dsts)
+        self.phi_destinations = {}
+        # Phi's dst -> new var
         self.phi_new_var = {}
-        self.end_new_var = {}
+        # For a new_var representing dst:
+        # new_var -> set(parents of Phi's src in dst = Phi(src,...))
+        self.new_var_to_srcs_parents = {}
+        # new_var -> set(variables to be coalesced with, named "merge_set")
+        self.merge_state = {}
 
+        # Launch the algorithm in several steps
         self.isolate_phi_nodes_block()
-        self.coalesce_phi_nodes_block()
+        self.init_phis_merge_state()
         self.order_ssa_var_dom()
         self.aggressive_coalesce_block()
         self.insert_parallel_copy()
@@ -711,16 +722,21 @@ class UnSSADiGraph(object):
         remove_empty_assignblks(self.ssa.graph)
 
     def insert_parallel_copy(self):
-        """Insert generated parallel copies"""
+        """
+        Naive Out-of-SSA from CSSA (without coalescing for now)
+        - Replace Phi
+        - Create room for parallel copies in Phi's parents
+        """
         ircfg = self.ssa.graph
 
-        # Run coalesce on the post phi parallel copy
         for irblock in ircfg.blocks.values():
             if not irblock_has_phi(irblock):
                 continue
+
+            # Replace Phi with Phi's dst = new_var
             parallel_copies = {}
-            for dst in self.parallel_copy_block_at_phi[irblock.loc_key]:
-                new_var = self.phi_new_var[(irblock.loc_key, dst)]
+            for dst in self.phi_destinations[irblock.loc_key]:
+                new_var = self.phi_new_var[dst]
                 parallel_copies[dst] = new_var
 
             assignblks = list(irblock)
@@ -728,12 +744,12 @@ class UnSSADiGraph(object):
             new_irblock = IRBlock(irblock.loc_key, assignblks)
             ircfg.blocks[irblock.loc_key] = new_irblock
 
-
+            # Insert new_var = src in each Phi's parent, at the end of the block
             parent_to_parallel_copies = {}
             parallel_copies = {}
             for dst in irblock[0]:
-                new_var = self.phi_new_var[(irblock.loc_key, dst)]
-                for parent, src in self.parallel_copy_block_at_end[(irblock.loc_key, dst)]:
+                new_var = self.phi_new_var[dst]
+                for parent, src in self.phi_parent_sources[dst]:
                     parent_to_parallel_copies.setdefault(parent, {})[new_var] = src
 
             for parent, parallel_copies in parent_to_parallel_copies.iteritems():
@@ -745,49 +761,48 @@ class UnSSADiGraph(object):
 
     def create_copy_var(self, var):
         """
-        Generate a new var in @phivar2var replacing @var
-        @phivar2var: dictionary associating variable with their replacing one.
+        Generate a new var standing for @var
         @var: variable to replace
         """
-        new_phi_var = ExprId('var%d' % len(self.copy_vars), var.size)
-        self.copy_vars.add(new_phi_var)
-        return new_phi_var
+        new_var = ExprId('var%d' % len(self.copy_vars), var.size)
+        self.copy_vars.add(new_var)
+        return new_var
 
     def isolate_phi_nodes_block(self):
-        """Insert parallel copy before/after each phi node"""
-
+        """
+        Init structures and virtually insert parallel copy before/after each phi
+        node
+        """
         ircfg = self.ssa.graph
-        for irblock in ircfg.blocks.values():
+        for irblock in ircfg.blocks.itervalues():
             if not irblock_has_phi(irblock):
                 continue
-            all_dst = set()
-            for dst, sources in irblock[0].items():
-                all_dst.add(dst)
+            for dst, sources in irblock[0].iteritems():
                 assert sources.is_op('Phi')
                 new_var = self.create_copy_var(dst)
-                self.phi_new_var[(irblock.loc_key, dst)] = new_var
+                self.phi_new_var[dst] = new_var
 
                 var_to_parents = get_phi_sources_parent_block(
                     self.ssa.graph,
                     irblock.loc_key,
                     sources.args
                 )
+
                 for src in sources.args:
                     parents = var_to_parents[src]
+                    self.new_var_to_srcs_parents.setdefault(new_var, set()).update(parents)
+                    parent_dsts = set((parent, src) for parent in parents)
                     for parent in parents:
-                        self.parallel_copy_block_at_end.setdefault((irblock.loc_key, dst), set()).add((parent, src))
-                        self.end_new_var.setdefault(new_var, set()).add(parent)
+                        self.phi_parent_sources.setdefault(dst, set()).add((parent, src))
 
-            self.parallel_copy_block_at_phi.setdefault(irblock.loc_key, set()).update(all_dst)
+            self.phi_destinations[irblock.loc_key] = set(irblock[0])
 
-    def coalesce_phi_nodes_block(self):
+    def init_phis_merge_state(self):
         """
-        Generate trivial coalescing of phi variable and its destination
+        Generate trivial coalescing of phi variable and itself
         """
-        merge_state = {}
         for phi_new_var in self.phi_new_var.itervalues():
-            merge_state.setdefault(phi_new_var, set([phi_new_var]))
-        self.merge_state = merge_state
+            self.merge_state.setdefault(phi_new_var, set([phi_new_var]))
 
     def order_ssa_var_dom(self):
         """Compute dominance order of each ssa variable"""
@@ -796,19 +811,23 @@ class UnSSADiGraph(object):
         # compute dominator tree
         dominator_tree = ircfg.compute_dominator_tree(self.head)
 
-        self.var_to_infos = {}
+        # variable -> Varinfo
+        self.var_to_varinfo = {}
+        # live_index can later be used to compare dominance of AssignBlocks
         live_index = 0
+
         # walk in DFS over the dominator tree
         for loc_key in dominator_tree.walk_depth_first_forward(self.head):
             irblock = ircfg.blocks[loc_key]
 
             # Create live index for phi new vars
+            # They do not exist in the graph yet, so index is set to None
             if irblock_has_phi(irblock):
                 for dst in irblock[0]:
                     if not dst.is_id():
                         continue
-                    new_var = self.phi_new_var[(irblock.loc_key, dst)]
-                    self.var_to_infos[new_var] = VarInfos(live_index, loc_key, None)
+                    new_var = self.phi_new_var[dst]
+                    self.var_to_varinfo[new_var] = Varinfo(live_index, loc_key, None)
 
                 live_index += 1
 
@@ -819,10 +838,12 @@ class UnSSADiGraph(object):
                     if not dst.is_id():
                         continue
                     if dst in self.ssa.immutable_ids:
+                        # Will not be considered by the current algo, ignore it
+                        # (for instance, IRDst)
                         continue
 
-                    assert dst not in self.var_to_infos
-                    self.var_to_infos[dst] = VarInfos(live_index, loc_key, index)
+                    assert dst not in self.var_to_varinfo
+                    self.var_to_varinfo[dst] = Varinfo(live_index, loc_key, index)
                     used = True
                 if used:
                     live_index += 1
@@ -831,69 +852,87 @@ class UnSSADiGraph(object):
     def ssa_def_dominates(self, node_a, node_b):
         """
         Return living index order of @node_a and @node_b
-        @node_a: VarInfos instance
-        @node_b: VarInfos instance
+        @node_a: Varinfo instance
+        @node_b: Varinfo instance
         """
-        ret = self.var_to_infos[node_a].live_index <= self.var_to_infos[node_b].live_index
+        ret = self.var_to_varinfo[node_a].live_index <= self.var_to_varinfo[node_b].live_index
         return ret
 
     def merge_set_sort(self, merge_set):
         """
-        Sort the list of nodes @merge_set in dominance order
-        @node_a: VarInfos instance
-        @node_b: VarInfos instance
+        Return a sorted list of (live_index, var) from @merge_set in dominance
+        order
+        @merge_set: set of coalescing variables
         """
-        out = [(self.var_to_infos[var].live_index, var) for var in merge_set]
-        out.sort()
-        return out
+        return sorted(
+            (self.var_to_varinfo[var].live_index, var)
+            for var in merge_set
+        )
 
     def ssa_def_is_live_at(self, node_a, node_b, parent):
         """
-        Return true if @node_a is live during @node_b definition
-        if @parent is None, this is a liveness test for a post phi variable;
+        Return True if @node_a is live during @node_b definition
+        If @parent is None, this is a liveness test for a post phi variable;
         Else, it is a liveness test for a variable source of the phi node
 
-        @node_a: VarInfos instance
-        @node_b: VarInfos instance
+        @node_a: Varinfo instance
+        @node_b: Varinfo instance
         @parent: Optional parent location of the phi source
         """
-        loc_key, index = self.var_to_infos[node_b].loc_key, self.var_to_infos[node_b].index
-        if parent and index is None:
-            index = 0
-        if node_a not in self.end_new_var:
-            info = self.cfg_liveness.blocks[loc_key].infos[index]
-            ret = node_a in info.var_out
-            return ret
+        loc_key_b, index_b = self.var_to_varinfo[node_b].loc_key, self.var_to_varinfo[node_b].index
+        if parent and index_b is None:
+            index_b = 0
+        if node_a not in self.new_var_to_srcs_parents:
+            # node_a is not a new var (it is a "classic" var)
+            # -> use a basic liveness test
+            liveness_b = self.cfg_liveness.blocks[loc_key_b].infos[index_b]
+            return node_a in liveness_b.var_out
+
+        for def_loc_key in self.new_var_to_srcs_parents[node_a]:
+            # Consider node_a as defined at the end of its parents blocks
+            # and compute liveness check accordingly
 
-        for def_loc_key in self.end_new_var[node_a]:
             if def_loc_key == parent:
+                # Same path as node_a definition, so SSA ensure b cannot be live
+                # on this path (otherwise, a Phi would already happen earlier)
                 continue
-            info = self.cfg_liveness.blocks[def_loc_key].infos[-1]
-            if node_b in info.var_out:
+            liveness_end_block = self.cfg_liveness.blocks[def_loc_key].infos[-1]
+            if node_b in liveness_end_block.var_out:
                 return True
         return False
 
     def merge_nodes_interfere(self, node_a, node_b, parent):
         """
         Return True if @node_a and @node_b interfere
-        @node_a: VarInfos instance
-        @node_b: VarInfos instance
+        @node_a: variable
+        @node_b: variable
         @parent: Optional parent location of the phi source for liveness tests
+
+        Interference check is: is x live at y definition (or reverse)
+        TODO: add Value-based interference improvement
         """
-        if self.var_to_infos[node_a].live_index == self.var_to_infos[node_b].live_index:
+        if self.var_to_varinfo[node_a].live_index == self.var_to_varinfo[node_b].live_index:
+            # Defined in the same AssignBlock -> interfere
             return True
 
-        if self.var_to_infos[node_a].live_index < self.var_to_infos[node_b].live_index:
+        if self.var_to_varinfo[node_a].live_index < self.var_to_varinfo[node_b].live_index:
             return self.ssa_def_is_live_at(node_a, node_b, parent)
         return self.ssa_def_is_live_at(node_b, node_a, parent)
 
     def merge_sets_interfere(self, merge_a, merge_b, parent):
         """
         Return True if no variable in @merge_a and @merge_b interferes.
+
+        Implementation of "Algorithm 2: Check intersection in a set of variables"
+
         @merge_a: a dom ordered list of equivalent variables
         @merge_b: a dom ordered list of equivalent variables
         @parent: Optional parent location of the phi source for liveness tests
         """
+        if merge_a == merge_b:
+            # No need to consider interference if equal
+            return False
+
         merge_a_list = self.merge_set_sort(merge_a)
         merge_b_list = self.merge_set_sort(merge_b)
         dom = []
@@ -903,6 +942,7 @@ class UnSSADiGraph(object):
             elif not merge_b_list:
                 _, current = merge_a_list.pop(0)
             else:
+                # compare live_indexes (standing for dominance)
                 if merge_a_list[-1] < merge_b_list[-1]:
                     _, current = merge_a_list.pop(0)
                 else:
@@ -911,10 +951,16 @@ class UnSSADiGraph(object):
                 dom.pop()
 
             # Don't test node in same merge_set
-            if (dom and
-                not (dom[-1] in merge_a and current in merge_a) and
-                not (dom[-1] in merge_b and current in merge_b)):
-                if dom and self.merge_nodes_interfere(current, dom[-1], parent):
+            if (
+                    # Is stack not empty?
+                    dom and
+                    # Trivial non-interference if dom.top() and current come
+                    # from the same merge set
+                    not (dom[-1] in merge_a and current in merge_a) and
+                    not (dom[-1] in merge_b and current in merge_b) and
+                    # Actually test for interference
+                    self.merge_nodes_interfere(current, dom[-1], parent)
+            ):
                     return True
             dom.append(current)
         return False
@@ -931,8 +977,6 @@ class UnSSADiGraph(object):
         for dst, src in parallel_copies.iteritems():
             dst_merge = self.merge_state.setdefault(dst, set([dst]))
             src_merge = self.merge_state.setdefault(src, set([src]))
-            if dst_merge == src_merge:
-                continue
             if not self.merge_sets_interfere(dst_merge, src_merge, parent):
                 dst_merge.update(src_merge)
                 for node in dst_merge:
@@ -948,18 +992,19 @@ class UnSSADiGraph(object):
             if not irblock_has_phi(irblock):
                 continue
             parallel_copies = {}
-            for dst in self.parallel_copy_block_at_phi[irblock.loc_key]:
-                parallel_copies[dst] = self.phi_new_var[(irblock.loc_key, dst)]
+            for dst in self.phi_destinations[irblock.loc_key]:
+                parallel_copies[dst] = self.phi_new_var[dst]
             self.aggressive_coalesce_parallel_copy(parallel_copies, None)
 
             # Run coalesce on the pre phi parallel copy
+
+            # Stand for the virtual parallel copies at the end of Phi's block
+            # parents
             parent_to_parallel_copies = {}
-            parallel_copies = {}
             for dst in irblock[0]:
-                new_var = self.phi_new_var[(irblock.loc_key, dst)]
-                for parent, src in self.parallel_copy_block_at_end[(irblock.loc_key, dst)]:
+                new_var = self.phi_new_var[dst]
+                for parent, src in self.phi_parent_sources[dst]:
                     parent_to_parallel_copies.setdefault(parent, {})[new_var] = src
-                    #self.end_new_var.setdefault(new_var, set()).add(parent)
 
             for parent, parallel_copies in parent_to_parallel_copies.iteritems():
                 self.aggressive_coalesce_parallel_copy(parallel_copies, parent)
@@ -981,10 +1026,13 @@ class UnSSADiGraph(object):
 
     def replace_merge_sets(self):
         """
-        Replace all variables in a merge state by a representative variable
+        In the graph, replace all variables from merge state by their
+        representative variable
         """
         replace = {}
         merge_sets = set()
+
+        # Elect representative for merge sets
         merge_set_to_name = {}
         for merge_set in self.merge_state.itervalues():
             frozen_merge_set = frozenset(merge_set)
@@ -992,6 +1040,7 @@ class UnSSADiGraph(object):
             var_name = self.get_best_merge_set_name(merge_set)
             merge_set_to_name[frozen_merge_set] = var_name
 
+        # Generate replacement of variable by their representative
         for merge_set in merge_sets:
             var_name = merge_set_to_name[merge_set]
             merge_set = list(merge_set)
@@ -1019,10 +1068,8 @@ class UnSSADiGraph(object):
 
     def remove_assign_eq(self):
         """
-        Remove trivial expressions (a=a) in @ifcfg
-        @ircfg: IRDiGraph instance
+        Remove trivial expressions (a=a) in the current graph
         """
-
         for irblock in self.ssa.graph.blocks.values():
             assignblks = list(irblock)
             for i, assignblk in enumerate(assignblks):