diff options
Diffstat (limited to 'miasm2/analysis/ssa.py')
| -rw-r--r-- | miasm2/analysis/ssa.py | 634 |
1 files changed, 533 insertions, 101 deletions
diff --git a/miasm2/analysis/ssa.py b/miasm2/analysis/ssa.py index c22aae59..b68eef7c 100644 --- a/miasm2/analysis/ssa.py +++ b/miasm2/analysis/ssa.py @@ -1,10 +1,10 @@ from collections import deque +from miasm2.analysis.data_flow import DiGraphLivenessIRA, remove_empty_assignblks from miasm2.expression.expression import ExprId, ExprAssign, ExprOp, get_expr_ids from miasm2.ir.ir import AssignBlock, IRBlock - class SSA(object): """ Generic class for static single assignment (SSA) transformation @@ -44,7 +44,7 @@ class SSA(object): # stack for LHS self._stack_lhs = {} - self._ssa_variable_to_expr = {} + self.ssa_variable_to_expr = {} # dict of SSA expressions self.expressions = {} @@ -78,7 +78,7 @@ class SSA(object): :param ssa_var: ExprId, variable in SSA form :return: ExprId, variable in non-SSA form """ - expr = self._ssa_variable_to_expr.get(ssa_var, ssa_var) + expr = self.ssa_variable_to_expr.get(ssa_var, ssa_var) return expr def reset(self): @@ -99,7 +99,7 @@ class SSA(object): index = stack[expr] name = "%s.%d" % (expr.name, index) ssa_var = ExprId(name, expr.size) - self._ssa_variable_to_expr[ssa_var] = expr + self.ssa_variable_to_expr[ssa_var] = expr return ssa_var @@ -250,7 +250,7 @@ class SSA(object): # transform LHS for expr in instructions: - if expr.dst in self.immutable_ids: + if expr.dst in self.immutable_ids or expr.dst in self.ssa_variable_to_expr: dst_ssa = expr.dst else: dst_ssa = self._transform_expression_lhs(expr.dst) @@ -372,6 +372,7 @@ class SSADiGraph(SSA): self._rename(head) self._insert_phi() self._convert_phi() + self._fix_no_def_var(head) def reset(self): """Resets SSA transformation""" @@ -400,7 +401,7 @@ class SSADiGraph(SSA): # enforce ExprId if dst.is_id(): # exclude immutable ids - if dst in self.immutable_ids: + if dst in self.immutable_ids or dst in self.ssa_variable_to_expr: continue # map variable definition to blocks self.defs.setdefault(dst, set()).add(irblock.loc_key) @@ -438,13 +439,11 @@ class SSADiGraph(SSA): for node in frontier.get(loc_key, []): if node in done: continue - # place empty phi functions for a variable empty_phi = self._gen_empty_phi(variable) # add empty phi node for variable in node self._phinodes.setdefault(node, {})[variable] = empty_phi.src - done.add(node) if node not in intodo: @@ -575,120 +574,553 @@ class SSADiGraph(SSA): new_irs = IRBlock(loc_key, [assignblk] + list(irblock.assignblks)) self.ircfg.blocks[loc_key] = new_irs + def _fix_no_def_var(self, head): + """ + Replace phi source variables which are not ssa vars by ssa vars. + @head: loc_key of the graph head + """ + var_to_insert = set() + for loc_key in self._phinodes: + for dst, sources in self._phinodes[loc_key].iteritems(): + for src in sources.args: + if src in self.ssa_variable_to_expr: + continue + var_to_insert.add(src) + var_to_newname = {} + newname_to_var = {} + for var in var_to_insert: + new_var = self._transform_var_lhs(var) + var_to_newname[var] = new_var + newname_to_var[new_var] = var + + # Replace non modified node used in phi with new variable + self.ircfg.simplify(lambda expr:expr.replace_expr(var_to_newname)) + + irblock = self.ircfg.blocks[head] + assignblks = list(irblock) + assignblks[0:0] = [AssignBlock(newname_to_var, assignblks[0].instr)] + self.ircfg.blocks[head] = IRBlock(head, assignblks) + + # Updt structure + for loc_key in self._phinodes: + for dst, sources in self._phinodes[loc_key].items(): + self._phinodes[loc_key][dst] = sources.replace_expr(var_to_newname) + + for var, (loc_key, index) in self.ssa_to_location.items(): + if loc_key == head: + self.ssa_to_location[var] = loc_key, index + 1 + + for newname, var in newname_to_var.iteritems(): + self.ssa_to_location[newname] = head, 0 + self.ssa_variable_to_expr[newname] = var + self.immutable_ids.add(newname) + self.expressions[newname] = var -def get_assignblk(graph, loc, index): +def irblock_has_phi(irblock): """ - Return the dictionary of the AssignBlock from @graph at location @loc at - @index - @graph: IRCFG instance - @loc: Location instance - @index: assignblock index + Return True if @irblock has Phi assignments + @irblock: IRBlock instance """ + if not irblock.assignblks: + return False + for src in irblock[0].itervalues(): + return src.is_op('Phi') + return False - irblock = graph.blocks[loc] - assignblks = irblock.assignblks - assignblk = assignblks[index] - assignblk_dct = dict(assignblk) - return assignblk_dct +class Varinfo(object): + """Store liveness information for a variable""" + __slots__ = ["live_index", "loc_key", "index"] + + def __init__(self, live_index, loc_key, index): + self.live_index = live_index + self.loc_key = loc_key + self.index = index -def set_assignblk(graph, loc, index, assignblk_dct): - """ - Set the Assignblock in @graph at location @loc at @index using dictionary - @assignblk_dct - @graph: IRCFG instance - @loc: Location instance - @index: assignblock index - @assignblk_dct: dictionary representing the AssignBlock +def get_var_assignment_src(ircfg, node, variables): + """ + Return the variable of @variables which is written by the irblock at @node + @node: Location + @variables: a set of variable to test """ + irblock = ircfg.blocks[node] + for assignblk in irblock: + result = set(assignblk).intersection(variables) + if not result: + continue + assert len(result) == 1 + return list(result)[0] + return None - irblock = graph.blocks[loc] - assignblks = list(irblock.assignblks) - assignblk = assignblks[index] - assignblks[index] = AssignBlock( - assignblk_dct, - assignblk.instr - ) - new_irblock = IRBlock(loc, assignblks) - return new_irblock +def get_phi_sources_parent_block(ircfg, loc_key, sources): + """ + Return a dictionary linking a variable to it's direct parent label + which belong to a path which affects the node. + @loc_key: the starting node + @sources: set of variables to resolve + """ + source_to_parent = {} + for parent in ircfg.predecessors(loc_key): + done = set() + todo = set([parent]) + found = False + while todo: + node = todo.pop() + if node in done: + continue + done.add(node) + ret = get_var_assignment_src(ircfg, node, sources) + if ret: + source_to_parent.setdefault(ret, set()).add(parent) + found = True + break + for pred in ircfg.predecessors(node): + todo.add(pred) + assert found + return source_to_parent -def remove_phi(ssa, head): +class UnSSADiGraph(object): """ - Remove Phi using naive algorithm - Note: The _ssa_variable_to_expr must be up to date - - @ssa: a SSADiGraph instance - @head: the loc_key of the graph head + Implements unssa algorithm + Revisiting Out-of-SSA Translation for Correctness, Code Quality, and + Efficiency """ - phivar2var = {} + def __init__(self, ssa, head, cfg_liveness): + self.cfg_liveness = cfg_liveness + self.ssa = ssa + self.head = head + + # Set of created variables + self.copy_vars = set() + # Virtual parallel copies + + # On loc_key's Phi node dst -> set((parent, src)) + self.phi_parent_sources = {} + # On loc_key's Phi node, loc_key -> set(Phi dsts) + self.phi_destinations = {} + # Phi's dst -> new var + self.phi_new_var = {} + # For a new_var representing dst: + # new_var -> set(parents of Phi's src in dst = Phi(src,...)) + self.new_var_to_srcs_parents = {} + # new_var -> set(variables to be coalesced with, named "merge_set") + self.merge_state = {} + + # Launch the algorithm in several steps + self.isolate_phi_nodes_block() + self.init_phis_merge_state() + self.order_ssa_var_dom() + self.aggressive_coalesce_block() + self.insert_parallel_copy() + self.replace_merge_sets() + self.remove_assign_eq() + remove_empty_assignblks(self.ssa.graph) + + def insert_parallel_copy(self): + """ + Naive Out-of-SSA from CSSA (without coalescing for now) + - Replace Phi + - Create room for parallel copies in Phi's parents + """ + ircfg = self.ssa.graph - all_ssa_vars = ssa._ssa_variable_to_expr + for irblock in ircfg.blocks.values(): + if not irblock_has_phi(irblock): + continue - # Retrieve Phi nodes - phi_nodes = [] - for irblock in ssa.graph.blocks.itervalues(): - for index, assignblk in enumerate(irblock): - for dst, src in assignblk.iteritems(): - if src.is_op('Phi'): - phi_nodes.append((irblock.loc_key, index)) + # Replace Phi with Phi's dst = new_var + parallel_copies = {} + for dst in self.phi_destinations[irblock.loc_key]: + new_var = self.phi_new_var[dst] + parallel_copies[dst] = new_var + + assignblks = list(irblock) + assignblks[0] = AssignBlock(parallel_copies, irblock[0].instr) + new_irblock = IRBlock(irblock.loc_key, assignblks) + ircfg.blocks[irblock.loc_key] = new_irblock + + # Insert new_var = src in each Phi's parent, at the end of the block + parent_to_parallel_copies = {} + parallel_copies = {} + for dst in irblock[0]: + new_var = self.phi_new_var[dst] + for parent, src in self.phi_parent_sources[dst]: + parent_to_parallel_copies.setdefault(parent, {})[new_var] = src + + for parent, parallel_copies in parent_to_parallel_copies.iteritems(): + parent = ircfg.blocks[parent] + assignblks = list(parent) + assignblks.append(AssignBlock(parallel_copies, parent[-1].instr)) + new_irblock = IRBlock(parent.loc_key, assignblks) + ircfg.blocks[parent.loc_key] = new_irblock + + def create_copy_var(self, var): + """ + Generate a new var standing for @var + @var: variable to replace + """ + new_var = ExprId('var%d' % len(self.copy_vars), var.size) + self.copy_vars.add(new_var) + return new_var + def isolate_phi_nodes_block(self): + """ + Init structures and virtually insert parallel copy before/after each phi + node + """ + ircfg = self.ssa.graph + for irblock in ircfg.blocks.itervalues(): + if not irblock_has_phi(irblock): + continue + for dst, sources in irblock[0].iteritems(): + assert sources.is_op('Phi') + new_var = self.create_copy_var(dst) + self.phi_new_var[dst] = new_var + + var_to_parents = get_phi_sources_parent_block( + self.ssa.graph, + irblock.loc_key, + sources.args + ) + + for src in sources.args: + parents = var_to_parents[src] + self.new_var_to_srcs_parents.setdefault(new_var, set()).update(parents) + parent_dsts = set((parent, src) for parent in parents) + for parent in parents: + self.phi_parent_sources.setdefault(dst, set()).add((parent, src)) + + self.phi_destinations[irblock.loc_key] = set(irblock[0]) + + def init_phis_merge_state(self): + """ + Generate trivial coalescing of phi variable and itself + """ + for phi_new_var in self.phi_new_var.itervalues(): + self.merge_state.setdefault(phi_new_var, set([phi_new_var])) - for phi_loc, phi_index in phi_nodes: - assignblk_dct = get_assignblk(ssa.graph, phi_loc, phi_index) - for dst, src in assignblk_dct.iteritems(): - if src.is_op('Phi'): - break - else: - raise RuntimeError('Cannot find phi?') - node_src = src - var = dst - - # Create new variable - new_var = ExprId('var%d' % len(phivar2var), var.size) - phivar2var[var] = new_var - phi_sources = set(node_src.args) - - # Place var init for non ssa variables - to_remove = set() - for phi_source in list(phi_sources): - if phi_source not in all_ssa_vars.union(phivar2var.values()): - assignblk_dct = get_assignblk(ssa.graph, head, 0) - assignblk_dct[new_var] = phi_source - new_irblock = set_assignblk(ssa.graph, head, 0, assignblk_dct) - ssa.graph.blocks[head] = new_irblock - to_remove.add(phi_source) - phi_sources.difference_update(to_remove) - - var_to_replace = set([var]) - var_to_replace.update(phi_sources) - - - - - # Replace variables - to_replace_dct = {x:new_var for x in var_to_replace} - for loc in ssa.graph.blocks: - irblock = ssa.graph.blocks[loc] - assignblks = [] - for assignblk in irblock: - assignblk_dct = {} + def order_ssa_var_dom(self): + """Compute dominance order of each ssa variable""" + ircfg = self.ssa.graph + + # compute dominator tree + dominator_tree = ircfg.compute_dominator_tree(self.head) + + # variable -> Varinfo + self.var_to_varinfo = {} + # live_index can later be used to compare dominance of AssignBlocks + live_index = 0 + + # walk in DFS over the dominator tree + for loc_key in dominator_tree.walk_depth_first_forward(self.head): + irblock = ircfg.blocks[loc_key] + + # Create live index for phi new vars + # They do not exist in the graph yet, so index is set to None + if irblock_has_phi(irblock): + for dst in irblock[0]: + if not dst.is_id(): + continue + new_var = self.phi_new_var[dst] + self.var_to_varinfo[new_var] = Varinfo(live_index, loc_key, None) + + live_index += 1 + + # Create live index for remaining assignments + for index, assignblk in enumerate(irblock): + used = False + for dst in assignblk: + if not dst.is_id(): + continue + if dst in self.ssa.immutable_ids: + # Will not be considered by the current algo, ignore it + # (for instance, IRDst) + continue + + assert dst not in self.var_to_varinfo + self.var_to_varinfo[dst] = Varinfo(live_index, loc_key, index) + used = True + if used: + live_index += 1 + + + def ssa_def_dominates(self, node_a, node_b): + """ + Return living index order of @node_a and @node_b + @node_a: Varinfo instance + @node_b: Varinfo instance + """ + ret = self.var_to_varinfo[node_a].live_index <= self.var_to_varinfo[node_b].live_index + return ret + + def merge_set_sort(self, merge_set): + """ + Return a sorted list of (live_index, var) from @merge_set in dominance + order + @merge_set: set of coalescing variables + """ + return sorted( + (self.var_to_varinfo[var].live_index, var) + for var in merge_set + ) + + def ssa_def_is_live_at(self, node_a, node_b, parent): + """ + Return True if @node_a is live during @node_b definition + If @parent is None, this is a liveness test for a post phi variable; + Else, it is a liveness test for a variable source of the phi node + + @node_a: Varinfo instance + @node_b: Varinfo instance + @parent: Optional parent location of the phi source + """ + loc_key_b, index_b = self.var_to_varinfo[node_b].loc_key, self.var_to_varinfo[node_b].index + if parent and index_b is None: + index_b = 0 + if node_a not in self.new_var_to_srcs_parents: + # node_a is not a new var (it is a "classic" var) + # -> use a basic liveness test + liveness_b = self.cfg_liveness.blocks[loc_key_b].infos[index_b] + return node_a in liveness_b.var_out + + for def_loc_key in self.new_var_to_srcs_parents[node_a]: + # Consider node_a as defined at the end of its parents blocks + # and compute liveness check accordingly + + if def_loc_key == parent: + # Same path as node_a definition, so SSA ensure b cannot be live + # on this path (otherwise, a Phi would already happen earlier) + continue + liveness_end_block = self.cfg_liveness.blocks[def_loc_key].infos[-1] + if node_b in liveness_end_block.var_out: + return True + return False + + def merge_nodes_interfere(self, node_a, node_b, parent): + """ + Return True if @node_a and @node_b interfere + @node_a: variable + @node_b: variable + @parent: Optional parent location of the phi source for liveness tests + + Interference check is: is x live at y definition (or reverse) + TODO: add Value-based interference improvement + """ + if self.var_to_varinfo[node_a].live_index == self.var_to_varinfo[node_b].live_index: + # Defined in the same AssignBlock -> interfere + return True + + if self.var_to_varinfo[node_a].live_index < self.var_to_varinfo[node_b].live_index: + return self.ssa_def_is_live_at(node_a, node_b, parent) + return self.ssa_def_is_live_at(node_b, node_a, parent) + + def merge_sets_interfere(self, merge_a, merge_b, parent): + """ + Return True if no variable in @merge_a and @merge_b interferes. + + Implementation of "Algorithm 2: Check intersection in a set of variables" + + @merge_a: a dom ordered list of equivalent variables + @merge_b: a dom ordered list of equivalent variables + @parent: Optional parent location of the phi source for liveness tests + """ + if merge_a == merge_b: + # No need to consider interference if equal + return False + + merge_a_list = self.merge_set_sort(merge_a) + merge_b_list = self.merge_set_sort(merge_b) + dom = [] + while merge_a_list or merge_b_list: + if not merge_a_list: + _, current = merge_b_list.pop(0) + elif not merge_b_list: + _, current = merge_a_list.pop(0) + else: + # compare live_indexes (standing for dominance) + if merge_a_list[-1] < merge_b_list[-1]: + _, current = merge_a_list.pop(0) + else: + _, current = merge_b_list.pop(0) + while dom and not self.ssa_def_dominates(dom[-1], current): + dom.pop() + + # Don't test node in same merge_set + if ( + # Is stack not empty? + dom and + # Trivial non-interference if dom.top() and current come + # from the same merge set + not (dom[-1] in merge_a and current in merge_a) and + not (dom[-1] in merge_b and current in merge_b) and + # Actually test for interference + self.merge_nodes_interfere(current, dom[-1], parent) + ): + return True + dom.append(current) + return False + + def aggressive_coalesce_parallel_copy(self, parallel_copies, parent): + """ + Try to coalesce variables each dst/src couple together from + @parallel_copies + + @parallel_copies: a dictionary representing dst/src parallel + assignments. + @parent: Optional parent location of the phi source for liveness tests + """ + for dst, src in parallel_copies.iteritems(): + dst_merge = self.merge_state.setdefault(dst, set([dst])) + src_merge = self.merge_state.setdefault(src, set([src])) + if not self.merge_sets_interfere(dst_merge, src_merge, parent): + dst_merge.update(src_merge) + for node in dst_merge: + self.merge_state[node] = dst_merge + + def aggressive_coalesce_block(self): + """Try to coalesce phi var with their pre/post variables""" + + ircfg = self.ssa.graph + + # Run coalesce on the post phi parallel copy + for irblock in ircfg.blocks.values(): + if not irblock_has_phi(irblock): + continue + parallel_copies = {} + for dst in self.phi_destinations[irblock.loc_key]: + parallel_copies[dst] = self.phi_new_var[dst] + self.aggressive_coalesce_parallel_copy(parallel_copies, None) + + # Run coalesce on the pre phi parallel copy + + # Stand for the virtual parallel copies at the end of Phi's block + # parents + parent_to_parallel_copies = {} + for dst in irblock[0]: + new_var = self.phi_new_var[dst] + for parent, src in self.phi_parent_sources[dst]: + parent_to_parallel_copies.setdefault(parent, {})[new_var] = src + + for parent, parallel_copies in parent_to_parallel_copies.iteritems(): + self.aggressive_coalesce_parallel_copy(parallel_copies, parent) + + def get_best_merge_set_name(self, merge_set): + """ + For a given @merge_set, prefer an original SSA variable instead of a + created copy. In other case, take a random name. + @merge_set: set of equivalent expressions + """ + if not merge_set: + raise RuntimeError("Merge set should not be empty") + for var in merge_set: + if var not in self.copy_vars: + return var + # Get random name + return var + + + def replace_merge_sets(self): + """ + In the graph, replace all variables from merge state by their + representative variable + """ + replace = {} + merge_sets = set() + + # Elect representative for merge sets + merge_set_to_name = {} + for merge_set in self.merge_state.itervalues(): + frozen_merge_set = frozenset(merge_set) + merge_sets.add(frozen_merge_set) + var_name = self.get_best_merge_set_name(merge_set) + merge_set_to_name[frozen_merge_set] = var_name + + # Generate replacement of variable by their representative + for merge_set in merge_sets: + var_name = merge_set_to_name[merge_set] + merge_set = list(merge_set) + for var in merge_set: + replace[var] = var_name + + self.ssa.graph.simplify(lambda x: x.replace_expr(replace)) + + def remove_phi(self): + """ + Remove phi operators in @ifcfg + @ircfg: IRDiGraph instance + """ + + for irblock in self.ssa.graph.blocks.values(): + assignblks = list(irblock) + out = {} + for dst, src in assignblks[0].iteritems(): + if src.is_op('Phi'): + assert set([dst]) == set(src.args) + continue + out[dst] = src + assignblks[0] = AssignBlock(out, assignblks[0].instr) + self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) + + def remove_assign_eq(self): + """ + Remove trivial expressions (a=a) in the current graph + """ + for irblock in self.ssa.graph.blocks.values(): + assignblks = list(irblock) + for i, assignblk in enumerate(assignblks): + out = {} for dst, src in assignblk.iteritems(): - dst = dst.replace_expr(to_replace_dct) - src = src.replace_expr(to_replace_dct) - assignblk_dct[dst] = src - assignblks.append(AssignBlock(assignblk_dct, assignblk.instr)) - new_irblock = IRBlock(loc, assignblks) - ssa.graph.blocks[loc] = new_irblock + if dst == src: + continue + out[dst] = src + assignblks[i] = AssignBlock(out, assignblk.instr) + self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) + + +class DiGraphLivenessSSA(DiGraphLivenessIRA): + """ + DiGraph representing variable liveness is a SSA graph + """ + def __init__(self, ircfg): + super(DiGraphLivenessSSA, self).__init__(ircfg) + + self.loc_key_to_phi_parents = {} + for irblock in self.blocks.values(): + if not irblock_has_phi(irblock): + continue + out = {} + for sources in irblock[0].itervalues(): + var_to_parents = get_phi_sources_parent_block(self, irblock.loc_key, sources.args) + for var, var_parents in var_to_parents.iteritems(): + out.setdefault(var, set()).update(var_parents) + self.loc_key_to_phi_parents[irblock.loc_key] = out + + def back_propagate_to_parent(self, todo, node, parent): + parent_block = self.blocks[parent] + cur_block = self.blocks[node] + irblock = self.ircfg.blocks[node] + if cur_block.infos[0].var_in == parent_block.infos[-1].var_out: + return + var_info = cur_block.infos[0].var_in.union(parent_block.infos[-1].var_out) + + if irblock_has_phi(irblock): + # Remove phi special case + out = set() + phi_sources = self.loc_key_to_phi_parents[irblock.loc_key] + for var in var_info: + if var not in phi_sources: + out.add(var) + continue + if parent in phi_sources[var]: + out.add(var) + var_info = out - # Remove phi - assignblk_dct = get_assignblk(ssa.graph, phi_loc, phi_index) - del assignblk_dct[new_var] + parent_block.infos[-1].var_out = var_info + todo.add(parent) - new_irblock = set_assignblk(ssa.graph, phi_loc, phi_index, assignblk_dct) - ssa.graph.blocks[phi_loc] = new_irblock |