diff options
| author | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2025-10-14 09:09:29 +0000 |
|---|---|---|
| committer | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2025-10-14 09:09:29 +0000 |
| commit | 579cf1d03fb932083e6317967d1613d5c2587fb6 (patch) | |
| tree | 629f039935382a2a7391bce9253f6c9968159049 /src/miasm/analysis/outofssa.py | |
| parent | 51c15d3ea2e16d4fc5f0f01a3b9befc66b1f982e (diff) | |
| download | focaccia-miasm-579cf1d03fb932083e6317967d1613d5c2587fb6.tar.gz focaccia-miasm-579cf1d03fb932083e6317967d1613d5c2587fb6.zip | |
Convert to src-layout ta/nix
Diffstat (limited to 'src/miasm/analysis/outofssa.py')
| -rw-r--r-- | src/miasm/analysis/outofssa.py | 415 |
1 files changed, 415 insertions, 0 deletions
diff --git a/src/miasm/analysis/outofssa.py b/src/miasm/analysis/outofssa.py new file mode 100644 index 00000000..2f2b185c --- /dev/null +++ b/src/miasm/analysis/outofssa.py @@ -0,0 +1,415 @@ +from future.utils import viewitems, viewvalues + +from miasm.expression.expression import ExprId +from miasm.ir.ir import IRBlock, AssignBlock +from miasm.analysis.ssa import get_phi_sources_parent_block, \ + irblock_has_phi + + +class Varinfo(object): + """Store liveness information for a variable""" + __slots__ = ["live_index", "loc_key", "index"] + + def __init__(self, live_index, loc_key, index): + self.live_index = live_index + self.loc_key = loc_key + self.index = index + + +class UnSSADiGraph(object): + """ + Implements unssa algorithm + Revisiting Out-of-SSA Translation for Correctness, Code Quality, and + Efficiency + """ + + def __init__(self, ssa, head, cfg_liveness): + self.cfg_liveness = cfg_liveness + self.ssa = ssa + self.head = head + + # Set of created variables + self.copy_vars = set() + # Virtual parallel copies + + # On loc_key's Phi node dst -> set((parent, src)) + self.phi_parent_sources = {} + # On loc_key's Phi node, loc_key -> set(Phi dsts) + self.phi_destinations = {} + # Phi's dst -> new var + self.phi_new_var = {} + # For a new_var representing dst: + # new_var -> set(parents of Phi's src in dst = Phi(src,...)) + self.new_var_to_srcs_parents = {} + # new_var -> set(variables to be coalesced with, named "merge_set") + self.merge_state = {} + + # Launch the algorithm in several steps + self.isolate_phi_nodes_block() + self.init_phis_merge_state() + self.order_ssa_var_dom() + self.aggressive_coalesce_block() + self.insert_parallel_copy() + self.replace_merge_sets() + self.remove_assign_eq() + + def insert_parallel_copy(self): + """ + Naive Out-of-SSA from CSSA (without coalescing for now) + - Replace Phi + - Create room for parallel copies in Phi's parents + """ + ircfg = self.ssa.graph + + for irblock in list(viewvalues(ircfg.blocks)): + if not irblock_has_phi(irblock): + continue + + # Replace Phi with Phi's dst = new_var + parallel_copies = {} + for dst in self.phi_destinations[irblock.loc_key]: + new_var = self.phi_new_var[dst] + parallel_copies[dst] = new_var + + assignblks = list(irblock) + assignblks[0] = AssignBlock(parallel_copies, irblock[0].instr) + new_irblock = IRBlock(irblock.loc_db, irblock.loc_key, assignblks) + ircfg.blocks[irblock.loc_key] = new_irblock + + # Insert new_var = src in each Phi's parent, at the end of the block + parent_to_parallel_copies = {} + parallel_copies = {} + for dst in irblock[0]: + new_var = self.phi_new_var[dst] + for parent, src in self.phi_parent_sources[dst]: + parent_to_parallel_copies.setdefault(parent, {})[new_var] = src + + for parent, parallel_copies in viewitems(parent_to_parallel_copies): + parent = ircfg.blocks[parent] + assignblks = list(parent) + assignblks.append(AssignBlock(parallel_copies, parent[-1].instr)) + new_irblock = IRBlock(parent.loc_db, parent.loc_key, assignblks) + ircfg.blocks[parent.loc_key] = new_irblock + + def create_copy_var(self, var): + """ + Generate a new var standing for @var + @var: variable to replace + """ + new_var = ExprId('var%d' % len(self.copy_vars), var.size) + self.copy_vars.add(new_var) + return new_var + + def isolate_phi_nodes_block(self): + """ + Init structures and virtually insert parallel copy before/after each phi + node + """ + ircfg = self.ssa.graph + for irblock in viewvalues(ircfg.blocks): + if not irblock_has_phi(irblock): + continue + for dst, sources in viewitems(irblock[0]): + assert sources.is_op('Phi') + new_var = self.create_copy_var(dst) + self.phi_new_var[dst] = new_var + + var_to_parents = get_phi_sources_parent_block( + self.ssa.graph, + irblock.loc_key, + sources.args + ) + + for src in sources.args: + parents = var_to_parents[src] + self.new_var_to_srcs_parents.setdefault(new_var, set()).update(parents) + for parent in parents: + self.phi_parent_sources.setdefault(dst, set()).add((parent, src)) + + self.phi_destinations[irblock.loc_key] = set(irblock[0]) + + def init_phis_merge_state(self): + """ + Generate trivial coalescing of phi variable and itself + """ + for phi_new_var in viewvalues(self.phi_new_var): + self.merge_state.setdefault(phi_new_var, set([phi_new_var])) + + def order_ssa_var_dom(self): + """Compute dominance order of each ssa variable""" + ircfg = self.ssa.graph + + # compute dominator tree + dominator_tree = ircfg.compute_dominator_tree(self.head) + + # variable -> Varinfo + self.var_to_varinfo = {} + # live_index can later be used to compare dominance of AssignBlocks + live_index = 0 + + # walk in DFS over the dominator tree + for loc_key in dominator_tree.walk_depth_first_forward(self.head): + irblock = ircfg.blocks.get(loc_key, None) + if irblock is None: + continue + + # Create live index for phi new vars + # They do not exist in the graph yet, so index is set to None + if irblock_has_phi(irblock): + for dst in irblock[0]: + if not dst.is_id(): + continue + new_var = self.phi_new_var[dst] + self.var_to_varinfo[new_var] = Varinfo(live_index, loc_key, None) + + live_index += 1 + + # Create live index for remaining assignments + for index, assignblk in enumerate(irblock): + used = False + for dst in assignblk: + if not dst.is_id(): + continue + if dst in self.ssa.immutable_ids: + # Will not be considered by the current algo, ignore it + # (for instance, IRDst) + continue + + assert dst not in self.var_to_varinfo + self.var_to_varinfo[dst] = Varinfo(live_index, loc_key, index) + used = True + if used: + live_index += 1 + + + def ssa_def_dominates(self, node_a, node_b): + """ + Return living index order of @node_a and @node_b + @node_a: Varinfo instance + @node_b: Varinfo instance + """ + ret = self.var_to_varinfo[node_a].live_index <= self.var_to_varinfo[node_b].live_index + return ret + + def merge_set_sort(self, merge_set): + """ + Return a sorted list of (live_index, var) from @merge_set in dominance + order + @merge_set: set of coalescing variables + """ + return sorted( + (self.var_to_varinfo[var].live_index, var) + for var in merge_set + ) + + def ssa_def_is_live_at(self, node_a, node_b, parent): + """ + Return True if @node_a is live during @node_b definition + If @parent is None, this is a liveness test for a post phi variable; + Else, it is a liveness test for a variable source of the phi node + + @node_a: Varinfo instance + @node_b: Varinfo instance + @parent: Optional parent location of the phi source + """ + loc_key_b, index_b = self.var_to_varinfo[node_b].loc_key, self.var_to_varinfo[node_b].index + if parent and index_b is None: + index_b = 0 + if node_a not in self.new_var_to_srcs_parents: + # node_a is not a new var (it is a "classic" var) + # -> use a basic liveness test + liveness_b = self.cfg_liveness.blocks[loc_key_b].infos[index_b] + return node_a in liveness_b.var_out + + for def_loc_key in self.new_var_to_srcs_parents[node_a]: + # Consider node_a as defined at the end of its parents blocks + # and compute liveness check accordingly + + if def_loc_key == parent: + # Same path as node_a definition, so SSA ensure b cannot be live + # on this path (otherwise, a Phi would already happen earlier) + continue + liveness_end_block = self.cfg_liveness.blocks[def_loc_key].infos[-1] + if node_b in liveness_end_block.var_out: + return True + return False + + def merge_nodes_interfere(self, node_a, node_b, parent): + """ + Return True if @node_a and @node_b interfere + @node_a: variable + @node_b: variable + @parent: Optional parent location of the phi source for liveness tests + + Interference check is: is x live at y definition (or reverse) + TODO: add Value-based interference improvement + """ + if self.var_to_varinfo[node_a].live_index == self.var_to_varinfo[node_b].live_index: + # Defined in the same AssignBlock -> interfere + return True + + if self.var_to_varinfo[node_a].live_index < self.var_to_varinfo[node_b].live_index: + return self.ssa_def_is_live_at(node_a, node_b, parent) + return self.ssa_def_is_live_at(node_b, node_a, parent) + + def merge_sets_interfere(self, merge_a, merge_b, parent): + """ + Return True if no variable in @merge_a and @merge_b interferes. + + Implementation of "Algorithm 2: Check intersection in a set of variables" + + @merge_a: a dom ordered list of equivalent variables + @merge_b: a dom ordered list of equivalent variables + @parent: Optional parent location of the phi source for liveness tests + """ + if merge_a == merge_b: + # No need to consider interference if equal + return False + + merge_a_list = self.merge_set_sort(merge_a) + merge_b_list = self.merge_set_sort(merge_b) + dom = [] + while merge_a_list or merge_b_list: + if not merge_a_list: + _, current = merge_b_list.pop(0) + elif not merge_b_list: + _, current = merge_a_list.pop(0) + else: + # compare live_indexes (standing for dominance) + if merge_a_list[-1] < merge_b_list[-1]: + _, current = merge_a_list.pop(0) + else: + _, current = merge_b_list.pop(0) + while dom and not self.ssa_def_dominates(dom[-1], current): + dom.pop() + + # Don't test node in same merge_set + if ( + # Is stack not empty? + dom and + # Trivial non-interference if dom.top() and current come + # from the same merge set + not (dom[-1] in merge_a and current in merge_a) and + not (dom[-1] in merge_b and current in merge_b) and + # Actually test for interference + self.merge_nodes_interfere(current, dom[-1], parent) + ): + return True + dom.append(current) + return False + + def aggressive_coalesce_parallel_copy(self, parallel_copies, parent): + """ + Try to coalesce variables each dst/src couple together from + @parallel_copies + + @parallel_copies: a dictionary representing dst/src parallel + assignments. + @parent: Optional parent location of the phi source for liveness tests + """ + for dst, src in viewitems(parallel_copies): + dst_merge = self.merge_state.setdefault(dst, set([dst])) + src_merge = self.merge_state.setdefault(src, set([src])) + if not self.merge_sets_interfere(dst_merge, src_merge, parent): + dst_merge.update(src_merge) + for node in dst_merge: + self.merge_state[node] = dst_merge + + def aggressive_coalesce_block(self): + """Try to coalesce phi var with their pre/post variables""" + + ircfg = self.ssa.graph + + # Run coalesce on the post phi parallel copy + for irblock in viewvalues(ircfg.blocks): + if not irblock_has_phi(irblock): + continue + parallel_copies = {} + for dst in self.phi_destinations[irblock.loc_key]: + parallel_copies[dst] = self.phi_new_var[dst] + self.aggressive_coalesce_parallel_copy(parallel_copies, None) + + # Run coalesce on the pre phi parallel copy + + # Stand for the virtual parallel copies at the end of Phi's block + # parents + parent_to_parallel_copies = {} + for dst in irblock[0]: + new_var = self.phi_new_var[dst] + for parent, src in self.phi_parent_sources[dst]: + parent_to_parallel_copies.setdefault(parent, {})[new_var] = src + + for parent, parallel_copies in viewitems(parent_to_parallel_copies): + self.aggressive_coalesce_parallel_copy(parallel_copies, parent) + + def get_best_merge_set_name(self, merge_set): + """ + For a given @merge_set, prefer an original SSA variable instead of a + created copy. In other case, take a random name. + @merge_set: set of equivalent expressions + """ + if not merge_set: + raise RuntimeError("Merge set should not be empty") + for var in merge_set: + if var not in self.copy_vars: + return var + # Get random name + return var + + + def replace_merge_sets(self): + """ + In the graph, replace all variables from merge state by their + representative variable + """ + replace = {} + merge_sets = set() + + # Elect representative for merge sets + merge_set_to_name = {} + for merge_set in viewvalues(self.merge_state): + frozen_merge_set = frozenset(merge_set) + merge_sets.add(frozen_merge_set) + var_name = self.get_best_merge_set_name(merge_set) + merge_set_to_name[frozen_merge_set] = var_name + + # Generate replacement of variable by their representative + for merge_set in merge_sets: + var_name = merge_set_to_name[merge_set] + merge_set = list(merge_set) + for var in merge_set: + replace[var] = var_name + + self.ssa.graph.simplify(lambda x: x.replace_expr(replace)) + + def remove_phi(self): + """ + Remove phi operators in @ifcfg + @ircfg: IRDiGraph instance + """ + + for irblock in list(viewvalues(self.ssa.graph.blocks)): + assignblks = list(irblock) + out = {} + for dst, src in viewitems(assignblks[0]): + if src.is_op('Phi'): + assert set([dst]) == set(src.args) + continue + out[dst] = src + assignblks[0] = AssignBlock(out, assignblks[0].instr) + self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_db, irblock.loc_key, assignblks) + + def remove_assign_eq(self): + """ + Remove trivial expressions (a=a) in the current graph + """ + for irblock in list(viewvalues(self.ssa.graph.blocks)): + assignblks = list(irblock) + for i, assignblk in enumerate(assignblks): + out = {} + for dst, src in viewitems(assignblk): + if dst == src: + continue + out[dst] = src + assignblks[i] = AssignBlock(out, assignblk.instr) + self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_db, irblock.loc_key, assignblks) |