diff options
Diffstat (limited to 'miasm2/analysis')
| -rw-r--r-- | miasm2/analysis/data_flow.py | 156 | ||||
| -rw-r--r-- | miasm2/analysis/simplifier.py | 303 | ||||
| -rw-r--r-- | miasm2/analysis/ssa.py | 9 |
3 files changed, 444 insertions, 24 deletions
diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py index 53033d7e..fb09a6cb 100644 --- a/miasm2/analysis/data_flow.py +++ b/miasm2/analysis/data_flow.py @@ -11,6 +11,7 @@ from miasm2.expression.expression_helper import possible_values from miasm2.analysis.ssa import get_phi_sources_parent_block, \ irblock_has_phi + class ReachingDefinitions(dict): """ Computes for each assignblock the set of reaching definitions. @@ -510,17 +511,19 @@ def remove_empty_assignblks(ircfg): modified = False for loc_key, block in ircfg.blocks.iteritems(): irs = [] + block_modified = False for assignblk in block: if len(assignblk): irs.append(assignblk) else: - modified = True - ircfg.blocks[loc_key] = IRBlock(loc_key, irs) - + block_modified = True + if block_modified: + new_irblock = IRBlock(loc_key, irs) + ircfg.blocks[loc_key] = new_irblock + modified = True return modified - class SSADefUse(DiGraph): """ Generate DefUse information from SSA transformation @@ -635,17 +638,16 @@ def expr_has_call(expr): return expr_test_visit(expr, expr_has_call_test) -class PropagateExpr(object): - - def assignblk_is_propagation_barrier(self, assignblk): - for dst, src in assignblk.iteritems(): - if expr_has_call(src): - return True - if dst.is_mem(): - return True - return False +class PropagateThroughExprId(object): + """ + Propagate expressions though ExprId + """ def has_propagation_barrier(self, assignblks): + """ + Return True if propagation cannot cross the @assignblks + @assignblks: list of AssignBlock to check + """ for assignblk in assignblks: for dst, src in assignblk.iteritems(): if expr_has_call(src): @@ -655,13 +657,19 @@ class PropagateExpr(object): return False def is_mem_written(self, ssa, node, successor): + """ + Return True if memory is written at least once between @node and + @successor + + @node: Location of the block to start with + @successor: Location of last block + """ loc_a, index_a, reg_a = node loc_b, index_b, reg_b = successor block_b = ssa.graph.blocks[loc_b] nodes_to_do = self.compute_reachable_nodes_from_a_to_b(ssa.graph, loc_a, loc_b) - if loc_a == loc_b: # src is dst assert nodes_to_do == set([loc_a]) @@ -703,7 +711,7 @@ class PropagateExpr(object): return False return True - def propagate(self, ssa, head): + def get_candidates(self, ssa, head, max_expr_depth): defuse = SSADefUse.from_ssa(ssa) to_replace = {} node_to_reg = {} @@ -717,9 +725,20 @@ class PropagateExpr(object): continue if node.var.is_mem(): continue + if max_expr_depth is not None and len(str(src)) > max_expr_depth: + continue to_replace[node.var] = src node_to_reg[node] = node.var + return node_to_reg, to_replace, defuse + def propagate(self, ssa, head, max_expr_depth=None): + """ + Do expression propagation + @ssa: SSADiGraph instance + @head: the head location of the graph + @max_expr_depth: the maximum allowed depth of an expression + """ + node_to_reg, to_replace, defuse = self.get_candidates(ssa, head, max_expr_depth) modified = False for node, reg in node_to_reg.iteritems(): for successor in defuse.successors(node): @@ -741,8 +760,7 @@ class PropagateExpr(object): continue if src.is_mem(): - ptr = src.ptr - ptr = ptr.replace_expr(replace) + ptr = src.ptr.replace_expr(replace) new_src = ExprMem(ptr, src.size) else: new_src = src.replace_expr(replace) @@ -750,8 +768,7 @@ class PropagateExpr(object): if dst.is_id(): new_dst = dst elif dst.is_mem(): - ptr = dst.ptr - ptr = ptr.replace_expr(replace) + ptr = dst.ptr.replace_expr(replace) new_dst = ExprMem(ptr, dst.size) else: new_dst = dst.replace_expr(replace) @@ -764,6 +781,105 @@ class PropagateExpr(object): assignblks[node_b.index] = out new_block = IRBlock(block.loc_key, assignblks) ssa.graph.blocks[block.loc_key] = new_block + + return modified + + + +class PropagateExprIntThroughExprId(PropagateThroughExprId): + """ + Propagate ExprInt though ExprId: classic constant propagation + This is a sub family of PropagateThroughExprId. + It reduces leaves in expressions of a program. + """ + + def get_candidates(self, ssa, head, max_expr_depth): + defuse = SSADefUse.from_ssa(ssa) + + to_replace = {} + node_to_reg = {} + for node in defuse.nodes(): + src = defuse.get_node_target(node) + if not src.is_int(): + continue + if expr_has_call(src): + continue + if node.var.is_mem(): + continue + to_replace[node.var] = src + node_to_reg[node] = node.var + return node_to_reg, to_replace, defuse + + def propagation_allowed(self, ssa, to_replace, node_a, node_b): + """ + Propagating ExprInt is always ok + """ + return True + + +class PropagateThroughExprMem(object): + """ + Propagate through ExprMem in very simple cases: + - if no memory write between source and target + - if source does not contain any memory reference + """ + + def propagate(self, ssa, head, max_expr_depth=None): + ircfg = ssa.graph + todo = set() + modified = False + for block in ircfg.blocks.itervalues(): + for i, assignblk in enumerate(block): + for dst, src in assignblk.iteritems(): + if not dst.is_mem(): + continue + if expr_has_mem(src): + continue + todo.add((block.loc_key, i + 1, dst, src)) + ptr = dst.ptr + for size in xrange(8, dst.size, 8): + todo.add((block.loc_key, i + 1, ExprMem(ptr, size), src[:size])) + + while todo: + loc_key, index, mem_dst, mem_src = todo.pop() + block = ircfg.blocks[loc_key] + assignblks = list(block) + block_modified = False + for i in xrange(index, len(block)): + assignblk = block[i] + write_mem = False + assignblk_modified = False + out = dict(assignblk) + out_new = {} + for dst, src in out.iteritems(): + if dst.is_mem(): + write_mem = True + if dst != mem_dst and mem_dst in dst: + dst = dst.replace_expr({mem_dst:mem_src}) + if mem_dst in src: + src = src.replace_expr({mem_dst:mem_src}) + out_new[dst] = src + if out != out_new: + assignblk_modified = True + + if assignblk_modified: + assignblks[i] = AssignBlock(out_new, assignblk.instr) + block_modified = True + if write_mem: + break + else: + # If no memory written, we may propagate to sons + # if son has only parent + for successor in ircfg.successors(loc_key): + predecessors = ircfg.predecessors(successor) + if len(predecessors) != 1: + continue + todo.add((successor, 0, mem_dst, mem_src)) + + if block_modified: + modified = True + new_block = IRBlock(block.loc_key, assignblks) + ircfg.blocks[block.loc_key] = new_block return modified @@ -971,7 +1087,7 @@ def load_from_int(ir_arch, bs, is_addr_ro_variable): """ modified = False - for label, block in ir_arch.blocks.iteritems(): + for block in ir_arch.blocks.itervalues(): assignblks = list() for assignblk in block: out = {} diff --git a/miasm2/analysis/simplifier.py b/miasm2/analysis/simplifier.py new file mode 100644 index 00000000..ca8e74fb --- /dev/null +++ b/miasm2/analysis/simplifier.py @@ -0,0 +1,303 @@ +""" +Apply simplification passes to an IR cfg +""" + +import logging +from functools import wraps +from miasm2.analysis.ssa import SSADiGraph +from miasm2.analysis.outofssa import UnSSADiGraph +from miasm2.analysis.data_flow import DiGraphLivenessSSA +from miasm2.expression.simplifications import expr_simp +from miasm2.analysis.data_flow import dead_simp, \ + merge_blocks, remove_empty_assignblks, \ + PropagateExprIntThroughExprId, PropagateThroughExprId, \ + PropagateThroughExprMem, del_unused_edges + + +log = logging.getLogger("simplifier") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARNING) + + +def fix_point(func): + @wraps(func) + def ret_func(self, ircfg, head): + log.debug('[%s]: start', func.func_name) + has_been_modified = False + modified = True + while modified: + modified = func(self, ircfg, head) + has_been_modified |= modified + log.debug( + '[%s]: stop %r', + func.func_name, + has_been_modified + ) + return has_been_modified + return ret_func + + +class IRCFGSimplifier(object): + """ + Simplify an IRCFG + This class applies passes until reaching a fix point + """ + + def __init__(self, ir_arch): + self.ir_arch = ir_arch + self.init_passes() + + def init_passes(self): + """ + Init the array of simplification passes + """ + self.passes = [] + + @fix_point + def simplify(self, ircfg, head): + """ + Apply passes until reaching a fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + modified = False + for simplify_pass in self.passes: + modified |= simplify_pass(ircfg, head) + return modified + + def __call__(self, ircfg, head): + return self.simplify(ircfg, head) + + +class IRCFGSimplifierCommon(IRCFGSimplifier): + """ + Simplify an IRCFG + This class applies following passes until reaching a fix point: + - simplify_ircfg + - do_dead_simp_ircfg + """ + def __init__(self, ir_arch, expr_simp=expr_simp): + self.expr_simp = expr_simp + super(IRCFGSimplifierCommon, self).__init__(ir_arch) + + def init_passes(self): + self.passes = [ + self.simplify_ircfg, + self.do_dead_simp_ircfg, + ] + + @fix_point + def simplify_ircfg(self, ircfg, _head): + """ + Apply self.expr_simp on the @ircfg until reaching fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + """ + modified = ircfg.simplify(self.expr_simp) + return modified + + @fix_point + def do_dead_simp_ircfg(self, ircfg, head): + """ + Apply: + - dead_simp + - remove_empty_assignblks + - merge_blocks + on the @ircfg until reaching fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + modified = dead_simp(self.ir_arch, ircfg) + modified |= remove_empty_assignblks(ircfg) + modified |= merge_blocks(ircfg, set([head])) + return modified + + +class IRCFGSimplifierSSA(IRCFGSimplifierCommon): + """ + Simplify an IRCFG. + The IRCF is first transformed in SSA, then apply transformations passes + and apply out-of-ssa. Final passes of IRcfgSimplifier are applied + + This class apply following pass until reaching a fix point: + - do_propagate_int + - do_propagate_mem + - do_propagate_expr + - do_dead_simp_ssa + """ + + def __init__(self, ir_arch, expr_simp=expr_simp): + super(IRCFGSimplifierSSA, self).__init__(ir_arch, expr_simp) + + self.ir_arch.ssa_var = {} + self.all_ssa_vars = {} + + self.ssa_forbidden_regs = self.get_forbidden_regs() + + self.propag_int = PropagateExprIntThroughExprId() + self.propag_expr = PropagateThroughExprId() + self.propag_mem = PropagateThroughExprMem() + + def get_forbidden_regs(self): + """ + Return a set of immutable register during SSA transformation + """ + regs = set( + [ + self.ir_arch.pc, + self.ir_arch.IRDst, + self.ir_arch.arch.regs.exception_flags + ] + ) + return regs + + def init_passes(self): + """ + Init the array of simplification passes + """ + self.passes = [ + self.simplify_ssa, + self.do_propagate_int, + self.do_propagate_mem, + self.do_propagate_expr, + self.do_dead_simp_ssa, + ] + + def ircfg_to_ssa(self, ircfg, head): + """ + Apply the SSA transformation to @ircfg using it's @head + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + ssa = SSADiGraph(ircfg) + ssa.immutable_ids.update(self.ssa_forbidden_regs) + ssa.ssa_variable_to_expr.update(self.all_ssa_vars) + ssa.transform(head) + self.all_ssa_vars.update(ssa.ssa_variable_to_expr) + self.ir_arch.ssa_var.update(ssa.ssa_variable_to_expr) + return ssa + + def ssa_to_unssa(self, ssa, head): + """ + Apply the out-of-ssa transformation to @ssa using it's @head + + @ssa: SSADiGraph instance + @head: Location instance of the graph head + """ + cfg_liveness = DiGraphLivenessSSA(ssa.graph) + cfg_liveness.init_var_info(self.ir_arch) + cfg_liveness.compute_liveness() + + UnSSADiGraph(ssa, head, cfg_liveness) + return ssa.graph + + @fix_point + def simplify_ssa(self, ssa, _head): + """ + Apply self.expr_simp on the @ssa.graph until reaching fix point + Return True if the graph has been modified + + @ssa: SSADiGraph instance + """ + modified = ssa.graph.simplify(self.expr_simp) + return modified + + @fix_point + def do_propagate_int(self, ssa, head): + """ + Constant propagation in the @ssa graph + @head: Location instance of the graph head + """ + modified = self.propag_int.propagate(ssa, head) + modified |= ssa.graph.simplify(self.expr_simp) + modified |= del_unused_edges(ssa.graph, set([head])) + return modified + + @fix_point + def do_propagate_mem(self, ssa, head): + """ + Propagation of expression based on ExprInt/ExprId in the @ssa graph + @head: Location instance of the graph head + """ + modified = self.propag_mem.propagate(ssa, head) + modified |= ssa.graph.simplify(self.expr_simp) + modified |= del_unused_edges(ssa.graph, set([head])) + return modified + + @fix_point + def do_propagate_expr(self, ssa, head): + """ + Expressions propagation through ExprId in the @ssa graph + @head: Location instance of the graph head + """ + modified = self.propag_expr.propagate(ssa, head) + modified |= ssa.graph.simplify(self.expr_simp) + modified |= del_unused_edges(ssa.graph, set([head])) + return modified + + @fix_point + def do_dead_simp_ssa(self, ssa, head): + """ + Apply: + - dead_simp + - remove_empty_assignblks + - del_unused_edges + - merge_blocks + on the @ircfg until reaching fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + modified = dead_simp(self.ir_arch, ssa.graph) + modified |= remove_empty_assignblks(ssa.graph) + modified |= del_unused_edges(ssa.graph, set([head])) + modified |= merge_blocks(ssa.graph, set([head])) + return modified + + def do_simplify(self, ssa, head): + """ + Apply passes until reaching a fix point + Return True if the graph has been modified + """ + return super(IRCFGSimplifierSSA, self).simplify(ssa, head) + + def do_simplify_loop(self, ssa, head): + """ + Apply do_simplify until reaching a fix point + SSA is updated between each do_simplify + Return True if the graph has been modified + """ + modified = True + while modified: + modified = self.do_simplify(ssa, head) + # Update ssa structs + ssa = self.ircfg_to_ssa(ssa.graph, head) + return ssa + + def simplify(self, ircfg, head): + """ + Apply SSA transformation to @ircfg + Apply passes until reaching a fix point + Apply out-of-ssa transformation + Apply post simplification passes + + Updated simplified IRCFG instance and return it + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + ssa = self.ircfg_to_ssa(ircfg, head) + ssa = self.do_simplify_loop(ssa, head) + ircfg = self.ssa_to_unssa(ssa, head) + ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) + ircfg_simplifier.simplify(ircfg, head) + return ircfg diff --git a/miasm2/analysis/ssa.py b/miasm2/analysis/ssa.py index 5e1a872b..036d92ca 100644 --- a/miasm2/analysis/ssa.py +++ b/miasm2/analysis/ssa.py @@ -595,10 +595,11 @@ class SSADiGraph(SSA): # Replace non modified node used in phi with new variable self.ircfg.simplify(lambda expr:expr.replace_expr(var_to_newname)) - irblock = self.ircfg.blocks[head] - assignblks = list(irblock) - assignblks[0:0] = [AssignBlock(newname_to_var, assignblks[0].instr)] - self.ircfg.blocks[head] = IRBlock(head, assignblks) + if newname_to_var: + irblock = self.ircfg.blocks[head] + assignblks = list(irblock) + assignblks[0:0] = [AssignBlock(newname_to_var, assignblks[0].instr)] + self.ircfg.blocks[head] = IRBlock(head, assignblks) # Updt structure for loc_key in self._phinodes: |