diff options
| author | Fabrice Desclaux <fabrice.desclaux@cea.fr> | 2019-03-18 09:06:32 +0100 |
|---|---|---|
| committer | serpilliere <serpilliere@droids-corp.org> | 2020-02-14 16:41:23 +0100 |
| commit | 215c5ebfe9d0beed56f9391cb517ccbb7fa4f4f8 (patch) | |
| tree | 37fef0c8bf6d0daed22ad73bcf1dfa4295280ab5 | |
| parent | fc6bb3ce49ea44012a762b207a39301825e9648a (diff) | |
| download | miasm-215c5ebfe9d0beed56f9391cb517ccbb7fa4f4f8.tar.gz miasm-215c5ebfe9d0beed56f9391cb517ccbb7fa4f4f8.zip | |
Analysis: dead simp to class
| -rw-r--r-- | example/disasm/full.py | 37 | ||||
| -rw-r--r-- | example/expression/asm_to_ir.py | 6 | ||||
| -rw-r--r-- | example/expression/constant_propagation.py | 5 | ||||
| -rw-r--r-- | example/expression/graph_dataflow.py | 6 | ||||
| -rw-r--r-- | miasm/analysis/data_flow.py | 238 | ||||
| -rw-r--r-- | miasm/analysis/simplifier.py | 16 | ||||
| -rw-r--r-- | miasm/ir/analysis.py | 6 | ||||
| -rw-r--r-- | test/analysis/data_flow.py | 5 |
8 files changed, 203 insertions, 116 deletions
diff --git a/example/disasm/full.py b/example/disasm/full.py index a28d548e..d4fae867 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -9,7 +9,7 @@ from miasm.analysis.binary import Container from miasm.core.asmblock import log_asmblock, AsmCFG from miasm.core.interval import interval from miasm.analysis.machine import Machine -from miasm.analysis.data_flow import dead_simp, \ +from miasm.analysis.data_flow import \ DiGraphDefUse, ReachingDefinitions, \ replace_stack_vars, load_from_int, del_unused_edges from miasm.expression.simplifications import expr_simp @@ -213,7 +213,6 @@ if args.propagexpr: class IRADelModCallStack(ira): - def call_effects(self, addr, instr): assignblks, extra = super(IRADelModCallStack, self).call_effects(addr, instr) if not args.calldontmodstack: @@ -283,34 +282,6 @@ if args.gen_ir: if args.propagexpr: - class IRAOutRegs(ira): - def get_out_regs(self, block): - regs_todo = super(self.__class__, self).get_out_regs(block) - out = {} - for assignblk in block: - for dst in assignblk: - reg = self.ssa_var.get(dst, None) - if reg is None: - continue - if reg in regs_todo: - out[reg] = dst - return set(viewvalues(out)) - - # Add dummy dependency to uncover out regs assignment - for loc in ircfg_a.leaves(): - irblock = ircfg_a.blocks.get(loc) - if irblock is None: - continue - regs = {} - for reg in ir_arch_a.get_out_regs(irblock): - regs[reg] = reg - assignblks = list(irblock) - new_assiblk = AssignBlock(regs, assignblks[-1].instr) - assignblks.append(new_assiblk) - new_irblock = IRBlock(irblock.loc_key, assignblks) - ircfg_a.blocks[loc] = new_irblock - - def is_addr_ro_variable(bs, addr, size): """ @@ -327,9 +298,6 @@ if args.propagexpr: return False return True - ir_arch_a = IRAOutRegs(mdis.loc_db) - - class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA): def do_simplify(self, ssa, head): modified = super(CustomIRCFGSimplifierSSA, self).do_simplify(ssa, head) @@ -345,14 +313,13 @@ if args.propagexpr: replace_stack_vars(self.ir_arch, ircfg) ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) + ircfg_simplifier.deadremoval.add_expr_to_original_expr(ssa.ssa_variable_to_expr) ircfg_simplifier.simplify(ircfg, head) return ircfg - head = list(entry_points)[0] - ir_arch_a = IRAOutRegs(mdis.loc_db) simplifier = CustomIRCFGSimplifierSSA(ir_arch_a) ircfg = simplifier.simplify(ircfg_a, head) open('final.dot', 'w').write(ircfg.dot()) diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py index 4bcbb05d..83eac728 100644 --- a/example/expression/asm_to_ir.py +++ b/example/expression/asm_to_ir.py @@ -8,7 +8,7 @@ from miasm.core import parse_asm from miasm.expression.expression import * from miasm.core import asmblock from miasm.arch.x86.ira import ir_a_x86_32 -from miasm.analysis.data_flow import dead_simp +from miasm.analysis.data_flow import DeadRemoval # First, asm code @@ -40,6 +40,8 @@ patches = asmblock.asm_resolve_final(mn_x86, asmcfg, loc_db) # Translate to IR ir_arch = ir_a_x86_32(loc_db) ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) +deadrm = DeadRemoval(ir_arch) + # Display IR for lbl, irblock in viewitems(ircfg.blocks): @@ -48,7 +50,7 @@ for lbl, irblock in viewitems(ircfg.blocks): # Dead propagation open('graph.dot', 'w').write(ircfg.dot()) print('*' * 80) -dead_simp(ir_arch, ircfg) +deadrm(ircfg) open('graph2.dot', 'w').write(ircfg.dot()) # Display new IR diff --git a/example/expression/constant_propagation.py b/example/expression/constant_propagation.py index a6efbb46..36a548c5 100644 --- a/example/expression/constant_propagation.py +++ b/example/expression/constant_propagation.py @@ -9,7 +9,7 @@ from argparse import ArgumentParser from miasm.analysis.machine import Machine from miasm.analysis.binary import Container from miasm.analysis.cst_propag import propagate_cst_expr -from miasm.analysis.data_flow import dead_simp, \ +from miasm.analysis.data_flow import DeadRemoval, \ merge_blocks, remove_empty_assignblks from miasm.expression.simplifications import expr_simp @@ -29,6 +29,7 @@ cont = Container.from_stream(open(args.filename, 'rb')) mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) ir_arch = machine.ira(mdis.loc_db) addr = int(args.address, 0) +deadrm = DeadRemoval(ir_arch) asmcfg = mdis.dis_multiblock(addr) ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) @@ -42,7 +43,7 @@ if args.simplify: modified = True while modified: modified = False - modified |= dead_simp(ir_arch, ircfg) + modified |= deadrm(ircfg) modified |= remove_empty_assignblks(ircfg) modified |= merge_blocks(ircfg, entry_points) diff --git a/example/expression/graph_dataflow.py b/example/expression/graph_dataflow.py index c320fba0..e7386e9e 100644 --- a/example/expression/graph_dataflow.py +++ b/example/expression/graph_dataflow.py @@ -9,7 +9,7 @@ from miasm.expression.expression import get_expr_mem from miasm.analysis.data_analysis import intra_block_flow_raw, inter_block_flow from miasm.core.graph import DiGraph from miasm.ir.symbexec import SymbolicExecutionEngine -from miasm.analysis.data_flow import dead_simp +from miasm.analysis.data_flow import DeadRemoval parser = ArgumentParser("Simple expression use for generating dataflow graph") @@ -88,7 +88,7 @@ def gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb): for irblock in viewvalues(ircfg.blocks): print(irblock) - dead_simp(ir_arch, ircfg) + deadrm(ircfg) irblock_0 = None @@ -140,6 +140,8 @@ print('ok') print('generating dataflow graph for:') ir_arch_analysis = machine.ira(mdis.loc_db) ircfg = ir_arch_analysis.new_ircfg_from_asmcfg(asmcfg) +deadrm = DeadRemoval(ir_arch_analysis) + for irblock in viewvalues(ircfg.blocks): print(irblock) diff --git a/miasm/analysis/data_flow.py b/miasm/analysis/data_flow.py index be0e4528..a47df7c9 100644 --- a/miasm/analysis/data_flow.py +++ b/miasm/analysis/data_flow.py @@ -201,81 +201,191 @@ class DiGraphDefUse(DiGraph): yield self.DotCellDescription(text="", attr={}) -def dead_simp_useful_assignblks(irarch, defuse, reaching_defs): - """Mark useful statements using previous reach analysis and defuse +class DeadRemoval(object): + """ + Do dead removal + """ - Source : Kennedy, K. (1979). A survey of data flow analysis techniques. - IBM Thomas J. Watson Research Division, Algorithm MK + def __init__(self, ir_arch, expr_to_original_expr=None): + self.ir_arch = ir_arch + if expr_to_original_expr is None: + expr_to_original_expr = {} + self.expr_to_original_expr = expr_to_original_expr - Return a set of triplets (block, assignblk number, lvalue) of - useful definitions - PRE: compute_reach(self) - """ - ircfg = reaching_defs.ircfg - useful = set() - - for block_lbl, block in viewitems(ircfg.blocks): - successors = ircfg.successors(block_lbl) - for successor in successors: - if successor not in ircfg.blocks: - keep_all_definitions = True - break - else: - keep_all_definitions = False - - # Block has a nonexistent successor or is a leaf - if keep_all_definitions or (len(successors) == 0): - valid_definitions = reaching_defs.get_definitions(block_lbl, - len(block)) - for lval, definitions in viewitems(valid_definitions): - if lval in irarch.get_out_regs(block) or keep_all_definitions: - for definition in definitions: - useful.add(AssignblkNode(definition[0], definition[1], lval)) - - # Force keeping of specific cases + def add_expr_to_original_expr(self, expr_to_original_expr): + self.expr_to_original_expr.update(expr_to_original_expr) + + def is_unkillable_destination(self, lval, rval): + if ( + lval.is_mem() or + self.ir_arch.IRDst == lval or + lval.is_id("exception_flags") or + rval.is_function_call() + ): + return True + return False + + def get_block_useful_destinations(self, block): + """ + Force keeping of specific cases + block: IRBlock instance + """ + useful = set() for index, assignblk in enumerate(block): for lval, rval in viewitems(assignblk): - if (lval.is_mem() or - irarch.IRDst == lval or - lval.is_id("exception_flags") or - rval.is_function_call()): - useful.add(AssignblkNode(block_lbl, index, lval)) + if self.is_unkillable_destination(lval, rval): + useful.add(AssignblkNode(block.loc_key, index, lval)) + return useful + + def is_tracked_var(self, lval, variable): + new_lval = self.expr_to_original_expr.get(lval, lval) + return new_lval == variable - # Useful nodes dependencies - for node in useful: - for parent in defuse.reachable_parents(node): - yield parent + def find_definitions_from_worklist(self, worklist, ircfg): + """ + Find variables definition in @worklist by browsing the @ircfg + """ + locs_done = set() + defs = set() -def dead_simp(irarch, ircfg): - """ - Remove useless assignments. + while worklist: + found = False + elt = worklist.pop() + if elt in locs_done: + continue + locs_done.add(elt) + variable, loc_key = elt + block = ircfg.get_block(loc_key) - This function is used to analyse relation of a * complete function * - This means the blocks under study represent a solid full function graph. + if block is None: + # Consider no sources in incomplete graph + continue - Source : Kennedy, K. (1979). A survey of data flow analysis techniques. - IBM Thomas J. Watson Research Division, page 43 + for index, assignblk in reversed(list(enumerate(block))): + for dst, src in viewitems(assignblk): + if self.is_tracked_var(dst, variable): + defs.add(AssignblkNode(loc_key, index, dst)) + found = True + break + if found: + break - @ircfg: IntermediateRepresentation instance - """ + if not found: + for predecessor in ircfg.predecessors(loc_key): + worklist.add((variable, predecessor)) + + return defs + + def find_out_regs_definitions_from_block(self, block, ircfg): + """ + Find definitions of out regs starting from @block + """ + worklist = set() + for reg in self.ir_arch.get_out_regs(block): + worklist.add((reg, block.loc_key)) + ret = self.find_definitions_from_worklist(worklist, ircfg) + return ret + + + def add_def_for_incomplete_leaf(self, block, ircfg, reaching_defs): + """ + Add valid definitions at end of @block plus out regs + """ + valid_definitions = reaching_defs.get_definitions( + block.loc_key, + len(block) + ) + worklist = set() + for lval, definitions in viewitems(valid_definitions): + for definition in definitions: + new_lval = self.expr_to_original_expr.get(lval, lval) + worklist.add((new_lval, block.loc_key)) + ret = self.find_definitions_from_worklist(worklist, ircfg) + useful = ret + useful.update(self.find_out_regs_definitions_from_block(block, ircfg)) + return useful + + def get_useful_assignments(self, ircfg, defuse, reaching_defs): + """ + Mark useful statements using previous reach analysis and defuse + + Return a set of triplets (block, assignblk number, lvalue) of + useful definitions + PRE: compute_reach(self) + + """ + + useful = set() + + for block_lbl, block in viewitems(ircfg.blocks): + block = ircfg.get_block(block_lbl) + if block is None: + # skip unknown blocks: won't generate dependencies + continue + + block_useful = self.get_block_useful_destinations(block) + useful.update(block_useful) + + + successors = ircfg.successors(block_lbl) + for successor in successors: + if successor not in ircfg.blocks: + keep_all_definitions = True + break + else: + keep_all_definitions = False + + if keep_all_definitions: + useful.update(self.add_def_for_incomplete_leaf(block, ircfg, reaching_defs)) + continue + + if len(successors) == 0: + useful.update(self.find_out_regs_definitions_from_block(block, ircfg)) + else: + continue - modified = False - reaching_defs = ReachingDefinitions(ircfg) - defuse = DiGraphDefUse(reaching_defs, deref_mem=True) - useful = set(dead_simp_useful_assignblks(irarch, defuse, reaching_defs)) - for block in list(viewvalues(ircfg.blocks)): - irs = [] - for idx, assignblk in enumerate(block): - new_assignblk = dict(assignblk) - for lval in assignblk: - if AssignblkNode(block.loc_key, idx, lval) not in useful: - del new_assignblk[lval] - modified = True - irs.append(AssignBlock(new_assignblk, assignblk.instr)) - ircfg.blocks[block.loc_key] = IRBlock(block.loc_key, irs) - return modified + + + # Useful nodes dependencies + for node in useful: + for parent in defuse.reachable_parents(node): + yield parent + + def do_dead_removal(self, ircfg): + """ + Remove useless assignments. + + This function is used to analyse relation of a * complete function * + This means the blocks under study represent a solid full function graph. + + Source : Kennedy, K. (1979). A survey of data flow analysis techniques. + IBM Thomas J. Watson Research Division, page 43 + + @ircfg: IntermediateRepresentation instance + """ + + modified = False + reaching_defs = ReachingDefinitions(ircfg) + defuse = DiGraphDefUse(reaching_defs, deref_mem=True) + useful = self.get_useful_assignments(ircfg, defuse, reaching_defs) + useful = set(useful) + for block in list(viewvalues(ircfg.blocks)): + irs = [] + for idx, assignblk in enumerate(block): + new_assignblk = dict(assignblk) + for lval in assignblk: + if AssignblkNode(block.loc_key, idx, lval) not in useful: + del new_assignblk[lval] + modified = True + irs.append(AssignBlock(new_assignblk, assignblk.instr)) + ircfg.blocks[block.loc_key] = IRBlock(block.loc_key, irs) + return modified + + def __call__(self, ircfg): + ret = self.do_dead_removal(ircfg) + return ret def _test_merge_next_block(ircfg, loc_key): @@ -1528,6 +1638,8 @@ def del_unused_edges(ircfg, heads): for node in nodes_to_del: block = ircfg.blocks[node] ircfg.del_node(node) + del ircfg.blocks[node] + for assignblock in block: for dst in assignblock: deleted_vars.add(dst) diff --git a/miasm/analysis/simplifier.py b/miasm/analysis/simplifier.py index 8c448991..a8cd4eb8 100644 --- a/miasm/analysis/simplifier.py +++ b/miasm/analysis/simplifier.py @@ -8,7 +8,8 @@ from miasm.analysis.ssa import SSADiGraph from miasm.analysis.outofssa import UnSSADiGraph from miasm.analysis.data_flow import DiGraphLivenessSSA from miasm.expression.simplifications import expr_simp -from miasm.analysis.data_flow import dead_simp, \ +from miasm.ir.ir import AssignBlock, IRBlock +from miasm.analysis.data_flow import DeadRemoval, \ merge_blocks, remove_empty_assignblks, \ PropagateExprIntThroughExprId, PropagateThroughExprId, \ PropagateThroughExprMem, del_unused_edges @@ -83,6 +84,7 @@ class IRCFGSimplifierCommon(IRCFGSimplifier): def __init__(self, ir_arch, expr_simp=expr_simp): self.expr_simp = expr_simp super(IRCFGSimplifierCommon, self).__init__(ir_arch) + self.deadremoval = DeadRemoval(self.ir_arch) def init_passes(self): self.passes = [ @@ -114,7 +116,7 @@ class IRCFGSimplifierCommon(IRCFGSimplifier): @ircfg: IRCFG instance to simplify @head: Location instance of the ircfg head """ - modified = dead_simp(self.ir_arch, ircfg) + modified = self.deadremoval(ircfg) modified |= remove_empty_assignblks(ircfg) modified |= merge_blocks(ircfg, set([head])) return modified @@ -144,6 +146,7 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon): self.propag_int = PropagateExprIntThroughExprId() self.propag_expr = PropagateThroughExprId() self.propag_mem = PropagateThroughExprMem() + self.deadremoval = DeadRemoval(self.ir_arch, self.all_ssa_vars) def get_forbidden_regs(self): """ @@ -170,6 +173,8 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon): self.do_dead_simp_ssa, ] + + def ircfg_to_ssa(self, ircfg, head): """ Apply the SSA transformation to @ircfg using it's @head @@ -247,7 +252,7 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon): def do_dead_simp_ssa(self, ssa, head): """ Apply: - - dead_simp + - deadrm - remove_empty_assignblks - del_unused_edges - merge_blocks @@ -257,7 +262,7 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon): @ircfg: IRCFG instance to simplify @head: Location instance of the ircfg head """ - modified = dead_simp(self.ir_arch, ssa.graph) + modified = self.deadremoval(ssa) modified |= remove_empty_assignblks(ssa.graph) modified |= del_unused_edges(ssa.graph, set([head])) modified |= merge_blocks(ssa.graph, set([head])) @@ -285,6 +290,7 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon): def simplify(self, ircfg, head): """ + Add access to "abi out regs" in each leaf block Apply SSA transformation to @ircfg Apply passes until reaching a fix point Apply out-of-ssa transformation @@ -295,9 +301,11 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon): @ircfg: IRCFG instance to simplify @head: Location instance of the ircfg head """ + ssa = self.ircfg_to_ssa(ircfg, head) ssa = self.do_simplify_loop(ssa, head) ircfg = self.ssa_to_unssa(ssa, head) ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) + ircfg_simplifier.deadremoval.add_expr_to_original_expr(self.all_ssa_vars) ircfg_simplifier.simplify(ircfg, head) return ircfg diff --git a/miasm/ir/analysis.py b/miasm/ir/analysis.py index 774e66f7..9aa61f59 100644 --- a/miasm/ir/analysis.py +++ b/miasm/ir/analysis.py @@ -5,7 +5,6 @@ import logging from miasm.ir.ir import IntermediateRepresentation, AssignBlock from miasm.expression.expression import ExprOp, ExprAssign -from miasm.analysis.data_flow import dead_simp as new_dead_simp_imp log = logging.getLogger("analysis") @@ -106,8 +105,3 @@ class ira(IntermediateRepresentation): def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method") - - def dead_simp(self, ircfg): - """Deprecated: See miasm.analysis.data_flow.dead_simp()""" - warnings.warn('DEPRECATION WARNING: Please use miasm.analysis.data_flow.dead_simp(ira) instead of ira.dead_simp()') - new_dead_simp_imp(self, ircfg) diff --git a/test/analysis/data_flow.py b/test/analysis/data_flow.py index 259aca7c..98efecbe 100644 --- a/test/analysis/data_flow.py +++ b/test/analysis/data_flow.py @@ -5,7 +5,7 @@ from future.utils import viewitems from miasm.expression.expression import ExprId, ExprInt, ExprAssign, ExprMem from miasm.core.locationdb import LocationDB -from miasm.analysis.data_flow import * +from miasm.analysis.data_flow import DeadRemoval, ReachingDefinitions, DiGraphDefUse from miasm.ir.analysis import ira from miasm.ir.ir import IRBlock, AssignBlock @@ -82,6 +82,7 @@ class IRATest(ira): return set([self.ret_reg, self.sp]) IRA = IRATest(loc_db) +deadrm = DeadRemoval(IRA) # graph 1 : Simple graph with dead and alive variables @@ -696,7 +697,7 @@ for test_nb, test in enumerate([(G1_IRA, G1_EXP_IRA), defuse = DiGraphDefUse(reaching_defs, deref_mem=True) # # Simplify graph - dead_simp(IRA, g_ira) + deadrm(g_ira) # # Print simplified graph, for debug open("simp_graph_%02d.dot" % (test_nb+1), "w").write(g_ira.dot()) |