about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--example/disasm/full.py37
-rw-r--r--example/expression/asm_to_ir.py6
-rw-r--r--example/expression/constant_propagation.py5
-rw-r--r--example/expression/graph_dataflow.py6
-rw-r--r--miasm/analysis/data_flow.py238
-rw-r--r--miasm/analysis/simplifier.py16
-rw-r--r--miasm/ir/analysis.py6
-rw-r--r--test/analysis/data_flow.py5
8 files changed, 203 insertions, 116 deletions
diff --git a/example/disasm/full.py b/example/disasm/full.py
index a28d548e..d4fae867 100644
--- a/example/disasm/full.py
+++ b/example/disasm/full.py
@@ -9,7 +9,7 @@ from miasm.analysis.binary import Container
 from miasm.core.asmblock import log_asmblock, AsmCFG
 from miasm.core.interval import interval
 from miasm.analysis.machine import Machine
-from miasm.analysis.data_flow import dead_simp, \
+from miasm.analysis.data_flow import \
     DiGraphDefUse, ReachingDefinitions, \
     replace_stack_vars, load_from_int, del_unused_edges
 from miasm.expression.simplifications import expr_simp
@@ -213,7 +213,6 @@ if args.propagexpr:
 
 
 class IRADelModCallStack(ira):
-
         def call_effects(self, addr, instr):
             assignblks, extra = super(IRADelModCallStack, self).call_effects(addr, instr)
             if not args.calldontmodstack:
@@ -283,34 +282,6 @@ if args.gen_ir:
 
 
 if args.propagexpr:
-    class IRAOutRegs(ira):
-        def get_out_regs(self, block):
-            regs_todo = super(self.__class__, self).get_out_regs(block)
-            out = {}
-            for assignblk in block:
-                for dst in assignblk:
-                    reg = self.ssa_var.get(dst, None)
-                    if reg is None:
-                        continue
-                    if reg in regs_todo:
-                        out[reg] = dst
-            return set(viewvalues(out))
-
-    # Add dummy dependency to uncover out regs assignment
-    for loc in ircfg_a.leaves():
-        irblock = ircfg_a.blocks.get(loc)
-        if irblock is None:
-            continue
-        regs = {}
-        for reg in ir_arch_a.get_out_regs(irblock):
-            regs[reg] = reg
-        assignblks = list(irblock)
-        new_assiblk = AssignBlock(regs, assignblks[-1].instr)
-        assignblks.append(new_assiblk)
-        new_irblock = IRBlock(irblock.loc_key, assignblks)
-        ircfg_a.blocks[loc] = new_irblock
-
-
 
     def is_addr_ro_variable(bs, addr, size):
         """
@@ -327,9 +298,6 @@ if args.propagexpr:
             return False
         return True
 
-    ir_arch_a = IRAOutRegs(mdis.loc_db)
-
-
     class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA):
         def do_simplify(self, ssa, head):
             modified = super(CustomIRCFGSimplifierSSA, self).do_simplify(ssa, head)
@@ -345,14 +313,13 @@ if args.propagexpr:
                 replace_stack_vars(self.ir_arch, ircfg)
 
             ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch)
+            ircfg_simplifier.deadremoval.add_expr_to_original_expr(ssa.ssa_variable_to_expr)
             ircfg_simplifier.simplify(ircfg, head)
             return ircfg
 
 
 
-
     head = list(entry_points)[0]
-    ir_arch_a = IRAOutRegs(mdis.loc_db)
     simplifier = CustomIRCFGSimplifierSSA(ir_arch_a)
     ircfg = simplifier.simplify(ircfg_a, head)
     open('final.dot', 'w').write(ircfg.dot())
diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py
index 4bcbb05d..83eac728 100644
--- a/example/expression/asm_to_ir.py
+++ b/example/expression/asm_to_ir.py
@@ -8,7 +8,7 @@ from miasm.core import parse_asm
 from miasm.expression.expression import *
 from miasm.core import asmblock
 from miasm.arch.x86.ira import ir_a_x86_32
-from miasm.analysis.data_flow import dead_simp
+from miasm.analysis.data_flow import DeadRemoval
 
 
 # First, asm code
@@ -40,6 +40,8 @@ patches = asmblock.asm_resolve_final(mn_x86, asmcfg, loc_db)
 # Translate to IR
 ir_arch = ir_a_x86_32(loc_db)
 ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg)
+deadrm = DeadRemoval(ir_arch)
+
 
 # Display IR
 for lbl, irblock in viewitems(ircfg.blocks):
@@ -48,7 +50,7 @@ for lbl, irblock in viewitems(ircfg.blocks):
 # Dead propagation
 open('graph.dot', 'w').write(ircfg.dot())
 print('*' * 80)
-dead_simp(ir_arch, ircfg)
+deadrm(ircfg)
 open('graph2.dot', 'w').write(ircfg.dot())
 
 # Display new IR
diff --git a/example/expression/constant_propagation.py b/example/expression/constant_propagation.py
index a6efbb46..36a548c5 100644
--- a/example/expression/constant_propagation.py
+++ b/example/expression/constant_propagation.py
@@ -9,7 +9,7 @@ from argparse import ArgumentParser
 from miasm.analysis.machine import Machine
 from miasm.analysis.binary import Container
 from miasm.analysis.cst_propag import propagate_cst_expr
-from miasm.analysis.data_flow import dead_simp, \
+from miasm.analysis.data_flow import DeadRemoval, \
     merge_blocks, remove_empty_assignblks
 from miasm.expression.simplifications import expr_simp
 
@@ -29,6 +29,7 @@ cont = Container.from_stream(open(args.filename, 'rb'))
 mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db)
 ir_arch = machine.ira(mdis.loc_db)
 addr = int(args.address, 0)
+deadrm = DeadRemoval(ir_arch)
 
 asmcfg = mdis.dis_multiblock(addr)
 ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg)
@@ -42,7 +43,7 @@ if args.simplify:
     modified = True
     while modified:
         modified = False
-        modified |= dead_simp(ir_arch, ircfg)
+        modified |= deadrm(ircfg)
         modified |= remove_empty_assignblks(ircfg)
         modified |= merge_blocks(ircfg, entry_points)
 
diff --git a/example/expression/graph_dataflow.py b/example/expression/graph_dataflow.py
index c320fba0..e7386e9e 100644
--- a/example/expression/graph_dataflow.py
+++ b/example/expression/graph_dataflow.py
@@ -9,7 +9,7 @@ from miasm.expression.expression import get_expr_mem
 from miasm.analysis.data_analysis import intra_block_flow_raw, inter_block_flow
 from miasm.core.graph import DiGraph
 from miasm.ir.symbexec import SymbolicExecutionEngine
-from miasm.analysis.data_flow import dead_simp
+from miasm.analysis.data_flow import DeadRemoval
 
 
 parser = ArgumentParser("Simple expression use for generating dataflow graph")
@@ -88,7 +88,7 @@ def gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb):
     for irblock in viewvalues(ircfg.blocks):
         print(irblock)
 
-    dead_simp(ir_arch, ircfg)
+    deadrm(ircfg)
 
 
     irblock_0 = None
@@ -140,6 +140,8 @@ print('ok')
 print('generating dataflow graph for:')
 ir_arch_analysis = machine.ira(mdis.loc_db)
 ircfg = ir_arch_analysis.new_ircfg_from_asmcfg(asmcfg)
+deadrm = DeadRemoval(ir_arch_analysis)
+
 
 for irblock in viewvalues(ircfg.blocks):
     print(irblock)
diff --git a/miasm/analysis/data_flow.py b/miasm/analysis/data_flow.py
index be0e4528..a47df7c9 100644
--- a/miasm/analysis/data_flow.py
+++ b/miasm/analysis/data_flow.py
@@ -201,81 +201,191 @@ class DiGraphDefUse(DiGraph):
         yield self.DotCellDescription(text="", attr={})
 
 
-def dead_simp_useful_assignblks(irarch, defuse, reaching_defs):
-    """Mark useful statements using previous reach analysis and defuse
+class DeadRemoval(object):
+    """
+    Do dead removal
+    """
 
-    Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
-    IBM Thomas J. Watson Research Division,  Algorithm MK
+    def __init__(self, ir_arch, expr_to_original_expr=None):
+        self.ir_arch = ir_arch
+        if expr_to_original_expr is None:
+            expr_to_original_expr = {}
+        self.expr_to_original_expr = expr_to_original_expr
 
-    Return a set of triplets (block, assignblk number, lvalue) of
-    useful definitions
-    PRE: compute_reach(self)
 
-    """
-    ircfg = reaching_defs.ircfg
-    useful = set()
-
-    for block_lbl, block in viewitems(ircfg.blocks):
-        successors = ircfg.successors(block_lbl)
-        for successor in successors:
-            if successor not in ircfg.blocks:
-                keep_all_definitions = True
-                break
-        else:
-            keep_all_definitions = False
-
-        # Block has a nonexistent successor or is a leaf
-        if keep_all_definitions or (len(successors) == 0):
-            valid_definitions = reaching_defs.get_definitions(block_lbl,
-                                                              len(block))
-            for lval, definitions in viewitems(valid_definitions):
-                if lval in irarch.get_out_regs(block) or keep_all_definitions:
-                    for definition in definitions:
-                        useful.add(AssignblkNode(definition[0], definition[1], lval))
-
-        # Force keeping of specific cases
+    def add_expr_to_original_expr(self, expr_to_original_expr):
+        self.expr_to_original_expr.update(expr_to_original_expr)
+
+    def is_unkillable_destination(self, lval, rval):
+        if (
+                lval.is_mem() or
+                self.ir_arch.IRDst == lval or
+                lval.is_id("exception_flags") or
+                rval.is_function_call()
+        ):
+            return True
+        return False
+
+    def get_block_useful_destinations(self, block):
+        """
+        Force keeping of specific cases
+        block: IRBlock instance
+        """
+        useful = set()
         for index, assignblk in enumerate(block):
             for lval, rval in viewitems(assignblk):
-                if (lval.is_mem() or
-                    irarch.IRDst == lval or
-                    lval.is_id("exception_flags") or
-                    rval.is_function_call()):
-                    useful.add(AssignblkNode(block_lbl, index, lval))
+                if self.is_unkillable_destination(lval, rval):
+                    useful.add(AssignblkNode(block.loc_key, index, lval))
+        return useful
+
+    def is_tracked_var(self, lval, variable):
+        new_lval = self.expr_to_original_expr.get(lval, lval)
+        return new_lval == variable
 
-    # Useful nodes dependencies
-    for node in useful:
-        for parent in defuse.reachable_parents(node):
-            yield parent
+    def find_definitions_from_worklist(self, worklist, ircfg):
+        """
+        Find variables definition in @worklist by browsing the @ircfg
+        """
+        locs_done = set()
 
+        defs = set()
 
-def dead_simp(irarch, ircfg):
-    """
-    Remove useless assignments.
+        while worklist:
+            found = False
+            elt = worklist.pop()
+            if elt in locs_done:
+                continue
+            locs_done.add(elt)
+            variable, loc_key = elt
+            block = ircfg.get_block(loc_key)
 
-    This function is used to analyse relation of a * complete function *
-    This means the blocks under study represent a solid full function graph.
+            if block is None:
+                # Consider no sources in incomplete graph
+                continue
 
-    Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
-    IBM Thomas J. Watson Research Division, page 43
+            for index, assignblk in reversed(list(enumerate(block))):
+                for dst, src in viewitems(assignblk):
+                    if self.is_tracked_var(dst, variable):
+                        defs.add(AssignblkNode(loc_key, index, dst))
+                        found = True
+                        break
+                if found:
+                    break
 
-    @ircfg: IntermediateRepresentation instance
-    """
+            if not found:
+                for predecessor in ircfg.predecessors(loc_key):
+                    worklist.add((variable, predecessor))
+
+        return defs
+
+    def find_out_regs_definitions_from_block(self, block, ircfg):
+        """
+        Find definitions of out regs starting from @block
+        """
+        worklist = set()
+        for reg in self.ir_arch.get_out_regs(block):
+            worklist.add((reg, block.loc_key))
+        ret = self.find_definitions_from_worklist(worklist, ircfg)
+        return ret
+
+
+    def add_def_for_incomplete_leaf(self, block, ircfg, reaching_defs):
+        """
+        Add valid definitions at end of @block plus out regs
+        """
+        valid_definitions = reaching_defs.get_definitions(
+            block.loc_key,
+            len(block)
+        )
+        worklist = set()
+        for lval, definitions in viewitems(valid_definitions):
+            for definition in definitions:
+                new_lval = self.expr_to_original_expr.get(lval, lval)
+                worklist.add((new_lval, block.loc_key))
+        ret = self.find_definitions_from_worklist(worklist, ircfg)
+        useful = ret
+        useful.update(self.find_out_regs_definitions_from_block(block, ircfg))
+        return useful
+
+    def get_useful_assignments(self, ircfg, defuse, reaching_defs):
+        """
+        Mark useful statements using previous reach analysis and defuse
+
+        Return a set of triplets (block, assignblk number, lvalue) of
+        useful definitions
+        PRE: compute_reach(self)
+
+        """
+
+        useful = set()
+
+        for block_lbl, block in viewitems(ircfg.blocks):
+            block = ircfg.get_block(block_lbl)
+            if block is None:
+                # skip unknown blocks: won't generate dependencies
+                continue
+
+            block_useful = self.get_block_useful_destinations(block)
+            useful.update(block_useful)
+
+
+            successors = ircfg.successors(block_lbl)
+            for successor in successors:
+                if successor not in ircfg.blocks:
+                    keep_all_definitions = True
+                    break
+            else:
+                keep_all_definitions = False
+
+            if keep_all_definitions:
+                useful.update(self.add_def_for_incomplete_leaf(block, ircfg, reaching_defs))
+                continue
+
+            if len(successors) == 0:
+                useful.update(self.find_out_regs_definitions_from_block(block, ircfg))
+            else:
+                continue
 
-    modified = False
-    reaching_defs = ReachingDefinitions(ircfg)
-    defuse = DiGraphDefUse(reaching_defs, deref_mem=True)
-    useful = set(dead_simp_useful_assignblks(irarch, defuse, reaching_defs))
-    for block in list(viewvalues(ircfg.blocks)):
-        irs = []
-        for idx, assignblk in enumerate(block):
-            new_assignblk = dict(assignblk)
-            for lval in assignblk:
-                if AssignblkNode(block.loc_key, idx, lval) not in useful:
-                    del new_assignblk[lval]
-                    modified = True
-            irs.append(AssignBlock(new_assignblk, assignblk.instr))
-        ircfg.blocks[block.loc_key] = IRBlock(block.loc_key, irs)
-    return modified
+
+
+        # Useful nodes dependencies
+        for node in useful:
+            for parent in defuse.reachable_parents(node):
+                yield parent
+
+    def do_dead_removal(self, ircfg):
+        """
+        Remove useless assignments.
+
+        This function is used to analyse relation of a * complete function *
+        This means the blocks under study represent a solid full function graph.
+
+        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
+        IBM Thomas J. Watson Research Division, page 43
+
+        @ircfg: IntermediateRepresentation instance
+        """
+
+        modified = False
+        reaching_defs = ReachingDefinitions(ircfg)
+        defuse = DiGraphDefUse(reaching_defs, deref_mem=True)
+        useful = self.get_useful_assignments(ircfg, defuse, reaching_defs)
+        useful = set(useful)
+        for block in list(viewvalues(ircfg.blocks)):
+            irs = []
+            for idx, assignblk in enumerate(block):
+                new_assignblk = dict(assignblk)
+                for lval in assignblk:
+                    if AssignblkNode(block.loc_key, idx, lval) not in useful:
+                        del new_assignblk[lval]
+                        modified = True
+                irs.append(AssignBlock(new_assignblk, assignblk.instr))
+            ircfg.blocks[block.loc_key] = IRBlock(block.loc_key, irs)
+        return modified
+
+    def __call__(self, ircfg):
+        ret = self.do_dead_removal(ircfg)
+        return ret
 
 
 def _test_merge_next_block(ircfg, loc_key):
@@ -1528,6 +1638,8 @@ def del_unused_edges(ircfg, heads):
     for node in nodes_to_del:
         block = ircfg.blocks[node]
         ircfg.del_node(node)
+        del ircfg.blocks[node]
+
         for assignblock in block:
             for dst in assignblock:
                 deleted_vars.add(dst)
diff --git a/miasm/analysis/simplifier.py b/miasm/analysis/simplifier.py
index 8c448991..a8cd4eb8 100644
--- a/miasm/analysis/simplifier.py
+++ b/miasm/analysis/simplifier.py
@@ -8,7 +8,8 @@ from miasm.analysis.ssa import SSADiGraph
 from miasm.analysis.outofssa import UnSSADiGraph
 from miasm.analysis.data_flow import DiGraphLivenessSSA
 from miasm.expression.simplifications import expr_simp
-from miasm.analysis.data_flow import dead_simp, \
+from miasm.ir.ir import AssignBlock, IRBlock
+from miasm.analysis.data_flow import DeadRemoval, \
     merge_blocks, remove_empty_assignblks, \
     PropagateExprIntThroughExprId, PropagateThroughExprId, \
     PropagateThroughExprMem, del_unused_edges
@@ -83,6 +84,7 @@ class IRCFGSimplifierCommon(IRCFGSimplifier):
     def __init__(self, ir_arch, expr_simp=expr_simp):
         self.expr_simp = expr_simp
         super(IRCFGSimplifierCommon, self).__init__(ir_arch)
+        self.deadremoval = DeadRemoval(self.ir_arch)
 
     def init_passes(self):
         self.passes = [
@@ -114,7 +116,7 @@ class IRCFGSimplifierCommon(IRCFGSimplifier):
         @ircfg: IRCFG instance to simplify
         @head: Location instance of the ircfg head
         """
-        modified = dead_simp(self.ir_arch, ircfg)
+        modified = self.deadremoval(ircfg)
         modified |= remove_empty_assignblks(ircfg)
         modified |= merge_blocks(ircfg, set([head]))
         return modified
@@ -144,6 +146,7 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon):
         self.propag_int = PropagateExprIntThroughExprId()
         self.propag_expr = PropagateThroughExprId()
         self.propag_mem = PropagateThroughExprMem()
+        self.deadremoval = DeadRemoval(self.ir_arch, self.all_ssa_vars)
 
     def get_forbidden_regs(self):
         """
@@ -170,6 +173,8 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon):
             self.do_dead_simp_ssa,
         ]
 
+
+
     def ircfg_to_ssa(self, ircfg, head):
         """
         Apply the SSA transformation to @ircfg using it's @head
@@ -247,7 +252,7 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon):
     def do_dead_simp_ssa(self, ssa, head):
         """
         Apply:
-        - dead_simp
+        - deadrm
         - remove_empty_assignblks
         - del_unused_edges
         - merge_blocks
@@ -257,7 +262,7 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon):
         @ircfg: IRCFG instance to simplify
         @head: Location instance of the ircfg head
         """
-        modified = dead_simp(self.ir_arch, ssa.graph)
+        modified = self.deadremoval(ssa)
         modified |= remove_empty_assignblks(ssa.graph)
         modified |= del_unused_edges(ssa.graph, set([head]))
         modified |= merge_blocks(ssa.graph, set([head]))
@@ -285,6 +290,7 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon):
 
     def simplify(self, ircfg, head):
         """
+        Add access to "abi out regs" in each leaf block
         Apply SSA transformation to @ircfg
         Apply passes until reaching a fix point
         Apply out-of-ssa transformation
@@ -295,9 +301,11 @@ class IRCFGSimplifierSSA(IRCFGSimplifierCommon):
         @ircfg: IRCFG instance to simplify
         @head: Location instance of the ircfg head
         """
+
         ssa = self.ircfg_to_ssa(ircfg, head)
         ssa = self.do_simplify_loop(ssa, head)
         ircfg = self.ssa_to_unssa(ssa, head)
         ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch)
+        ircfg_simplifier.deadremoval.add_expr_to_original_expr(self.all_ssa_vars)
         ircfg_simplifier.simplify(ircfg, head)
         return ircfg
diff --git a/miasm/ir/analysis.py b/miasm/ir/analysis.py
index 774e66f7..9aa61f59 100644
--- a/miasm/ir/analysis.py
+++ b/miasm/ir/analysis.py
@@ -5,7 +5,6 @@ import logging
 
 from miasm.ir.ir import IntermediateRepresentation, AssignBlock
 from miasm.expression.expression import ExprOp, ExprAssign
-from miasm.analysis.data_flow import dead_simp as new_dead_simp_imp
 
 
 log = logging.getLogger("analysis")
@@ -106,8 +105,3 @@ class ira(IntermediateRepresentation):
     def sizeof_pointer(self):
         "Return the size of a void* in bits"
         raise NotImplementedError("Abstract method")
-
-    def dead_simp(self, ircfg):
-        """Deprecated: See miasm.analysis.data_flow.dead_simp()"""
-        warnings.warn('DEPRECATION WARNING: Please use miasm.analysis.data_flow.dead_simp(ira) instead of ira.dead_simp()')
-        new_dead_simp_imp(self, ircfg)
diff --git a/test/analysis/data_flow.py b/test/analysis/data_flow.py
index 259aca7c..98efecbe 100644
--- a/test/analysis/data_flow.py
+++ b/test/analysis/data_flow.py
@@ -5,7 +5,7 @@ from future.utils import viewitems
 
 from miasm.expression.expression import ExprId, ExprInt, ExprAssign, ExprMem
 from miasm.core.locationdb import LocationDB
-from miasm.analysis.data_flow import *
+from miasm.analysis.data_flow import DeadRemoval, ReachingDefinitions, DiGraphDefUse
 from miasm.ir.analysis import ira
 from miasm.ir.ir import IRBlock, AssignBlock
 
@@ -82,6 +82,7 @@ class IRATest(ira):
         return set([self.ret_reg, self.sp])
 
 IRA = IRATest(loc_db)
+deadrm = DeadRemoval(IRA)
 
 # graph 1 : Simple graph with dead and alive variables
 
@@ -696,7 +697,7 @@ for test_nb, test in enumerate([(G1_IRA, G1_EXP_IRA),
     defuse = DiGraphDefUse(reaching_defs, deref_mem=True)
 
     # # Simplify graph
-    dead_simp(IRA, g_ira)
+    deadrm(g_ira)
 
     # # Print simplified graph, for debug
     open("simp_graph_%02d.dot" % (test_nb+1), "w").write(g_ira.dot())