about summary refs log tree commit diff stats
path: root/miasm2
diff options
context:
space:
mode:
Diffstat (limited to 'miasm2')
-rw-r--r--miasm2/analysis/data_flow.py156
-rw-r--r--miasm2/analysis/simplifier.py303
-rw-r--r--miasm2/analysis/ssa.py9
3 files changed, 444 insertions, 24 deletions
diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py
index 53033d7e..fb09a6cb 100644
--- a/miasm2/analysis/data_flow.py
+++ b/miasm2/analysis/data_flow.py
@@ -11,6 +11,7 @@ from miasm2.expression.expression_helper import possible_values
 from miasm2.analysis.ssa import get_phi_sources_parent_block, \
     irblock_has_phi
 
+
 class ReachingDefinitions(dict):
     """
     Computes for each assignblock the set of reaching definitions.
@@ -510,17 +511,19 @@ def remove_empty_assignblks(ircfg):
     modified = False
     for loc_key, block in ircfg.blocks.iteritems():
         irs = []
+        block_modified = False
         for assignblk in block:
             if len(assignblk):
                 irs.append(assignblk)
             else:
-                modified = True
-        ircfg.blocks[loc_key] = IRBlock(loc_key, irs)
-
+                block_modified = True
+        if block_modified:
+            new_irblock = IRBlock(loc_key, irs)
+            ircfg.blocks[loc_key] = new_irblock
+            modified = True
     return modified
 
 
-
 class SSADefUse(DiGraph):
     """
     Generate DefUse information from SSA transformation
@@ -635,17 +638,16 @@ def expr_has_call(expr):
     return expr_test_visit(expr, expr_has_call_test)
 
 
-class PropagateExpr(object):
-
-    def assignblk_is_propagation_barrier(self, assignblk):
-        for dst, src in assignblk.iteritems():
-            if expr_has_call(src):
-                return True
-            if dst.is_mem():
-                return True
-        return False
+class PropagateThroughExprId(object):
+    """
+    Propagate expressions though ExprId
+    """
 
     def has_propagation_barrier(self, assignblks):
+        """
+        Return True if propagation cannot cross the @assignblks
+        @assignblks: list of AssignBlock to check
+        """
         for assignblk in assignblks:
             for dst, src in assignblk.iteritems():
                 if expr_has_call(src):
@@ -655,13 +657,19 @@ class PropagateExpr(object):
         return False
 
     def is_mem_written(self, ssa, node, successor):
+        """
+        Return True if memory is written at least once between @node and
+        @successor
+
+        @node: Location of the block to start with
+        @successor: Location of last block
+        """
         loc_a, index_a, reg_a = node
         loc_b, index_b, reg_b = successor
         block_b = ssa.graph.blocks[loc_b]
 
         nodes_to_do = self.compute_reachable_nodes_from_a_to_b(ssa.graph, loc_a, loc_b)
 
-
         if loc_a == loc_b:
             # src is dst
             assert nodes_to_do == set([loc_a])
@@ -703,7 +711,7 @@ class PropagateExpr(object):
             return False
         return True
 
-    def propagate(self, ssa, head):
+    def get_candidates(self, ssa, head, max_expr_depth):
         defuse = SSADefUse.from_ssa(ssa)
         to_replace = {}
         node_to_reg = {}
@@ -717,9 +725,20 @@ class PropagateExpr(object):
                 continue
             if node.var.is_mem():
                 continue
+            if max_expr_depth is not None and len(str(src)) > max_expr_depth:
+                continue
             to_replace[node.var] = src
             node_to_reg[node] = node.var
+        return node_to_reg, to_replace, defuse
 
+    def propagate(self, ssa, head, max_expr_depth=None):
+        """
+        Do expression propagation
+        @ssa: SSADiGraph instance
+        @head: the head location of the graph
+        @max_expr_depth: the maximum allowed depth of an expression
+        """
+        node_to_reg, to_replace, defuse = self.get_candidates(ssa, head, max_expr_depth)
         modified = False
         for node, reg in node_to_reg.iteritems():
             for successor in defuse.successors(node):
@@ -741,8 +760,7 @@ class PropagateExpr(object):
                         continue
 
                     if src.is_mem():
-                        ptr = src.ptr
-                        ptr = ptr.replace_expr(replace)
+                        ptr = src.ptr.replace_expr(replace)
                         new_src = ExprMem(ptr, src.size)
                     else:
                         new_src = src.replace_expr(replace)
@@ -750,8 +768,7 @@ class PropagateExpr(object):
                     if dst.is_id():
                         new_dst = dst
                     elif dst.is_mem():
-                        ptr = dst.ptr
-                        ptr = ptr.replace_expr(replace)
+                        ptr = dst.ptr.replace_expr(replace)
                         new_dst = ExprMem(ptr, dst.size)
                     else:
                         new_dst = dst.replace_expr(replace)
@@ -764,6 +781,105 @@ class PropagateExpr(object):
                 assignblks[node_b.index] = out
                 new_block = IRBlock(block.loc_key, assignblks)
                 ssa.graph.blocks[block.loc_key] = new_block
+
+        return modified
+
+
+
+class PropagateExprIntThroughExprId(PropagateThroughExprId):
+    """
+    Propagate ExprInt though ExprId: classic constant propagation
+    This is a sub family of PropagateThroughExprId.
+    It reduces leaves in expressions of a program.
+    """
+
+    def get_candidates(self, ssa, head, max_expr_depth):
+        defuse = SSADefUse.from_ssa(ssa)
+
+        to_replace = {}
+        node_to_reg = {}
+        for node in defuse.nodes():
+            src = defuse.get_node_target(node)
+            if not src.is_int():
+                continue
+            if expr_has_call(src):
+                continue
+            if node.var.is_mem():
+                continue
+            to_replace[node.var] = src
+            node_to_reg[node] = node.var
+        return node_to_reg, to_replace, defuse
+
+    def propagation_allowed(self, ssa, to_replace, node_a, node_b):
+        """
+        Propagating ExprInt is always ok
+        """
+        return True
+
+
+class PropagateThroughExprMem(object):
+    """
+    Propagate through ExprMem in very simple cases:
+    - if no memory write between source and target
+    - if source does not contain any memory reference
+    """
+
+    def propagate(self, ssa, head, max_expr_depth=None):
+        ircfg = ssa.graph
+        todo = set()
+        modified = False
+        for block in ircfg.blocks.itervalues():
+            for i, assignblk in enumerate(block):
+                for dst, src in assignblk.iteritems():
+                    if not dst.is_mem():
+                        continue
+                    if expr_has_mem(src):
+                        continue
+                    todo.add((block.loc_key, i + 1, dst, src))
+                    ptr = dst.ptr
+                    for size in xrange(8, dst.size, 8):
+                        todo.add((block.loc_key, i + 1, ExprMem(ptr, size), src[:size]))
+
+        while todo:
+            loc_key, index, mem_dst, mem_src = todo.pop()
+            block = ircfg.blocks[loc_key]
+            assignblks = list(block)
+            block_modified = False
+            for i in xrange(index, len(block)):
+                assignblk = block[i]
+                write_mem = False
+                assignblk_modified = False
+                out = dict(assignblk)
+                out_new = {}
+                for dst, src in out.iteritems():
+                    if dst.is_mem():
+                        write_mem = True
+                    if dst != mem_dst and mem_dst in dst:
+                        dst = dst.replace_expr({mem_dst:mem_src})
+                    if mem_dst in src:
+                        src = src.replace_expr({mem_dst:mem_src})
+                    out_new[dst] = src
+                if out != out_new:
+                    assignblk_modified = True
+
+                if assignblk_modified:
+                    assignblks[i] = AssignBlock(out_new, assignblk.instr)
+                    block_modified = True
+                if write_mem:
+                    break
+            else:
+                # If no memory written, we may propagate to sons
+                # if son has only parent
+                for successor in ircfg.successors(loc_key):
+                    predecessors = ircfg.predecessors(successor)
+                    if len(predecessors) != 1:
+                        continue
+                    todo.add((successor, 0, mem_dst, mem_src))
+
+            if block_modified:
+                modified = True
+                new_block = IRBlock(block.loc_key, assignblks)
+                ircfg.blocks[block.loc_key] = new_block
         return modified
 
 
@@ -971,7 +1087,7 @@ def load_from_int(ir_arch, bs, is_addr_ro_variable):
     """
 
     modified = False
-    for label, block in ir_arch.blocks.iteritems():
+    for block in ir_arch.blocks.itervalues():
         assignblks = list()
         for assignblk in block:
             out = {}
diff --git a/miasm2/analysis/simplifier.py b/miasm2/analysis/simplifier.py
new file mode 100644
index 00000000..ca8e74fb
--- /dev/null
+++ b/miasm2/analysis/simplifier.py
@@ -0,0 +1,303 @@
+"""
+Apply simplification passes to an IR cfg
+"""
+
+import logging
+from functools import wraps
+from miasm2.analysis.ssa import SSADiGraph
+from miasm2.analysis.outofssa import UnSSADiGraph
+from miasm2.analysis.data_flow import DiGraphLivenessSSA
+from miasm2.expression.simplifications import expr_simp
+from miasm2.analysis.data_flow import dead_simp, \
+    merge_blocks, remove_empty_assignblks, \
+    PropagateExprIntThroughExprId, PropagateThroughExprId, \
+    PropagateThroughExprMem, del_unused_edges
+
+
+log = logging.getLogger("simplifier")
+console_handler = logging.StreamHandler()
+console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
+log.addHandler(console_handler)
+log.setLevel(logging.WARNING)
+
+
+def fix_point(func):
+    @wraps(func)
+    def ret_func(self, ircfg, head):
+        log.debug('[%s]: start', func.func_name)
+        has_been_modified = False
+        modified = True
+        while modified:
+            modified = func(self, ircfg, head)
+            has_been_modified |= modified
+        log.debug(
+            '[%s]: stop %r',
+            func.func_name,
+            has_been_modified
+        )
+        return has_been_modified
+    return ret_func
+
+
+class IRCFGSimplifier(object):
+    """
+    Simplify an IRCFG
+    This class applies passes until reaching a fix point
+    """
+
+    def __init__(self, ir_arch):
+        self.ir_arch = ir_arch
+        self.init_passes()
+
+    def init_passes(self):
+        """
+        Init the array of simplification passes
+        """
+        self.passes = []
+
+    @fix_point
+    def simplify(self, ircfg, head):
+        """
+        Apply passes until reaching a fix point
+        Return True if the graph has been modified
+
+        @ircfg: IRCFG instance to simplify
+        @head: Location instance of the ircfg head
+        """
+        modified = False
+        for simplify_pass in self.passes:
+            modified |= simplify_pass(ircfg, head)
+        return modified
+
+    def __call__(self, ircfg, head):
+        return self.simplify(ircfg, head)
+
+
+class IRCFGSimplifierCommon(IRCFGSimplifier):
+    """
+    Simplify an IRCFG
+    This class applies following passes until reaching a fix point:
+    - simplify_ircfg
+    - do_dead_simp_ircfg
+    """
+    def __init__(self, ir_arch, expr_simp=expr_simp):
+        self.expr_simp = expr_simp
+        super(IRCFGSimplifierCommon, self).__init__(ir_arch)
+
+    def init_passes(self):
+        self.passes = [
+            self.simplify_ircfg,
+            self.do_dead_simp_ircfg,
+        ]
+
+    @fix_point
+    def simplify_ircfg(self, ircfg, _head):
+        """
+        Apply self.expr_simp on the @ircfg until reaching fix point
+        Return True if the graph has been modified
+
+        @ircfg: IRCFG instance to simplify
+        """
+        modified = ircfg.simplify(self.expr_simp)
+        return modified
+
+    @fix_point
+    def do_dead_simp_ircfg(self, ircfg, head):
+        """
+        Apply:
+        - dead_simp
+        - remove_empty_assignblks
+        - merge_blocks
+        on the @ircfg until reaching fix point
+        Return True if the graph has been modified
+
+        @ircfg: IRCFG instance to simplify
+        @head: Location instance of the ircfg head
+        """
+        modified = dead_simp(self.ir_arch, ircfg)
+        modified |= remove_empty_assignblks(ircfg)
+        modified |= merge_blocks(ircfg, set([head]))
+        return modified
+
+
+class IRCFGSimplifierSSA(IRCFGSimplifierCommon):
+    """
+    Simplify an IRCFG.
+    The IRCF is first transformed in SSA, then apply transformations passes
+    and apply out-of-ssa. Final passes of IRcfgSimplifier are applied
+
+    This class apply following pass until reaching a fix point:
+    - do_propagate_int
+    - do_propagate_mem
+    - do_propagate_expr
+    - do_dead_simp_ssa
+    """
+
+    def __init__(self, ir_arch, expr_simp=expr_simp):
+        super(IRCFGSimplifierSSA, self).__init__(ir_arch, expr_simp)
+
+        self.ir_arch.ssa_var = {}
+        self.all_ssa_vars = {}
+
+        self.ssa_forbidden_regs = self.get_forbidden_regs()
+
+        self.propag_int = PropagateExprIntThroughExprId()
+        self.propag_expr = PropagateThroughExprId()
+        self.propag_mem = PropagateThroughExprMem()
+
+    def get_forbidden_regs(self):
+        """
+        Return a set of immutable register during SSA transformation
+        """
+        regs = set(
+            [
+                self.ir_arch.pc,
+                self.ir_arch.IRDst,
+                self.ir_arch.arch.regs.exception_flags
+            ]
+        )
+        return regs
+
+    def init_passes(self):
+        """
+        Init the array of simplification passes
+        """
+        self.passes = [
+            self.simplify_ssa,
+            self.do_propagate_int,
+            self.do_propagate_mem,
+            self.do_propagate_expr,
+            self.do_dead_simp_ssa,
+        ]
+
+    def ircfg_to_ssa(self, ircfg, head):
+        """
+        Apply the SSA transformation to @ircfg using it's @head
+
+        @ircfg: IRCFG instance to simplify
+        @head: Location instance of the ircfg head
+        """
+        ssa = SSADiGraph(ircfg)
+        ssa.immutable_ids.update(self.ssa_forbidden_regs)
+        ssa.ssa_variable_to_expr.update(self.all_ssa_vars)
+        ssa.transform(head)
+        self.all_ssa_vars.update(ssa.ssa_variable_to_expr)
+        self.ir_arch.ssa_var.update(ssa.ssa_variable_to_expr)
+        return ssa
+
+    def ssa_to_unssa(self, ssa, head):
+        """
+        Apply the out-of-ssa transformation to @ssa using it's @head
+
+        @ssa: SSADiGraph instance
+        @head: Location instance of the graph head
+        """
+        cfg_liveness = DiGraphLivenessSSA(ssa.graph)
+        cfg_liveness.init_var_info(self.ir_arch)
+        cfg_liveness.compute_liveness()
+
+        UnSSADiGraph(ssa, head, cfg_liveness)
+        return ssa.graph
+
+    @fix_point
+    def simplify_ssa(self, ssa, _head):
+        """
+        Apply self.expr_simp on the @ssa.graph until reaching fix point
+        Return True if the graph has been modified
+
+        @ssa: SSADiGraph instance
+        """
+        modified = ssa.graph.simplify(self.expr_simp)
+        return modified
+
+    @fix_point
+    def do_propagate_int(self, ssa, head):
+        """
+        Constant propagation in the @ssa graph
+        @head: Location instance of the graph head
+        """
+        modified = self.propag_int.propagate(ssa, head)
+        modified |= ssa.graph.simplify(self.expr_simp)
+        modified |= del_unused_edges(ssa.graph, set([head]))
+        return modified
+
+    @fix_point
+    def do_propagate_mem(self, ssa, head):
+        """
+        Propagation of expression based on ExprInt/ExprId in the @ssa graph
+        @head: Location instance of the graph head
+        """
+        modified = self.propag_mem.propagate(ssa, head)
+        modified |= ssa.graph.simplify(self.expr_simp)
+        modified |= del_unused_edges(ssa.graph, set([head]))
+        return modified
+
+    @fix_point
+    def do_propagate_expr(self, ssa, head):
+        """
+        Expressions propagation through ExprId in the @ssa graph
+        @head: Location instance of the graph head
+        """
+        modified = self.propag_expr.propagate(ssa, head)
+        modified |= ssa.graph.simplify(self.expr_simp)
+        modified |= del_unused_edges(ssa.graph, set([head]))
+        return modified
+
+    @fix_point
+    def do_dead_simp_ssa(self, ssa, head):
+        """
+        Apply:
+        - dead_simp
+        - remove_empty_assignblks
+        - del_unused_edges
+        - merge_blocks
+        on the @ircfg until reaching fix point
+        Return True if the graph has been modified
+
+        @ircfg: IRCFG instance to simplify
+        @head: Location instance of the ircfg head
+        """
+        modified = dead_simp(self.ir_arch, ssa.graph)
+        modified |= remove_empty_assignblks(ssa.graph)
+        modified |= del_unused_edges(ssa.graph, set([head]))
+        modified |= merge_blocks(ssa.graph, set([head]))
+        return modified
+
+    def do_simplify(self, ssa, head):
+        """
+        Apply passes until reaching a fix point
+        Return True if the graph has been modified
+        """
+        return super(IRCFGSimplifierSSA, self).simplify(ssa, head)
+
+    def do_simplify_loop(self, ssa, head):
+        """
+        Apply do_simplify until reaching a fix point
+        SSA is updated between each do_simplify
+        Return True if the graph has been modified
+        """
+        modified = True
+        while modified:
+            modified = self.do_simplify(ssa, head)
+            # Update ssa structs
+            ssa = self.ircfg_to_ssa(ssa.graph, head)
+        return ssa
+
+    def simplify(self, ircfg, head):
+        """
+        Apply SSA transformation to @ircfg
+        Apply passes until reaching a fix point
+        Apply out-of-ssa transformation
+        Apply post simplification passes
+
+        Updated simplified IRCFG instance and return it
+
+        @ircfg: IRCFG instance to simplify
+        @head: Location instance of the ircfg head
+        """
+        ssa = self.ircfg_to_ssa(ircfg, head)
+        ssa = self.do_simplify_loop(ssa, head)
+        ircfg = self.ssa_to_unssa(ssa, head)
+        ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch)
+        ircfg_simplifier.simplify(ircfg, head)
+        return ircfg
diff --git a/miasm2/analysis/ssa.py b/miasm2/analysis/ssa.py
index 5e1a872b..036d92ca 100644
--- a/miasm2/analysis/ssa.py
+++ b/miasm2/analysis/ssa.py
@@ -595,10 +595,11 @@ class SSADiGraph(SSA):
         # Replace non modified node used in phi with new variable
         self.ircfg.simplify(lambda expr:expr.replace_expr(var_to_newname))
 
-        irblock = self.ircfg.blocks[head]
-        assignblks = list(irblock)
-        assignblks[0:0] = [AssignBlock(newname_to_var, assignblks[0].instr)]
-        self.ircfg.blocks[head] = IRBlock(head, assignblks)
+        if newname_to_var:
+            irblock = self.ircfg.blocks[head]
+            assignblks = list(irblock)
+            assignblks[0:0] = [AssignBlock(newname_to_var, assignblks[0].instr)]
+            self.ircfg.blocks[head] = IRBlock(head, assignblks)
 
         # Updt structure
         for loc_key in self._phinodes: