about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorFabrice Desclaux <fabrice.desclaux@cea.fr>2018-06-15 17:59:26 +0200
committerFabrice Desclaux <fabrice.desclaux@cea.fr>2018-07-10 14:57:22 +0200
commitfecd4b822ec268d91570ea58868c0cda64d19b3d (patch)
tree81715a7e7c7a79dff364af9a79e126e96c434fce
parent17d48de1951c81fc8b5b4184713a971537747227 (diff)
downloadmiasm-fecd4b822ec268d91570ea58868c0cda64d19b3d.tar.gz
miasm-fecd4b822ec268d91570ea58868c0cda64d19b3d.zip
IR: move simplification from ir to data_flow
-rw-r--r--miasm2/analysis/data_flow.py250
-rw-r--r--miasm2/ir/ir.py148
2 files changed, 260 insertions, 138 deletions
diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py
index 9e5203a6..b3c4df88 100644
--- a/miasm2/analysis/data_flow.py
+++ b/miasm2/analysis/data_flow.py
@@ -3,6 +3,8 @@
 from collections import namedtuple
 from miasm2.core.graph import DiGraph
 from miasm2.ir.ir import AssignBlock, IRBlock
+from miasm2.expression.expression import ExprLoc
+
 
 class ReachingDefinitions(dict):
     """
@@ -95,6 +97,7 @@ ATTR_DEP = {"color" : "black",
 
 AssignblkNode = namedtuple('AssignblkNode', ['label', 'index', 'var'])
 
+
 class DiGraphDefUse(DiGraph):
     """Representation of a Use-Definition graph as defined by
     Kennedy, K. (1979). A survey of data flow analysis techniques.
@@ -217,17 +220,16 @@ def dead_simp_useful_assignblks(irarch, defuse, reaching_defs):
             valid_definitions = reaching_defs.get_definitions(block_lbl,
                                                               len(block))
             for lval, definitions in valid_definitions.iteritems():
-                if (lval in irarch.get_out_regs(block)
-                    or keep_all_definitions):
+                if lval in irarch.get_out_regs(block) or keep_all_definitions:
                     for definition in definitions:
                         useful.add(AssignblkNode(definition[0], definition[1], lval))
 
         # Force keeping of specific cases
         for index, assignblk in enumerate(block):
             for lval, rval in assignblk.iteritems():
-                if (lval.is_mem()
-                    or irarch.IRDst == lval
-                    or rval.is_function_call()):
+                if (lval.is_mem() or
+                    irarch.IRDst == lval or
+                    rval.is_function_call()):
                     useful.add(AssignblkNode(block_lbl, index, lval))
 
     # Useful nodes dependencies
@@ -235,6 +237,7 @@ def dead_simp_useful_assignblks(irarch, defuse, reaching_defs):
         for parent in defuse.reachable_parents(node):
             yield parent
 
+
 def dead_simp(irarch, ircfg):
     """
     Remove useless affectations.
@@ -263,3 +266,240 @@ def dead_simp(irarch, ircfg):
             irs.append(AssignBlock(new_assignblk, assignblk.instr))
         ircfg.blocks[block.loc_key] = IRBlock(block.loc_key, irs)
     return modified
+
+
+def _test_merge_next_block(ircfg, loc_key):
+    """
+    Test if the irblock at @loc_key can be merge with its son
+    @ircfg: DiGraphIR instance
+    @loc_key: LocKey instance of the candidate parent irblock
+    """
+
+    if loc_key not in ircfg.blocks:
+        return None
+    sons = ircfg.successors(loc_key)
+    if len(sons) != 1:
+        return None
+    son = list(sons)[0]
+    if ircfg.predecessors(son) != [loc_key]:
+        return None
+    if son not in ircfg.blocks:
+        return None
+    return son
+
+def _do_merge_blocks(ircfg, loc_key, son_loc_key):
+    """
+    Merge two irblocks at @loc_key and @son_loc_key
+
+    @ircfg: DiGrpahIR
+    @loc_key: LocKey instance of the parent IRBlock
+    @loc_key: LocKey instance of the son IRBlock
+    """
+
+    assignblks = []
+    for assignblk in ircfg.blocks[loc_key]:
+        if ircfg.IRDst not in assignblk:
+            assignblks.append(assignblk)
+            continue
+        affs = {}
+        for dst, src in assignblk.iteritems():
+            if dst != ircfg.IRDst:
+                affs[dst] = src
+        if affs:
+            assignblks.append(AssignBlock(affs, assignblk.instr))
+
+    assignblks += ircfg.blocks[son_loc_key].assignblks
+    new_block = IRBlock(loc_key, assignblks)
+
+    ircfg.discard_edge(loc_key, son_loc_key)
+
+    for son_successor in ircfg.successors(son_loc_key):
+        ircfg.add_uniq_edge(loc_key, son_successor)
+        ircfg.discard_edge(son_loc_key, son_successor)
+    del ircfg.blocks[son_loc_key]
+    ircfg.del_node(son_loc_key)
+    ircfg.blocks[loc_key] = new_block
+
+
+def _test_jmp_only(ircfg, loc_key):
+    """
+    If irblock at @loc_key sets only IRDst to an ExprLoc, return the
+    corresponding loc_key target.
+    None in other cases.
+
+    @ircfg: IRCFG instance
+    @loc_key: LocKey instance of the candidate irblock
+
+    """
+
+    if loc_key not in ircfg.blocks:
+        return None
+    irblock = ircfg.blocks[loc_key]
+    if len(irblock.assignblks) != 1:
+        return None
+    items = dict(irblock.assignblks[0]).items()
+    if len(items) != 1:
+        return None
+    dst, src = items[0]
+    assert dst.is_id("IRDst")
+    if not src.is_loc():
+        return None
+    return src.loc_key
+
+
+def _relink_block_node(ircfg, loc_key, son_loc_key, replace_dct):
+    """
+    Link loc_key's parents to parents directly to son_loc_key
+    """
+    for parent in set(ircfg.predecessors(loc_key)):
+        parent_block = ircfg.blocks[parent]
+
+        new_block = parent_block.modify_exprs(
+            lambda expr:expr.replace_expr(replace_dct),
+            lambda expr:expr.replace_expr(replace_dct)
+        )
+
+        # Link parent to new dst
+        ircfg.add_edge(parent, son_loc_key)
+
+        # Unlink block
+        ircfg.blocks[new_block.loc_key] = new_block
+        ircfg.del_node(loc_key)
+
+
+def _remove_to_son(ircfg, loc_key, son_loc_key):
+    """
+    Merge irblocks; The final block has the @son_loc_key loc_key
+    Update references
+
+    Condition:
+    - irblock at @loc_key is a pure jump block
+    - @loc_key is not an entry point (can be removed)
+
+    @irblock: DiGraphIR
+    @loc_key: LocKey instance of the parent irblock
+    @son_loc_key: LocKey instance of the son irblock
+    """
+
+    # Ircfg loop => don't mess
+    if loc_key == son_loc_key:
+        return False
+
+    # Unlink block destinations
+    ircfg.del_edge(loc_key, son_loc_key)
+    del ircfg.blocks[loc_key]
+
+    replace_dct = {
+        ExprLoc(loc_key, ircfg.IRDst.size):ExprLoc(son_loc_key, ircfg.IRDst.size)
+    }
+
+    _relink_block_node(ircfg, loc_key, son_loc_key, replace_dct)
+
+    return True
+
+
+def _remove_to_parent(ircfg, loc_key, son_loc_key):
+    """
+    Merge irblocks; The final block has the @loc_key loc_key
+    Update references
+
+    Condition:
+    - irblock at @loc_key is a pure jump block
+    - @son_loc_key is not an entry point (can be removed)
+
+    @irblock: IRCFG instance
+    @loc_key: LocKey instance of the parent irblock
+    @son_loc_key: LocKey instance of the son irblock
+    """
+
+    # Ircfg loop => don't mess
+    if loc_key == son_loc_key:
+        return False
+
+    # Unlink block destinations
+    ircfg.del_edge(loc_key, son_loc_key)
+
+    old_irblock = ircfg.blocks[son_loc_key]
+    new_irblock = IRBlock(loc_key, old_irblock.assignblks)
+
+    ircfg.blocks[son_loc_key] = new_irblock
+
+    del ircfg.blocks[son_loc_key]
+    ircfg.add_irblock(new_irblock)
+
+    replace_dct = {
+        ExprLoc(son_loc_key, ircfg.IRDst.size):ExprLoc(loc_key, ircfg.IRDst.size)
+    }
+
+    _relink_block_node(ircfg, son_loc_key, loc_key, replace_dct)
+
+    return True
+
+
+def merge_blocks(ircfg, loc_key_entries):
+    """
+    This function modifies @ircfg to apply the following transformations:
+    - group an irblock with its son if the irblock has one and only one son and
+      this son has one and only one parent (spaghetti code).
+    - if an irblock is only made of an assignment to IRDst with a given label,
+      this irblock is dropped and its parent destination targets are
+      updated. The irblock must have a parent (avoid deleting the function head)
+    - if an irblock is a head of the graph and is only made of an assignment to
+      IRDst with a given label, this irblock is dropped and its son becomes the
+      head. References are fixed
+
+    Return True if at least an irblock has been modified
+
+    @ircfg: DiGraphIR instance
+    @loc_key_entries: loc_key to keep
+    """
+
+    modified = False
+    todo = set(ircfg.nodes())
+    while todo:
+        loc_key = todo.pop()
+
+        # Test merge block
+        son = _test_merge_next_block(ircfg, loc_key)
+        if son is not None and son not in loc_key_entries:
+            _do_merge_blocks(ircfg, loc_key, son)
+            todo.add(loc_key)
+            modified = True
+            continue
+
+        # Test jmp only block
+        son = _test_jmp_only(ircfg, loc_key)
+        if son is not None and loc_key not in loc_key_entries:
+            modified |= _remove_to_son(ircfg, loc_key, son)
+            todo.add(loc_key)
+            continue
+
+        # Test head jmp only block
+        if son is not None and son not in loc_key_entries:
+            # jmp only test done previously
+            modified |= _remove_to_parent(ircfg, loc_key, son)
+            todo.add(loc_key)
+            continue
+
+
+    return modified
+
+
+def remove_empty_assignblks(ircfg):
+    """
+    Remove empty assignblks in irblocks of @ircfg
+    Return True if at least an irblock has been modified
+
+    @ircfg: DiGraphIR
+    """
+    modified = False
+    for loc_key, block in ircfg.blocks.iteritems():
+        irs = []
+        for assignblk in block:
+            if len(assignblk):
+                irs.append(assignblk)
+            else:
+                modified = True
+        ircfg.blocks[loc_key] = IRBlock(loc_key, irs)
+
+    return modified
diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py
index bf9b4e9a..9f797199 100644
--- a/miasm2/ir/ir.py
+++ b/miasm2/ir/ir.py
@@ -302,6 +302,20 @@ class IRBlock(object):
         self._dst = None
         self._dst_linenb = None
 
+    def __eq__(self, other):
+        if self.__class__ is not other.__class__:
+            return False
+        if self.loc_key != other.loc_key:
+            return False
+        if len(self.assignblks) != len(other.assignblks):
+            return False
+        for assignblk1, assignblk2 in zip(self.assignblks, other.assignblks):
+            if assignblk1 != assignblk2:
+                return False
+        return True
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
 
     def get_label(self):
         warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"')
@@ -421,7 +435,7 @@ class IRBlock(object):
                 node_name = "".join("%s:\n" % name for name in names)
         out.append(node_name)
 
-        for i, assignblk in enumerate(self):
+        for assignblk in self:
             out.append(assignblk.to_string(loc_db))
         return '\n'.join(out)
 
@@ -661,138 +675,6 @@ class DiGraphIR(DiGraph):
 
         return done
 
-    def remove_empty_assignblks(self):
-        modified = False
-        for loc_key, block in self.blocks.iteritems():
-            irs = []
-            for assignblk in block:
-                if len(assignblk):
-                    irs.append(assignblk)
-                else:
-                    modified = True
-            self.blocks[loc_key] = IRBlock(loc_key, irs)
-        return modified
-
-    def remove_jmp_blocks(self):
-        """
-        Remove irblock with only IRDst set, by linking it's parent destination to
-        the block destination.
-        """
-
-        # Find candidates
-        jmp_blocks = set()
-        for block in self.blocks.itervalues():
-            if len(block) != 1:
-                continue
-            assignblk = block[0]
-            if len(assignblk) > 1:
-                continue
-            assert set(assignblk.keys()) == set([self.IRDst])
-            if len(self.successors(block.loc_key)) != 1:
-                continue
-            if not assignblk[self.IRDst].is_loc():
-                continue
-            dst = assignblk[self.IRDst].loc_key
-            if dst == block.loc_key:
-                # Infinite loop block
-                continue
-            jmp_blocks.add(block.loc_key)
-
-        # Remove them, relink graph
-        modified = False
-        for loc_key in jmp_blocks:
-            block = self.blocks[loc_key]
-            dst_loc_key = block.dst
-            parents = self.predecessors(block.loc_key)
-            for lbl in parents:
-                parent = self.blocks.get(lbl, None)
-                if parent is None:
-                    continue
-                dst = parent.dst
-                if dst.is_id(block.loc_key):
-                    dst = m2_expr.ExprLoc(dst_loc_key, dst.size)
-
-                    self.discard_edge(lbl, block.loc_key)
-                    self.discard_edge(block.loc_key, dst_loc_key)
-
-                    self.add_uniq_edge(lbl, dst_loc_key)
-                    modified = True
-                elif dst.is_cond():
-                    src1, src2 = dst.src1, dst.src2
-                    if src1.is_id(block.loc_key):
-                        dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprLoc(dst_loc_key, dst.size), dst.src2)
-                        self.discard_edge(lbl, block.loc_key)
-                        self.discard_edge(block.loc_key, dst_loc_key)
-                        self.add_uniq_edge(lbl, dst_loc_key)
-                        modified = True
-                    if src2.is_id(block.loc_key):
-                        dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprLoc(dst_loc_key, dst.size))
-                        self.discard_edge(lbl, block.loc_key)
-                        self.discard_edge(block.loc_key, dst_loc_key)
-                        self.add_uniq_edge(lbl, dst_loc_key)
-                        modified = True
-                    if dst.src1 == dst.src2:
-                        dst = dst.src1
-                else:
-                    continue
-                new_parent = parent.set_dst(dst)
-                self.blocks[parent.loc_key] = new_parent
-
-        # Remove unlinked useless nodes
-        for loc_key in jmp_blocks:
-            if (len(self.predecessors(loc_key)) == 0 and
-                len(self.successors(loc_key)) == 0):
-                self.del_node(loc_key)
-                del self.blocks[loc_key]
-        return modified
-
-    def merge_blocks(self):
-        """
-        Group irblock with one and only one son if this son has one and only one
-        parent
-        """
-        modified = False
-        todo = set(self.nodes())
-        while todo:
-            block = todo.pop()
-            sons = self.successors(block)
-            if len(sons) != 1:
-                continue
-            son = list(sons)[0]
-            if self.predecessors(son) != [block]:
-                continue
-            if block not in self.blocks:
-                continue
-            if son not in self.blocks:
-                continue
-            # Block has one son, son has one parent => merge
-            assignblks = []
-            for assignblk in self.blocks[block]:
-                if self.IRDst not in assignblk:
-                    assignblks.append(assignblk)
-                    continue
-                affs = {}
-                for dst, src in assignblk.iteritems():
-                    if dst != self.IRDst:
-                        affs[dst] = src
-                assignblks.append(AssignBlock(affs, assignblk.instr))
-
-            assignblks += self.blocks[son].assignblks
-            new_block = IRBlock(block, assignblks)
-
-            self.discard_edge(block, son)
-
-            for lson in self.successors(son):
-                self.add_uniq_edge(block, lson)
-                self.discard_edge(son, lson)
-            del self.blocks[son]
-            self.del_node(son)
-
-            self.blocks[block] = new_block
-            todo.add(block)
-            modified = True
-        return modified
-
 
 class IntermediateRepresentation(object):
     """