about summary refs log tree commit diff stats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--example/disasm/full.py17
-rw-r--r--example/samples/x86_32_dead.S15
-rw-r--r--miasm2/analysis/data_flow.py3
-rw-r--r--miasm2/core/graph.py5
-rw-r--r--miasm2/ir/ir.py169
-rwxr-xr-xtest/test_all.py9
6 files changed, 183 insertions, 35 deletions
diff --git a/example/disasm/full.py b/example/disasm/full.py
index b919310a..33903282 100644
--- a/example/disasm/full.py
+++ b/example/disasm/full.py
@@ -9,6 +9,7 @@ from miasm2.expression.expression import ExprId
 from miasm2.core.interval import interval
 from miasm2.analysis.machine import Machine
 from miasm2.analysis.data_flow import dead_simp, DiGraphDefUse, ReachingDefinitions
+from miasm2.expression.simplifications import expr_simp
 
 log = logging.getLogger("dis")
 console_handler = logging.StreamHandler()
@@ -43,7 +44,7 @@ parser.add_argument('-z', "--dis-nulstart-block", action="store_true",
 parser.add_argument('-l', "--dontdis-retcall", action="store_true",
                     help="If set, disassemble only call destinations")
 parser.add_argument('-s', "--simplify", action="store_true",
-                    help="Use the liveness analysis pass")
+                    help="Apply simplifications rules (liveness, graph simplification, ...)")
 parser.add_argument('-o', "--shiftoffset", default=None,
                     type=lambda x: int(x, 0),
                     help="Shift input binary by an offset")
@@ -210,7 +211,7 @@ if args.gen_ir:
     for label, block in ir_arch_a.blocks.iteritems():
         print block
 
-    if args.simplify:
+    if args.simplify > 0:
         dead_simp(ir_arch_a)
 
     if args.defuse:
@@ -221,3 +222,15 @@ if args.gen_ir:
     open('graph_irflow.dot', 'w').write(out)
     out = ir_arch.graph.dot()
     open('graph_irflow_raw.dot', 'w').write(out)
+
+    if args.simplify > 1:
+        ir_arch_a.simplify(expr_simp)
+        modified = True
+        while modified:
+            modified = False
+            modified |= dead_simp(ir_arch_a)
+            modified |= ir_arch_a.remove_empty_assignblks()
+            modified |= ir_arch_a.remove_jmp_blocks()
+            modified |= ir_arch_a.merge_blocks()
+
+        open('graph_irflow_reduced.dot', 'w').write(ir_arch_a.graph.dot())
diff --git a/example/samples/x86_32_dead.S b/example/samples/x86_32_dead.S
new file mode 100644
index 00000000..e1130842
--- /dev/null
+++ b/example/samples/x86_32_dead.S
@@ -0,0 +1,15 @@
+main:
+	MOV     ECX, ECX
+	INC     ECX
+	CMP     ECX, 0
+	JZ      lbl0
+	INC     EAX
+lbl0:
+	DEC     EAX
+	JMP     lbl1
+lbl1:
+	MOV     EAX, 3
+	JMP     lbl2
+lbl2:
+	MOV     EAX, 4
+	RET
diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py
index dc1bf6ae..892a01c3 100644
--- a/miasm2/analysis/data_flow.py
+++ b/miasm2/analysis/data_flow.py
@@ -243,6 +243,7 @@ def dead_simp(ir_a):
     Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
     IBM Thomas J. Watson Research Division, page 43
     """
+    modified = False
     reaching_defs = ReachingDefinitions(ir_a)
     defuse = DiGraphDefUse(reaching_defs, deref_mem=True)
     useful = set(dead_simp_useful_instrs(defuse, reaching_defs))
@@ -252,4 +253,6 @@ def dead_simp(ir_a):
             for lval in assignblk:
                 if InstrNode(block.label, idx, lval) not in useful:
                     del new_assignblk[lval]
+                    modified = True
             block.irs[idx] = AssignBlock(new_assignblk, assignblk.instr)
+    return modified
diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py
index d97ca8be..ec9eac36 100644
--- a/miasm2/core/graph.py
+++ b/miasm2/core/graph.py
@@ -100,6 +100,11 @@ class DiGraph(object):
         self._nodes_succ[src].remove(dst)
         self._nodes_pred[dst].remove(src)
 
+    def discard_edge(self, src, dst):
+        """Remove edge between @src and @dst if it exits"""
+        if (src, dst) in self._edges:
+            self.del_edge(src, dst)
+
     def predecessors_iter(self, node):
         if not node in self._nodes_pred:
             raise StopIteration
diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py
index caf2c0ed..bc23d471 100644
--- a/miasm2/ir/ir.py
+++ b/miasm2/ir/ir.py
@@ -23,7 +23,6 @@ from itertools import chain
 
 import miasm2.expression.expression as m2_expr
 from miasm2.expression.expression_helper import get_missing_interval
-from miasm2.expression.simplifications import expr_simp
 from miasm2.core.asmblock import AsmSymbolPool, expr_is_label, AsmLabel, \
     AsmBlock
 from miasm2.core.graph import DiGraph
@@ -267,7 +266,7 @@ class IRBlock(object):
         final_linenb = None
         for linenb, assignblk in enumerate(self.irs):
             for dst, src in assignblk.iteritems():
-                if isinstance(dst, m2_expr.ExprId) and dst.name == "IRDst":
+                if dst.is_id("IRDst"):
                     if final_dst is not None:
                         raise ValueError('Multiple destinations!')
                     final_dst = src
@@ -283,7 +282,7 @@ class IRBlock(object):
 
         new_assignblk = dict(self.irs[self._dst_linenb])
         for dst in new_assignblk:
-            if isinstance(dst, m2_expr.ExprId) and dst.name == "IRDst":
+            if dst.is_id("IRDst"):
                 new_assignblk[dst] = value
                 # Sanity check is already done in _get_dst
                 break
@@ -297,27 +296,6 @@ class IRBlock(object):
         """Line number of the IRDst setting statement in the current irs"""
         return self._dst_linenb
 
-    def get_rw(self, regs_ids):
-        """
-        Computes the variables read and written by each instructions
-        Initialize attributes needed for in/out and reach computation.
-        @regs_ids : ids of registers used in IR
-        """
-        keep_exprid = lambda elts: filter(lambda expr: isinstance(expr,
-                                                                  m2_expr.ExprId),
-                                          elts)
-        for idx, assignblk in enumerate(self.irs):
-            assignblk._cur_reach = {reg: set() for reg in regs_ids}
-            assignblk._prev_reach = {reg: set() for reg in regs_ids}
-            assignblk._cur_kill = {reg: set() for reg in regs_ids}
-            assignblk._prev_kill = {reg: set() for reg in regs_ids}
-            # LineNumber -> dict:
-            #               Register: set(definition(irb label, index))
-            assignblk.defout = {reg: set() for reg in regs_ids}
-            assignblk.defout.update({dst: set([(self.label, idx, dst)])
-                                     for dst in assignblk
-                                     if isinstance(dst, m2_expr.ExprId)})
-
     def __str__(self):
         out = []
         out.append('%s' % self.label)
@@ -595,12 +573,17 @@ class IntermediateRepresentation(object):
         label = self.symbol_pool.getby_offset_create(instr.offset + instr.l)
         return label
 
-    def simplify_blocs(self):
-        for irblock in self.blocks.values():
-            for assignblk in irblock.irs:
-                for dst, src in assignblk.items():
-                    del assignblk[dst]
-                    assignblk[expr_simp(dst)] = expr_simp(src)
+    def simplify(self, simplifier):
+        """
+        Simplify expressions in each irblocks
+        @simplifier: ExpressionSimplifier instance
+        """
+        for label, block in self.blocks.iteritems():
+            assignblks = []
+            for assignblk in block.irs:
+                new_assignblk = assignblk.simplify(simplifier)
+                assignblks.append(new_assignblk)
+            self.blocks[label] = IRBlock(label, assignblks)
 
     def replace_expr_in_ir(self, bloc, rep):
         for assignblk in bloc.irs:
@@ -685,6 +668,132 @@ class IntermediateRepresentation(object):
             self._gen_graph()
         return self._graph
 
+    def remove_empty_assignblks(self):
+        modified = False
+        for block in self.blocks.itervalues():
+            to_del = []
+            for idx, assignblk in enumerate(block.irs):
+                if len(assignblk) == 0:
+                    to_del.append(idx)
+            for idx in reversed(to_del):
+                del block.irs[idx]
+                block._dst_linenb = None
+                block._dst = None
+                modified = True
+        return modified
+
+    def remove_jmp_blocks(self):
+        """
+        Remove irblock with only IRDst set, by linking it's parent destination to
+        the block destination.
+        """
+
+        # Find candidates
+        jmp_blocks = set()
+        for block in self.blocks.itervalues():
+            if len(block.irs) != 1:
+                continue
+            assignblk = block.irs[0]
+            if len(assignblk) > 1:
+                continue
+            assert set(assignblk.keys()) == set([self.IRDst])
+            if len(self.graph.successors(block.label)) != 1:
+                continue
+            if not expr_is_label(assignblk[self.IRDst]):
+                continue
+            jmp_blocks.add(block)
+
+        # Remove them, relink graph
+        modified = False
+        for block in jmp_blocks:
+            dst_label = block.dst.name
+            parents = self.graph.predecessors(block.label)
+            for lbl in parents:
+                parent = self.blocks.get(lbl, None)
+                if parent is None:
+                    continue
+                dst = parent.dst
+                if dst.is_id(block.label):
+                    dst = m2_expr.ExprId(dst_label, dst.size)
+
+                    self.graph.discard_edge(lbl, block.label)
+                    self.graph.discard_edge(block.label, dst_label)
+
+                    self.graph.add_uniq_edge(lbl, dst_label)
+                    modified = True
+                elif dst.is_cond():
+                    src1, src2 = dst.src1, dst.src2
+                    if src1.is_id(block.label):
+                        dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprId(dst_label, dst.size), dst.src2)
+                        self.graph.discard_edge(lbl, block.label)
+                        self.graph.discard_edge(block.label, dst_label)
+                        self.graph.add_uniq_edge(lbl, dst_label)
+                        modified = True
+                    if src2.is_id(block.label):
+                        dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprId(dst_label, dst.size))
+                        self.graph.discard_edge(lbl, block.label)
+                        self.graph.discard_edge(block.label, dst_label)
+                        self.graph.add_uniq_edge(lbl, dst_label)
+                        modified = True
+                    if dst.src1 == dst.src2:
+                        dst = src1
+                else:
+                    continue
+                parent.dst = dst
+
+        # Remove unlinked useless nodes
+        for block in jmp_blocks:
+            if (len(self.graph.predecessors(block.label)) == 0 and
+                len(self.graph.successors(block.label)) == 0):
+                self.graph.del_node(block.label)
+        return modified
+
+    def merge_blocks(self):
+        """
+        Group irblock with one and only one son if this son has one and only one
+        parent
+        """
+        modified = False
+        todo = set(self.graph.nodes())
+        while todo:
+            block = todo.pop()
+            sons = self.graph.successors(block)
+            if len(sons) != 1:
+                continue
+            son = list(sons)[0]
+            if self.graph.predecessors(son) != [block]:
+                continue
+            if block not in self.blocks:
+                continue
+            if son not in self.blocks:
+                continue
+            # Block has one son, son has one parent => merge
+            assignblks =[]
+            for linenb, assignblk in enumerate(self.blocks[block].irs):
+                if self.IRDst not in assignblk:
+                    assignblks.append(assignblk)
+                    continue
+                affs = {}
+                for dst, src in assignblk.iteritems():
+                    if dst != self.IRDst:
+                        affs[dst] = src
+                assignblks.append(AssignBlock(affs, assignblk.instr))
+
+            assignblks += self.blocks[son].irs
+            new_block = IRBlock(block, assignblks)
+
+            self.graph.discard_edge(block, son)
+
+            for lson in self.graph.successors(son):
+                self.graph.add_uniq_edge(block, lson)
+                self.graph.discard_edge(son, lson)
+            del self.blocks[son]
+            self.graph.del_node(son)
+
+            self.blocks[block] = new_block
+            todo.add(block)
+            modified = True
+        return modified
 
 
 class ir(IntermediateRepresentation):
diff --git a/test/test_all.py b/test/test_all.py
index d2c3e5e2..0cc50d03 100755
--- a/test/test_all.py
+++ b/test/test_all.py
@@ -432,6 +432,7 @@ test_x86_64 = ExampleShellcode(["x86_64", "x86_64.S", "demo_x86_64.bin",
 test_x86_32_if_reg = ExampleShellcode(['x86_32', 'x86_32_if_reg.S', "x86_32_if_reg.bin"])
 test_x86_32_seh = ExampleShellcode(["x86_32", "x86_32_seh.S", "x86_32_seh.bin",
                                     "--PE"])
+test_x86_32_dead = ExampleShellcode(['x86_32', 'x86_32_dead.S', "x86_32_dead.bin"])
 
 test_human = ExampleShellcode(["x86_64", "human.S", "human.bin"])
 
@@ -449,7 +450,7 @@ testset += test_mips32l
 testset += test_x86_64
 testset += test_x86_32_if_reg
 testset += test_x86_32_seh
-
+testset += test_x86_32_dead
 testset += test_human
 
 class ExampleDisassembler(Example):
@@ -480,9 +481,9 @@ class ExampleDisasmFull(ExampleDisassembler):
 
     def __init__(self, *args, **kwargs):
         super(ExampleDisasmFull, self).__init__(*args, **kwargs)
-        self.command_line = ["full.py", "-g", "-s", "-d", "-m"] + self.command_line
+        self.command_line = ["full.py", "-g", "-ss", "-d", "-m"] + self.command_line
         self.products += ["graph_defuse.dot", "graph_execflow.dot",
-                          "graph_irflow.dot", "graph_irflow_raw.dot", "lines.dot"]
+                          "graph_irflow.dot", "graph_irflow_raw.dot", "lines.dot", "graph_irflow_reduced.dot"]
 
 
 testset += ExampleDisasmFull(["arml", Example.get_sample("demo_arm_l.bin"),
@@ -519,6 +520,8 @@ testset += ExampleDisasmFull(["x86_32", os.path.join("..", "..", "test",
                                                      "arch", "x86", "qemu",
                                                      "test-i386"),
                               "func_iret"])
+testset += ExampleDisasmFull(["x86_32", Example.get_sample("x86_32_dead.bin"),
+                              "0"], depends=[test_x86_32_dead])
 
 
 ## Expression