diff options
Diffstat (limited to '')
| -rw-r--r-- | example/disasm/full.py | 17 | ||||
| -rw-r--r-- | example/samples/x86_32_dead.S | 15 | ||||
| -rw-r--r-- | miasm2/analysis/data_flow.py | 3 | ||||
| -rw-r--r-- | miasm2/core/graph.py | 5 | ||||
| -rw-r--r-- | miasm2/ir/ir.py | 169 | ||||
| -rwxr-xr-x | test/test_all.py | 9 |
6 files changed, 183 insertions, 35 deletions
diff --git a/example/disasm/full.py b/example/disasm/full.py index b919310a..33903282 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -9,6 +9,7 @@ from miasm2.expression.expression import ExprId from miasm2.core.interval import interval from miasm2.analysis.machine import Machine from miasm2.analysis.data_flow import dead_simp, DiGraphDefUse, ReachingDefinitions +from miasm2.expression.simplifications import expr_simp log = logging.getLogger("dis") console_handler = logging.StreamHandler() @@ -43,7 +44,7 @@ parser.add_argument('-z', "--dis-nulstart-block", action="store_true", parser.add_argument('-l', "--dontdis-retcall", action="store_true", help="If set, disassemble only call destinations") parser.add_argument('-s', "--simplify", action="store_true", - help="Use the liveness analysis pass") + help="Apply simplifications rules (liveness, graph simplification, ...)") parser.add_argument('-o', "--shiftoffset", default=None, type=lambda x: int(x, 0), help="Shift input binary by an offset") @@ -210,7 +211,7 @@ if args.gen_ir: for label, block in ir_arch_a.blocks.iteritems(): print block - if args.simplify: + if args.simplify > 0: dead_simp(ir_arch_a) if args.defuse: @@ -221,3 +222,15 @@ if args.gen_ir: open('graph_irflow.dot', 'w').write(out) out = ir_arch.graph.dot() open('graph_irflow_raw.dot', 'w').write(out) + + if args.simplify > 1: + ir_arch_a.simplify(expr_simp) + modified = True + while modified: + modified = False + modified |= dead_simp(ir_arch_a) + modified |= ir_arch_a.remove_empty_assignblks() + modified |= ir_arch_a.remove_jmp_blocks() + modified |= ir_arch_a.merge_blocks() + + open('graph_irflow_reduced.dot', 'w').write(ir_arch_a.graph.dot()) diff --git a/example/samples/x86_32_dead.S b/example/samples/x86_32_dead.S new file mode 100644 index 00000000..e1130842 --- /dev/null +++ b/example/samples/x86_32_dead.S @@ -0,0 +1,15 @@ +main: + MOV ECX, ECX + INC ECX + CMP ECX, 0 + JZ lbl0 + INC EAX +lbl0: + DEC EAX + JMP lbl1 +lbl1: + MOV EAX, 3 + JMP lbl2 +lbl2: + MOV EAX, 4 + RET diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py index dc1bf6ae..892a01c3 100644 --- a/miasm2/analysis/data_flow.py +++ b/miasm2/analysis/data_flow.py @@ -243,6 +243,7 @@ def dead_simp(ir_a): Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 """ + modified = False reaching_defs = ReachingDefinitions(ir_a) defuse = DiGraphDefUse(reaching_defs, deref_mem=True) useful = set(dead_simp_useful_instrs(defuse, reaching_defs)) @@ -252,4 +253,6 @@ def dead_simp(ir_a): for lval in assignblk: if InstrNode(block.label, idx, lval) not in useful: del new_assignblk[lval] + modified = True block.irs[idx] = AssignBlock(new_assignblk, assignblk.instr) + return modified diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py index d97ca8be..ec9eac36 100644 --- a/miasm2/core/graph.py +++ b/miasm2/core/graph.py @@ -100,6 +100,11 @@ class DiGraph(object): self._nodes_succ[src].remove(dst) self._nodes_pred[dst].remove(src) + def discard_edge(self, src, dst): + """Remove edge between @src and @dst if it exits""" + if (src, dst) in self._edges: + self.del_edge(src, dst) + def predecessors_iter(self, node): if not node in self._nodes_pred: raise StopIteration diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index caf2c0ed..bc23d471 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -23,7 +23,6 @@ from itertools import chain import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.expression.simplifications import expr_simp from miasm2.core.asmblock import AsmSymbolPool, expr_is_label, AsmLabel, \ AsmBlock from miasm2.core.graph import DiGraph @@ -267,7 +266,7 @@ class IRBlock(object): final_linenb = None for linenb, assignblk in enumerate(self.irs): for dst, src in assignblk.iteritems(): - if isinstance(dst, m2_expr.ExprId) and dst.name == "IRDst": + if dst.is_id("IRDst"): if final_dst is not None: raise ValueError('Multiple destinations!') final_dst = src @@ -283,7 +282,7 @@ class IRBlock(object): new_assignblk = dict(self.irs[self._dst_linenb]) for dst in new_assignblk: - if isinstance(dst, m2_expr.ExprId) and dst.name == "IRDst": + if dst.is_id("IRDst"): new_assignblk[dst] = value # Sanity check is already done in _get_dst break @@ -297,27 +296,6 @@ class IRBlock(object): """Line number of the IRDst setting statement in the current irs""" return self._dst_linenb - def get_rw(self, regs_ids): - """ - Computes the variables read and written by each instructions - Initialize attributes needed for in/out and reach computation. - @regs_ids : ids of registers used in IR - """ - keep_exprid = lambda elts: filter(lambda expr: isinstance(expr, - m2_expr.ExprId), - elts) - for idx, assignblk in enumerate(self.irs): - assignblk._cur_reach = {reg: set() for reg in regs_ids} - assignblk._prev_reach = {reg: set() for reg in regs_ids} - assignblk._cur_kill = {reg: set() for reg in regs_ids} - assignblk._prev_kill = {reg: set() for reg in regs_ids} - # LineNumber -> dict: - # Register: set(definition(irb label, index)) - assignblk.defout = {reg: set() for reg in regs_ids} - assignblk.defout.update({dst: set([(self.label, idx, dst)]) - for dst in assignblk - if isinstance(dst, m2_expr.ExprId)}) - def __str__(self): out = [] out.append('%s' % self.label) @@ -595,12 +573,17 @@ class IntermediateRepresentation(object): label = self.symbol_pool.getby_offset_create(instr.offset + instr.l) return label - def simplify_blocs(self): - for irblock in self.blocks.values(): - for assignblk in irblock.irs: - for dst, src in assignblk.items(): - del assignblk[dst] - assignblk[expr_simp(dst)] = expr_simp(src) + def simplify(self, simplifier): + """ + Simplify expressions in each irblocks + @simplifier: ExpressionSimplifier instance + """ + for label, block in self.blocks.iteritems(): + assignblks = [] + for assignblk in block.irs: + new_assignblk = assignblk.simplify(simplifier) + assignblks.append(new_assignblk) + self.blocks[label] = IRBlock(label, assignblks) def replace_expr_in_ir(self, bloc, rep): for assignblk in bloc.irs: @@ -685,6 +668,132 @@ class IntermediateRepresentation(object): self._gen_graph() return self._graph + def remove_empty_assignblks(self): + modified = False + for block in self.blocks.itervalues(): + to_del = [] + for idx, assignblk in enumerate(block.irs): + if len(assignblk) == 0: + to_del.append(idx) + for idx in reversed(to_del): + del block.irs[idx] + block._dst_linenb = None + block._dst = None + modified = True + return modified + + def remove_jmp_blocks(self): + """ + Remove irblock with only IRDst set, by linking it's parent destination to + the block destination. + """ + + # Find candidates + jmp_blocks = set() + for block in self.blocks.itervalues(): + if len(block.irs) != 1: + continue + assignblk = block.irs[0] + if len(assignblk) > 1: + continue + assert set(assignblk.keys()) == set([self.IRDst]) + if len(self.graph.successors(block.label)) != 1: + continue + if not expr_is_label(assignblk[self.IRDst]): + continue + jmp_blocks.add(block) + + # Remove them, relink graph + modified = False + for block in jmp_blocks: + dst_label = block.dst.name + parents = self.graph.predecessors(block.label) + for lbl in parents: + parent = self.blocks.get(lbl, None) + if parent is None: + continue + dst = parent.dst + if dst.is_id(block.label): + dst = m2_expr.ExprId(dst_label, dst.size) + + self.graph.discard_edge(lbl, block.label) + self.graph.discard_edge(block.label, dst_label) + + self.graph.add_uniq_edge(lbl, dst_label) + modified = True + elif dst.is_cond(): + src1, src2 = dst.src1, dst.src2 + if src1.is_id(block.label): + dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprId(dst_label, dst.size), dst.src2) + self.graph.discard_edge(lbl, block.label) + self.graph.discard_edge(block.label, dst_label) + self.graph.add_uniq_edge(lbl, dst_label) + modified = True + if src2.is_id(block.label): + dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprId(dst_label, dst.size)) + self.graph.discard_edge(lbl, block.label) + self.graph.discard_edge(block.label, dst_label) + self.graph.add_uniq_edge(lbl, dst_label) + modified = True + if dst.src1 == dst.src2: + dst = src1 + else: + continue + parent.dst = dst + + # Remove unlinked useless nodes + for block in jmp_blocks: + if (len(self.graph.predecessors(block.label)) == 0 and + len(self.graph.successors(block.label)) == 0): + self.graph.del_node(block.label) + return modified + + def merge_blocks(self): + """ + Group irblock with one and only one son if this son has one and only one + parent + """ + modified = False + todo = set(self.graph.nodes()) + while todo: + block = todo.pop() + sons = self.graph.successors(block) + if len(sons) != 1: + continue + son = list(sons)[0] + if self.graph.predecessors(son) != [block]: + continue + if block not in self.blocks: + continue + if son not in self.blocks: + continue + # Block has one son, son has one parent => merge + assignblks =[] + for linenb, assignblk in enumerate(self.blocks[block].irs): + if self.IRDst not in assignblk: + assignblks.append(assignblk) + continue + affs = {} + for dst, src in assignblk.iteritems(): + if dst != self.IRDst: + affs[dst] = src + assignblks.append(AssignBlock(affs, assignblk.instr)) + + assignblks += self.blocks[son].irs + new_block = IRBlock(block, assignblks) + + self.graph.discard_edge(block, son) + + for lson in self.graph.successors(son): + self.graph.add_uniq_edge(block, lson) + self.graph.discard_edge(son, lson) + del self.blocks[son] + self.graph.del_node(son) + + self.blocks[block] = new_block + todo.add(block) + modified = True + return modified class ir(IntermediateRepresentation): diff --git a/test/test_all.py b/test/test_all.py index d2c3e5e2..0cc50d03 100755 --- a/test/test_all.py +++ b/test/test_all.py @@ -432,6 +432,7 @@ test_x86_64 = ExampleShellcode(["x86_64", "x86_64.S", "demo_x86_64.bin", test_x86_32_if_reg = ExampleShellcode(['x86_32', 'x86_32_if_reg.S', "x86_32_if_reg.bin"]) test_x86_32_seh = ExampleShellcode(["x86_32", "x86_32_seh.S", "x86_32_seh.bin", "--PE"]) +test_x86_32_dead = ExampleShellcode(['x86_32', 'x86_32_dead.S', "x86_32_dead.bin"]) test_human = ExampleShellcode(["x86_64", "human.S", "human.bin"]) @@ -449,7 +450,7 @@ testset += test_mips32l testset += test_x86_64 testset += test_x86_32_if_reg testset += test_x86_32_seh - +testset += test_x86_32_dead testset += test_human class ExampleDisassembler(Example): @@ -480,9 +481,9 @@ class ExampleDisasmFull(ExampleDisassembler): def __init__(self, *args, **kwargs): super(ExampleDisasmFull, self).__init__(*args, **kwargs) - self.command_line = ["full.py", "-g", "-s", "-d", "-m"] + self.command_line + self.command_line = ["full.py", "-g", "-ss", "-d", "-m"] + self.command_line self.products += ["graph_defuse.dot", "graph_execflow.dot", - "graph_irflow.dot", "graph_irflow_raw.dot", "lines.dot"] + "graph_irflow.dot", "graph_irflow_raw.dot", "lines.dot", "graph_irflow_reduced.dot"] testset += ExampleDisasmFull(["arml", Example.get_sample("demo_arm_l.bin"), @@ -519,6 +520,8 @@ testset += ExampleDisasmFull(["x86_32", os.path.join("..", "..", "test", "arch", "x86", "qemu", "test-i386"), "func_iret"]) +testset += ExampleDisasmFull(["x86_32", Example.get_sample("x86_32_dead.bin"), + "0"], depends=[test_x86_32_dead]) ## Expression |