diff options
| author | Fabrice Desclaux <fabrice.desclaux@cea.fr> | 2017-04-14 16:58:53 +0200 |
|---|---|---|
| committer | Fabrice Desclaux <fabrice.desclaux@cea.fr> | 2017-04-21 10:59:37 +0200 |
| commit | e6ab4fd79ea540de0dd4563f9b6605c8a2f7a1bd (patch) | |
| tree | c9a0243c446a4f33563ea7ae56ad0794cff91d5d | |
| parent | bdd6b06076aea4bf5bbbf7ad6a8dbf89092ae315 (diff) | |
| download | miasm-e6ab4fd79ea540de0dd4563f9b6605c8a2f7a1bd.tar.gz miasm-e6ab4fd79ea540de0dd4563f9b6605c8a2f7a1bd.zip | |
Ir: Add ir graph helpers
| -rw-r--r-- | miasm2/analysis/data_flow.py | 3 | ||||
| -rw-r--r-- | miasm2/ir/ir.py | 169 |
2 files changed, 142 insertions, 30 deletions
diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py index dc1bf6ae..892a01c3 100644 --- a/miasm2/analysis/data_flow.py +++ b/miasm2/analysis/data_flow.py @@ -243,6 +243,7 @@ def dead_simp(ir_a): Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 """ + modified = False reaching_defs = ReachingDefinitions(ir_a) defuse = DiGraphDefUse(reaching_defs, deref_mem=True) useful = set(dead_simp_useful_instrs(defuse, reaching_defs)) @@ -252,4 +253,6 @@ def dead_simp(ir_a): for lval in assignblk: if InstrNode(block.label, idx, lval) not in useful: del new_assignblk[lval] + modified = True block.irs[idx] = AssignBlock(new_assignblk, assignblk.instr) + return modified diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index caf2c0ed..bc23d471 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -23,7 +23,6 @@ from itertools import chain import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.expression.simplifications import expr_simp from miasm2.core.asmblock import AsmSymbolPool, expr_is_label, AsmLabel, \ AsmBlock from miasm2.core.graph import DiGraph @@ -267,7 +266,7 @@ class IRBlock(object): final_linenb = None for linenb, assignblk in enumerate(self.irs): for dst, src in assignblk.iteritems(): - if isinstance(dst, m2_expr.ExprId) and dst.name == "IRDst": + if dst.is_id("IRDst"): if final_dst is not None: raise ValueError('Multiple destinations!') final_dst = src @@ -283,7 +282,7 @@ class IRBlock(object): new_assignblk = dict(self.irs[self._dst_linenb]) for dst in new_assignblk: - if isinstance(dst, m2_expr.ExprId) and dst.name == "IRDst": + if dst.is_id("IRDst"): new_assignblk[dst] = value # Sanity check is already done in _get_dst break @@ -297,27 +296,6 @@ class IRBlock(object): """Line number of the IRDst setting statement in the current irs""" return self._dst_linenb - def get_rw(self, regs_ids): - """ - Computes the variables read and written by each instructions - Initialize attributes needed for in/out and reach computation. - @regs_ids : ids of registers used in IR - """ - keep_exprid = lambda elts: filter(lambda expr: isinstance(expr, - m2_expr.ExprId), - elts) - for idx, assignblk in enumerate(self.irs): - assignblk._cur_reach = {reg: set() for reg in regs_ids} - assignblk._prev_reach = {reg: set() for reg in regs_ids} - assignblk._cur_kill = {reg: set() for reg in regs_ids} - assignblk._prev_kill = {reg: set() for reg in regs_ids} - # LineNumber -> dict: - # Register: set(definition(irb label, index)) - assignblk.defout = {reg: set() for reg in regs_ids} - assignblk.defout.update({dst: set([(self.label, idx, dst)]) - for dst in assignblk - if isinstance(dst, m2_expr.ExprId)}) - def __str__(self): out = [] out.append('%s' % self.label) @@ -595,12 +573,17 @@ class IntermediateRepresentation(object): label = self.symbol_pool.getby_offset_create(instr.offset + instr.l) return label - def simplify_blocs(self): - for irblock in self.blocks.values(): - for assignblk in irblock.irs: - for dst, src in assignblk.items(): - del assignblk[dst] - assignblk[expr_simp(dst)] = expr_simp(src) + def simplify(self, simplifier): + """ + Simplify expressions in each irblocks + @simplifier: ExpressionSimplifier instance + """ + for label, block in self.blocks.iteritems(): + assignblks = [] + for assignblk in block.irs: + new_assignblk = assignblk.simplify(simplifier) + assignblks.append(new_assignblk) + self.blocks[label] = IRBlock(label, assignblks) def replace_expr_in_ir(self, bloc, rep): for assignblk in bloc.irs: @@ -685,6 +668,132 @@ class IntermediateRepresentation(object): self._gen_graph() return self._graph + def remove_empty_assignblks(self): + modified = False + for block in self.blocks.itervalues(): + to_del = [] + for idx, assignblk in enumerate(block.irs): + if len(assignblk) == 0: + to_del.append(idx) + for idx in reversed(to_del): + del block.irs[idx] + block._dst_linenb = None + block._dst = None + modified = True + return modified + + def remove_jmp_blocks(self): + """ + Remove irblock with only IRDst set, by linking it's parent destination to + the block destination. + """ + + # Find candidates + jmp_blocks = set() + for block in self.blocks.itervalues(): + if len(block.irs) != 1: + continue + assignblk = block.irs[0] + if len(assignblk) > 1: + continue + assert set(assignblk.keys()) == set([self.IRDst]) + if len(self.graph.successors(block.label)) != 1: + continue + if not expr_is_label(assignblk[self.IRDst]): + continue + jmp_blocks.add(block) + + # Remove them, relink graph + modified = False + for block in jmp_blocks: + dst_label = block.dst.name + parents = self.graph.predecessors(block.label) + for lbl in parents: + parent = self.blocks.get(lbl, None) + if parent is None: + continue + dst = parent.dst + if dst.is_id(block.label): + dst = m2_expr.ExprId(dst_label, dst.size) + + self.graph.discard_edge(lbl, block.label) + self.graph.discard_edge(block.label, dst_label) + + self.graph.add_uniq_edge(lbl, dst_label) + modified = True + elif dst.is_cond(): + src1, src2 = dst.src1, dst.src2 + if src1.is_id(block.label): + dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprId(dst_label, dst.size), dst.src2) + self.graph.discard_edge(lbl, block.label) + self.graph.discard_edge(block.label, dst_label) + self.graph.add_uniq_edge(lbl, dst_label) + modified = True + if src2.is_id(block.label): + dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprId(dst_label, dst.size)) + self.graph.discard_edge(lbl, block.label) + self.graph.discard_edge(block.label, dst_label) + self.graph.add_uniq_edge(lbl, dst_label) + modified = True + if dst.src1 == dst.src2: + dst = src1 + else: + continue + parent.dst = dst + + # Remove unlinked useless nodes + for block in jmp_blocks: + if (len(self.graph.predecessors(block.label)) == 0 and + len(self.graph.successors(block.label)) == 0): + self.graph.del_node(block.label) + return modified + + def merge_blocks(self): + """ + Group irblock with one and only one son if this son has one and only one + parent + """ + modified = False + todo = set(self.graph.nodes()) + while todo: + block = todo.pop() + sons = self.graph.successors(block) + if len(sons) != 1: + continue + son = list(sons)[0] + if self.graph.predecessors(son) != [block]: + continue + if block not in self.blocks: + continue + if son not in self.blocks: + continue + # Block has one son, son has one parent => merge + assignblks =[] + for linenb, assignblk in enumerate(self.blocks[block].irs): + if self.IRDst not in assignblk: + assignblks.append(assignblk) + continue + affs = {} + for dst, src in assignblk.iteritems(): + if dst != self.IRDst: + affs[dst] = src + assignblks.append(AssignBlock(affs, assignblk.instr)) + + assignblks += self.blocks[son].irs + new_block = IRBlock(block, assignblks) + + self.graph.discard_edge(block, son) + + for lson in self.graph.successors(son): + self.graph.add_uniq_edge(block, lson) + self.graph.discard_edge(son, lson) + del self.blocks[son] + self.graph.del_node(son) + + self.blocks[block] = new_block + todo.add(block) + modified = True + return modified class ir(IntermediateRepresentation): |