diff options
Diffstat (limited to 'miasm2/ir')
| -rw-r--r-- | miasm2/ir/analysis.py | 124 | ||||
| -rw-r--r-- | miasm2/ir/ir.py | 128 | ||||
| -rw-r--r-- | miasm2/ir/translators/C.py | 14 |
3 files changed, 140 insertions, 126 deletions
diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py index 31f6294c..40a3bf64 100644 --- a/miasm2/ir/analysis.py +++ b/miasm2/ir/analysis.py @@ -4,7 +4,7 @@ import logging from miasm2.ir.symbexec import symbexec -from miasm2.core.graph import DiGraph +from miasm2.ir.ir import ir from miasm2.expression.expression \ import ExprAff, ExprCond, ExprId, ExprInt, ExprMem @@ -14,109 +14,21 @@ console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) log.addHandler(console_handler) log.setLevel(logging.WARNING) -class ira: + +class ira(ir): + """IR Analysis + This class provides higher level manipulations on IR, such as dead + instruction removals. + + This class can be used as a common parent with `miasm2.ir.ir::ir` class. + For instance: + class ira_x86_16(ir_x86_16, ira) + """ def ira_regs_ids(self): """Returns ids of all registers used in the IR""" return self.arch.regs.all_regs_ids + [self.IRDst] - def sort_dst(self, todo, done): - out = set() - while todo: - dst = todo.pop() - if self.ExprIsLabel(dst): - done.add(dst) - elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): - done.add(dst) - elif isinstance(dst, ExprCond): - todo.add(dst.src1) - todo.add(dst.src2) - elif isinstance(dst, ExprId): - out.add(dst) - else: - done.add(dst) - return out - - def dst_trackback(self, b): - dst = b.dst - todo = set([dst]) - done = set() - - for irs in reversed(b.irs): - if len(todo) == 0: - break - out = self.sort_dst(todo, done) - found = set() - follow = set() - for i in irs: - if not out: - break - for o in out: - if i.dst == o: - follow.add(i.src) - found.add(o) - for o in found: - out.remove(o) - - for o in out: - if o not in found: - follow.add(o) - todo = follow - - return done - - def gen_graph(self, link_all = True): - """ - Gen irbloc digraph - @link_all: also gen edges to non present irblocs - """ - self.g = DiGraph() - for lbl, b in self.blocs.items(): - # print 'add', lbl - self.g.add_node(lbl) - # dst = self.get_bloc_dst(b) - dst = self.dst_trackback(b) - # print "\tdst", dst - for d in dst: - if isinstance(d, ExprInt): - d = ExprId( - self.symbol_pool.getby_offset_create(int(d.arg))) - if self.ExprIsLabel(d): - if d.name in self.blocs or link_all is True: - self.g.add_edge(lbl, d.name) - - def graph(self): - """Output the graphviz script""" - out = """ - digraph asm_graph { - size="80,50"; - node [ - fontsize = "16", - shape = "box" - ]; - """ - all_lbls = {} - for lbl in self.g.nodes(): - if lbl not in self.blocs: - continue - irb = self.blocs[lbl] - ir_txt = [str(lbl)] - for irs in irb.irs: - for l in irs: - ir_txt.append(str(l)) - ir_txt.append("") - ir_txt.append("") - all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) - for l, v in all_lbls.items(): - # print l, v - out += '%s [label="%s"];\n' % (l, v) - - for a, b in self.g.edges(): - # print 'edge', a, b, hash(a), hash(b) - out += '%s -> %s;\n' % (hash(a), hash(b)) - out += '}' - return out - def remove_dead_instr(self, irb, useful): """Remove dead affectations using previous reaches analysis @irb: irbloc instance @@ -149,12 +61,12 @@ class ira: useful = set() - for node in self.g.nodes(): + for node in self.graph.nodes(): if node not in self.blocs: continue block = self.blocs[node] - successors = self.g.successors(node) + successors = self.graph.successors(node) has_son = bool(successors) for p_son in successors: if p_son not in self.blocs: @@ -274,7 +186,7 @@ class ira: for key, value in irb.cur_reach[0].iteritems()} # Compute reach from predecessors - for n_pred in self.g.predecessors(irb.label): + for n_pred in self.graph.predecessors(irb.label): p_block = self.blocs[n_pred] # Handle each register definition @@ -313,7 +225,7 @@ class ira: analysis""" fixed = True - for node in self.g.nodes(): + for node in self.graph.nodes(): if node in self.blocs: irb = self.blocs[node] if (irb.cur_reach != irb.prev_reach or @@ -329,13 +241,11 @@ class ira: Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 - - PRE: gen_graph() """ fixed_point = False log.debug('iteration...') while not fixed_point: - for node in self.g.nodes(): + for node in self.graph.nodes(): if node in self.blocs: self.compute_reach_block(self.blocs[node]) fixed_point = self._test_kill_reach_fix() @@ -347,8 +257,6 @@ class ira: Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 - - PRE: gen_graph() """ # Update r/w variables for all irblocs self.get_rw(self.ira_regs_ids()) diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index e051dc8c..f957fcab 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -22,15 +22,16 @@ import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.core import asmbloc from miasm2.expression.simplifications import expr_simp -from miasm2.core.asmbloc import asm_symbol_pool +from miasm2.core.asmbloc import asm_symbol_pool, expr_is_label, asm_label, \ + asm_bloc +from miasm2.core.graph import DiGraph class irbloc(object): def __init__(self, label, irs, lines = []): - assert(isinstance(label, asmbloc.asm_label)) + assert(isinstance(label, asm_label)) self.label = label self.irs = irs self.lines = lines @@ -119,6 +120,47 @@ class irbloc(object): return "\n".join(o) +class DiGraphIR(DiGraph): + """DiGraph for IR instances""" + + def __init__(self, blocks, *args, **kwargs): + """Instanciate a DiGraphIR + @blocks: IR blocks + """ + self._blocks = blocks + super(DiGraphIR, self).__init__(*args, **kwargs) + + def dot(self): + """Output the graphviz script""" + out = """ + digraph asm_graph { + size="80,50"; + node [ + fontsize = "16", + shape = "box" + ]; + """ + all_lbls = {} + for lbl in self.nodes(): + if lbl not in self._blocks: + continue + irb = self._blocks[lbl] + ir_txt = [str(lbl)] + for irs in irb.irs: + for l in irs: + ir_txt.append(str(l)) + ir_txt.append("") + ir_txt.append("") + all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) + for l, v in all_lbls.items(): + out += '%s [label="%s"];\n' % (l, v) + + for a, b in self.edges(): + out += '%s -> %s;\n' % (hash(a), hash(b)) + out += '}' + return out + + class ir(object): def __init__(self, arch, attrib, symbol_pool=None): @@ -130,6 +172,8 @@ class ir(object): self.sp = arch.getsp(attrib) self.arch = arch self.attrib = attrib + # Lazy structure + self._graph = None def instr2ir(self, l): ir_bloc_cur, ir_blocs_extra = self.get_ir(l) @@ -140,13 +184,13 @@ class ir(object): @ad: an ExprId/ExprInt/label/int""" if (isinstance(ad, m2_expr.ExprId) and - isinstance(ad.name, asmbloc.asm_label)): + isinstance(ad.name, asm_label)): ad = ad.name if isinstance(ad, m2_expr.ExprInt): ad = int(ad.arg) if type(ad) in [int, long]: ad = self.symbol_pool.getby_offset_create(ad) - elif isinstance(ad, asmbloc.asm_label): + elif isinstance(ad, asm_label): ad = self.symbol_pool.getby_name_create(ad.name) return ad @@ -158,7 +202,7 @@ class ir(object): return self.blocs.get(label, None) def add_instr(self, l, ad=0, gen_pc_updt = False): - b = asmbloc.asm_bloc(l) + b = asm_bloc(l) b.lines = [l] self.add_bloc(b, gen_pc_updt) @@ -299,6 +343,8 @@ class ir(object): self.blocs[irb.label] = irb + # Forget graph if any + self._graph = None def get_instr_label(self, instr): """Returns the label associated to an instruction @@ -334,6 +380,70 @@ class ir(object): for b in self.blocs.values(): b.get_rw(regs_ids) - def ExprIsLabel(self, l): - return isinstance(l, m2_expr.ExprId) and isinstance(l.name, - asmbloc.asm_label) + def sort_dst(self, todo, done): + out = set() + while todo: + dst = todo.pop() + if expr_is_label(dst): + done.add(dst) + elif isinstance(dst, m2_expr.ExprMem) or isinstance(dst, m2_expr.ExprInt): + done.add(dst) + elif isinstance(dst, m2_expr.ExprCond): + todo.add(dst.src1) + todo.add(dst.src2) + elif isinstance(dst, m2_expr.ExprId): + out.add(dst) + else: + done.add(dst) + return out + + def dst_trackback(self, b): + dst = b.dst + todo = set([dst]) + done = set() + + for irs in reversed(b.irs): + if len(todo) == 0: + break + out = self.sort_dst(todo, done) + found = set() + follow = set() + for i in irs: + if not out: + break + for o in out: + if i.dst == o: + follow.add(i.src) + found.add(o) + for o in found: + out.remove(o) + + for o in out: + if o not in found: + follow.add(o) + todo = follow + + return done + + def _gen_graph(self): + """ + Gen irbloc digraph + """ + self._graph = DiGraphIR(self.blocs) + for lbl, b in self.blocs.iteritems(): + self._graph.add_node(lbl) + dst = self.dst_trackback(b) + for d in dst: + if isinstance(d, m2_expr.ExprInt): + d = m2_expr.ExprId( + self.symbol_pool.getby_offset_create(int(d.arg))) + if expr_is_label(d): + self._graph.add_edge(lbl, d.name) + + @property + def graph(self): + """Get a DiGraph representation of current IR instance. + Lazy property, building the graph on-demand""" + if self._graph is None: + self._gen_graph() + return self._graph diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index 72759900..a7ba1a20 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -18,13 +18,9 @@ class TranslatorC(Translator): dct_rot = {'<<<': 'rot_left', '>>>': 'rot_right', } - dct_div = {'div8': "div_op", - 'div16': "div_op", - 'div32': "div_op", - 'idiv32': "div_op", # XXX to test - '<<<c_rez': 'rcl_rez_op', - '>>>c_rez': 'rcr_rez_op', - } + dct_rotc = {'<<<c_rez': 'rcl_rez_op', + '>>>c_rez': 'rcr_rez_op', + } def from_ExprId(self, expr): @@ -120,8 +116,8 @@ class TranslatorC(Translator): else: raise NotImplementedError('Unknown op: %r' % expr.op) - elif len(expr.args) == 3 and expr.op in self.dct_div: - return '(%s(%s, %s, %s, %s) &0x%x)' % (self.dct_div[expr.op], + elif len(expr.args) == 3 and expr.op in self.dct_rotc: + return '(%s(%s, %s, %s, %s) &0x%x)' % (self.dct_rotc[expr.op], expr.args[0].size, self.from_expr(expr.args[0]), self.from_expr(expr.args[1]), |