diff options
| author | Camille Mougey <commial@gmail.com> | 2015-01-09 15:40:28 +0100 |
|---|---|---|
| committer | Camille Mougey <commial@gmail.com> | 2015-01-09 15:40:28 +0100 |
| commit | ae9004eec4ef264013208e1ce50f6e884a4da23f (patch) | |
| tree | e09503cc60f0ce9576a830a2bdc78b7f303961f2 | |
| parent | 0d6454f6b507c3c05c45507ffb56ac9f12eca536 (diff) | |
| parent | 48581006f578d7de1a5944c31952faa5a4e947b4 (diff) | |
| download | miasm-ae9004eec4ef264013208e1ce50f6e884a4da23f.tar.gz miasm-ae9004eec4ef264013208e1ce50f6e884a4da23f.zip | |
Merge pull request #32 from serpilliere/improve_liveness_analysis
Improve liveness analysis
| -rw-r--r-- | miasm2/arch/arm/ira.py | 4 | ||||
| -rw-r--r-- | miasm2/core/graph.py | 25 | ||||
| -rw-r--r-- | miasm2/ir/analysis.py | 180 | ||||
| -rw-r--r-- | miasm2/ir/ir.py | 12 |
4 files changed, 179 insertions, 42 deletions
diff --git a/miasm2/arch/arm/ira.py b/miasm2/arch/arm/ira.py index 8cfe2da0..74548f86 100644 --- a/miasm2/arch/arm/ira.py +++ b/miasm2/arch/arm/ira.py @@ -67,9 +67,9 @@ class ir_a_arml(ir_a_arml_base): irs = self.call_effects(pc_val) irs.append([ExprAff(self.IRDst, ExprId(lbl, size=self.pc.size))]) nbloc = irbloc(new_lbl, irs) - nbloc.lines = [l] + nbloc.lines = [l]*len(irs) self.blocs[new_lbl] = nbloc - irb.dst = ExprId(new_lbl, size=self.pc.size) + irb.set_dst(ExprId(new_lbl, size=self.pc.size)) """ if not bloc.lines: diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py index 47047269..d29517cc 100644 --- a/miasm2/core/graph.py +++ b/miasm2/core/graph.py @@ -27,6 +27,17 @@ class DiGraph: self._nodes_to[n] = [] self._nodes_from[n] = [] + def del_node(self, node): + """Delete the @node of the graph; Also delete every edge to/from this + @node""" + + if node in self._nodes: + self._nodes.remove(node) + for pred in self.predecessors(node): + self.del_edge(pred, node) + for succ in self.successors(node): + self.del_edge(node, succ) + def add_edge(self, a, b): if not a in self._nodes: self.add_node(a) @@ -97,6 +108,20 @@ class DiGraph: out.append(path + [b]) return out + def get_all_parents(self, node): + """Return every parents nodes for a given @node""" + + todo = set([node]) + done = set() + while todo: + node = todo.pop() + if node in done: + continue + done.add(node) + for parent in self.predecessors(node): + todo.add(parent) + return done + def node2str(self, n): return str(n) diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py index eaa5f6c8..790a790e 100644 --- a/miasm2/ir/analysis.py +++ b/miasm2/ir/analysis.py @@ -1,10 +1,17 @@ #!/usr/bin/env python #-*- coding:utf-8 -*- +import logging + from miasm2.ir.symbexec import symbexec from miasm2.core.graph import DiGraph from miasm2.expression.expression import * +log = logging.getLogger("analysis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARNING) class ira: @@ -55,7 +62,7 @@ class ira: return done - def gen_graph(self, link_all = False): + def gen_graph(self, link_all = True): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs @@ -76,6 +83,7 @@ class ira: self.g.add_edge(lbl, d.name) def graph(self): + """Output the graphviz script""" out = """ digraph asm_graph { size="80,50"; @@ -83,46 +91,72 @@ class ira: fontsize = "16", shape = "box" ]; - """ + """ all_lbls = {} for lbl in self.g.nodes(): if not lbl in self.blocs: continue - b = self.blocs[lbl] + irb = self.blocs[lbl] ir_txt = [str(lbl)] - for irs in b.irs: + for irs in irb.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") - all_lbls[id(lbl)] = "\l\\\n".join(ir_txt) + all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): + # print l, v out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): - out += '%s -> %s;\n' % (id(a), id(b)) + # print 'edge', a, b, hash(a), hash(b) + out += '%s -> %s;\n' % (hash(a), hash(b)) out += '}' return out - def remove_dead(self, b): - for ir, _, c_out in zip(b.irs, b.c_in, b.c_out): + def remove_dead(self, irb): + """Remove dead affectations using previous liveness analysis + @irb: irbloc instance + Return True iff the bloc state has changed + PRE: compute_in_out(@irb) + """ + + # print 'state1' + # self.dump_bloc_state(irb) + + modified = False + for ir, _, c_out in zip(irb.irs, irb.c_in, irb.c_out): j = 0 while j < len(ir): i_cur = ir[j] if not isinstance(i_cur.dst, ExprId): pass + elif i_cur.dst == self.IRDst: + # never delete irdst + pass elif (isinstance(i_cur.src, ExprOp) and i_cur.src.op.startswith('call')): # /!\ never remove ir calls pass elif not i_cur.dst in c_out: del(ir[j]) + modified = True continue j += 1 + # print 'state2' + # self.dump_bloc_state(irb) + + return modified + def remove_blocs_dead(self): + """Call remove_dead on each irbloc + Return True iff one of the bloc state has changed + """ + modified = False for b in self.blocs.values(): - self.remove_dead(b) + modified |= self.remove_dead(b) + return modified # for test XXX TODO def set_dead_regs(self, b): @@ -131,77 +165,143 @@ class ira: def add_unused_regs(self): pass - def compute_in_out(self, b): + def dump_bloc_state(self, irb): + print '*'*80 + for i, (ir, c_in, c_out) in enumerate(zip(irb.irs, irb.c_in, irb.c_out)): + print 'ir' + for x in ir: + print '\t', x + print 'R', [str(x) for x in irb.r[i]]#c_in] + print 'W', [str(x) for x in irb.w[i]]#c_out] + print 'IN', [str(x) for x in c_in] + print 'OUT', [str(x) for x in c_out] + + print 'OUT final', [str(x) for x in irb.c_out[-1]] + + def compute_in_out(self, irb): + """Liveness computation for a single bloc + @irb: irbloc instance + Return True iff bloc state has changed + """ # get out/in from bloc sons modified = False # set b in - if b.c_in[-1] != set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))): + if irb.c_in[-1] != set(irb.r[-1].union(irb.c_out[-1].difference(irb.w[-1]))): modified = True - b.c_in[-1] = set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))) + irb.c_in[-1] = set(irb.r[-1].union(irb.c_out[-1].difference(irb.w[-1]))) # set b out c_out = set() has_son = False - for n_son in self.g.successors(b.label): - # print n_me, n_son + for n_son in self.g.successors(irb.label): has_son = True if not n_son in self.blocs: - print "leaf has lost her sons!" - continue - b_son = self.blocs[n_son] - c_out.update(b_son.c_in[0]) + # If the son is not defined, we will propagate our current out + # nodes to the in nodes's son + son_c_in = irb.c_out_missing + else: + son_c_in = self.blocs[n_son].c_in[0] + c_out.update(son_c_in) if not has_son: # special case: leaf nodes architecture dependant - c_out = self.get_out_regs(b) - if b.c_out[-1] != set(c_out): + c_out = self.get_out_regs(irb) + if irb.c_out[-1] != set(c_out): modified = True - b.c_out[-1] = set(c_out) + irb.c_out[-1] = set(c_out) # get out/in for bloc - for i in reversed(xrange(len(b.irs))): - if b.c_in[i] != set(b.r[i].union(b.c_out[i].difference(b.w[i]))): + for i in reversed(xrange(len(irb.irs))): + if irb.c_in[i] != set(irb.r[i].union(irb.c_out[i].difference(irb.w[i]))): modified = True - b.c_in[i] = set(b.r[i].union(b.c_out[i].difference(b.w[i]))) - if b.c_out[i] != set(b.c_in[i + 1]): + irb.c_in[i] = set(irb.r[i].union(irb.c_out[i].difference(irb.w[i]))) + if irb.c_out[i] != set(irb.c_in[i + 1]): modified = True - b.c_out[i] = set(b.c_in[i + 1]) + irb.c_out[i] = set(irb.c_in[i + 1]) + return modified def test_in_out_fix(self): + """Return True iff a fixed point has been reached during liveness + analysis""" + fixed = True - for n in self.g.nodes(): - if not n in self.blocs: + for node in self.g.nodes(): + if not node in self.blocs: # leaf has lost her son continue - b = self.blocs[n] - if b.c_in != b.l_in or b.c_out != b.l_out: + irb = self.blocs[node] + if irb.c_in != irb.l_in or irb.c_out != irb.l_out: fixed = False - b.l_in = [set(x) for x in b.c_in] - b.l_out = [set(x) for x in b.c_out] + irb.l_in = [set(x) for x in irb.c_in] + irb.l_out = [set(x) for x in irb.c_out] return fixed + def fill_missing_son_c_in(self): + """Find nodes with missing sons in graph, and add virtual link to all + written variables of all parents. + PRE: gen_graph() and get_rw()""" + + for node in self.g.nodes(): + if not node in self.blocs: + continue + self.blocs[node].c_out_missing = set() + has_all_son = True + for node_son in self.g.successors(node): + if not node_son in self.blocs: + has_all_son = False + break + if has_all_son: + continue + parents = self.g.get_all_parents(node) + for parent in parents: + irb = self.blocs[parent] + for var_w in irb.w: + self.blocs[node].c_out_missing.update(var_w) + def compute_dead(self): - self.get_rw() + """Iterate liveness analysis until a fixed point is reached. + PRE: gen_graph() + """ it = 0 fixed_point = False - print 'iteration...', + log.debug('iteration...') while not fixed_point: - print it, + log.debug(it) it += 1 for n in self.g.nodes(): if not n in self.blocs: # leaf has lost her son continue - b = self.blocs[n] - self.compute_in_out(b) + irb = self.blocs[n] + self.compute_in_out(irb) fixed_point = self.test_in_out_fix() - print def dead_simp(self): - self.compute_dead() - self.remove_blocs_dead() + """This function is used to analyse relation of a * complete function * + This mean the blocs under study represent a solid full function graph. + + Ref: CS 5470 Compiler Techniques and Principles (Liveness + analysis/Dataflow equations) + + PRE: call to gen_graph + """ + + modified = True + while modified: + log.debug('dead_simp step') + + # Update r/w variables for all irblocs + self.get_rw() + # Fill c_in for missing sons + self.fill_missing_son_c_in() + + # Liveness step + self.compute_dead() + modified = self.remove_blocs_dead() + + # Simplify expressions self.simplify_blocs() def gen_equations(self): diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 9db6d696..b955bac7 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -52,6 +52,18 @@ class irbloc: self._dst = dst return dst + def set_dst(self, value): + """Find and replace the IRDst affectation's source by @value""" + dst = None + for ir in self.irs: + for i in ir: + if isinstance(i.dst, m2_expr.ExprId) and i.dst.name == "IRDst": + if dst is not None: + raise ValueError('Multiple destinations!') + dst = value + i.src = value + self._dst = value + dst = property(get_dst) def get_rw(self): |