8 files changed, 346 insertions, 318 deletions
diff --git a/example/disasm/full.py b/example/disasm/full.py
index 79db46d4..b919310a 100644
--- a/example/disasm/full.py
+++ b/example/disasm/full.py
@@ -8,6 +8,7 @@ from miasm2.core.asmblock import log_asmblock, AsmLabel, AsmCFG
 from miasm2.expression.expression import ExprId
 from miasm2.core.interval import interval
 from miasm2.analysis.machine import Machine
+from miasm2.analysis.data_flow import dead_simp, DiGraphDefUse, ReachingDefinitions
 
 log = logging.getLogger("dis")
 console_handler = logging.StreamHandler()
@@ -52,6 +53,9 @@ parser.add_argument('-i', "--image", action="store_true",
                     help="Display image representation of disasm")
 parser.add_argument('-c', "--rawbinary", default=False, action="store_true",
                     help="Don't interpret input as ELF/PE/...")
+parser.add_argument('-d', "--defuse", action="store_true",
+                    help="Dump the def-use graph in file 'defuse.dot'."
+                    "The defuse is dumped after simplifications if -s option is specified")
 
 args = parser.parse_args()
 
@@ -207,7 +211,11 @@ if args.gen_ir:
         print block
 
     if args.simplify:
-        ir_arch_a.dead_simp()
+        dead_simp(ir_arch_a)
+
+    if args.defuse:
+        reachings = ReachingDefinitions(ir_arch_a)
+        open('graph_defuse.dot', 'w').write(DiGraphDefUse(reachings).dot())
 
     out = ir_arch_a.graph.dot()
     open('graph_irflow.dot', 'w').write(out)
diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py
index b28f8a81..058910e3 100644
--- a/example/expression/asm_to_ir.py
+++ b/example/expression/asm_to_ir.py
@@ -5,7 +5,7 @@ from miasm2.core import parse_asm
 from miasm2.expression.expression import *
 from miasm2.core import asmblock
 from miasm2.arch.x86.ira import ir_a_x86_32
-
+from miasm2.analysis.data_flow import dead_simp
 
 # First, asm code
 blocks, symbol_pool = parse_asm.parse_txt(mn_x86, 32, '''
@@ -47,7 +47,7 @@ for lbl, irblock in ir_arch.blocks.items():
 # Dead propagation
 open('graph.dot', 'w').write(ir_arch.graph.dot())
 print '*' * 80
-ir_arch.dead_simp()
+dead_simp(ir_arch)
 open('graph2.dot', 'w').write(ir_arch.graph.dot())
 
 # Display new IR
diff --git a/example/expression/graph_dataflow.py b/example/expression/graph_dataflow.py
index bd767165..c79cd7d4 100644
--- a/example/expression/graph_dataflow.py
+++ b/example/expression/graph_dataflow.py
@@ -8,6 +8,7 @@ from miasm2.arch.x86.disasm import dis_x86_32
 from miasm2.analysis.data_analysis import intra_bloc_flow_raw, inter_bloc_flow
 from miasm2.core.graph import DiGraph
 from miasm2.ir.symbexec import SymbolicExecutionEngine
+from miasm2.analysis.data_flow import dead_simp
 
 
 parser = ArgumentParser("Simple expression use for generating dataflow graph")
@@ -114,7 +115,7 @@ def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb):
     for irblock in ir_arch.blocks.values():
         print irblock
 
-    ir_arch.dead_simp()
+    dead_simp(ir_arch)
 
     irblock_0 = None
     for irblock in ir_arch.blocks.values():
diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py
index 3aac0281..d2552c82 100644
--- a/example/ida/graph_ir.py
+++ b/example/ida/graph_ir.py
@@ -10,7 +10,7 @@ from miasm2.expression.simplifications import expr_simp
 from miasm2.expression.expression import *
 from miasm2.analysis.data_analysis import inter_bloc_flow, \
     intra_bloc_flow_symbexec
-
+from miasm2.analysis.data_flow import dead_simp
 from utils import guess_machine, expr2colorstr
 
 
@@ -144,7 +144,7 @@ out = ir_arch.graph.dot()
 open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out)
 
 
-# ir_arch.dead_simp()
+# dead_simp(ir_arch)
 
 g = GraphMiasmIR(ir_arch, "Miasm IR graph", None)
 
@@ -198,7 +198,7 @@ def get_modified_symbols(sb):
 def gen_bloc_data_flow_graph(ir_arch, in_str, ad):  # arch, attrib, pool_bin, bloc, symbol_pool):
     out_str = ""
 
-    # ir_arch.dead_simp()
+    # dead_simp(ir_arch)
 
     irbloc_0 = None
     for irbloc in ir_arch.blocks.values():
diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py
new file mode 100644
index 00000000..b9764daa
--- /dev/null
+++ b/miasm2/analysis/data_flow.py
@@ -0,0 +1,252 @@
+"""Data flow analysis based on miasm intermediate representation"""
+
+from collections import namedtuple
+from miasm2.core.graph import DiGraph
+
+class ReachingDefinitions(dict):
+    """
+    Computes for each instruction the set of reaching definitions.
+    Example:
+    IR block:
+    lbl0:
+       0 A = 1
+         B = 3
+       1 B = 2
+       2 A = A + B + 4
+
+    Reach definition of lbl0:
+    (lbl0, 0) => {}
+    (lbl0, 1) => {A: {(lbl0, 0)}, B: {(lbl0, 0)}}
+    (lbl0, 2) => {A: {(lbl0, 0)}, B: {(lbl0, 1)}}
+    (lbl0, 3) => {A: {(lbl0, 2)}, B: {(lbl0, 1)}}
+
+    Source set 'REACHES' in: Kennedy, K. (1979).
+    A survey of data flow analysis techniques.
+    IBM Thomas J. Watson Research Division,  Algorithm MK
+
+    This class is usable as a dictionnary whose struture is
+    { (block, instr_index): { lvalue: set((block, instr_index)) } }
+    """
+
+    ir_a = None
+
+    def __init__(self, ir_a):
+        super(ReachingDefinitions, self).__init__()
+        self.ir_a = ir_a
+        self.compute()
+
+    def get_definitions(self, block_lbl, instruction):
+        """Returns the dict { lvalue: set((def_block_lbl, def_instr_index)) }
+        associated with self.ir_a.@block.irs[@instruction]
+        or {} if it is not yet computed
+        """
+        return self.get((block_lbl, instruction), {})
+
+    def compute(self):
+        """This is the main fixpoint"""
+        modified = True
+        while modified:
+            modified = False
+            for block in self.ir_a.blocks.itervalues():
+                modified |= self.process_block(block)
+
+    def process_block(self, block):
+        """
+        Fetch reach definitions from predecessors and propagate it to
+        the instruction in block @block.
+        """
+        predecessor_state = {}
+        for pred_lbl in self.ir_a.graph.predecessors(block.label):
+            pred = self.ir_a.blocks[pred_lbl]
+            for lval, definitions in self.get_definitions(pred_lbl, len(pred.irs)).iteritems():
+                predecessor_state.setdefault(lval, set()).update(definitions)
+
+        modified = self.get((block.label, 0)) != predecessor_state
+        if not modified:
+            return False
+        self[(block.label, 0)] = predecessor_state
+
+        for instr_index in xrange(len(block.irs)):
+            modified |= self.process_instruction(block, instr_index)
+        return modified
+
+    def process_instruction(self, block, instr_index):
+        """
+        Updates the reach definitions with values defined at
+        instruction @instr_index in block @block.
+        NB: the effect of instruction @instr_index in stored at index
+        (@block, @instr_index + 1).
+        """
+
+        instr = block.irs[instr_index]
+        defs = self.get_definitions(block.label, instr_index).copy()
+        for lval in instr:
+            defs.update({lval: set([(block.label, instr_index)])})
+
+        modified = self.get((block.label, instr_index + 1)) != defs
+        if modified:
+            self[(block.label, instr_index + 1)] = defs
+
+        return modified
+
+ATTR_DEP = {"color" : "black",
+            "_type" : "data"}
+
+InstrNode = namedtuple('InstructionNode', ['label', 'index', 'var'])
+
+class DiGraphDefUse(DiGraph):
+    """Representation of a Use-Definition graph as defined by
+    Kennedy, K. (1979). A survey of data flow analysis techniques.
+    IBM Thomas J. Watson Research Division.
+    Example:
+    IR block:
+    lbl0:
+       0 A = 1
+         B = 3
+       1 B = 2
+       2 A = A + B + 4
+
+    Def use analysis:
+    (lbl0, 0, A) => {(lbl0, 2, A)}
+    (lbl0, 0, B) => {}
+    (lbl0, 1, B) => {(lbl0, 2, A)}
+    (lbl0, 2, A) => {}
+
+    """
+
+
+    def __init__(self, reaching_defs,
+                 deref_mem=False, *args, **kwargs):
+        """Instanciate a DiGraphIR
+        @blocks: IR blocks
+        """
+        self._edge_attr = {}
+
+        # For dot display
+        self._filter_node = None
+        self._dot_offset = None
+        self._blocks = reaching_defs.ir_a.blocks
+
+        super(DiGraphDefUse, self).__init__(*args, **kwargs)
+        self._compute_def_use(reaching_defs,
+                              deref_mem=deref_mem)
+
+    def edge_attr(self, src, dst):
+        """
+        Return a dictionary of attributes for the edge between @src and @dst
+        @src: the source node of the edge
+        @dst: the destination node of the edge
+        """
+        return self._edge_attr[(src, dst)]
+
+    def _compute_def_use(self, reaching_defs,
+                         deref_mem=False):
+        for block in self._blocks.itervalues():
+            self._compute_def_use_block(block,
+                                        reaching_defs,
+                                        deref_mem=deref_mem)
+
+    def _compute_def_use_block(self, block, reaching_defs, deref_mem=False):
+        for ind, instr in enumerate(block.irs):
+            instruction_reaching_defs = reaching_defs.get_definitions(block.label, ind)
+            for lval, expr in instr.iteritems():
+                self.add_node(InstrNode(block.label, ind, lval))
+
+                read_vars = expr.get_r(mem_read=deref_mem)
+                if deref_mem and lval.is_mem():
+                    read_vars.update(lval.arg.get_r(mem_read=deref_mem))
+                for read_var in read_vars:
+                    for reach in instruction_reaching_defs.get(read_var, set()):
+                        self.add_data_edge(InstrNode(reach[0], reach[1], read_var),
+                                           InstrNode(block.label, ind, lval))
+
+    def del_edge(self, src, dst):
+        super(DiGraphDefUse, self).del_edge(src, dst)
+        del self._edge_attr[(src, dst)]
+
+    def add_uniq_labeled_edge(self, src, dst, edge_label):
+        """Adds the edge (@src, @dst) with label @edge_label.
+        if edge (@src, @dst) already exists, the previous label is overriden
+        """
+        self.add_uniq_edge(src, dst)
+        self._edge_attr[(src, dst)] = edge_label
+
+    def add_data_edge(self, src, dst):
+        """Adds an edge representing a data dependencie
+        and sets the label accordingly"""
+        self.add_uniq_labeled_edge(src, dst, ATTR_DEP)
+
+    def node2lines(self, node):
+        lbl, ind, reg = node
+        yield self.DotCellDescription(text="%s (%s)" % (lbl, ind),
+                                      attr={'align': 'center',
+                                            'colspan': 2,
+                                            'bgcolor': 'grey'})
+        src = self._blocks[lbl].irs[ind][reg]
+        line = "%s = %s" % (reg, src)
+        yield self.DotCellDescription(text=line, attr={})
+        yield self.DotCellDescription(text="", attr={})
+
+
+def dead_simp_useful_instrs(defuse, reaching_defs):
+    """Mark useful statements using previous reach analysis and defuse
+
+    Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
+    IBM Thomas J. Watson Research Division,  Algorithm MK
+
+    Return a set of triplets (block, instruction number, instruction) of
+    useful instructions
+    PRE: compute_reach(self)
+
+    """
+    ir_a = reaching_defs.ir_a
+    useful = set()
+
+    for block_lbl, block in ir_a.blocks.iteritems():
+        successors = ir_a.graph.successors(block_lbl)
+        for successor in successors:
+            if successor not in ir_a.blocks:
+                keep_all_definitions = True
+                break
+        else:
+            keep_all_definitions = False
+
+        # Block has a nonexistant successor or is a leaf
+        if keep_all_definitions or (len(successors) == 0):
+            valid_definitions = reaching_defs.get_definitions(block_lbl,
+                                                              len(block.irs))
+            for lval, definitions in valid_definitions.iteritems():
+                if (lval in ir_a.get_out_regs(block)
+                    or keep_all_definitions):
+                    for definition in definitions:
+                        useful.add(InstrNode(definition[0], definition[1], lval))
+
+        # Force keeping of specific cases
+        for instr_index, instr in enumerate(block.irs):
+            for lval, rval in instr.iteritems():
+                if (lval.is_mem()
+                    or ir_a.IRDst == lval
+                    or rval.is_function_call()):
+                    useful.add(InstrNode(block_lbl, instr_index, lval))
+
+    # Useful nodes dependencies
+    for node in useful:
+        for parent in defuse.reachable_parents(node):
+            yield parent
+
+def dead_simp(ir_a):
+    """
+    This function is used to analyse relation of a * complete function *
+    This means the blocks under study represent a solid full function graph.
+
+    Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
+    IBM Thomas J. Watson Research Division, page 43
+    """
+    reaching_defs = ReachingDefinitions(ir_a)
+    defuse = DiGraphDefUse(reaching_defs, deref_mem=True)
+    useful = set(dead_simp_useful_instrs(defuse, reaching_defs))
+    for block in ir_a.blocks.itervalues():
+        for idx, assignblk in enumerate(block.irs):
+            for lval in assignblk.keys():
+                if InstrNode(block.label, idx, lval) not in useful:
+                    del assignblk[lval]
diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py
index 4f7b321f..5c5193c9 100644
--- a/miasm2/ir/analysis.py
+++ b/miasm2/ir/analysis.py
@@ -6,6 +6,8 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine
 from miasm2.ir.ir import IntermediateRepresentation, AssignBlock
 from miasm2.expression.expression \
     import ExprAff, ExprCond, ExprId, ExprInt, ExprMem, ExprOp
+from miasm2.analysis.data_flow import dead_simp as new_dead_simp_imp
+import warnings
 
 log = logging.getLogger("analysis")
 console_handler = logging.StreamHandler()
@@ -27,10 +29,6 @@ class ira(IntermediateRepresentation):
 
     """
 
-    def ira_regs_ids(self):
-        """Returns ids of all registers used in the IR"""
-        return self.arch.regs.all_regs_ids + [self.IRDst]
-
     def call_effects(self, ad, instr):
         """Default modelisation of a function call to @ad. This may be used to:
 
@@ -58,247 +56,6 @@ class ira(IntermediateRepresentation):
             irb_cur.lines.append(instr)
         return None
 
-    def remove_dead_instr(self, irb, useful):
-        """Remove dead affectations using previous reaches analysis
-        @irb: irbloc instance
-        @useful: useful statements from previous reach analysis
-        Return True iff the block state has changed
-        PRE: compute_reach(self)
-        """
-        modified = False
-        for idx, assignblk in enumerate(irb.irs):
-            for dst in assignblk.keys():
-                if (isinstance(dst, ExprId) and
-                        (irb.label, idx, dst) not in useful):
-                    del assignblk[dst]
-                    modified = True
-        return modified
-
-    def init_useful_instr(self):
-        """Computes a set of triples (block, instruction number, instruction)
-        containing initially useful instructions :
-          - Instructions affecting final value of return registers
-          - Instructions affecting IRDst register
-          - Instructions writing in memory
-          - Function call instructions
-        Return set of intial useful instructions
-        """
-
-        useful = set()
-
-        for node in self.graph.nodes():
-            if node not in self.blocks:
-                continue
-
-            block = self.blocks[node]
-            successors = self.graph.successors(node)
-            has_son = bool(successors)
-            for p_son in successors:
-                if p_son not in self.blocks:
-                    # Leaf has lost its son: don't remove anything
-                    # reaching this block
-                    for r in self.ira_regs_ids():
-                        useful.update(block.irs[-1]._cur_reach[r].union(
-                            block.irs[-1].defout[r]))
-
-            # Function call, memory write or IRDst affectation
-            for idx, assignblk in enumerate(block.irs):
-                for dst, src in assignblk.iteritems():
-                    if src.is_function_call():
-                        # /!\ never remove ir calls
-                        useful.add((block.label, idx, dst))
-                    if isinstance(dst, ExprMem):
-                        useful.add((block.label, idx, dst))
-                    useful.update(block.irs[idx].defout[self.IRDst])
-
-            # Affecting return registers
-            if not has_son:
-                for r in self.get_out_regs(block):
-                    useful.update(block.irs[-1].defout[r]
-                                  if block.irs[-1].defout[r] else
-                                  block.irs[-1]._cur_reach[r])
-
-        return useful
-
-    def _mark_useful_code(self):
-        """Mark useful statements using previous reach analysis
-
-        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
-        IBM Thomas J. Watson Research Division,  Algorithm MK
-
-        Return a set of triplets (block, instruction number, instruction) of
-        useful instructions
-        PRE: compute_reach(self)
-
-        """
-
-        useful = self.init_useful_instr()
-        worklist = useful.copy()
-        while worklist:
-            elem = worklist.pop()
-            useful.add(elem)
-            irb_label, irs_ind, dst = elem
-
-            assignblk = self.blocks[irb_label].irs[irs_ind]
-            ins = assignblk.dst2ExprAff(dst)
-
-            # Handle dependencies of used variables in ins
-            for reg in ins.get_r(True).intersection(self.ira_regs_ids()):
-                worklist.update(
-                    assignblk._cur_reach[reg].difference(useful).difference(
-                        assignblk._cur_kill[reg]
-                        if not assignblk.defout[reg] else
-                        set()))
-                for _, _, defout_dst in assignblk.defout[reg]:
-                    # Loop case (dst in defout of current irb)
-                    if defout_dst == dst:
-                        worklist.update(
-                            assignblk._cur_reach[reg].difference(useful))
-        return useful
-
-    def remove_dead_code(self):
-        """Remove dead instructions in each block of the graph using the reach
-        analysis .
-        Returns True if a block has been modified
-        PRE : compute_reach(self)
-        """
-        useful = self._mark_useful_code()
-        modified = False
-        for irblock in self.blocks.values():
-            modified |= self.remove_dead_instr(irblock, useful)
-            # Remove useless structures
-            for assignblk in irblock.irs:
-                del assignblk._cur_kill
-                del assignblk._prev_kill
-                del assignblk._cur_reach
-                del assignblk._prev_reach
-        return modified
-
-    def set_dead_regs(self, b):
-        pass
-
-    def add_unused_regs(self):
-        pass
-
-    @staticmethod
-    def print_set(v_set):
-        """Print each triplet contained in a set
-        @v_set: set containing triplets elements
-        """
-        for p in v_set:
-            print '    (%s, %s, %s)' % p
-
-    def dump_bloc_state(self, irb):
-        print '*' * 80
-        for irs in irb.irs:
-            for assignblk in irs:
-                print 5 * "-"
-                print 'instr', assignblk
-                print 5 * "-"
-                for v in self.ira_regs_ids():
-                    if assignblk._cur_reach[v]:
-                        print 'REACH[%d][%s]' % (k, v)
-                        self.print_set(assignblk._cur_reach[v])
-                    if assignblk._cur_kill[v]:
-                        print 'KILL[%d][%s]' % (k, v)
-                        self.print_set(assignblk._cur_kill[v])
-                    if assignblk.defout[v]:
-                        print 'DEFOUT[%d][%s]' % (k, v)
-                        self.print_set(assignblk.defout[v])
-
-    def compute_reach_block(self, irb):
-        """Variable influence computation for a single block
-        @irb: irbloc instance
-        PRE: init_reach()
-        """
-
-        reach_block = {key: value.copy()
-                       for key, value in irb.irs[0]._cur_reach.iteritems()}
-
-        # Compute reach from predecessors
-        for n_pred in self.graph.predecessors(irb.label):
-            p_block = self.blocks[n_pred]
-
-            # Handle each register definition
-            for c_reg in self.ira_regs_ids():
-                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
-                pred_through = p_block.irs[-1].defout[c_reg].union(
-                    p_block.irs[-1]._cur_reach[c_reg].difference(
-                        p_block.irs[-1]._cur_kill[c_reg]))
-                reach_block[c_reg].update(pred_through)
-
-        # If a predecessor has changed
-        if reach_block != irb.irs[0]._cur_reach:
-            irb.irs[0]._cur_reach = reach_block
-            for c_reg in self.ira_regs_ids():
-                if irb.irs[0].defout[c_reg]:
-                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
-                    irb.irs[0]._cur_kill[c_reg].update(
-                        reach_block[c_reg].difference(irb.irs[0].defout[c_reg]))
-
-        # Compute reach and kill for block's instructions
-        for i in xrange(1, len(irb.irs)):
-            for c_reg in self.ira_regs_ids():
-                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
-                pred_through = irb.irs[i - 1].defout[c_reg].union(
-                    irb.irs[i - 1]._cur_reach[c_reg].difference(
-                        irb.irs[i - 1]._cur_kill[c_reg]))
-                irb.irs[i]._cur_reach[c_reg].update(pred_through)
-                if irb.irs[i].defout[c_reg]:
-                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
-                    irb.irs[i]._cur_kill[c_reg].update(
-                        irb.irs[i]._cur_reach[c_reg].difference(
-                            irb.irs[i].defout[c_reg]))
-
-    def _test_kill_reach_fix(self):
-        """Return True iff a fixed point has been reached during reach
-        analysis"""
-
-        fixed = True
-        for node in self.graph.nodes():
-            if node in self.blocks:
-                irb = self.blocks[node]
-                for assignblk in irb.irs:
-                    if (assignblk._cur_reach != assignblk._prev_reach or
-                            assignblk._cur_kill != assignblk._prev_kill):
-                        fixed = False
-                        # This is not a deepcopy, but cur_reach is assigned to a
-                        # new dictionnary on change in `compute_reach_block`
-                        assignblk._prev_reach = assignblk._cur_reach.copy()
-                        assignblk._prev_kill = assignblk._cur_kill.copy()
-        return fixed
-
-    def compute_reach(self):
-        """
-        Compute reach, defout and kill sets until a fixed point is reached.
-
-        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
-        IBM Thomas J. Watson Research Division, page 43
-        """
-        fixed_point = False
-        log.debug('iteration...')
-        while not fixed_point:
-            for node in self.graph.nodes():
-                if node in self.blocks:
-                    self.compute_reach_block(self.blocks[node])
-            fixed_point = self._test_kill_reach_fix()
-
-    def dead_simp(self):
-        """
-        This function is used to analyse relation of a * complete function *
-        This means the blocks under study represent a solid full function graph.
-
-        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
-        IBM Thomas J. Watson Research Division, page 43
-        """
-        # Update r/w variables for all irblocks
-        self.get_rw(self.ira_regs_ids())
-        # Liveness step
-        self.compute_reach()
-        self.remove_dead_code()
-        # Simplify expressions
-        self.simplify_blocs()
-
     def gen_equations(self):
         for irb in self.blocks.values():
             symbols_init = dict(self.arch.regs.all_regs_ids_init)
@@ -335,3 +92,8 @@ class ira(IntermediateRepresentation):
     def sizeof_pointer(self):
         "Return the size of a void* in bits"
         raise NotImplementedError("Abstract method")
+
+    def dead_simp(self):
+        """Deprecated: See miasm2.analysis.data_flow.dead_simp()"""
+        warnings.warn('DEPRECATION WARNING: Please use miasm2.analysis.data_flow.dead_simp(ira) instead of ira.dead_simp()')
+        new_dead_simp_imp(self)
diff --git a/test/ir/analysis.py b/test/analysis/data_flow.py
index 6209b36b..a40d000a 100644
--- a/test/ir/analysis.py
+++ b/test/analysis/data_flow.py
@@ -1,6 +1,7 @@
 """ Test cases for dead code elimination"""
 from miasm2.expression.expression import ExprId, ExprInt32, ExprAff, ExprMem
 from miasm2.core.asmblock import AsmLabel
+from miasm2.analysis.data_flow import *
 from miasm2.ir.analysis import ira
 from miasm2.ir.ir import IRBlock, AssignBlock
 
@@ -80,11 +81,11 @@ G1_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]])
 G1_IRB1 = gen_irblock(LBL1, [[ExprAff(a, b)]])
 G1_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]])
 
+G1_IRA.blocks = {irb.label : irb for irb in [G1_IRB0, G1_IRB1, G1_IRB2]}
+
 G1_IRA.graph.add_uniq_edge(G1_IRB0.label, G1_IRB1.label)
 G1_IRA.graph.add_uniq_edge(G1_IRB1.label, G1_IRB2.label)
 
-G1_IRA.blocks = {irb.label : irb for irb in [G1_IRB0, G1_IRB1, G1_IRB2]}
-
 # Expected output for graph 1
 G1_EXP_IRA = IRATest()
 
@@ -103,12 +104,12 @@ G2_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(r, CST1)]])
 G2_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]])
 G2_IRB2 = gen_irblock(LBL2, [[ExprAff(a, r)]])
 
+G2_IRA.blocks = {irb.label : irb for irb in [G2_IRB0, G2_IRB1, G2_IRB2]}
+
 G2_IRA.graph.add_uniq_edge(G2_IRB0.label, G2_IRB1.label)
 G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB2.label)
 G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB1.label)
 
-G2_IRA.blocks = {irb.label : irb for irb in [G2_IRB0, G2_IRB1, G2_IRB2]}
-
 # Expected output for graph 2
 G2_EXP_IRA = IRATest()
 
@@ -127,12 +128,12 @@ G3_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]])
 G3_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]])
 G3_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]])
 
+G3_IRA.blocks = {irb.label : irb for irb in [G3_IRB0, G3_IRB1, G3_IRB2]}
+
 G3_IRA.graph.add_uniq_edge(G3_IRB0.label, G3_IRB1.label)
 G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB2.label)
 G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB1.label)
 
-G3_IRA.blocks = {irb.label : irb for irb in [G3_IRB0, G3_IRB1, G3_IRB2]}
-
 # Expected output for graph 3
 G3_EXP_IRA = IRATest()
 
@@ -152,14 +153,14 @@ G4_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]])
 G4_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)]])
 G4_IRB3 = gen_irblock(LBL3, [[ExprAff(a, CST3)], [ExprAff(r, a)]])
 
+G4_IRA.blocks = {irb.label : irb for irb in [G4_IRB0, G4_IRB1, G4_IRB2,
+                                            G4_IRB3]}
+
 G4_IRA.graph.add_uniq_edge(G4_IRB0.label, G4_IRB1.label)
 G4_IRA.graph.add_uniq_edge(G4_IRB0.label, G4_IRB2.label)
 G4_IRA.graph.add_uniq_edge(G4_IRB1.label, G4_IRB3.label)
 G4_IRA.graph.add_uniq_edge(G4_IRB2.label, G4_IRB3.label)
 
-G4_IRA.blocks = {irb.label : irb for irb in [G4_IRB0, G4_IRB1, G4_IRB2,
-                                             G4_IRB3]}
-
 # Expected output for graph 4
 G4_EXP_IRA = IRATest()
 
@@ -182,6 +183,9 @@ G5_IRB3 = gen_irblock(LBL3, [[ExprAff(a, a+CST3)]])
 G5_IRB4 = gen_irblock(LBL4, [[ExprAff(a, a+CST1)]])
 G5_IRB5 = gen_irblock(LBL5, [[ExprAff(a, r)]])
 
+G5_IRA.blocks = {irb.label : irb for irb in [G5_IRB0, G5_IRB1, G5_IRB2, G5_IRB3,
+                                            G5_IRB4, G5_IRB5]}
+
 G5_IRA.graph.add_uniq_edge(G5_IRB0.label, G5_IRB1.label)
 G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB2.label)
 G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB3.label)
@@ -190,9 +194,6 @@ G5_IRA.graph.add_uniq_edge(G5_IRB3.label, G5_IRB4.label)
 G5_IRA.graph.add_uniq_edge(G5_IRB4.label, G5_IRB5.label)
 G5_IRA.graph.add_uniq_edge(G5_IRB4.label, G5_IRB1.label)
 
-G5_IRA.blocks = {irb.label : irb for irb in [G5_IRB0, G5_IRB1, G5_IRB2, G5_IRB3,
-                                             G5_IRB4, G5_IRB5]}
-
 # Expected output for graph 5
 G5_EXP_IRA = IRATest()
 
@@ -217,15 +218,14 @@ G6_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]])
 G6_IRB2 = gen_irblock(LBL2, [[ExprAff(a, b)]])
 G6_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST2)]])
 
+G6_IRA.blocks = {irb.label : irb for irb in [G6_IRB0, G6_IRB1, G6_IRB2,
+                                            G6_IRB3]}
 
 G6_IRA.graph.add_uniq_edge(G6_IRB0.label, G6_IRB1.label)
 G6_IRA.graph.add_uniq_edge(G6_IRB1.label, G6_IRB2.label)
 G6_IRA.graph.add_uniq_edge(G6_IRB2.label, G6_IRB1.label)
 G6_IRA.graph.add_uniq_edge(G6_IRB2.label, G6_IRB3.label)
 
-G6_IRA.blocks = {irb.label : irb for irb in [G6_IRB0, G6_IRB1, G6_IRB2,
-                                             G6_IRB3]}
-
 # Expected output for graph 6
 G6_EXP_IRA = IRATest()
 
@@ -246,6 +246,8 @@ G7_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]])
 G7_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)]])
 G7_IRB3 = gen_irblock(LBL3, [[ExprAff(a, r)]])
 
+G7_IRA.blocks = {irb.label : irb for irb in [G7_IRB0, G7_IRB1, G7_IRB2,
+                                            G7_IRB3]}
 
 G7_IRA.graph.add_uniq_edge(G7_IRB0.label, G7_IRB1.label)
 G7_IRA.graph.add_uniq_edge(G7_IRB1.label, G7_IRB2.label)
@@ -254,9 +256,6 @@ G7_IRA.graph.add_uniq_edge(G7_IRB2.label, G7_IRB3.label)
 G7_IRA.graph.add_uniq_edge(G7_IRB0.label, G7_IRB2.label)
 
 
-G7_IRA.blocks = {irb.label : irb for irb in [G7_IRB0, G7_IRB1, G7_IRB2,
-                                             G7_IRB3]}
-
 # Expected output for graph 7
 G7_EXP_IRA = IRATest()
 
@@ -278,6 +277,9 @@ G8_IRB2 = gen_irblock(LBL2, [[ExprAff(b, b+CST2)]])
 G8_IRB3 = gen_irblock(LBL3, [[ExprAff(a, b)]])
 
 
+G8_IRA.blocks = {irb.label : irb for irb in [G8_IRB0, G8_IRB1, G8_IRB2,
+                                            G8_IRB3]}
+
 G8_IRA.graph.add_uniq_edge(G8_IRB0.label, G8_IRB1.label)
 G8_IRA.graph.add_uniq_edge(G8_IRB1.label, G8_IRB2.label)
 G8_IRA.graph.add_uniq_edge(G8_IRB2.label, G8_IRB1.label)
@@ -285,9 +287,6 @@ G8_IRA.graph.add_uniq_edge(G8_IRB2.label, G8_IRB3.label)
 G8_IRA.graph.add_uniq_edge(G8_IRB3.label, G8_IRB2.label)
 
 
-G8_IRA.blocks = {irb.label : irb for irb in [G8_IRB0, G8_IRB1, G8_IRB2,
-                                             G8_IRB3]}
-
 # Expected output for graph 8
 
 G8_EXP_IRA = IRATest()
@@ -310,6 +309,8 @@ G9_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)], [ExprAff(b, b+CST2)]])
 G9_IRB3 = gen_irblock(LBL3, [[ExprAff(a, b)]])
 G9_IRB4 = gen_irblock(LBL4, [[ExprAff(r, a)], [ExprAff(r, b)]])
 
+G9_IRA.blocks = {irb.label : irb for irb in [G9_IRB0, G9_IRB1, G9_IRB2,
+                                            G9_IRB3, G9_IRB4]}
 
 G9_IRA.graph.add_uniq_edge(G9_IRB0.label, G9_IRB4.label)
 G9_IRA.graph.add_uniq_edge(G9_IRB0.label, G9_IRB1.label)
@@ -321,9 +322,6 @@ G9_IRA.graph.add_uniq_edge(G9_IRB2.label, G9_IRB3.label)
 G9_IRA.graph.add_uniq_edge(G9_IRB3.label, G9_IRB4.label)
 
 
-G9_IRA.blocks = {irb.label : irb for irb in [G9_IRB0, G9_IRB1, G9_IRB2,
-                                             G9_IRB3, G9_IRB4]}
-
 # Expected output for graph 9
 
 G9_EXP_IRA = IRATest()
@@ -348,15 +346,14 @@ G10_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]])
 G10_IRB2 = gen_irblock(LBL2, [[ExprAff(a, b)]])
 G10_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST1)]])
 
+G10_IRA.blocks = {irb.label : irb for irb in [G10_IRB0, G10_IRB1,
+                                             G10_IRB2, G10_IRB3]}
 
 G10_IRA.graph.add_uniq_edge(G10_IRB0.label, G10_IRB1.label)
 G10_IRA.graph.add_uniq_edge(G10_IRB1.label, G10_IRB2.label)
 G10_IRA.graph.add_uniq_edge(G10_IRB2.label, G10_IRB1.label)
 G10_IRA.graph.add_uniq_edge(G10_IRB2.label, G10_IRB3.label)
 
-G10_IRA.blocks = {irb.label : irb for irb in [G10_IRB0, G10_IRB1,
-                                              G10_IRB2, G10_IRB3]}
-
 # Expected output for graph 10
 G10_EXP_IRA = IRATest()
 
@@ -379,13 +376,14 @@ G11_IRB3 = gen_irblock(LBL3, [[ExprAff(a, a+CST1)]])
 G11_IRB4 = gen_irblock(LBL4, [[ExprAff(b, b+CST1)]])
 
 
+G11_IRA.blocks = {irb.label : irb for irb in [G11_IRB0, G11_IRB1, G11_IRB2]}
+
 G11_IRA.graph.add_uniq_edge(G11_IRB0.label, G11_IRB1.label)
 #G11_IRA.graph.add_uniq_edge(G11_IRB3.label, G11_IRB1.label)
 G11_IRA.graph.add_uniq_edge(G11_IRB1.label, G11_IRB0.label)
 #G11_IRA.graph.add_uniq_edge(G11_IRB4.label, G11_IRB0.label)
 G11_IRA.graph.add_uniq_edge(G11_IRB1.label, G11_IRB2.label)
 
-G11_IRA.blocks = {irb.label : irb for irb in [G11_IRB0, G11_IRB1, G11_IRB2]}
 
 # Expected output for graph 11
 G11_EXP_IRA = IRATest()
@@ -411,15 +409,15 @@ G12_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST3)]])
 G12_IRB4 = gen_irblock(LBL4, [[ExprAff(r, CST2)]])
 G12_IRB5 = gen_irblock(LBL5, [[ExprAff(r, b)]])
 
+G12_IRA.blocks = {irb.label : irb for irb in [G12_IRB0, G12_IRB1, G12_IRB2,
+                                             G12_IRB3, G12_IRB4, G12_IRB5]}
+
 G12_IRA.graph.add_uniq_edge(G12_IRB0.label, G12_IRB1.label)
 G12_IRA.graph.add_uniq_edge(G12_IRB0.label, G12_IRB2.label)
 G12_IRA.graph.add_uniq_edge(G12_IRB2.label, G12_IRB3.label)
 G12_IRA.graph.add_uniq_edge(G12_IRB2.label, G12_IRB4.label)
 G12_IRA.graph.add_uniq_edge(G12_IRB4.label, G12_IRB5.label)
 
-G12_IRA.blocks = {irb.label : irb for irb in [G12_IRB0, G12_IRB1, G12_IRB2,
-                                              G12_IRB3, G12_IRB4, G12_IRB5]}
-
 # Expected output for graph 12
 G12_EXP_IRA = IRATest()
 
@@ -446,14 +444,14 @@ G13_IRB2 = gen_irblock(LBL2, [[ExprAff(d, CST2)], [ExprAff(a, b+CST1),
 G13_IRB3 = gen_irblock(LBL3, [[]]) # lost son
 G13_IRB4 = gen_irblock(LBL4, [[ExprAff(b, CST2)]])
 
+G13_IRA.blocks = {irb.label : irb for irb in [G13_IRB0, G13_IRB1, G13_IRB2,
+                                             G13_IRB4]}
+
 G13_IRA.graph.add_uniq_edge(G13_IRB0.label, G13_IRB1.label)
 G13_IRA.graph.add_uniq_edge(G13_IRB0.label, G13_IRB4.label)
 G13_IRA.graph.add_uniq_edge(G13_IRB2.label, G13_IRB3.label)
 G13_IRA.graph.add_uniq_edge(G13_IRB4.label, G13_IRB2.label)
 
-G13_IRA.blocks = {irb.label : irb for irb in [G13_IRB0, G13_IRB1, G13_IRB2,
-                                              G13_IRB4]}
-
 # Expected output for graph 13
 G13_EXP_IRA = IRATest()
 
@@ -478,10 +476,10 @@ G14_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(c, a)],
                               [ExprAff(a, CST2)]])
 G14_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+c)]])
 
-G14_IRA.graph.add_uniq_edge(G14_IRB0.label, G14_IRB1.label)
-
 G14_IRA.blocks = {irb.label : irb for irb in [G14_IRB0, G14_IRB1]}
 
+G14_IRA.graph.add_uniq_edge(G14_IRB0.label, G14_IRB1.label)
+
 # Expected output for graph 1
 G14_EXP_IRA = IRATest()
 
@@ -501,10 +499,10 @@ G15_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST2)], [ExprAff(a, CST1),
                                                    ExprAff(c, CST1)]])
 G15_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a)]])
 
-G15_IRA.graph.add_uniq_edge(G15_IRB0.label, G15_IRB1.label)
-
 G15_IRA.blocks = {irb.label : irb for irb in [G15_IRB0, G15_IRB1]}
 
+G15_IRA.graph.add_uniq_edge(G15_IRB0.label, G15_IRB1.label)
+
 # Expected output for graph 1
 G15_EXP_IRA = IRATest()
 
@@ -523,6 +521,8 @@ G16_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1), ExprAff(b, CST2),
 G16_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+b)], [ExprAff(r, c+r)]])
 G16_IRB2 = gen_irblock(LBL2, [[]])
 
+G16_IRA.blocks = {irb.label : irb for irb in [G16_IRB0, G16_IRB1]}
+
 G16_IRA.graph.add_uniq_edge(G16_IRB0.label, G16_IRB1.label)
 G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB2.label)
 
@@ -642,23 +642,23 @@ G17_EXP_IRA.blocks = {irb.label : irb for irb in [G17_EXP_IRB0]}
 # Begining  of tests
 
 for test_nb, test in enumerate([(G1_IRA, G1_EXP_IRA),
-                          (G2_IRA, G2_EXP_IRA),
-                          (G3_IRA, G3_EXP_IRA),
-                          (G4_IRA, G4_EXP_IRA),
-                          (G5_IRA, G5_EXP_IRA),
-                          (G6_IRA, G6_EXP_IRA),
-                          (G7_IRA, G7_EXP_IRA),
-                          (G8_IRA, G8_EXP_IRA),
-                          (G9_IRA, G9_EXP_IRA),
-                          (G10_IRA, G10_EXP_IRA),
-                          (G11_IRA, G11_EXP_IRA),
-                          (G12_IRA, G12_EXP_IRA),
-                          (G13_IRA, G13_EXP_IRA),
-                          (G14_IRA, G14_EXP_IRA),
-                          (G15_IRA, G15_EXP_IRA),
-                          (G16_IRA, G16_EXP_IRA),
-                          (G17_IRA, G17_EXP_IRA)
-                     ]):
+                                (G2_IRA, G2_EXP_IRA),
+                                (G3_IRA, G3_EXP_IRA),
+                                (G4_IRA, G4_EXP_IRA),
+                                (G5_IRA, G5_EXP_IRA),
+                                (G6_IRA, G6_EXP_IRA),
+                                (G7_IRA, G7_EXP_IRA),
+                                (G8_IRA, G8_EXP_IRA),
+                                (G9_IRA, G9_EXP_IRA),
+                                (G10_IRA, G10_EXP_IRA),
+                                (G11_IRA, G11_EXP_IRA),
+                                (G12_IRA, G12_EXP_IRA),
+                                (G13_IRA, G13_EXP_IRA),
+                                (G14_IRA, G14_EXP_IRA),
+                                (G15_IRA, G15_EXP_IRA),
+                                (G16_IRA, G16_EXP_IRA),
+                                (G17_IRA, G17_EXP_IRA)
+]):
     # Extract test elements
     g_ira, g_exp_ira = test
 
@@ -667,10 +667,14 @@ for test_nb, test in enumerate([(G1_IRA, G1_EXP_IRA),
     # Print initial graph, for debug
     open("graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph.dot())
 
-    # Simplify graph
-    g_ira.dead_simp()
+    reaching_defs = ReachingDefinitions(g_ira)
+    defuse = DiGraphDefUse(reaching_defs, deref_mem=True)
+    #open("defuse_%02d.dot" % (test_nb+1), "w").write(defuse.dot())
+
+    # # Simplify graph
+    dead_simp(g_ira)
 
-    # Print simplified graph, for debug
+    # # Print simplified graph, for debug
     open("simp_graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph.dot())
 
     # Same number of blocks
diff --git a/test/test_all.py b/test/test_all.py
index 4f3ea760..45f5ac97 100755
--- a/test/test_all.py
+++ b/test/test_all.py
@@ -246,11 +246,7 @@ for script in ["modint.py",
 for script in ["symbexec.py",
                ]:
     testset += RegressionTest([script], base_dir="ir")
-testset += RegressionTest(["analysis.py"], base_dir="ir",
-                          products=[fname for fnames in (
-            ["simp_graph_%02d.dot" % test_nb, "graph_%02d.dot" % test_nb]
-            for test_nb in xrange(1, 18))
-                                    for fname in fnames])
+
 testset += RegressionTest(["z3_ir.py"], base_dir="ir/translators",
                           tags=[TAGS["z3"]])
 testset += RegressionTest(["smt2.py"], base_dir="ir/translators",
@@ -277,6 +273,11 @@ testset += RegressionTest(["modularintervals.py"], base_dir="analysis")
 testset += RegressionTest(["range.py"], base_dir="analysis",
                           tags=[TAGS["z3"]])
 
+testset += RegressionTest(["data_flow.py"], base_dir="analysis",
+                          products=[fname for fnames in (
+            ["simp_graph_%02d.dot" % test_nb, "graph_%02d.dot" % test_nb]
+            for test_nb in xrange(1, 18))
+                                    for fname in fnames])
 
 ## Degraph
 class TestDepgraph(RegressionTest):
@@ -464,9 +465,9 @@ class ExampleDisasmFull(ExampleDisassembler):
 
     def __init__(self, *args, **kwargs):
         super(ExampleDisasmFull, self).__init__(*args, **kwargs)
-        self.command_line = ["full.py", "-g", "-s", "-m"] + self.command_line
-        self.products += ["graph_execflow.dot", "graph_irflow.dot",
-                          "graph_irflow_raw.dot", "lines.dot"]
+        self.command_line = ["full.py", "-g", "-s", "-d", "-m"] + self.command_line
+        self.products += ["graph_defuse.dot", "graph_execflow.dot",
+                          "graph_irflow.dot", "graph_irflow_raw.dot", "lines.dot"]
 
 
 testset += ExampleDisasmFull(["arml", Example.get_sample("demo_arm_l.bin"),