about summary refs log tree commit diff stats
path: root/miasm2/analysis/data_analysis.py
diff options
context:
space:
mode:
Diffstat (limited to 'miasm2/analysis/data_analysis.py')
-rw-r--r--miasm2/analysis/data_analysis.py137
1 files changed, 23 insertions, 114 deletions
diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py
index bceb0bd8..9c21fd51 100644
--- a/miasm2/analysis/data_analysis.py
+++ b/miasm2/analysis/data_analysis.py
@@ -4,12 +4,11 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine
 
 
 def get_node_name(label, i, n):
-    # n_name = "%s_%d_%s"%(label.name, i, n)
     n_name = (label, i, n)
     return n_name
 
 
-def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes):
+def intra_block_flow_raw(ir_arch, ircfg, flow_graph, irb, in_nodes, out_nodes):
     """
     Create data flow for an irbloc using raw IR expressions
     """
@@ -27,7 +26,7 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes):
                 continue
 
             for n in all_mems:
-                node_n_w = get_node_name(irb.label, i, n)
+                node_n_w = get_node_name(irb.loc_key, i, n)
                 if not n in nodes_r:
                     continue
                 o_r = n.arg.get_r(mem_read=False, cst_read=True)
@@ -35,7 +34,7 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes):
                     if n_r in current_nodes:
                         node_n_r = current_nodes[n_r]
                     else:
-                        node_n_r = get_node_name(irb.label, i, n_r)
+                        node_n_r = get_node_name(irb.loc_key, i, n_r)
                         current_nodes[n_r] = node_n_r
                         in_nodes[n_r] = node_n_r
                     flow_graph.add_uniq_edge(node_n_r, node_n_w)
@@ -46,80 +45,40 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes):
                 if n_r in current_nodes:
                     node_n_r = current_nodes[n_r]
                 else:
-                    node_n_r = get_node_name(irb.label, i, n_r)
+                    node_n_r = get_node_name(irb.loc_key, i, n_r)
                     current_nodes[n_r] = node_n_r
                     in_nodes[n_r] = node_n_r
 
                 flow_graph.add_node(node_n_r)
 
-                node_n_w = get_node_name(irb.label, i + 1, node_w)
+                node_n_w = get_node_name(irb.loc_key, i + 1, node_w)
                 out_nodes[node_w] = node_n_w
 
                 flow_graph.add_node(node_n_w)
                 flow_graph.add_uniq_edge(node_n_r, node_n_w)
 
 
-def intra_block_flow_symbexec(ir_arch, flow_graph, irb, in_nodes, out_nodes):
-    """
-    Create data flow for an irbloc using symbolic execution
-    """
-    current_nodes = {}
-
-    symbols_init = dict(ir_arch.arch.regs.regs_init)
-
-    sb = SymbolicExecutionEngine(ir_arch, dict(symbols_init))
-    sb.emulbloc(irb)
-    # print "*"*40
-    # print irb
-    # print sb.dump_id()
-    # print sb.dump_mem()
-
-    for n_w in sb.symbols:
-        # print n_w
-        v = sb.symbols[n_w]
-        if n_w in symbols_init and symbols_init[n_w] == v:
-            continue
-        read_values = v.get_r(cst_read=True)
-        # print n_w, v, [str(x) for x in read_values]
-        node_n_w = get_node_name(irb.label, len(irb), n_w)
 
-        for n_r in read_values:
-            if n_r in current_nodes:
-                node_n_r = current_nodes[n_r]
-            else:
-                node_n_r = get_node_name(irb.label, 0, n_r)
-                current_nodes[n_r] = node_n_r
-                in_nodes[n_r] = node_n_r
-
-            out_nodes[n_w] = node_n_w
-            flow_graph.add_uniq_edge(node_n_r, node_n_w)
-
-
-def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data):
+def inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data):
     lbl, current_nodes, exec_nodes = todo
-    # print 'TODO'
-    # print lbl
-    # print [(str(x[0]), str(x[1])) for x in current_nodes]
     current_nodes = dict(current_nodes)
 
     # link current nodes to bloc in_nodes
-    if not lbl in ir_arch.blocks:
+    if not lbl in ircfg.blocks:
         print "cannot find bloc!!", lbl
         return set()
-    irb = ir_arch.blocks[lbl]
-    # pp(('IN', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()]))
+    irb = ircfg.blocks[lbl]
     to_del = set()
-    for n_r, node_n_r in irb_in_nodes[irb.label].items():
+    for n_r, node_n_r in irb_in_nodes[irb.loc_key].items():
         if not n_r in current_nodes:
             continue
-        # print 'add link', current_nodes[n_r], node_n_r
         flow_graph.add_uniq_edge(current_nodes[n_r], node_n_r)
         to_del.add(n_r)
 
     # if link exec to data, all nodes depends on exec nodes
     if link_exec_to_data:
         for n_x_r in exec_nodes:
-            for n_r, node_n_r in irb_in_nodes[irb.label].items():
+            for n_r, node_n_r in irb_in_nodes[irb.loc_key].items():
                 if not n_x_r in current_nodes:
                     continue
                 if isinstance(n_r, ExprInt):
@@ -127,18 +86,16 @@ def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo
                 flow_graph.add_uniq_edge(current_nodes[n_x_r], node_n_r)
 
     # update current nodes using bloc out_nodes
-    for n_w, node_n_w in irb_out_nodes[irb.label].items():
+    for n_w, node_n_w in irb_out_nodes[irb.loc_key].items():
         current_nodes[n_w] = node_n_w
 
     # get nodes involved in exec flow
     x_nodes = tuple(sorted(list(irb.dst.get_r())))
 
     todo = set()
-    for lbl_dst in ir_arch.graph.successors(irb.label):
+    for lbl_dst in ircfg.successors(irb.loc_key):
         todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes))
 
-    # pp(('OUT', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()]))
-
     return todo
 
 
@@ -150,36 +107,29 @@ def create_implicit_flow(ir_arch, flow_graph, irb_in_nodes, irb_out_ndes):
     while todo:
         lbl = todo.pop()
         irb = ir_arch.blocks[lbl]
-        for lbl_son in ir_arch.graph.successors(irb.label):
+        for lbl_son in ir_arch.graph.successors(irb.loc_key):
             if not lbl_son in ir_arch.blocks:
                 print "cannot find bloc!!", lbl
                 continue
             irb_son = ir_arch.blocks[lbl_son]
-            for n_r in irb_in_nodes[irb_son.label]:
-                if n_r in irb_out_nodes[irb.label]:
+            for n_r in irb_in_nodes[irb_son.loc_key]:
+                if n_r in irb_out_nodes[irb.loc_key]:
                     continue
                 if not isinstance(n_r, ExprId):
                     continue
 
-                # print "###", n_r
-                # print "###", irb
-                # print "###", 'OUT', [str(x) for x in irb.out_nodes]
-                # print "###", irb_son
-                # print "###", 'IN', [str(x) for x in irb_son.in_nodes]
-
-                node_n_w = irb.label, len(irb), n_r
-                irb_out_nodes[irb.label][n_r] = node_n_w
-                if not n_r in irb_in_nodes[irb.label]:
-                    irb_in_nodes[irb.label][n_r] = irb.label, 0, n_r
-                node_n_r = irb_in_nodes[irb.label][n_r]
-                # print "###", node_n_r
-                for lbl_p in ir_arch.graph.predecessors(irb.label):
+                node_n_w = irb.loc_key, len(irb), n_r
+                irb_out_nodes[irb.loc_key][n_r] = node_n_w
+                if not n_r in irb_in_nodes[irb.loc_key]:
+                    irb_in_nodes[irb.loc_key][n_r] = irb.loc_key, 0, n_r
+                node_n_r = irb_in_nodes[irb.loc_key][n_r]
+                for lbl_p in ir_arch.graph.predecessors(irb.loc_key):
                     todo.add(lbl_p)
 
                 flow_graph.add_uniq_edge(node_n_r, node_n_w)
 
 
-def inter_block_flow(ir_arch, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True):
+def inter_block_flow(ir_arch, ircfg, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True):
 
     todo = set()
     done = set()
@@ -190,7 +140,7 @@ def inter_block_flow(ir_arch, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, li
         if state in done:
             continue
         done.add(state)
-        out = inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data)
+        out = inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data)
         todo.update(out)
 
 
@@ -220,46 +170,11 @@ class symb_exec_func:
         b = self.ir_arch.get_block(ad)
         if b is None:
             raise ValueError("unknown bloc! %s" % ad)
-        """
-        dead = b.dead[0]
-        for d in dead:
-            if d in variables:
-                del(variables[d])
-        """
         variables = variables.items()
 
         s = parent, ad, tuple(sorted(variables))
-        """
-        state_var = s[1]
-        if s in self.states_var_done:
-            print 'skip state'
-            return
-        if not ad in self.stateby_ad:
-            self.stateby_ad[ad] = set()
-        self.stateby_ad[ad].add(state_var)
-
-        """
         self.todo.add(s)
 
-        """
-        if not ad in self.cpt:
-            self.cpt[ad] = 0
-        """
-    """
-    def get_next_min(self):
-        state_by_ad = {}
-        for state in self.todo:
-            ad = state[1]
-            if not ad in state_by_ad:
-                state_by_ad[ad] = []
-            state_by_ad[ad].append(state)
-        print "XX", [len(x) for x in state_by_ad.values()]
-        state_by_ad = state_by_ad.items()
-        state_by_ad.sort(key=lambda x:len(x[1]))
-        state_by_ad.reverse()
-        return state_by_ad.pop()[1][0]
-    """
-
     def get_next_state(self):
         state = self.todo.pop()
         return state
@@ -273,16 +188,10 @@ class symb_exec_func:
         self.total_done += 1
         print 'CPT', self.total_done
         while self.todo:
-            # if self.total_done>20:
-            #    self.get_next_min()
-            # state = self.todo.pop()
             state = self.get_next_state()
             parent, ad, s = state
             self.states_done.add(state)
             self.states_var_done.add(state)
-            # if s in self.states_var_done:
-            #    print "state done"
-            #    continue
 
             sb = SymbolicExecutionEngine(self.ir_arch, dict(s))