diff options
| -rw-r--r-- | example/disasm/full.py | 10 | ||||
| -rw-r--r-- | example/expression/asm_to_ir.py | 4 | ||||
| -rw-r--r-- | example/expression/graph_dataflow.py | 3 | ||||
| -rw-r--r-- | example/ida/graph_ir.py | 6 | ||||
| -rw-r--r-- | miasm2/analysis/data_flow.py | 252 | ||||
| -rw-r--r-- | miasm2/ir/analysis.py | 252 | ||||
| -rw-r--r-- | test/analysis/data_flow.py (renamed from test/ir/analysis.py) | 120 | ||||
| -rwxr-xr-x | test/test_all.py | 17 |
8 files changed, 346 insertions, 318 deletions
diff --git a/example/disasm/full.py b/example/disasm/full.py index 79db46d4..b919310a 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -8,6 +8,7 @@ from miasm2.core.asmblock import log_asmblock, AsmLabel, AsmCFG from miasm2.expression.expression import ExprId from miasm2.core.interval import interval from miasm2.analysis.machine import Machine +from miasm2.analysis.data_flow import dead_simp, DiGraphDefUse, ReachingDefinitions log = logging.getLogger("dis") console_handler = logging.StreamHandler() @@ -52,6 +53,9 @@ parser.add_argument('-i', "--image", action="store_true", help="Display image representation of disasm") parser.add_argument('-c', "--rawbinary", default=False, action="store_true", help="Don't interpret input as ELF/PE/...") +parser.add_argument('-d', "--defuse", action="store_true", + help="Dump the def-use graph in file 'defuse.dot'." + "The defuse is dumped after simplifications if -s option is specified") args = parser.parse_args() @@ -207,7 +211,11 @@ if args.gen_ir: print block if args.simplify: - ir_arch_a.dead_simp() + dead_simp(ir_arch_a) + + if args.defuse: + reachings = ReachingDefinitions(ir_arch_a) + open('graph_defuse.dot', 'w').write(DiGraphDefUse(reachings).dot()) out = ir_arch_a.graph.dot() open('graph_irflow.dot', 'w').write(out) diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py index b28f8a81..058910e3 100644 --- a/example/expression/asm_to_ir.py +++ b/example/expression/asm_to_ir.py @@ -5,7 +5,7 @@ from miasm2.core import parse_asm from miasm2.expression.expression import * from miasm2.core import asmblock from miasm2.arch.x86.ira import ir_a_x86_32 - +from miasm2.analysis.data_flow import dead_simp # First, asm code blocks, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' @@ -47,7 +47,7 @@ for lbl, irblock in ir_arch.blocks.items(): # Dead propagation open('graph.dot', 'w').write(ir_arch.graph.dot()) print '*' * 80 -ir_arch.dead_simp() +dead_simp(ir_arch) open('graph2.dot', 'w').write(ir_arch.graph.dot()) # Display new IR diff --git a/example/expression/graph_dataflow.py b/example/expression/graph_dataflow.py index bd767165..c79cd7d4 100644 --- a/example/expression/graph_dataflow.py +++ b/example/expression/graph_dataflow.py @@ -8,6 +8,7 @@ from miasm2.arch.x86.disasm import dis_x86_32 from miasm2.analysis.data_analysis import intra_bloc_flow_raw, inter_bloc_flow from miasm2.core.graph import DiGraph from miasm2.ir.symbexec import SymbolicExecutionEngine +from miasm2.analysis.data_flow import dead_simp parser = ArgumentParser("Simple expression use for generating dataflow graph") @@ -114,7 +115,7 @@ def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): for irblock in ir_arch.blocks.values(): print irblock - ir_arch.dead_simp() + dead_simp(ir_arch) irblock_0 = None for irblock in ir_arch.blocks.values(): diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index 3aac0281..d2552c82 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -10,7 +10,7 @@ from miasm2.expression.simplifications import expr_simp from miasm2.expression.expression import * from miasm2.analysis.data_analysis import inter_bloc_flow, \ intra_bloc_flow_symbexec - +from miasm2.analysis.data_flow import dead_simp from utils import guess_machine, expr2colorstr @@ -144,7 +144,7 @@ out = ir_arch.graph.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) -# ir_arch.dead_simp() +# dead_simp(ir_arch) g = GraphMiasmIR(ir_arch, "Miasm IR graph", None) @@ -198,7 +198,7 @@ def get_modified_symbols(sb): def gen_bloc_data_flow_graph(ir_arch, in_str, ad): # arch, attrib, pool_bin, bloc, symbol_pool): out_str = "" - # ir_arch.dead_simp() + # dead_simp(ir_arch) irbloc_0 = None for irbloc in ir_arch.blocks.values(): diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py new file mode 100644 index 00000000..b9764daa --- /dev/null +++ b/miasm2/analysis/data_flow.py @@ -0,0 +1,252 @@ +"""Data flow analysis based on miasm intermediate representation""" + +from collections import namedtuple +from miasm2.core.graph import DiGraph + +class ReachingDefinitions(dict): + """ + Computes for each instruction the set of reaching definitions. + Example: + IR block: + lbl0: + 0 A = 1 + B = 3 + 1 B = 2 + 2 A = A + B + 4 + + Reach definition of lbl0: + (lbl0, 0) => {} + (lbl0, 1) => {A: {(lbl0, 0)}, B: {(lbl0, 0)}} + (lbl0, 2) => {A: {(lbl0, 0)}, B: {(lbl0, 1)}} + (lbl0, 3) => {A: {(lbl0, 2)}, B: {(lbl0, 1)}} + + Source set 'REACHES' in: Kennedy, K. (1979). + A survey of data flow analysis techniques. + IBM Thomas J. Watson Research Division, Algorithm MK + + This class is usable as a dictionnary whose struture is + { (block, instr_index): { lvalue: set((block, instr_index)) } } + """ + + ir_a = None + + def __init__(self, ir_a): + super(ReachingDefinitions, self).__init__() + self.ir_a = ir_a + self.compute() + + def get_definitions(self, block_lbl, instruction): + """Returns the dict { lvalue: set((def_block_lbl, def_instr_index)) } + associated with self.ir_a.@block.irs[@instruction] + or {} if it is not yet computed + """ + return self.get((block_lbl, instruction), {}) + + def compute(self): + """This is the main fixpoint""" + modified = True + while modified: + modified = False + for block in self.ir_a.blocks.itervalues(): + modified |= self.process_block(block) + + def process_block(self, block): + """ + Fetch reach definitions from predecessors and propagate it to + the instruction in block @block. + """ + predecessor_state = {} + for pred_lbl in self.ir_a.graph.predecessors(block.label): + pred = self.ir_a.blocks[pred_lbl] + for lval, definitions in self.get_definitions(pred_lbl, len(pred.irs)).iteritems(): + predecessor_state.setdefault(lval, set()).update(definitions) + + modified = self.get((block.label, 0)) != predecessor_state + if not modified: + return False + self[(block.label, 0)] = predecessor_state + + for instr_index in xrange(len(block.irs)): + modified |= self.process_instruction(block, instr_index) + return modified + + def process_instruction(self, block, instr_index): + """ + Updates the reach definitions with values defined at + instruction @instr_index in block @block. + NB: the effect of instruction @instr_index in stored at index + (@block, @instr_index + 1). + """ + + instr = block.irs[instr_index] + defs = self.get_definitions(block.label, instr_index).copy() + for lval in instr: + defs.update({lval: set([(block.label, instr_index)])}) + + modified = self.get((block.label, instr_index + 1)) != defs + if modified: + self[(block.label, instr_index + 1)] = defs + + return modified + +ATTR_DEP = {"color" : "black", + "_type" : "data"} + +InstrNode = namedtuple('InstructionNode', ['label', 'index', 'var']) + +class DiGraphDefUse(DiGraph): + """Representation of a Use-Definition graph as defined by + Kennedy, K. (1979). A survey of data flow analysis techniques. + IBM Thomas J. Watson Research Division. + Example: + IR block: + lbl0: + 0 A = 1 + B = 3 + 1 B = 2 + 2 A = A + B + 4 + + Def use analysis: + (lbl0, 0, A) => {(lbl0, 2, A)} + (lbl0, 0, B) => {} + (lbl0, 1, B) => {(lbl0, 2, A)} + (lbl0, 2, A) => {} + + """ + + + def __init__(self, reaching_defs, + deref_mem=False, *args, **kwargs): + """Instanciate a DiGraphIR + @blocks: IR blocks + """ + self._edge_attr = {} + + # For dot display + self._filter_node = None + self._dot_offset = None + self._blocks = reaching_defs.ir_a.blocks + + super(DiGraphDefUse, self).__init__(*args, **kwargs) + self._compute_def_use(reaching_defs, + deref_mem=deref_mem) + + def edge_attr(self, src, dst): + """ + Return a dictionary of attributes for the edge between @src and @dst + @src: the source node of the edge + @dst: the destination node of the edge + """ + return self._edge_attr[(src, dst)] + + def _compute_def_use(self, reaching_defs, + deref_mem=False): + for block in self._blocks.itervalues(): + self._compute_def_use_block(block, + reaching_defs, + deref_mem=deref_mem) + + def _compute_def_use_block(self, block, reaching_defs, deref_mem=False): + for ind, instr in enumerate(block.irs): + instruction_reaching_defs = reaching_defs.get_definitions(block.label, ind) + for lval, expr in instr.iteritems(): + self.add_node(InstrNode(block.label, ind, lval)) + + read_vars = expr.get_r(mem_read=deref_mem) + if deref_mem and lval.is_mem(): + read_vars.update(lval.arg.get_r(mem_read=deref_mem)) + for read_var in read_vars: + for reach in instruction_reaching_defs.get(read_var, set()): + self.add_data_edge(InstrNode(reach[0], reach[1], read_var), + InstrNode(block.label, ind, lval)) + + def del_edge(self, src, dst): + super(DiGraphDefUse, self).del_edge(src, dst) + del self._edge_attr[(src, dst)] + + def add_uniq_labeled_edge(self, src, dst, edge_label): + """Adds the edge (@src, @dst) with label @edge_label. + if edge (@src, @dst) already exists, the previous label is overriden + """ + self.add_uniq_edge(src, dst) + self._edge_attr[(src, dst)] = edge_label + + def add_data_edge(self, src, dst): + """Adds an edge representing a data dependencie + and sets the label accordingly""" + self.add_uniq_labeled_edge(src, dst, ATTR_DEP) + + def node2lines(self, node): + lbl, ind, reg = node + yield self.DotCellDescription(text="%s (%s)" % (lbl, ind), + attr={'align': 'center', + 'colspan': 2, + 'bgcolor': 'grey'}) + src = self._blocks[lbl].irs[ind][reg] + line = "%s = %s" % (reg, src) + yield self.DotCellDescription(text=line, attr={}) + yield self.DotCellDescription(text="", attr={}) + + +def dead_simp_useful_instrs(defuse, reaching_defs): + """Mark useful statements using previous reach analysis and defuse + + Source : Kennedy, K. (1979). A survey of data flow analysis techniques. + IBM Thomas J. Watson Research Division, Algorithm MK + + Return a set of triplets (block, instruction number, instruction) of + useful instructions + PRE: compute_reach(self) + + """ + ir_a = reaching_defs.ir_a + useful = set() + + for block_lbl, block in ir_a.blocks.iteritems(): + successors = ir_a.graph.successors(block_lbl) + for successor in successors: + if successor not in ir_a.blocks: + keep_all_definitions = True + break + else: + keep_all_definitions = False + + # Block has a nonexistant successor or is a leaf + if keep_all_definitions or (len(successors) == 0): + valid_definitions = reaching_defs.get_definitions(block_lbl, + len(block.irs)) + for lval, definitions in valid_definitions.iteritems(): + if (lval in ir_a.get_out_regs(block) + or keep_all_definitions): + for definition in definitions: + useful.add(InstrNode(definition[0], definition[1], lval)) + + # Force keeping of specific cases + for instr_index, instr in enumerate(block.irs): + for lval, rval in instr.iteritems(): + if (lval.is_mem() + or ir_a.IRDst == lval + or rval.is_function_call()): + useful.add(InstrNode(block_lbl, instr_index, lval)) + + # Useful nodes dependencies + for node in useful: + for parent in defuse.reachable_parents(node): + yield parent + +def dead_simp(ir_a): + """ + This function is used to analyse relation of a * complete function * + This means the blocks under study represent a solid full function graph. + + Source : Kennedy, K. (1979). A survey of data flow analysis techniques. + IBM Thomas J. Watson Research Division, page 43 + """ + reaching_defs = ReachingDefinitions(ir_a) + defuse = DiGraphDefUse(reaching_defs, deref_mem=True) + useful = set(dead_simp_useful_instrs(defuse, reaching_defs)) + for block in ir_a.blocks.itervalues(): + for idx, assignblk in enumerate(block.irs): + for lval in assignblk.keys(): + if InstrNode(block.label, idx, lval) not in useful: + del assignblk[lval] diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py index 4f7b321f..5c5193c9 100644 --- a/miasm2/ir/analysis.py +++ b/miasm2/ir/analysis.py @@ -6,6 +6,8 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import IntermediateRepresentation, AssignBlock from miasm2.expression.expression \ import ExprAff, ExprCond, ExprId, ExprInt, ExprMem, ExprOp +from miasm2.analysis.data_flow import dead_simp as new_dead_simp_imp +import warnings log = logging.getLogger("analysis") console_handler = logging.StreamHandler() @@ -27,10 +29,6 @@ class ira(IntermediateRepresentation): """ - def ira_regs_ids(self): - """Returns ids of all registers used in the IR""" - return self.arch.regs.all_regs_ids + [self.IRDst] - def call_effects(self, ad, instr): """Default modelisation of a function call to @ad. This may be used to: @@ -58,247 +56,6 @@ class ira(IntermediateRepresentation): irb_cur.lines.append(instr) return None - def remove_dead_instr(self, irb, useful): - """Remove dead affectations using previous reaches analysis - @irb: irbloc instance - @useful: useful statements from previous reach analysis - Return True iff the block state has changed - PRE: compute_reach(self) - """ - modified = False - for idx, assignblk in enumerate(irb.irs): - for dst in assignblk.keys(): - if (isinstance(dst, ExprId) and - (irb.label, idx, dst) not in useful): - del assignblk[dst] - modified = True - return modified - - def init_useful_instr(self): - """Computes a set of triples (block, instruction number, instruction) - containing initially useful instructions : - - Instructions affecting final value of return registers - - Instructions affecting IRDst register - - Instructions writing in memory - - Function call instructions - Return set of intial useful instructions - """ - - useful = set() - - for node in self.graph.nodes(): - if node not in self.blocks: - continue - - block = self.blocks[node] - successors = self.graph.successors(node) - has_son = bool(successors) - for p_son in successors: - if p_son not in self.blocks: - # Leaf has lost its son: don't remove anything - # reaching this block - for r in self.ira_regs_ids(): - useful.update(block.irs[-1]._cur_reach[r].union( - block.irs[-1].defout[r])) - - # Function call, memory write or IRDst affectation - for idx, assignblk in enumerate(block.irs): - for dst, src in assignblk.iteritems(): - if src.is_function_call(): - # /!\ never remove ir calls - useful.add((block.label, idx, dst)) - if isinstance(dst, ExprMem): - useful.add((block.label, idx, dst)) - useful.update(block.irs[idx].defout[self.IRDst]) - - # Affecting return registers - if not has_son: - for r in self.get_out_regs(block): - useful.update(block.irs[-1].defout[r] - if block.irs[-1].defout[r] else - block.irs[-1]._cur_reach[r]) - - return useful - - def _mark_useful_code(self): - """Mark useful statements using previous reach analysis - - Source : Kennedy, K. (1979). A survey of data flow analysis techniques. - IBM Thomas J. Watson Research Division, Algorithm MK - - Return a set of triplets (block, instruction number, instruction) of - useful instructions - PRE: compute_reach(self) - - """ - - useful = self.init_useful_instr() - worklist = useful.copy() - while worklist: - elem = worklist.pop() - useful.add(elem) - irb_label, irs_ind, dst = elem - - assignblk = self.blocks[irb_label].irs[irs_ind] - ins = assignblk.dst2ExprAff(dst) - - # Handle dependencies of used variables in ins - for reg in ins.get_r(True).intersection(self.ira_regs_ids()): - worklist.update( - assignblk._cur_reach[reg].difference(useful).difference( - assignblk._cur_kill[reg] - if not assignblk.defout[reg] else - set())) - for _, _, defout_dst in assignblk.defout[reg]: - # Loop case (dst in defout of current irb) - if defout_dst == dst: - worklist.update( - assignblk._cur_reach[reg].difference(useful)) - return useful - - def remove_dead_code(self): - """Remove dead instructions in each block of the graph using the reach - analysis . - Returns True if a block has been modified - PRE : compute_reach(self) - """ - useful = self._mark_useful_code() - modified = False - for irblock in self.blocks.values(): - modified |= self.remove_dead_instr(irblock, useful) - # Remove useless structures - for assignblk in irblock.irs: - del assignblk._cur_kill - del assignblk._prev_kill - del assignblk._cur_reach - del assignblk._prev_reach - return modified - - def set_dead_regs(self, b): - pass - - def add_unused_regs(self): - pass - - @staticmethod - def print_set(v_set): - """Print each triplet contained in a set - @v_set: set containing triplets elements - """ - for p in v_set: - print ' (%s, %s, %s)' % p - - def dump_bloc_state(self, irb): - print '*' * 80 - for irs in irb.irs: - for assignblk in irs: - print 5 * "-" - print 'instr', assignblk - print 5 * "-" - for v in self.ira_regs_ids(): - if assignblk._cur_reach[v]: - print 'REACH[%d][%s]' % (k, v) - self.print_set(assignblk._cur_reach[v]) - if assignblk._cur_kill[v]: - print 'KILL[%d][%s]' % (k, v) - self.print_set(assignblk._cur_kill[v]) - if assignblk.defout[v]: - print 'DEFOUT[%d][%s]' % (k, v) - self.print_set(assignblk.defout[v]) - - def compute_reach_block(self, irb): - """Variable influence computation for a single block - @irb: irbloc instance - PRE: init_reach() - """ - - reach_block = {key: value.copy() - for key, value in irb.irs[0]._cur_reach.iteritems()} - - # Compute reach from predecessors - for n_pred in self.graph.predecessors(irb.label): - p_block = self.blocks[n_pred] - - # Handle each register definition - for c_reg in self.ira_regs_ids(): - # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p) - pred_through = p_block.irs[-1].defout[c_reg].union( - p_block.irs[-1]._cur_reach[c_reg].difference( - p_block.irs[-1]._cur_kill[c_reg])) - reach_block[c_reg].update(pred_through) - - # If a predecessor has changed - if reach_block != irb.irs[0]._cur_reach: - irb.irs[0]._cur_reach = reach_block - for c_reg in self.ira_regs_ids(): - if irb.irs[0].defout[c_reg]: - # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY - irb.irs[0]._cur_kill[c_reg].update( - reach_block[c_reg].difference(irb.irs[0].defout[c_reg])) - - # Compute reach and kill for block's instructions - for i in xrange(1, len(irb.irs)): - for c_reg in self.ira_regs_ids(): - # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p) - pred_through = irb.irs[i - 1].defout[c_reg].union( - irb.irs[i - 1]._cur_reach[c_reg].difference( - irb.irs[i - 1]._cur_kill[c_reg])) - irb.irs[i]._cur_reach[c_reg].update(pred_through) - if irb.irs[i].defout[c_reg]: - # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY - irb.irs[i]._cur_kill[c_reg].update( - irb.irs[i]._cur_reach[c_reg].difference( - irb.irs[i].defout[c_reg])) - - def _test_kill_reach_fix(self): - """Return True iff a fixed point has been reached during reach - analysis""" - - fixed = True - for node in self.graph.nodes(): - if node in self.blocks: - irb = self.blocks[node] - for assignblk in irb.irs: - if (assignblk._cur_reach != assignblk._prev_reach or - assignblk._cur_kill != assignblk._prev_kill): - fixed = False - # This is not a deepcopy, but cur_reach is assigned to a - # new dictionnary on change in `compute_reach_block` - assignblk._prev_reach = assignblk._cur_reach.copy() - assignblk._prev_kill = assignblk._cur_kill.copy() - return fixed - - def compute_reach(self): - """ - Compute reach, defout and kill sets until a fixed point is reached. - - Source : Kennedy, K. (1979). A survey of data flow analysis techniques. - IBM Thomas J. Watson Research Division, page 43 - """ - fixed_point = False - log.debug('iteration...') - while not fixed_point: - for node in self.graph.nodes(): - if node in self.blocks: - self.compute_reach_block(self.blocks[node]) - fixed_point = self._test_kill_reach_fix() - - def dead_simp(self): - """ - This function is used to analyse relation of a * complete function * - This means the blocks under study represent a solid full function graph. - - Source : Kennedy, K. (1979). A survey of data flow analysis techniques. - IBM Thomas J. Watson Research Division, page 43 - """ - # Update r/w variables for all irblocks - self.get_rw(self.ira_regs_ids()) - # Liveness step - self.compute_reach() - self.remove_dead_code() - # Simplify expressions - self.simplify_blocs() - def gen_equations(self): for irb in self.blocks.values(): symbols_init = dict(self.arch.regs.all_regs_ids_init) @@ -335,3 +92,8 @@ class ira(IntermediateRepresentation): def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method") + + def dead_simp(self): + """Deprecated: See miasm2.analysis.data_flow.dead_simp()""" + warnings.warn('DEPRECATION WARNING: Please use miasm2.analysis.data_flow.dead_simp(ira) instead of ira.dead_simp()') + new_dead_simp_imp(self) diff --git a/test/ir/analysis.py b/test/analysis/data_flow.py index 6209b36b..a40d000a 100644 --- a/test/ir/analysis.py +++ b/test/analysis/data_flow.py @@ -1,6 +1,7 @@ """ Test cases for dead code elimination""" from miasm2.expression.expression import ExprId, ExprInt32, ExprAff, ExprMem from miasm2.core.asmblock import AsmLabel +from miasm2.analysis.data_flow import * from miasm2.ir.analysis import ira from miasm2.ir.ir import IRBlock, AssignBlock @@ -80,11 +81,11 @@ G1_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]]) G1_IRB1 = gen_irblock(LBL1, [[ExprAff(a, b)]]) G1_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) +G1_IRA.blocks = {irb.label : irb for irb in [G1_IRB0, G1_IRB1, G1_IRB2]} + G1_IRA.graph.add_uniq_edge(G1_IRB0.label, G1_IRB1.label) G1_IRA.graph.add_uniq_edge(G1_IRB1.label, G1_IRB2.label) -G1_IRA.blocks = {irb.label : irb for irb in [G1_IRB0, G1_IRB1, G1_IRB2]} - # Expected output for graph 1 G1_EXP_IRA = IRATest() @@ -103,12 +104,12 @@ G2_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(r, CST1)]]) G2_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G2_IRB2 = gen_irblock(LBL2, [[ExprAff(a, r)]]) +G2_IRA.blocks = {irb.label : irb for irb in [G2_IRB0, G2_IRB1, G2_IRB2]} + G2_IRA.graph.add_uniq_edge(G2_IRB0.label, G2_IRB1.label) G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB2.label) G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB1.label) -G2_IRA.blocks = {irb.label : irb for irb in [G2_IRB0, G2_IRB1, G2_IRB2]} - # Expected output for graph 2 G2_EXP_IRA = IRATest() @@ -127,12 +128,12 @@ G3_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G3_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G3_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) +G3_IRA.blocks = {irb.label : irb for irb in [G3_IRB0, G3_IRB1, G3_IRB2]} + G3_IRA.graph.add_uniq_edge(G3_IRB0.label, G3_IRB1.label) G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB2.label) G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB1.label) -G3_IRA.blocks = {irb.label : irb for irb in [G3_IRB0, G3_IRB1, G3_IRB2]} - # Expected output for graph 3 G3_EXP_IRA = IRATest() @@ -152,14 +153,14 @@ G4_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G4_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)]]) G4_IRB3 = gen_irblock(LBL3, [[ExprAff(a, CST3)], [ExprAff(r, a)]]) +G4_IRA.blocks = {irb.label : irb for irb in [G4_IRB0, G4_IRB1, G4_IRB2, + G4_IRB3]} + G4_IRA.graph.add_uniq_edge(G4_IRB0.label, G4_IRB1.label) G4_IRA.graph.add_uniq_edge(G4_IRB0.label, G4_IRB2.label) G4_IRA.graph.add_uniq_edge(G4_IRB1.label, G4_IRB3.label) G4_IRA.graph.add_uniq_edge(G4_IRB2.label, G4_IRB3.label) -G4_IRA.blocks = {irb.label : irb for irb in [G4_IRB0, G4_IRB1, G4_IRB2, - G4_IRB3]} - # Expected output for graph 4 G4_EXP_IRA = IRATest() @@ -182,6 +183,9 @@ G5_IRB3 = gen_irblock(LBL3, [[ExprAff(a, a+CST3)]]) G5_IRB4 = gen_irblock(LBL4, [[ExprAff(a, a+CST1)]]) G5_IRB5 = gen_irblock(LBL5, [[ExprAff(a, r)]]) +G5_IRA.blocks = {irb.label : irb for irb in [G5_IRB0, G5_IRB1, G5_IRB2, G5_IRB3, + G5_IRB4, G5_IRB5]} + G5_IRA.graph.add_uniq_edge(G5_IRB0.label, G5_IRB1.label) G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB2.label) G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB3.label) @@ -190,9 +194,6 @@ G5_IRA.graph.add_uniq_edge(G5_IRB3.label, G5_IRB4.label) G5_IRA.graph.add_uniq_edge(G5_IRB4.label, G5_IRB5.label) G5_IRA.graph.add_uniq_edge(G5_IRB4.label, G5_IRB1.label) -G5_IRA.blocks = {irb.label : irb for irb in [G5_IRB0, G5_IRB1, G5_IRB2, G5_IRB3, - G5_IRB4, G5_IRB5]} - # Expected output for graph 5 G5_EXP_IRA = IRATest() @@ -217,15 +218,14 @@ G6_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) G6_IRB2 = gen_irblock(LBL2, [[ExprAff(a, b)]]) G6_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST2)]]) +G6_IRA.blocks = {irb.label : irb for irb in [G6_IRB0, G6_IRB1, G6_IRB2, + G6_IRB3]} G6_IRA.graph.add_uniq_edge(G6_IRB0.label, G6_IRB1.label) G6_IRA.graph.add_uniq_edge(G6_IRB1.label, G6_IRB2.label) G6_IRA.graph.add_uniq_edge(G6_IRB2.label, G6_IRB1.label) G6_IRA.graph.add_uniq_edge(G6_IRB2.label, G6_IRB3.label) -G6_IRA.blocks = {irb.label : irb for irb in [G6_IRB0, G6_IRB1, G6_IRB2, - G6_IRB3]} - # Expected output for graph 6 G6_EXP_IRA = IRATest() @@ -246,6 +246,8 @@ G7_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G7_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)]]) G7_IRB3 = gen_irblock(LBL3, [[ExprAff(a, r)]]) +G7_IRA.blocks = {irb.label : irb for irb in [G7_IRB0, G7_IRB1, G7_IRB2, + G7_IRB3]} G7_IRA.graph.add_uniq_edge(G7_IRB0.label, G7_IRB1.label) G7_IRA.graph.add_uniq_edge(G7_IRB1.label, G7_IRB2.label) @@ -254,9 +256,6 @@ G7_IRA.graph.add_uniq_edge(G7_IRB2.label, G7_IRB3.label) G7_IRA.graph.add_uniq_edge(G7_IRB0.label, G7_IRB2.label) -G7_IRA.blocks = {irb.label : irb for irb in [G7_IRB0, G7_IRB1, G7_IRB2, - G7_IRB3]} - # Expected output for graph 7 G7_EXP_IRA = IRATest() @@ -278,6 +277,9 @@ G8_IRB2 = gen_irblock(LBL2, [[ExprAff(b, b+CST2)]]) G8_IRB3 = gen_irblock(LBL3, [[ExprAff(a, b)]]) +G8_IRA.blocks = {irb.label : irb for irb in [G8_IRB0, G8_IRB1, G8_IRB2, + G8_IRB3]} + G8_IRA.graph.add_uniq_edge(G8_IRB0.label, G8_IRB1.label) G8_IRA.graph.add_uniq_edge(G8_IRB1.label, G8_IRB2.label) G8_IRA.graph.add_uniq_edge(G8_IRB2.label, G8_IRB1.label) @@ -285,9 +287,6 @@ G8_IRA.graph.add_uniq_edge(G8_IRB2.label, G8_IRB3.label) G8_IRA.graph.add_uniq_edge(G8_IRB3.label, G8_IRB2.label) -G8_IRA.blocks = {irb.label : irb for irb in [G8_IRB0, G8_IRB1, G8_IRB2, - G8_IRB3]} - # Expected output for graph 8 G8_EXP_IRA = IRATest() @@ -310,6 +309,8 @@ G9_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)], [ExprAff(b, b+CST2)]]) G9_IRB3 = gen_irblock(LBL3, [[ExprAff(a, b)]]) G9_IRB4 = gen_irblock(LBL4, [[ExprAff(r, a)], [ExprAff(r, b)]]) +G9_IRA.blocks = {irb.label : irb for irb in [G9_IRB0, G9_IRB1, G9_IRB2, + G9_IRB3, G9_IRB4]} G9_IRA.graph.add_uniq_edge(G9_IRB0.label, G9_IRB4.label) G9_IRA.graph.add_uniq_edge(G9_IRB0.label, G9_IRB1.label) @@ -321,9 +322,6 @@ G9_IRA.graph.add_uniq_edge(G9_IRB2.label, G9_IRB3.label) G9_IRA.graph.add_uniq_edge(G9_IRB3.label, G9_IRB4.label) -G9_IRA.blocks = {irb.label : irb for irb in [G9_IRB0, G9_IRB1, G9_IRB2, - G9_IRB3, G9_IRB4]} - # Expected output for graph 9 G9_EXP_IRA = IRATest() @@ -348,15 +346,14 @@ G10_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) G10_IRB2 = gen_irblock(LBL2, [[ExprAff(a, b)]]) G10_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST1)]]) +G10_IRA.blocks = {irb.label : irb for irb in [G10_IRB0, G10_IRB1, + G10_IRB2, G10_IRB3]} G10_IRA.graph.add_uniq_edge(G10_IRB0.label, G10_IRB1.label) G10_IRA.graph.add_uniq_edge(G10_IRB1.label, G10_IRB2.label) G10_IRA.graph.add_uniq_edge(G10_IRB2.label, G10_IRB1.label) G10_IRA.graph.add_uniq_edge(G10_IRB2.label, G10_IRB3.label) -G10_IRA.blocks = {irb.label : irb for irb in [G10_IRB0, G10_IRB1, - G10_IRB2, G10_IRB3]} - # Expected output for graph 10 G10_EXP_IRA = IRATest() @@ -379,13 +376,14 @@ G11_IRB3 = gen_irblock(LBL3, [[ExprAff(a, a+CST1)]]) G11_IRB4 = gen_irblock(LBL4, [[ExprAff(b, b+CST1)]]) +G11_IRA.blocks = {irb.label : irb for irb in [G11_IRB0, G11_IRB1, G11_IRB2]} + G11_IRA.graph.add_uniq_edge(G11_IRB0.label, G11_IRB1.label) #G11_IRA.graph.add_uniq_edge(G11_IRB3.label, G11_IRB1.label) G11_IRA.graph.add_uniq_edge(G11_IRB1.label, G11_IRB0.label) #G11_IRA.graph.add_uniq_edge(G11_IRB4.label, G11_IRB0.label) G11_IRA.graph.add_uniq_edge(G11_IRB1.label, G11_IRB2.label) -G11_IRA.blocks = {irb.label : irb for irb in [G11_IRB0, G11_IRB1, G11_IRB2]} # Expected output for graph 11 G11_EXP_IRA = IRATest() @@ -411,15 +409,15 @@ G12_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST3)]]) G12_IRB4 = gen_irblock(LBL4, [[ExprAff(r, CST2)]]) G12_IRB5 = gen_irblock(LBL5, [[ExprAff(r, b)]]) +G12_IRA.blocks = {irb.label : irb for irb in [G12_IRB0, G12_IRB1, G12_IRB2, + G12_IRB3, G12_IRB4, G12_IRB5]} + G12_IRA.graph.add_uniq_edge(G12_IRB0.label, G12_IRB1.label) G12_IRA.graph.add_uniq_edge(G12_IRB0.label, G12_IRB2.label) G12_IRA.graph.add_uniq_edge(G12_IRB2.label, G12_IRB3.label) G12_IRA.graph.add_uniq_edge(G12_IRB2.label, G12_IRB4.label) G12_IRA.graph.add_uniq_edge(G12_IRB4.label, G12_IRB5.label) -G12_IRA.blocks = {irb.label : irb for irb in [G12_IRB0, G12_IRB1, G12_IRB2, - G12_IRB3, G12_IRB4, G12_IRB5]} - # Expected output for graph 12 G12_EXP_IRA = IRATest() @@ -446,14 +444,14 @@ G13_IRB2 = gen_irblock(LBL2, [[ExprAff(d, CST2)], [ExprAff(a, b+CST1), G13_IRB3 = gen_irblock(LBL3, [[]]) # lost son G13_IRB4 = gen_irblock(LBL4, [[ExprAff(b, CST2)]]) +G13_IRA.blocks = {irb.label : irb for irb in [G13_IRB0, G13_IRB1, G13_IRB2, + G13_IRB4]} + G13_IRA.graph.add_uniq_edge(G13_IRB0.label, G13_IRB1.label) G13_IRA.graph.add_uniq_edge(G13_IRB0.label, G13_IRB4.label) G13_IRA.graph.add_uniq_edge(G13_IRB2.label, G13_IRB3.label) G13_IRA.graph.add_uniq_edge(G13_IRB4.label, G13_IRB2.label) -G13_IRA.blocks = {irb.label : irb for irb in [G13_IRB0, G13_IRB1, G13_IRB2, - G13_IRB4]} - # Expected output for graph 13 G13_EXP_IRA = IRATest() @@ -478,10 +476,10 @@ G14_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(c, a)], [ExprAff(a, CST2)]]) G14_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+c)]]) -G14_IRA.graph.add_uniq_edge(G14_IRB0.label, G14_IRB1.label) - G14_IRA.blocks = {irb.label : irb for irb in [G14_IRB0, G14_IRB1]} +G14_IRA.graph.add_uniq_edge(G14_IRB0.label, G14_IRB1.label) + # Expected output for graph 1 G14_EXP_IRA = IRATest() @@ -501,10 +499,10 @@ G15_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST2)], [ExprAff(a, CST1), ExprAff(c, CST1)]]) G15_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a)]]) -G15_IRA.graph.add_uniq_edge(G15_IRB0.label, G15_IRB1.label) - G15_IRA.blocks = {irb.label : irb for irb in [G15_IRB0, G15_IRB1]} +G15_IRA.graph.add_uniq_edge(G15_IRB0.label, G15_IRB1.label) + # Expected output for graph 1 G15_EXP_IRA = IRATest() @@ -523,6 +521,8 @@ G16_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1), ExprAff(b, CST2), G16_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+b)], [ExprAff(r, c+r)]]) G16_IRB2 = gen_irblock(LBL2, [[]]) +G16_IRA.blocks = {irb.label : irb for irb in [G16_IRB0, G16_IRB1]} + G16_IRA.graph.add_uniq_edge(G16_IRB0.label, G16_IRB1.label) G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB2.label) @@ -642,23 +642,23 @@ G17_EXP_IRA.blocks = {irb.label : irb for irb in [G17_EXP_IRB0]} # Begining of tests for test_nb, test in enumerate([(G1_IRA, G1_EXP_IRA), - (G2_IRA, G2_EXP_IRA), - (G3_IRA, G3_EXP_IRA), - (G4_IRA, G4_EXP_IRA), - (G5_IRA, G5_EXP_IRA), - (G6_IRA, G6_EXP_IRA), - (G7_IRA, G7_EXP_IRA), - (G8_IRA, G8_EXP_IRA), - (G9_IRA, G9_EXP_IRA), - (G10_IRA, G10_EXP_IRA), - (G11_IRA, G11_EXP_IRA), - (G12_IRA, G12_EXP_IRA), - (G13_IRA, G13_EXP_IRA), - (G14_IRA, G14_EXP_IRA), - (G15_IRA, G15_EXP_IRA), - (G16_IRA, G16_EXP_IRA), - (G17_IRA, G17_EXP_IRA) - ]): + (G2_IRA, G2_EXP_IRA), + (G3_IRA, G3_EXP_IRA), + (G4_IRA, G4_EXP_IRA), + (G5_IRA, G5_EXP_IRA), + (G6_IRA, G6_EXP_IRA), + (G7_IRA, G7_EXP_IRA), + (G8_IRA, G8_EXP_IRA), + (G9_IRA, G9_EXP_IRA), + (G10_IRA, G10_EXP_IRA), + (G11_IRA, G11_EXP_IRA), + (G12_IRA, G12_EXP_IRA), + (G13_IRA, G13_EXP_IRA), + (G14_IRA, G14_EXP_IRA), + (G15_IRA, G15_EXP_IRA), + (G16_IRA, G16_EXP_IRA), + (G17_IRA, G17_EXP_IRA) +]): # Extract test elements g_ira, g_exp_ira = test @@ -667,10 +667,14 @@ for test_nb, test in enumerate([(G1_IRA, G1_EXP_IRA), # Print initial graph, for debug open("graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph.dot()) - # Simplify graph - g_ira.dead_simp() + reaching_defs = ReachingDefinitions(g_ira) + defuse = DiGraphDefUse(reaching_defs, deref_mem=True) + #open("defuse_%02d.dot" % (test_nb+1), "w").write(defuse.dot()) + + # # Simplify graph + dead_simp(g_ira) - # Print simplified graph, for debug + # # Print simplified graph, for debug open("simp_graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph.dot()) # Same number of blocks diff --git a/test/test_all.py b/test/test_all.py index 4f3ea760..45f5ac97 100755 --- a/test/test_all.py +++ b/test/test_all.py @@ -246,11 +246,7 @@ for script in ["modint.py", for script in ["symbexec.py", ]: testset += RegressionTest([script], base_dir="ir") -testset += RegressionTest(["analysis.py"], base_dir="ir", - products=[fname for fnames in ( - ["simp_graph_%02d.dot" % test_nb, "graph_%02d.dot" % test_nb] - for test_nb in xrange(1, 18)) - for fname in fnames]) + testset += RegressionTest(["z3_ir.py"], base_dir="ir/translators", tags=[TAGS["z3"]]) testset += RegressionTest(["smt2.py"], base_dir="ir/translators", @@ -277,6 +273,11 @@ testset += RegressionTest(["modularintervals.py"], base_dir="analysis") testset += RegressionTest(["range.py"], base_dir="analysis", tags=[TAGS["z3"]]) +testset += RegressionTest(["data_flow.py"], base_dir="analysis", + products=[fname for fnames in ( + ["simp_graph_%02d.dot" % test_nb, "graph_%02d.dot" % test_nb] + for test_nb in xrange(1, 18)) + for fname in fnames]) ## Degraph class TestDepgraph(RegressionTest): @@ -464,9 +465,9 @@ class ExampleDisasmFull(ExampleDisassembler): def __init__(self, *args, **kwargs): super(ExampleDisasmFull, self).__init__(*args, **kwargs) - self.command_line = ["full.py", "-g", "-s", "-m"] + self.command_line - self.products += ["graph_execflow.dot", "graph_irflow.dot", - "graph_irflow_raw.dot", "lines.dot"] + self.command_line = ["full.py", "-g", "-s", "-d", "-m"] + self.command_line + self.products += ["graph_defuse.dot", "graph_execflow.dot", + "graph_irflow.dot", "graph_irflow_raw.dot", "lines.dot"] testset += ExampleDisasmFull(["arml", Example.get_sample("demo_arm_l.bin"), |