diff options
37 files changed, 1017 insertions, 770 deletions
diff --git a/README.md b/README.md index f4464eaf..4744bca5 100644 --- a/README.md +++ b/README.md @@ -47,10 +47,16 @@ Assembling / Disassembling Import Miasm x86 architecture: ``` >>> from miasm2.arch.x86.arch import mn_x86 +>>> from miasm2.core.locationdb import LocationDB +``` +Get a location db: + +``` +>>> loc_db = LocationDB() ``` Assemble a line: ``` ->>> l = mn_x86.fromstring('XOR ECX, ECX', 32) +>>> l = mn_x86.fromstring('XOR ECX, ECX', loc_db, 32) >>> print l XOR ECX, ECX >>> mn_x86.asm(l) @@ -92,34 +98,37 @@ Create an instruction: ``` >>> machine = Machine('arml') ->>> l = machine.mn.dis('002088e0'.decode('hex'), 'l') ->>> print l +>>> instr = machine.mn.dis('002088e0'.decode('hex'), 'l') +>>> print instr ADD R2, R8, R0 ``` -Create an intermediate representation (IR) object: +Create an intermediate representation object: ``` ->>> ira = machine.ira() +>>> ira = machine.ira(loc_db) +``` +Create an empty ircfg +``` +>>> ircfg = ira.new_ircfg() ``` Add instruction to the pool: ``` ->>> ira.add_instr(l) +>>> ira.add_instr_to_ircfg(instr, ircfg) ``` Print current pool: ``` ->>> for lbl, irblock in ira.blocks.items(): -... print irblock -... -loc_0000000000000000:0x00000000 +>>> for lbl, irblock in ircfg.blocks.items(): +... print irblock.to_string(loc_db) +loc_0: +R2 = R8 + R0 - R2 = (R8+R0) +IRDst = loc_4 - IRDst = loc_0000000000000004:0x00000004 ``` Working with IR, for instance by getting side effects: ``` ->>> for lbl, irblock in ira.blocks.iteritems(): +>>> for lbl, irblock in ircfg.blocks.iteritems(): ... for assignblk in irblock: ... rw = assignblk.get_rw() ... for dst, reads in rw.iteritems(): @@ -130,8 +139,9 @@ Working with IR, for instance by getting side effects: read: ['R8', 'R0'] written: R2 -read: ['loc_0000000000000004:0x00000004'] +read: [] written: IRDst + ``` Emulation @@ -266,12 +276,8 @@ Symbolic execution Initializing the IR pool: ``` ->>> ira = machine.ira(loc_db=mdis.loc_db) ->>> ->>> for block in asmcfg.blocks: -... ira.add_block(block) -... -... +>>> ira = machine.ira() +>>> ircfg = ira.new_ircfg_from_asmcfg(asmcfg) ``` Initializing the engine with default symbolic values: @@ -284,7 +290,7 @@ Initializing the engine with default symbolic values: Launching the execution: ``` ->>> symbolic_pc = sb.run_at(0) +>>> symbolic_pc = sb.run_at(ircfg, 0) >>> print symbolic_pc ((ECX + 0x4)[0:8] + 0xFF)?(0xB,0x10) ``` @@ -292,8 +298,8 @@ Launching the execution: Same, with step logs (only changes are displayed): ``` ->>> sb = SymbolicExecutionEngine(ira) ->>> symbolic_pc = sb.run_at(0, step=True) +>>> sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init) +>>> symbolic_pc = sb.run_at(ircfg, 0, step=True) Instr LEA ECX, DWORD PTR [ECX + 0x4] Assignblk: ECX = ECX + 0x4 @@ -349,8 +355,8 @@ Retry execution with a concrete ECX. Here, the symbolic / concolic execution rea ``` >>> from miasm2.expression.expression import ExprInt ->>> sb.symbols[machine.mn.regs.ECX] = ExprInt(-3, 32) ->>> symbolic_pc = sb.run_at(0, step=True) +>>> sb.symbols[machine.mn.regs.ECX] = ExprInt(-3) +>>> symbolic_pc = sb.run_at(ircfg, 0, step=True) Instr LEA ECX, DWORD PTR [ECX + 0x4] Assignblk: ECX = ECX + 0x4 diff --git a/example/disasm/full.py b/example/disasm/full.py index ddf91e29..cfbfc80c 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -191,13 +191,17 @@ if args.gen_ir: ir_arch = ir(mdis.loc_db) ir_arch_a = ira(mdis.loc_db) + + ircfg = ir_arch.new_ircfg() + ircfg_a = ir_arch.new_ircfg() + ir_arch.blocks = {} ir_arch_a.blocks = {} for ad, asmcfg in all_funcs_blocks.items(): log.info("generating IR... %x" % ad) for block in asmcfg.blocks: - ir_arch_a.add_block(block) - ir_arch.add_block(block) + ir_arch.add_asmblock_to_ircfg(block, ircfg) + ir_arch_a.add_asmblock_to_ircfg(block, ircfg_a) log.info("Print blocks (without analyse)") for label, block in ir_arch.blocks.iteritems(): @@ -210,25 +214,25 @@ if args.gen_ir: print block if args.simplify > 0: - dead_simp(ir_arch_a) + dead_simp(ir_arch_a, ircfg_a) if args.defuse: reachings = ReachingDefinitions(ir_arch_a) open('graph_defuse.dot', 'w').write(DiGraphDefUse(reachings).dot()) - out = ir_arch_a.graph.dot() + out = ircfg.dot() open('graph_irflow.dot', 'w').write(out) - out = ir_arch.graph.dot() + out = ircfg_a.dot() open('graph_irflow_raw.dot', 'w').write(out) if args.simplify > 1: - ir_arch_a.simplify(expr_simp) + ircfg_a.simplify(expr_simp) modified = True while modified: modified = False - modified |= dead_simp(ir_arch_a) - modified |= ir_arch_a.remove_empty_assignblks() - modified |= ir_arch_a.remove_jmp_blocks() - modified |= ir_arch_a.merge_blocks() + modified |= dead_simp(ir_arch_a, ircfg_a) + modified |= ircfg_a.remove_empty_assignblks() + modified |= ircfg_a.remove_jmp_blocks() + modified |= ircfg_a.merge_blocks() - open('graph_irflow_reduced.dot', 'w').write(ir_arch_a.graph.dot()) + open('graph_irflow_reduced.dot', 'w').write(ircfg_a.dot()) diff --git a/example/expression/access_c.py b/example/expression/access_c.py index 8e440cc1..e8d5e318 100644 --- a/example/expression/access_c.py +++ b/example/expression/access_c.py @@ -54,11 +54,10 @@ from miasm2.core.objc import ExprToAccessC, CHandler from miasm2.core.objc import CTypesManagerNotPacked from miasm2.core.ctypesmngr import CAstTypes, CTypePtr, CTypeStruct - -def find_call(ira): +def find_call(ircfg): """Returns (irb, index) which call""" - for irb in ira.blocks.values(): + for irb in ircfg.blocks.values(): out = set() if len(irb) < 2: continue @@ -92,17 +91,17 @@ class MyExprToAccessC(ExprToAccessC): reduction_rules = ExprToAccessC.reduction_rules + [reduce_compose] -def get_funcs_arg0(ctx, ira, lbl_head): +def get_funcs_arg0(ctx, ira, ircfg, lbl_head): """Compute DependencyGraph on the func @lbl_head""" - g_dep = DependencyGraph(ira, follow_call=False) + g_dep = DependencyGraph(ircfg, follow_call=False) element = ira.arch.regs.RSI - for irb, index in find_call(ira): + for irb, index in find_call(ircfg): instr = irb[index].instr print 'Analysing references from:', hex(instr.offset), instr g_list = g_dep.get(irb.loc_key, set([element]), index, set([lbl_head])) for dep in g_list: - emul_result = dep.emul(ctx) + emul_result = dep.emul(ira, ctx) value = emul_result[element] yield value @@ -147,10 +146,9 @@ asmcfg = mdis.dis_multiblock(addr_head) lbl_head = mdis.loc_db.get_offset_location(addr_head) ir_arch_a = ira(mdis.loc_db) -for block in asmcfg.blocks: - ir_arch_a.add_block(block) +ircfg = ir_arch_a.new_ircfg_from_asmcfg(asmcfg) -open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot()) +open('graph_irflow.dot', 'w').write(ircfg.dot()) # Main function's first argument's type is "struct ll_human*" ptr_llhuman = types_mngr.get_objc(CTypePtr(CTypeStruct('ll_human'))) @@ -161,7 +159,7 @@ expr_types = {arg0: (ptr_llhuman,), mychandler = MyCHandler(types_mngr, expr_types) -for expr in get_funcs_arg0(ctx, ir_arch_a, lbl_head): +for expr in get_funcs_arg0(ctx, ir_arch_a, ircfg, lbl_head): print "Access:", expr for c_str, ctype in mychandler.expr_to_c_and_types(expr): print '\taccess:', c_str diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py index 6db07e9b..7036d960 100644 --- a/example/expression/asm_to_ir.py +++ b/example/expression/asm_to_ir.py @@ -7,6 +7,7 @@ from miasm2.core import asmblock from miasm2.arch.x86.ira import ir_a_x86_32 from miasm2.analysis.data_flow import dead_simp + # First, asm code asmcfg, loc_db = parse_asm.parse_txt(mn_x86, 32, ''' main: @@ -35,22 +36,19 @@ patches = asmblock.asm_resolve_final(mn_x86, asmcfg, loc_db) # Translate to IR ir_arch = ir_a_x86_32(loc_db) -for block in asmcfg.blocks: - print 'add block' - print block - ir_arch.add_block(block) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Display IR -for lbl, irblock in ir_arch.blocks.items(): +for lbl, irblock in ircfg.blocks.items(): print irblock # Dead propagation -open('graph.dot', 'w').write(ir_arch.graph.dot()) +open('graph.dot', 'w').write(ircfg.dot()) print '*' * 80 -dead_simp(ir_arch) -open('graph2.dot', 'w').write(ir_arch.graph.dot()) +dead_simp(ir_arch, ircfg) +open('graph2.dot', 'w').write(ircfg.dot()) # Display new IR print 'new ir blocks' -for lbl, irblock in ir_arch.blocks.items(): +for lbl, irblock in ircfg.blocks.items(): print irblock diff --git a/example/expression/constant_propagation.py b/example/expression/constant_propagation.py index b39bcafd..d9c5fe65 100644 --- a/example/expression/constant_propagation.py +++ b/example/expression/constant_propagation.py @@ -31,24 +31,21 @@ mdis = dis_engine(cont.bin_stream) ir_arch = ira(mdis.loc_db) addr = int(args.address, 0) - asmcfg = mdis.dis_multiblock(addr) -for block in asmcfg.blocks: - ir_arch.add_block(block) - +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) init_infos = ir_arch.arch.regs.regs_init -cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) +cst_propag_link = propagate_cst_expr(ir_arch, ircfg, addr, init_infos) if args.simplify: - ir_arch.simplify(expr_simp) + ircfg.simplify(expr_simp) modified = True while modified: modified = False - modified |= dead_simp(ir_arch) - modified |= ir_arch.remove_empty_assignblks() - modified |= ir_arch.remove_jmp_blocks() - modified |= ir_arch.merge_blocks() + modified |= dead_simp(ir_arch, ircfg) + modified |= ircfg.remove_empty_assignblks() + modified |= ircfg.remove_jmp_blocks() + modified |= ircfg.merge_blocks() -open("%s.propag.dot" % args.filename, 'w').write(ir_arch.graph.dot()) +open("%s.propag.dot" % args.filename, 'w').write(ircfg.dot()) diff --git a/example/expression/get_read_write.py b/example/expression/get_read_write.py index 1bacb251..34d0f94a 100644 --- a/example/expression/get_read_write.py +++ b/example/expression/get_read_write.py @@ -12,14 +12,14 @@ Get read/written registers for a given instruction """ arch = mn_x86 -ir_arch = ir_a_x86_32() - -l = arch.fromstring('LODSB', loc_db, 32) -l.offset, l.l = 0, 15 -ir_arch.add_instr(l) +ir_arch = ir_a_x86_32(loc_db) +ircfg = ir_arch.new_ircfg() +instr = arch.fromstring('LODSB', loc_db, 32) +instr.offset, instr.l = 0, 15 +ir_arch.add_instr_to_ircfg(instr, ircfg) print '*' * 80 -for lbl, irblock in ir_arch.blocks.iteritems(): +for lbl, irblock in ircfg.blocks.iteritems(): print irblock for assignblk in irblock: rw = assignblk.get_rw() @@ -28,4 +28,4 @@ for lbl, irblock in ir_arch.blocks.iteritems(): print 'written:', dst print -open('graph_instr.dot', 'w').write(ir_arch.graph.dot()) +open('graph_instr.dot', 'w').write(ircfg.dot()) diff --git a/example/expression/graph_dataflow.py b/example/expression/graph_dataflow.py index dd9d3e9b..b30bd29f 100644 --- a/example/expression/graph_dataflow.py +++ b/example/expression/graph_dataflow.py @@ -28,7 +28,7 @@ def get_node_name(label, i, n): return n_name -def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): +def intra_block_flow_symb(ir_arch, _, flow_graph, irblock, in_nodes, out_nodes): symbols_init = ir_arch.arch.regs.regs_init.copy() sb = SymbolicExecutionEngine(ir_arch, symbols_init) sb.eval_updt_irblock(irblock) @@ -87,17 +87,17 @@ def node2str(self, node): return out -def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): - for irblock in ir_arch.blocks.values(): +def gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb): + for irblock in ircfg.blocks.values(): print irblock - dead_simp(ir_arch) + dead_simp(ir_arch, ircfg) irblock_0 = None - for irblock in ir_arch.blocks.values(): + for irblock in ircfg.blocks.values(): loc_key = irblock.loc_key - offset = ir_arch.loc_db.get_location_offset(loc_key) + offset = ircfg.loc_db.get_location_offset(loc_key) if offset == ad: irblock_0 = irblock break @@ -108,20 +108,20 @@ def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): irb_in_nodes = {} irb_out_nodes = {} - for label in ir_arch.blocks: + for label in ircfg.blocks: irb_in_nodes[label] = {} irb_out_nodes[label] = {} - for label, irblock in ir_arch.blocks.iteritems(): - block_flow_cb(ir_arch, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) + for label, irblock in ircfg.blocks.iteritems(): + block_flow_cb(ir_arch, ircfg, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) - for label in ir_arch.blocks: + for label in ircfg.blocks: print label print 'IN', [str(x) for x in irb_in_nodes[label]] print 'OUT', [str(x) for x in irb_out_nodes[label]] print '*' * 20, 'interblock', '*' * 20 - inter_block_flow(ir_arch, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes) + inter_block_flow(ir_arch, ircfg, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes) # from graph_qt import graph_qt # graph_qt(flow_graph) @@ -140,11 +140,9 @@ print 'ok' print 'generating dataflow graph for:' ir_arch = ir_a_x86_32(mdis.loc_db) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) -for block in asmcfg.blocks: - print block - ir_arch.add_block(block) -for irblock in ir_arch.blocks.values(): +for irblock in ircfg.blocks.values(): print irblock @@ -153,7 +151,7 @@ if args.symb: else: block_flow_cb = intra_block_flow_raw -gen_block_data_flow_graph(ir_arch, ad, block_flow_cb) +gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb) print '*' * 40 print """ diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index 3c850445..acb3abf4 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -13,7 +13,6 @@ from miasm2.core import parse_asm from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine from miasm2.ir.translators.translator import Translator - machine = Machine("x86_32") @@ -27,7 +26,7 @@ if not args: sys.exit(0) -def emul_symb(ir_arch, mdis, states_todo, states_done): +def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print '*' * 40, "addr", addr, '*' * 40 @@ -39,7 +38,7 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] - irblock = get_block(ir_arch, mdis, addr) + irblock = get_block(ir_arch, ircfg, mdis, addr) print 'Run block:' print irblock @@ -88,7 +87,7 @@ if __name__ == '__main__': ir_arch = machine.ir(mdis.loc_db) - + ircfg = ir_arch.new_ircfg() symbexec = SymbolicExecutionEngine(ir_arch) asmcfg, loc_db = parse_asm.parse_txt(machine.mn, 32, ''' @@ -127,8 +126,8 @@ if __name__ == '__main__': print block # add fake address and len to parsed instructions - ir_arch.add_block(block) - irb = ir_arch.blocks[init_lbl] + ir_arch.add_asmblock_to_ircfg(block, ircfg) + irb = ircfg.blocks[init_lbl] symbexec.eval_updt_irblock(irb) symbexec.dump(ids=False) # reset ir_arch blocks @@ -139,7 +138,7 @@ if __name__ == '__main__': states_todo.add((addr, symbexec.symbols, ())) # emul blocks, propagate states - emul_symb(ir_arch, mdis, states_todo, states_done) + emul_symb(ir_arch, ircfg, mdis, states_todo, states_done) all_info = [] diff --git a/example/ida/ctype_propagation.py b/example/ida/ctype_propagation.py index f236cf26..e8b52e3e 100644 --- a/example/ida/ctype_propagation.py +++ b/example/ida/ctype_propagation.py @@ -270,12 +270,12 @@ def analyse_function(): asmcfg = mdis.dis_multiblock(addr) # Generate IR - for block in asmcfg.blocks: - ir_arch.add_block(block) + ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) + cst_propag_link = {} if settings.cUnalias.value: init_infos = {ir_arch.sp: ir_arch.arch.regs.regs_init[ir_arch.sp] } - cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) + cst_propag_link = propagate_cst_expr(ir_arch, ircfg, addr, init_infos) types_mngr = get_types_mngr(settings.headerFile.value, settings.arch.value) @@ -317,8 +317,8 @@ def analyse_function(): first_block.lines[0] ) irb_head = IRBlock(lbl_head, [assignblk_head]) - ir_arch.blocks[lbl_head] = irb_head - ir_arch.graph.add_uniq_edge(lbl_head, lbl_real_start) + ircfg.blocks[lbl_head] = irb_head + ircfg.add_uniq_edge(lbl_head, lbl_real_start) state = TypePropagationEngine.StateEngine(infos_types) states = {lbl_head: state} @@ -331,23 +331,24 @@ def analyse_function(): if (lbl, state) in done: continue done.add((lbl, state)) - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue symbexec_engine = TypePropagationEngine(ir_arch, types_mngr, state) - addr = symbexec_engine.run_block_at(lbl) + addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) - ir_arch._graph = None - sons = ir_arch.graph.successors(lbl) + sons = ircfg.successors(lbl) for son in sons: - add_state(ir_arch, todo, states, son, - symbexec_engine.get_state()) + add_state( + ircfg, todo, states, son, + symbexec_engine.get_state() + ) for lbl, state in states.iteritems(): - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue symbexec_engine = CTypeEngineFixer(ir_arch, types_mngr, state, cst_propag_link) - addr = symbexec_engine.run_block_at(lbl) + addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) diff --git a/example/ida/depgraph.py b/example/ida/depgraph.py index 1ba7bee7..297877a1 100644 --- a/example/ida/depgraph.py +++ b/example/ida/depgraph.py @@ -19,16 +19,17 @@ from utils import guess_machine class depGraphSettingsForm(ida_kernwin.Form): - def __init__(self, ira): + def __init__(self, ira, ircfg): self.ira = ira + self.ircfg = ircfg self.stk_args = {'ARG%d' % i:i for i in xrange(10)} self.stk_unalias_force = False self.address = idc.ScreenEA() cur_block = None - for block in ira.getby_offset(self.address): - offset = self.ira.loc_db.get_location_offset(block.loc_key) + for block in ircfg.getby_offset(self.address): + offset = self.ircfg.loc_db.get_location_offset(block.loc_key) if offset is not None: # Only one block non-generated assert cur_block is None @@ -40,7 +41,7 @@ class depGraphSettingsForm(ida_kernwin.Form): break assert line_nb is not None cur_loc_key = str(cur_block.loc_key) - loc_keys = sorted(map(str, ira.blocks.keys())) + loc_keys = sorted(map(str, ircfg.blocks.keys())) regs = sorted(ira.arch.regs.all_regs_ids_byname.keys()) regs += self.stk_args.keys() reg_default = regs[0] @@ -97,7 +98,7 @@ Method to use: @property def loc_key(self): value = self.cbBBL.value - for real_loc_key in self.ira.blocks: + for real_loc_key in self.ircfg.blocks: if str(real_loc_key) == value: return real_loc_key raise ValueError("Bad loc_key") @@ -111,13 +112,13 @@ Method to use: elif mode == 1: return value + 1 else: - return len(self.ira.blocks[self.loc_key]) + return len(self.ircfg.blocks[self.loc_key]) @property def elements(self): value = self.cbReg.value if value in self.stk_args: - line = self.ira.blocks[self.loc_key][self.line_nb].instr + line = self.ircfg.blocks[self.loc_key][self.line_nb].instr arg_num = self.stk_args[value] stk_high = m2_expr.ExprInt(idc.GetSpd(line.offset), ir_arch.sp.size) stk_off = m2_expr.ExprInt(self.ira.sp.size/8 * arg_num, ir_arch.sp.size) @@ -135,7 +136,7 @@ Method to use: @property def depgraph(self): value = self.cMethod.value - return DependencyGraph(self.ira, + return DependencyGraph(self.ircfg, implicit=value & 4, follow_mem=value & 1, follow_call=value & 2) @@ -185,7 +186,7 @@ def treat_element(): if graph.has_loop: print 'Graph has dependency loop: symbolic execution is inexact' else: - print "Possible value: %s" % graph.emul().values()[0] + print "Possible value: %s" % graph.emul(self.ira).values()[0] for offset, elements in comments.iteritems(): idc.MakeComm(offset, ", ".join(map(str, elements))) @@ -219,11 +220,10 @@ def launch_depgraph(): asmcfg = mdis.dis_multiblock(func.startEA) # Generate IR - for block in asmcfg.blocks: - ir_arch.add_block(block) + ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Get settings - settings = depGraphSettingsForm(ir_arch) + settings = depGraphSettingsForm(ir_arch, ircfg) settings.Execute() loc_key, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index 50895b88..afd00d5c 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -54,29 +54,29 @@ def color_irblock(irblock, ir_arch): class GraphMiasmIR(idaapi.GraphViewer): - def __init__(self, ir_arch, title, result): + def __init__(self, ircfg, title, result): idaapi.GraphViewer.__init__(self, title) - self.ir_arch = ir_arch + self.ircfg = ircfg self.result = result self.names = {} def OnRefresh(self): self.Clear() addr_id = {} - for irblock in self.ir_arch.blocks.values(): - id_irblock = self.AddNode(color_irblock(irblock, self.ir_arch)) + for irblock in self.ircfg.blocks.values(): + id_irblock = self.AddNode(color_irblock(irblock, self.ircfg)) addr_id[irblock] = id_irblock - for irblock in self.ir_arch.blocks.values(): + for irblock in self.ircfg.blocks.values(): if not irblock: continue - all_dst = self.ir_arch.dst_trackback(irblock) + all_dst = self.ircfg.dst_trackback(irblock) for dst in all_dst: if not dst.is_loc(): continue - if not dst.loc_key in self.ir_arch.blocks: + if not dst.loc_key in self.ircfg.blocks: continue - dst_block = self.ir_arch.blocks[dst.loc_key] + dst_block = self.ircfg.blocks[dst.loc_key] node1 = addr_id[irblock] node2 = addr_id[dst_block] self.AddEdge(node1, node2) @@ -137,16 +137,12 @@ def build_graph(verbose=False, simplify=False): print "generating IR... %x" % start_addr - for block in asmcfg.blocks: - if verbose: - print 'ADD' - print block - ir_arch.add_block(block) + ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) if verbose: print "IR ok... %x" % start_addr - for irb in ir_arch.blocks.itervalues(): + for irb in ircfg.blocks.itervalues(): irs = [] for assignblk in irb: new_assignblk = { @@ -154,27 +150,27 @@ def build_graph(verbose=False, simplify=False): for dst, src in assignblk.iteritems() } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) - ir_arch.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) + ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) if verbose: - out = ir_arch.graph.dot() + out = ircfg.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" if simplify: - dead_simp(ir_arch) + dead_simp(ir_arch, ircfg) - ir_arch.simplify(expr_simp) + ircfg.simplify(expr_simp) modified = True while modified: modified = False - modified |= dead_simp(ir_arch) - modified |= ir_arch.remove_empty_assignblks() - modified |= ir_arch.remove_jmp_blocks() - modified |= ir_arch.merge_blocks() + modified |= dead_simp(ir_arch, ircfg) + modified |= ircfg.remove_empty_assignblks() + modified |= ircfg.remove_jmp_blocks() + modified |= ircfg.merge_blocks() title += " (simplified)" - g = GraphMiasmIR(ir_arch, title, None) + g = GraphMiasmIR(ircfg, title, None) g.Show() diff --git a/example/ida/symbol_exec.py b/example/ida/symbol_exec.py index 43100943..ffaa9b27 100644 --- a/example/ida/symbol_exec.py +++ b/example/ida/symbol_exec.py @@ -142,12 +142,11 @@ def symbolic_exec(): mdis.dont_dis = [end] asmcfg = mdis.dis_multiblock(start) ira = machine.ira(loc_db=mdis.loc_db) - for block in asmcfg.blocks: - ira.add_block(block) + ircfg = ira.new_ircfg_from_asmcfg(asmcfg) print "Run symbolic execution..." sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init) - sb.run_at(start) + sb.run_at(ircfg, start) modified = {} for dst, src in sb.modified(init_state=machine.mn.regs.regs_init): diff --git a/example/symbol_exec/depgraph.py b/example/symbol_exec/depgraph.py index 88540a83..f306e6e3 100644 --- a/example/symbol_exec/depgraph.py +++ b/example/symbol_exec/depgraph.py @@ -62,18 +62,19 @@ if args.rename_args: asmcfg = mdis.dis_multiblock(int(args.func_addr, 0)) # Generate IR -for block in asmcfg.blocks: - ir_arch.add_block(block) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Get the instance -dg = DependencyGraph(ir_arch, implicit=args.implicit, - apply_simp=not args.do_not_simplify, - follow_mem=not args.unfollow_mem, - follow_call=not args.unfollow_call) +dg = DependencyGraph( + ircfg, implicit=args.implicit, + apply_simp=not args.do_not_simplify, + follow_mem=not args.unfollow_mem, + follow_call=not args.unfollow_call +) # Build information target_addr = int(args.target_addr, 0) -current_block = list(ir_arch.getby_offset(target_addr))[0] +current_block = list(ircfg.getby_offset(target_addr))[0] assignblk_index = 0 for assignblk_index, assignblk in enumerate(current_block): if assignblk.instr.offset == target_addr: @@ -86,7 +87,7 @@ for sol_nb, sol in enumerate(dg.get(current_block.loc_key, elements, assignblk_i with open(fname, "w") as fdesc: fdesc.write(sol.graph.dot()) - results = sol.emul(ctx=init_ctx) + results = sol.emul(ir_arch, ctx=init_ctx) tokens = {str(k): str(v) for k, v in results.iteritems()} if not args.json: result = ", ".join("=".join(x) for x in tokens.iteritems()) diff --git a/example/symbol_exec/single_instr.py b/example/symbol_exec/single_instr.py index 6ddd2608..c78f1f7f 100644 --- a/example/symbol_exec/single_instr.py +++ b/example/symbol_exec/single_instr.py @@ -21,14 +21,15 @@ asm_block = mdis.dis_block(START_ADDR) # Translate ASM -> IR ira = machine.ira(mdis.loc_db) -ira.add_block(asm_block) +ircfg = ira.new_ircfg() +ira.add_asmblock_to_ircfg(asm_block, ircfg) # Instanciate a Symbolic Execution engine with default value for registers symb = SymbolicExecutionEngine(ira) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks -cur_addr = symb.run_at(START_ADDR) +cur_addr = symb.run_at(ircfg, START_ADDR) # Modified elements print 'Modified registers:' diff --git a/miasm2/analysis/cst_propag.py b/miasm2/analysis/cst_propag.py index 4b5d7834..7f74324f 100644 --- a/miasm2/analysis/cst_propag.py +++ b/miasm2/analysis/cst_propag.py @@ -17,21 +17,20 @@ class SymbExecState(SymbolicExecutionEngine): """ State manager for SymbolicExecution """ - def __init__(self, ir_arch, state): + def __init__(self, ir_arch, ircfg, state): super(SymbExecState, self).__init__(ir_arch, {}) self.set_state(state) -def add_state(ir_arch, todo, states, addr, state): +def add_state(ircfg, todo, states, addr, state): """ Add or merge the computed @state for the block at @addr. Update @todo - @ir_arch: IR instance @todo: modified block set @states: dictionnary linking a label to its entering state. @addr: address of the concidered block @state: computed state """ - addr = ir_arch.get_loc_key(addr) + addr = ircfg.get_loc_key(addr) todo.add(addr) if addr not in states: states[addr] = state @@ -67,7 +66,8 @@ class SymbExecStateFix(SymbolicExecutionEngine): # Function used to test if an Expression is considered as a constant is_expr_cst = lambda _, ir_arch, expr: is_expr_cst(ir_arch, expr) - def __init__(self, ir_arch, state, cst_propag_link): + def __init__(self, ir_arch, ircfg, state, cst_propag_link): + self.ircfg = ircfg super(SymbExecStateFix, self).__init__(ir_arch, {}) self.set_state(state) self.cst_propag_link = cst_propag_link @@ -112,10 +112,10 @@ class SymbExecStateFix(SymbolicExecutionEngine): self.eval_updt_assignblk(assignblk) assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) - self.ir_arch.blocks[irb.loc_key] = IRBlock(irb.loc_key, assignblks) + self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, assignblks) -def compute_cst_propagation_states(ir_arch, init_addr, init_infos): +def compute_cst_propagation_states(ir_arch, ircfg, init_addr, init_infos): """ Propagate "constant expressions" in a function. The attribute "constant expression" is true if the expression is based on @@ -128,7 +128,7 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): done = set() state = SymbExecState.StateEngine(init_infos) - lbl = ir_arch.get_loc_key(init_addr) + lbl = ircfg.get_loc_key(init_addr) todo = set([lbl]) states = {lbl: state} @@ -140,11 +140,11 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): if (lbl, state) in done: continue done.add((lbl, state)) - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue - symbexec_engine = SymbExecState(ir_arch, state) - addr = symbexec_engine.run_block_at(lbl) + symbexec_engine = SymbExecState(ir_arch, ircfg, state) + addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) for dst in possible_values(addr): @@ -153,14 +153,16 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): LOG_CST_PROPAG.warning('Bad destination: %s', value) continue elif value.is_int(): - value = ir_arch.get_loc_key(value) - add_state(ir_arch, todo, states, value, - symbexec_engine.get_state()) + value = ircfg.get_loc_key(value) + add_state( + ircfg, todo, states, value, + symbexec_engine.get_state() + ) return states -def propagate_cst_expr(ir_arch, addr, init_infos): +def propagate_cst_expr(ir_arch, ircfg, addr, init_infos): """ Propagate "constant expressions" in a @ir_arch. The attribute "constant expression" is true if the expression is based on @@ -172,11 +174,11 @@ def propagate_cst_expr(ir_arch, addr, init_infos): Returns a mapping between replaced Expression and their new values. """ - states = compute_cst_propagation_states(ir_arch, addr, init_infos) + states = compute_cst_propagation_states(ir_arch, ircfg, addr, init_infos) cst_propag_link = {} for lbl, state in states.iteritems(): - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue - symbexec = SymbExecStateFix(ir_arch, state, cst_propag_link) - symbexec.eval_updt_irblock(ir_arch.blocks[lbl]) + symbexec = SymbExecStateFix(ir_arch, ircfg, state, cst_propag_link) + symbexec.eval_updt_irblock(ircfg.blocks[lbl]) return cst_propag_link diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index aa1c0d1a..9c21fd51 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -8,7 +8,7 @@ def get_node_name(label, i, n): return n_name -def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): +def intra_block_flow_raw(ir_arch, ircfg, flow_graph, irb, in_nodes, out_nodes): """ Create data flow for an irbloc using raw IR expressions """ @@ -58,15 +58,16 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): flow_graph.add_uniq_edge(node_n_r, node_n_w) -def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data): + +def inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data): lbl, current_nodes, exec_nodes = todo current_nodes = dict(current_nodes) # link current nodes to bloc in_nodes - if not lbl in ir_arch.blocks: + if not lbl in ircfg.blocks: print "cannot find bloc!!", lbl return set() - irb = ir_arch.blocks[lbl] + irb = ircfg.blocks[lbl] to_del = set() for n_r, node_n_r in irb_in_nodes[irb.loc_key].items(): if not n_r in current_nodes: @@ -92,7 +93,7 @@ def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo x_nodes = tuple(sorted(list(irb.dst.get_r()))) todo = set() - for lbl_dst in ir_arch.graph.successors(irb.loc_key): + for lbl_dst in ircfg.successors(irb.loc_key): todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes)) return todo @@ -128,7 +129,7 @@ def create_implicit_flow(ir_arch, flow_graph, irb_in_nodes, irb_out_ndes): flow_graph.add_uniq_edge(node_n_r, node_n_w) -def inter_block_flow(ir_arch, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True): +def inter_block_flow(ir_arch, ircfg, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True): todo = set() done = set() @@ -139,7 +140,7 @@ def inter_block_flow(ir_arch, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, li if state in done: continue done.add(state) - out = inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data) + out = inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data) todo.update(out) diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py index e780f70c..9e5203a6 100644 --- a/miasm2/analysis/data_flow.py +++ b/miasm2/analysis/data_flow.py @@ -29,16 +29,16 @@ class ReachingDefinitions(dict): { (block, index): { lvalue: set((block, index)) } } """ - ir_a = None + ircfg = None - def __init__(self, ir_a): + def __init__(self, ircfg): super(ReachingDefinitions, self).__init__() - self.ir_a = ir_a + self.ircfg = ircfg self.compute() def get_definitions(self, block_lbl, assignblk_index): """Returns the dict { lvalue: set((def_block_lbl, def_index)) } - associated with self.ir_a.@block.assignblks[@assignblk_index] + associated with self.ircfg.@block.assignblks[@assignblk_index] or {} if it is not yet computed """ return self.get((block_lbl, assignblk_index), {}) @@ -48,7 +48,7 @@ class ReachingDefinitions(dict): modified = True while modified: modified = False - for block in self.ir_a.blocks.itervalues(): + for block in self.ircfg.blocks.itervalues(): modified |= self.process_block(block) def process_block(self, block): @@ -57,8 +57,8 @@ class ReachingDefinitions(dict): the assignblk in block @block. """ predecessor_state = {} - for pred_lbl in self.ir_a.graph.predecessors(block.loc_key): - pred = self.ir_a.blocks[pred_lbl] + for pred_lbl in self.ircfg.predecessors(block.loc_key): + pred = self.ircfg.blocks[pred_lbl] for lval, definitions in self.get_definitions(pred_lbl, len(pred)).iteritems(): predecessor_state.setdefault(lval, set()).update(definitions) @@ -126,7 +126,7 @@ class DiGraphDefUse(DiGraph): # For dot display self._filter_node = None self._dot_offset = None - self._blocks = reaching_defs.ir_a.blocks + self._blocks = reaching_defs.ircfg.blocks super(DiGraphDefUse, self).__init__(*args, **kwargs) self._compute_def_use(reaching_defs, @@ -189,7 +189,7 @@ class DiGraphDefUse(DiGraph): yield self.DotCellDescription(text="", attr={}) -def dead_simp_useful_assignblks(defuse, reaching_defs): +def dead_simp_useful_assignblks(irarch, defuse, reaching_defs): """Mark useful statements using previous reach analysis and defuse Source : Kennedy, K. (1979). A survey of data flow analysis techniques. @@ -200,13 +200,13 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): PRE: compute_reach(self) """ - ir_a = reaching_defs.ir_a + ircfg = reaching_defs.ircfg useful = set() - for block_lbl, block in ir_a.blocks.iteritems(): - successors = ir_a.graph.successors(block_lbl) + for block_lbl, block in ircfg.blocks.iteritems(): + successors = ircfg.successors(block_lbl) for successor in successors: - if successor not in ir_a.blocks: + if successor not in ircfg.blocks: keep_all_definitions = True break else: @@ -217,7 +217,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): valid_definitions = reaching_defs.get_definitions(block_lbl, len(block)) for lval, definitions in valid_definitions.iteritems(): - if (lval in ir_a.get_out_regs(block) + if (lval in irarch.get_out_regs(block) or keep_all_definitions): for definition in definitions: useful.add(AssignblkNode(definition[0], definition[1], lval)) @@ -226,7 +226,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): for index, assignblk in enumerate(block): for lval, rval in assignblk.iteritems(): if (lval.is_mem() - or ir_a.IRDst == lval + or irarch.IRDst == lval or rval.is_function_call()): useful.add(AssignblkNode(block_lbl, index, lval)) @@ -235,7 +235,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): for parent in defuse.reachable_parents(node): yield parent -def dead_simp(ir_a): +def dead_simp(irarch, ircfg): """ Remove useless affectations. @@ -245,14 +245,14 @@ def dead_simp(ir_a): Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 - @ir_a: IntermediateRepresentation instance + @ircfg: IntermediateRepresentation instance """ modified = False - reaching_defs = ReachingDefinitions(ir_a) + reaching_defs = ReachingDefinitions(ircfg) defuse = DiGraphDefUse(reaching_defs, deref_mem=True) - useful = set(dead_simp_useful_assignblks(defuse, reaching_defs)) - for block in ir_a.blocks.itervalues(): + useful = set(dead_simp_useful_assignblks(irarch, defuse, reaching_defs)) + for block in ircfg.blocks.itervalues(): irs = [] for idx, assignblk in enumerate(block): new_assignblk = dict(assignblk) @@ -261,5 +261,5 @@ def dead_simp(ir_a): del new_assignblk[lval] modified = True irs.append(AssignBlock(new_assignblk, assignblk.instr)) - ir_a.blocks[block.loc_key] = IRBlock(block.loc_key, irs) + ircfg.blocks[block.loc_key] = IRBlock(block.loc_key, irs) return modified diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py index 11476f79..93b3edb5 100644 --- a/miasm2/analysis/depgraph.py +++ b/miasm2/analysis/depgraph.py @@ -194,7 +194,7 @@ class DependencyResult(DependencyState): """Container and methods for DependencyGraph results""" - def __init__(self, ira, initial_state, state, inputs): + def __init__(self, ircfg, initial_state, state, inputs): self.initial_state = initial_state self.loc_key = state.loc_key self.history = state.history @@ -202,7 +202,7 @@ class DependencyResult(DependencyState): self.line_nb = state.line_nb self.inputs = inputs self.links = state.links - self._ira = ira + self._ircfg = ircfg # Init lazy elements self._graph = None @@ -212,7 +212,7 @@ class DependencyResult(DependencyState): def unresolved(self): """Set of nodes whose dependencies weren't found""" return set(element for element in self.pending - if element != self._ira.IRDst) + if element != self._ircfg.IRDst) @property def relevant_nodes(self): @@ -272,9 +272,10 @@ class DependencyResult(DependencyState): return IRBlock(irb.loc_key, assignblks) - def emul(self, ctx=None, step=False): + def emul(self, ir_arch, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history Return the values of inputs nodes' elements + @ir_arch: IntermediateRepresentation instance @ctx: (optional) Initial context as dictionnary @step: (optional) Verbose execution Warning: The emulation is not sound if the inputs nodes depend on loop @@ -293,13 +294,13 @@ class DependencyResult(DependencyState): line_nb = self.initial_state.line_nb else: line_nb = None - assignblks += self.irblock_slice(self._ira.blocks[loc_key], + assignblks += self.irblock_slice(self._ircfg.blocks[loc_key], line_nb).assignblks # Eval the block loc_db = LocationDB() temp_loc = loc_db.get_or_create_name_location("Temp") - symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) + symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) symb_exec.eval_updt_irblock(IRBlock(temp_loc, assignblks), step=step) # Return only inputs values (others could be wrongs) @@ -322,10 +323,10 @@ class DependencyResultImplicit(DependencyResult): generated loc_keys """ out = [] - expected = self._ira.loc_db.canonize_to_exprloc(expected) + expected = self._ircfg.loc_db.canonize_to_exprloc(expected) expected_is_loc_key = expected.is_loc() for consval in possible_values(expr): - value = self._ira.loc_db.canonize_to_exprloc(consval.value) + value = self._ircfg.loc_db.canonize_to_exprloc(consval.value) if expected_is_loc_key and value != expected: continue if not expected_is_loc_key and value.is_loc_key(): @@ -350,24 +351,24 @@ class DependencyResultImplicit(DependencyResult): conds = translator.from_expr(self.unsat_expr) return conds - def emul(self, ctx=None, step=False): + def emul(self, ir_arch, ctx=None, step=False): # Init ctx_init = {} if ctx is not None: ctx_init.update(ctx) solver = z3.Solver() - symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) + symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) history = self.history[::-1] history_size = len(history) translator = Translator.to_language("z3") - size = self._ira.IRDst.size + size = self._ircfg.IRDst.size for hist_nb, loc_key in enumerate(history, 1): if hist_nb == history_size and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None - irb = self.irblock_slice(self._ira.blocks[loc_key], line_nb) + irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb) # Emul the block and get back destination dst = symb_exec.eval_updt_irblock(irb, step=step) @@ -446,12 +447,12 @@ class DependencyGraph(object): *explicitely* or *implicitely* involved in the equation of given element. """ - def __init__(self, ira, implicit=False, apply_simp=True, follow_mem=True, + def __init__(self, ircfg, + implicit=False, apply_simp=True, follow_mem=True, follow_call=True): - """Create a DependencyGraph linked to @ira - The IRA graph must have been computed + """Create a DependencyGraph linked to @ircfg - @ira: IRAnalysis instance + @ircfg: DiGraphIR instance @implicit: (optional) Track IRDst for each block in the resulting path Following arguments define filters used to generate dependencies @@ -460,7 +461,7 @@ class DependencyGraph(object): @follow_call: (optional) Track through "call" """ # Init - self._ira = ira + self._ircfg = ircfg self._implicit = implicit # Create callback filters. The order is relevant. @@ -563,7 +564,7 @@ class DependencyGraph(object): if dst not in state.pending: continue # Track IRDst in implicit mode only - if dst == self._ira.IRDst and not self._implicit: + if dst == self._ircfg.IRDst and not self._implicit: continue assert dst not in node_resolved node_resolved.add(dst) @@ -581,7 +582,7 @@ class DependencyGraph(object): """Follow dependencies tracked in @state in the current irbloc @state: instance of DependencyState""" - irb = self._ira.blocks[state.loc_key] + irb = self._ircfg.blocks[state.loc_key] line_nb = len(irb) if state.line_nb is None else state.line_nb for cur_line_nb, assignblk in reversed(list(enumerate(irb[:line_nb]))): @@ -589,7 +590,7 @@ class DependencyGraph(object): def get(self, loc_key, elements, line_nb, heads): """Compute the dependencies of @elements at line number @line_nb in - the block named @loc_key in the current IRA, before the execution of + the block named @loc_key in the current DiGraphIR, before the execution of this line. Dependency check stop if one of @heads is reached @loc_key: LocKey instance @element: set of Expr instances @@ -613,17 +614,17 @@ class DependencyGraph(object): done.add(done_state) if (not state.pending or state.loc_key in heads or - not self._ira.graph.predecessors(state.loc_key)): - yield dpResultcls(self._ira, initial_state, state, elements) + not self._ircfg.predecessors(state.loc_key)): + yield dpResultcls(self._ircfg, initial_state, state, elements) if not state.pending: continue if self._implicit: # Force IRDst to be tracked, except in the input block - state.pending[self._ira.IRDst] = set() + state.pending[self._ircfg.IRDst] = set() # Propagate state to parents - for pred in self._ira.graph.predecessors_iter(state.loc_key): + for pred in self._ircfg.predecessors_iter(state.loc_key): todo.add(state.extend(pred)) def get_from_depnodes(self, depnodes, heads): diff --git a/miasm2/analysis/disasm_cb.py b/miasm2/analysis/disasm_cb.py index bb8223e8..d3278cb4 100644 --- a/miasm2/analysis/disasm_cb.py +++ b/miasm2/analysis/disasm_cb.py @@ -26,11 +26,12 @@ def arm_guess_subcall( sp = LocationDB() ir_arch = ira(sp) + ircfg = ira.new_ircfg() print '###' print cur_bloc - ir_arch.add_block(cur_bloc) + ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) - ir_blocks = ir_arch.blocks.values() + ir_blocks = ircfg.blocks.values() to_add = set() for irblock in ir_blocks: pc_val = None @@ -68,9 +69,10 @@ def arm_guess_jump_table( sp = LocationDB() ir_arch = ira(sp) - ir_arch.add_block(cur_bloc) + ircfg = ira.new_ircfg() + ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) - ir_blocks = ir_arch.blocks.values() + ir_blocks = ircfg.blocks.values() for irblock in ir_blocks: pc_val = None for exprs in irblock: diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py index 87d11e0a..0c01610f 100644 --- a/miasm2/analysis/dse.py +++ b/miasm2/analysis/dse.py @@ -63,6 +63,7 @@ from miasm2.expression.expression_helper import possible_values from miasm2.ir.translators import Translator from miasm2.analysis.expression_range import expr_range from miasm2.analysis.modularintervals import ModularIntervals +from miasm2.core.locationdb import LocationDB DriftInfo = namedtuple("DriftInfo", ["symbol", "computed", "expected"]) @@ -148,10 +149,12 @@ class DSEEngine(object): def __init__(self, machine): self.machine = machine + self.loc_db = LocationDB() self.handler = {} # addr -> callback(DSEEngine instance) self.instrumentation = {} # addr -> callback(DSEEngine instance) self.addr_to_cacheblocks = {} # addr -> {label -> IRBlock} - self.ir_arch = self.machine.ir() # corresponding IR + self.ir_arch = self.machine.ir(loc_db=self.loc_db) # corresponding IR + self.ircfg = self.ir_arch.new_ircfg() # corresponding IR # Defined after attachment self.jitter = None # Jitload (concrete execution) @@ -159,8 +162,6 @@ class DSEEngine(object): self.symb_concrete = None # Concrete SymbExec for path desambiguisation self.mdis = None # DisasmEngine - self.loc_db = self.ir_arch.loc_db - def prepare(self): """Prepare the environment for attachment with a jitter""" # Disassembler @@ -173,13 +174,16 @@ class DSEEngine(object): self.symb = self.SYMB_ENGINE(self.jitter.cpu, self.jitter.vm, self.ir_arch, {}) self.symb.enable_emulated_simplifications() - self.symb_concrete = EmulatedSymbExec(self.jitter.cpu, self.jitter.vm, - self.ir_arch, {}) + self.symb_concrete = EmulatedSymbExec( + self.jitter.cpu, self.jitter.vm, + self.ir_arch, {} + ) ## Update registers value - self.symb.symbols[self.ir_arch.IRDst] = ExprInt(getattr(self.jitter.cpu, - self.ir_arch.pc.name), - self.ir_arch.IRDst.size) + self.symb.symbols[self.ir_arch.IRDst] = ExprInt( + getattr(self.jitter.cpu, self.ir_arch.pc.name), + self.ir_arch.IRDst.size + ) # Avoid memory write self.symb.func_write = None @@ -316,24 +320,24 @@ class DSEEngine(object): # Get IR blocks if cur_addr in self.addr_to_cacheblocks: - self.ir_arch.blocks.clear() - self.ir_arch.blocks.update(self.addr_to_cacheblocks[cur_addr]) + self.ircfg.blocks.clear() + self.ircfg.blocks.update(self.addr_to_cacheblocks[cur_addr]) else: ## Reset cache structures - self.ir_arch.blocks.clear()# = {} + self.ircfg.blocks.clear()# = {} ## Update current state asm_block = self.mdis.dis_block(cur_addr) - self.ir_arch.add_block(asm_block) - self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks) + self.ir_arch.add_asmblock_to_ircfg(asm_block, self.ircfg) + self.addr_to_cacheblocks[cur_addr] = dict(self.ircfg.blocks) # Emulate the current instruction self.symb.reset_modified() # Is the symbolic execution going (potentially) to jump on a lbl_gen? - if len(self.ir_arch.blocks) == 1: - self.symb.run_at(cur_addr) + if len(self.ircfg.blocks) == 1: + self.symb.run_at(self.ircfg, cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain @@ -344,8 +348,10 @@ class DSEEngine(object): self._update_state_from_concrete_symb(self.symb_concrete) while True: - next_addr_concrete = self.symb_concrete.run_block_at(cur_addr) - self.symb.run_block_at(cur_addr) + next_addr_concrete = self.symb_concrete.run_block_at( + self.ircfg, cur_addr + ) + self.symb.run_block_at(self.ircfg, cur_addr) if not (isinstance(next_addr_concrete, ExprLoc) and self.ir_arch.loc_db.get_location_offset( diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index 85335863..00250157 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -1572,8 +1572,10 @@ class ir_arml(IntermediateRepresentation): assignments = [] loc = loc_do - split = self.add_instr_to_irblock(block, instr, assignments, - irblocks, gen_pc_updt) + split = self.add_instr_to_current_state( + instr, block, assignments, + irblocks, gen_pc_updt + ) if split: raise NotImplementedError("Unsupported instr in IT block (%s)" % instr) @@ -1587,7 +1589,7 @@ class ir_arml(IntermediateRepresentation): ir_blocks_all.append(irblocks) return index, ir_blocks_all - def add_block(self, block, gen_pc_updt=False): + def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): """ Add a native block to the current IR @block: native assembly block @@ -1613,8 +1615,10 @@ class ir_arml(IntermediateRepresentation): label = None continue - split = self.add_instr_to_irblock(block, instr, assignments, - ir_blocks_all, gen_pc_updt) + split = self.add_instr_to_current_state( + instr, block, assignments, + ir_blocks_all, gen_pc_updt + ) if split: ir_blocks_all.append(IRBlock(label, assignments)) label = None @@ -1622,7 +1626,7 @@ class ir_arml(IntermediateRepresentation): if label is not None: ir_blocks_all.append(IRBlock(label, assignments)) - new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) + new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) for irblock in new_ir_blocks_all: ircfg.add_irblock(irblock) return new_ir_blocks_all diff --git a/miasm2/arch/mips32/ira.py b/miasm2/arch/mips32/ira.py index 53c2c6b3..3caa8b12 100644 --- a/miasm2/arch/mips32/ira.py +++ b/miasm2/arch/mips32/ira.py @@ -10,12 +10,8 @@ class ir_a_mips32l(ir_mips32l, ira): ir_mips32l.__init__(self, loc_db) self.ret_reg = self.arch.regs.V0 - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_updt): - # Avoid adding side effects, already done in post_add_bloc - return False - - def post_add_block(self, block, ir_blocks): - IntermediateRepresentation.post_add_block(self, block, ir_blocks) + def post_add_asmblock_to_ircfg(self, block, ircfg, ir_blocks): + IntermediateRepresentation.post_add_asmblock_to_ircfg(self, block, ircfg, ir_blocks) new_irblocks = [] for irb in ir_blocks: pc_val = None diff --git a/miasm2/arch/ppc/ira.py b/miasm2/arch/ppc/ira.py index 76a979ae..a30f972d 100644 --- a/miasm2/arch/ppc/ira.py +++ b/miasm2/arch/ppc/ira.py @@ -35,14 +35,30 @@ class ir_a_ppc32b(ir_ppc32b, ira): instr )] - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_update): - """Replace function call with corresponding call effects, - inside the IR block""" - if not instr.is_subcall(): - return False - call_effects = self.call_effects(instr.getdstflow(None)[0], instr) - assignments+= call_effects - return True + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): + """ + Add the IR effects of an instruction to the current state. + + @instr: native instruction + @block: native block source + @assignments: list of current AssignBlocks + @ir_blocks_all: list of additional effects + @gen_pc_updt: insert PC update effects between instructions + """ + if instr.is_subcall(): + call_effects = self.call_effects(instr.getdstflow(None)[0], instr) + assignments+= call_effects + return True + + if gen_pc_updt is not False: + self.gen_pc_update(assignments, instr) + + assignblk, ir_blocks_extra = self.instr2ir(instr) + assignments.append(assignblk) + ir_blocks_all += ir_blocks_extra + if ir_blocks_extra: + return True + return False def sizeof_char(self): return 8 diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py index fc0c81c9..962b9889 100644 --- a/miasm2/ir/analysis.py +++ b/miasm2/ir/analysis.py @@ -3,11 +3,11 @@ import warnings import logging -from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import IntermediateRepresentation, AssignBlock -from miasm2.expression.expression import ExprAff, ExprOp +from miasm2.expression.expression import ExprOp from miasm2.analysis.data_flow import dead_simp as new_dead_simp_imp + log = logging.getLogger("analysis") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) @@ -27,6 +27,7 @@ class ira(IntermediateRepresentation): class ira_x86_16(ir_x86_16, ira) """ + ret_reg = None def call_effects(self, addr, instr): """Default modelisation of a function call to @addr. This may be used to: @@ -44,14 +45,36 @@ class ira(IntermediateRepresentation): instr) return [assignblk] - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_update): - """Replace function call with corresponding call effects, - inside the IR block""" - if not instr.is_subcall(): - return False - call_effects = self.call_effects(instr.args[0], instr) - assignments+= call_effects - return True + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): + """ + Add the IR effects of an instruction to the current state. + If the instruction is a function call, replace the original IR by a + model of the sub function + + Returns a bool: + * True if the current assignments list must be split + * False in other cases. + + @instr: native instruction + @block: native block source + @assignments: current irbloc + @ir_blocks_all: list of additional effects + @gen_pc_updt: insert PC update effects between instructions + """ + if instr.is_subcall(): + call_effects = self.call_effects(instr.args[0], instr) + assignments+= call_effects + return True + + if gen_pc_updt is not False: + self.gen_pc_update(assignments, instr) + + assignblk, ir_blocks_extra = self.instr2ir(instr) + assignments.append(assignblk) + ir_blocks_all += ir_blocks_extra + if ir_blocks_extra: + return True + return False def sizeof_char(self): "Return the size of a char in bits" @@ -73,7 +96,7 @@ class ira(IntermediateRepresentation): "Return the size of a void* in bits" raise NotImplementedError("Abstract method") - def dead_simp(self): + def dead_simp(self, ircfg): """Deprecated: See miasm2.analysis.data_flow.dead_simp()""" warnings.warn('DEPRECATION WARNING: Please use miasm2.analysis.data_flow.dead_simp(ira) instead of ira.dead_simp()') - new_dead_simp_imp(self) + new_dead_simp_imp(self, ircfg) diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 5d783c22..d41498b5 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -23,8 +23,7 @@ from itertools import chain import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.core.asmblock import AsmBlock, AsmConstraint, AsmBlockBad -from miasm2.core.locationdb import LocationDB +from miasm2.core.asmblock import AsmBlock, AsmConstraint from miasm2.core.graph import DiGraph class AssignBlock(object): @@ -387,6 +386,7 @@ class IRBlock(object): return IRBlock(self.loc_key, assignblks) + class irbloc(IRBlock): """ DEPRECATED object @@ -402,7 +402,7 @@ class DiGraphIR(DiGraph): """DiGraph for IR instances""" - def __init__(self, loc_db, blocks=None, *args, **kwargs): + def __init__(self, irdst, loc_db, blocks=None, *args, **kwargs): """Instanciate a DiGraphIR @loc_db: LocationDB instance @blocks: IR blocks @@ -411,9 +411,14 @@ class DiGraphIR(DiGraph): if blocks is None: blocks = {} self._blocks = blocks + self._irdst = irdst super(DiGraphIR, self).__init__(*args, **kwargs) @property + def IRDst(self): + return self._irdst + + @property def blocks(self): return self._blocks @@ -657,7 +662,7 @@ class DiGraphIR(DiGraph): if len(assignblk) > 1: continue assert set(assignblk.keys()) == set([self.IRDst]) - if len(self.graph.successors(block.loc_key)) != 1: + if len(self.successors(block.loc_key)) != 1: continue if not assignblk[self.IRDst].is_loc(): continue @@ -672,7 +677,7 @@ class DiGraphIR(DiGraph): for loc_key in jmp_blocks: block = self.blocks[loc_key] dst_loc_key = block.dst - parents = self.graph.predecessors(block.loc_key) + parents = self.predecessors(block.loc_key) for lbl in parents: parent = self.blocks.get(lbl, None) if parent is None: @@ -681,24 +686,24 @@ class DiGraphIR(DiGraph): if dst.is_id(block.loc_key): dst = m2_expr.ExprLoc(dst_loc_key, dst.size) - self.graph.discard_edge(lbl, block.loc_key) - self.graph.discard_edge(block.loc_key, dst_loc_key) + self.discard_edge(lbl, block.loc_key) + self.discard_edge(block.loc_key, dst_loc_key) - self.graph.add_uniq_edge(lbl, dst_loc_key) + self.add_uniq_edge(lbl, dst_loc_key) modified = True elif dst.is_cond(): src1, src2 = dst.src1, dst.src2 if src1.is_id(block.loc_key): dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprLoc(dst_loc_key, dst.size), dst.src2) - self.graph.discard_edge(lbl, block.loc_key) - self.graph.discard_edge(block.loc_key, dst_loc_key) - self.graph.add_uniq_edge(lbl, dst_loc_key) + self.discard_edge(lbl, block.loc_key) + self.discard_edge(block.loc_key, dst_loc_key) + self.add_uniq_edge(lbl, dst_loc_key) modified = True if src2.is_id(block.loc_key): dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprLoc(dst_loc_key, dst.size)) - self.graph.discard_edge(lbl, block.loc_key) - self.graph.discard_edge(block.loc_key, dst_loc_key) - self.graph.add_uniq_edge(lbl, dst_loc_key) + self.discard_edge(lbl, block.loc_key) + self.discard_edge(block.loc_key, dst_loc_key) + self.add_uniq_edge(lbl, dst_loc_key) modified = True if dst.src1 == dst.src2: dst = dst.src1 @@ -709,9 +714,9 @@ class DiGraphIR(DiGraph): # Remove unlinked useless nodes for loc_key in jmp_blocks: - if (len(self.graph.predecessors(loc_key)) == 0 and - len(self.graph.successors(loc_key)) == 0): - self.graph.del_node(loc_key) + if (len(self.predecessors(loc_key)) == 0 and + len(self.successors(loc_key)) == 0): + self.del_node(loc_key) del self.blocks[loc_key] return modified @@ -721,21 +726,21 @@ class DiGraphIR(DiGraph): parent """ modified = False - todo = set(self.graph.nodes()) + todo = set(self.nodes()) while todo: block = todo.pop() - sons = self.graph.successors(block) + sons = self.successors(block) if len(sons) != 1: continue son = list(sons)[0] - if self.graph.predecessors(son) != [block]: + if self.predecessors(son) != [block]: continue if block not in self.blocks: continue if son not in self.blocks: continue # Block has one son, son has one parent => merge - assignblks =[] + assignblks = [] for assignblk in self.blocks[block]: if self.IRDst not in assignblk: assignblks.append(assignblk) @@ -749,33 +754,19 @@ class DiGraphIR(DiGraph): assignblks += self.blocks[son].assignblks new_block = IRBlock(block, assignblks) - self.graph.discard_edge(block, son) + self.discard_edge(block, son) - for lson in self.graph.successors(son): - self.graph.add_uniq_edge(block, lson) - self.graph.discard_edge(son, lson) + for lson in self.successors(son): + self.add_uniq_edge(block, lson) + self.discard_edge(son, lson) del self.blocks[son] - self.graph.del_node(son) + self.del_node(son) self.blocks[block] = new_block todo.add(block) modified = True return modified - def _gen_graph(self): - """ - Gen irbloc digraph - """ - self._graph = DiGraphIR(self.blocks, self.loc_db) - for lbl, block in self.blocks.iteritems(): - assert isinstance(lbl, m2_expr.LocKey) - self._graph.add_node(lbl) - for dst in self.dst_trackback(block): - if dst.is_int(): - dst_lbl = self.loc_db.get_or_create_offset_location(int(dst)) - dst = m2_expr.ExprLoc(dst_lbl.loc_key, self.pc.size) - if dst.is_loc(): - self._graph.add_edge(lbl, dst.loc_key) class IntermediateRepresentation(object): """ @@ -790,10 +781,28 @@ class IntermediateRepresentation(object): self.arch = arch self.attrib = attrib self.loc_db = loc_db + self.IRDst = None def get_ir(self, instr): raise NotImplementedError("Abstract Method") + def new_ircfg(self, *args, **kwargs): + """ + Return a new instance of DiGraphIR + """ + return DiGraphIR(self.IRDst, self.loc_db, *args, **kwargs) + + def new_ircfg_from_asmcfg(self, asmcfg, *args, **kwargs): + """ + Return a new instance of DiGraphIR from an @asmcfg + @asmcfg: AsmCFG instance + """ + + ircfg = DiGraphIR(self.IRDst, self.loc_db, *args, **kwargs) + for block in asmcfg.blocks: + self.add_asmblock_to_ircfg(block, ircfg) + return ircfg + def instr2ir(self, instr): ir_bloc_cur, extra_irblocks = self.get_ir(instr) for index, irb in enumerate(extra_irblocks): @@ -804,75 +813,56 @@ class IntermediateRepresentation(object): assignblk = AssignBlock(ir_bloc_cur, instr) return assignblk, extra_irblocks - def add_instr(self, ircfg, line, loc_key=None, gen_pc_updt=False): + def add_instr_to_ircfg(self, instr, ircfg, loc_key=None, gen_pc_updt=False): + """ + Add the native instruction @instr to the @ircfg + @instr: instruction instance + @ircfg: IRCFG instance + @loc_key: loc_key instance of the instruction destination + @gen_pc_updt: insert PC update effects between instructions + """ + if loc_key is None: - loc_key = self.loc_db.add_location() + offset = getattr(instr, "offset", None) + loc_key = self.loc_db.add_location(offset=offset) block = AsmBlock(loc_key) - block.lines = [line] - self.add_block(ircfg, block, gen_pc_updt) + block.lines = [instr] + self.add_asmblock_to_ircfg(block, ircfg, gen_pc_updt) return loc_key def gen_pc_update(self, assignments, instr): offset = m2_expr.ExprInt(instr.offset, self.pc.size) assignments.append(AssignBlock({self.pc:offset}, instr)) - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_updt): - """Function called before adding an instruction from the the native @block to - the current irbloc. - - Returns a couple. The first element is the new irblock. The second the a - bool: - * True if the current irblock must be split - * False in other cases. - - @block: native block source - @instr: native instruction - @irb_cur: current irbloc - @ir_blocks_all: list of additional effects - @gen_pc_updt: insert PC update effects between instructions - - """ - - return False - - def add_instr_to_irblock(self, block, instr, assignments, ir_blocks_all, gen_pc_updt): + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): """ - Add the IR effects of an instruction to the current irblock. + Add the IR effects of an instruction to the current state. - Returns a couple. The first element is the new irblock. The second the a - bool: - * True if the current irblock must be split + Returns a bool: + * True if the current assignments list must be split * False in other cases. - @block: native block source @instr: native instruction - @irb_cur: current irbloc + @block: native block source + @assignments: list of current AssignBlocks @ir_blocks_all: list of additional effects @gen_pc_updt: insert PC update effects between instructions """ - - split = self.pre_add_instr( - block,instr, assignments, - ir_blocks_all, gen_pc_updt - ) - if split: - return True - - assignblk, ir_blocks_extra = self.instr2ir(instr) - if gen_pc_updt is not False: self.gen_pc_update(assignments, instr) + assignblk, ir_blocks_extra = self.instr2ir(instr) assignments.append(assignblk) ir_blocks_all += ir_blocks_extra if ir_blocks_extra: return True return False - def add_asmblock_to_ircfg(self, ircfg, block, gen_pc_updt=False): + def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): """ Add a native block to the current IR @block: native assembly block + @ircfg: DiGraphIR instance @gen_pc_updt: insert PC update effects between instructions """ @@ -884,8 +874,8 @@ class IntermediateRepresentation(object): if loc_key is None: assignments = [] loc_key = self.get_loc_key_for_instr(instr) - split = self.add_instr_to_irblock( - block, instr, assignments, + split = self.add_instr_to_current_state( + instr, block, assignments, ir_blocks_all, gen_pc_updt ) if split: @@ -895,7 +885,7 @@ class IntermediateRepresentation(object): if loc_key is not None: ir_blocks_all.append(IRBlock(loc_key, assignments)) - new_ir_blocks_all = self.post_add_block(ircfg, block, ir_blocks_all) + new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) for irblock in new_ir_blocks_all: ircfg.add_irblock(irblock) return new_ir_blocks_all @@ -979,7 +969,7 @@ class IntermediateRepresentation(object): assignblk = AssignBlock({self.IRDst: dst}, instr) ir_blocks[index] = IRBlock(irblock.loc_key, list(irblock.assignblks) + [assignblk]) - def post_add_block(self, ircfg, block, ir_blocks): + def post_add_asmblock_to_ircfg(self, block, ircfg, ir_blocks): self.set_empty_dst_to_next(block, ir_blocks) new_irblocks = [] diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index d137e71f..9ab455da 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -3,9 +3,8 @@ import logging from collections import MutableMapping from miasm2.expression.expression import ExprOp, ExprId, ExprLoc, ExprInt, \ - ExprMem, ExprCompose, ExprSlice, ExprCond, ExprAff + ExprMem, ExprCompose, ExprSlice, ExprCond from miasm2.expression.simplifications import expr_simp -from miasm2.core import asmblock from miasm2.ir.ir import AssignBlock log = logging.getLogger("symbexec") @@ -15,14 +14,14 @@ log.addHandler(console_handler) log.setLevel(logging.INFO) -def get_block(ir_arch, mdis, addr): +def get_block(ir_arch, ircfg, mdis, addr): """Get IRBlock at address @addr""" - loc_key = ir_arch.get_or_create_loc_key(addr) - if loc_key not in ir_arch.blocks: + loc_key = ircfg.get_or_create_loc_key(addr) + if not loc_key in ircfg.blocks: offset = mdis.loc_db.get_location_offset(loc_key) block = mdis.dis_block(offset) - ir_arch.add_block(block) - irblock = ir_arch.get_block(loc_key) + ir_arch.add_asmblock_to_ircfg(block, ircfg) + irblock = ircfg.get_block(loc_key) if irblock is None: raise LookupError('No block found at that address: %s' % ir_arch.loc_db.pretty_str(loc_key)) return irblock @@ -1055,18 +1054,18 @@ class SymbolicExecutionEngine(object): return dst - def run_block_at(self, addr, step=False): + def run_block_at(self, ircfg, addr, step=False): """ Symbolic execution of the block at @addr @addr: address to execute (int or ExprInt or label) @step: display intermediate steps """ - irblock = self.ir_arch.get_block(addr) + irblock = ircfg.get_block(addr) if irblock is not None: addr = self.eval_updt_irblock(irblock, step=step) return addr - def run_at(self, addr, lbl_stop=None, step=False): + def run_at(self, ircfg, addr, lbl_stop=None, step=False): """ Symbolic execution starting at @addr @addr: address to execute (int or ExprInt or label) @@ -1074,7 +1073,7 @@ class SymbolicExecutionEngine(object): @step: display intermediate steps """ while True: - irblock = self.ir_arch.get_block(addr) + irblock = ircfg.get_block(addr) if irblock is None: break if irblock.loc_key == lbl_stop: diff --git a/miasm2/ir/symbexec_top.py b/miasm2/ir/symbexec_top.py index 5fe12996..f5ecb566 100644 --- a/miasm2/ir/symbexec_top.py +++ b/miasm2/ir/symbexec_top.py @@ -2,7 +2,6 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine from miasm2.expression.simplifications import expr_simp from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\ ExprMem, ExprCond, ExprCompose, ExprOp -from miasm2.core import asmblock TOPSTR = "TOP" diff --git a/miasm2/ir/symbexec_types.py b/miasm2/ir/symbexec_types.py index fedd25bc..349d55a6 100644 --- a/miasm2/ir/symbexec_types.py +++ b/miasm2/ir/symbexec_types.py @@ -1,9 +1,6 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine from miasm2.expression.simplifications import expr_simp -from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\ - ExprMem, ExprCond, ExprCompose, ExprOp - -from miasm2.core.ctypesmngr import CTypeId +from miasm2.expression.expression import ExprId, ExprMem class SymbolicStateCTypes(StateEngine): @@ -88,7 +85,6 @@ class SymbExecCType(SymbolicExecutionEngine): @assignblk: AssignBlock instance """ pool_out = {} - eval_cache = {} for dst, src in assignblk.iteritems(): objcs = self.chandler.expr_to_types(src, self.symbols) if isinstance(dst, ExprMem): diff --git a/miasm2/jitter/jitcore.py b/miasm2/jitter/jitcore.py index a26d4c9f..fc5cf35e 100644 --- a/miasm2/jitter/jitcore.py +++ b/miasm2/jitter/jitcore.py @@ -24,7 +24,6 @@ from miasm2.core.utils import BoundedDict from miasm2.expression.expression import LocKey from miasm2.jitter.csts import * - class JitCore(object): "JiT management. This is an abstract class" @@ -42,6 +41,7 @@ class JitCore(object): """ # Arch related self.ir_arch = ir_arch + self.ircfg = self.ir_arch.new_ircfg() self.arch_name = "%s%s" % (self.ir_arch.arch.name, self.ir_arch.attrib) # Structures for block tracking @@ -126,7 +126,7 @@ class JitCore(object): """Add a block to JiT and JiT it. @block: asm_bloc to add """ - irblocks = self.ir_arch.add_block(block, gen_pc_updt = True) + irblocks = self.ir_arch.add_asmblock_to_ircfg(block, self.ircfg, gen_pc_updt = True) block.blocks = irblocks self.jit_irblocks(block.loc_key, irblocks) diff --git a/miasm2/jitter/jitcore_python.py b/miasm2/jitter/jitcore_python.py index fa751a68..61bd98d0 100644 --- a/miasm2/jitter/jitcore_python.py +++ b/miasm2/jitter/jitcore_python.py @@ -4,7 +4,6 @@ import miasm2.jitter.csts as csts from miasm2.expression.simplifications import ExpressionSimplifier from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec - ################################################################################ # Python jitter Core # ################################################################################ @@ -18,12 +17,16 @@ class JitCore_Python(jitcore.JitCore): def __init__(self, ir_arch, bin_stream): super(JitCore_Python, self).__init__(ir_arch, bin_stream) self.ir_arch = ir_arch + self.ircfg = self.ir_arch.new_ircfg() # CPU & VM (None for now) will be set later expr_simp = ExpressionSimplifier() expr_simp.enable_passes(ExpressionSimplifier.PASS_COMMONS) - self.symbexec = self.SymbExecClass(None, None, self.ir_arch, {}, - sb_expr_simp=expr_simp) + self.symbexec = self.SymbExecClass( + None, None, + self.ir_arch, {}, + sb_expr_simp=expr_simp + ) self.symbexec.enable_emulated_simplifications() def set_cpu_vm(self, cpu, vm): diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index 5f8b4ad6..288e737a 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -205,8 +205,11 @@ class Jitter(object): self.cpu = jcore.JitCpu() self.ir_arch = ir_arch self.bs = bin_stream_vm(self.vm) + self.ircfg = self.ir_arch.new_ircfg() - self.symbexec = EmulatedSymbExec(self.cpu, self.vm, self.ir_arch, {}) + self.symbexec = EmulatedSymbExec( + self.cpu, self.vm, self.ir_arch, {} + ) self.symbexec.reset_regs() try: diff --git a/test/analysis/data_flow.py b/test/analysis/data_flow.py index 8204d9ce..d0dbbd8d 100644 --- a/test/analysis/data_flow.py +++ b/test/analysis/data_flow.py @@ -77,108 +77,113 @@ class IRATest(ira): def get_out_regs(self, _): return set([self.ret_reg, self.sp]) +IRA = IRATest(loc_db) + # graph 1 : Simple graph with dead and alive variables -G1_IRA = IRATest(loc_db) +G1_IRA = IRA.new_ircfg() G1_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]]) G1_IRB1 = gen_irblock(LBL1, [[ExprAff(a, b)]]) G1_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) -G1_IRA.blocks = {irb.loc_key : irb for irb in [G1_IRB0, G1_IRB1, G1_IRB2]} +for irb in [G1_IRB0, G1_IRB1, G1_IRB2]: + G1_IRA.add_irblock(irb) -G1_IRA.graph.add_uniq_edge(G1_IRB0.loc_key, G1_IRB1.loc_key) -G1_IRA.graph.add_uniq_edge(G1_IRB1.loc_key, G1_IRB2.loc_key) +G1_IRA.add_uniq_edge(G1_IRB0.loc_key, G1_IRB1.loc_key) +G1_IRA.add_uniq_edge(G1_IRB1.loc_key, G1_IRB2.loc_key) # Expected output for graph 1 -G1_EXP_IRA = IRATest(loc_db) +G1_EXP_IRA = IRA.new_ircfg() G1_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(b, CST2)]]) G1_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(a, b)]]) G1_EXP_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) -G1_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G1_EXP_IRB0, G1_EXP_IRB1, - G1_EXP_IRB2]} +for irb in [G1_EXP_IRB0, G1_EXP_IRB1, G1_EXP_IRB2]: + G1_EXP_IRA.add_irblock(irb) # graph 2 : Natural loop with dead variable -G2_IRA = IRATest(loc_db) +G2_IRA = IRA.new_ircfg() G2_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(r, CST1)]]) G2_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G2_IRB2 = gen_irblock(LBL2, [[ExprAff(a, r)]]) -G2_IRA.blocks = {irb.loc_key : irb for irb in [G2_IRB0, G2_IRB1, G2_IRB2]} +for irb in [G2_IRB0, G2_IRB1, G2_IRB2]: + G2_IRA.add_irblock(irb) -G2_IRA.graph.add_uniq_edge(G2_IRB0.loc_key, G2_IRB1.loc_key) -G2_IRA.graph.add_uniq_edge(G2_IRB1.loc_key, G2_IRB2.loc_key) -G2_IRA.graph.add_uniq_edge(G2_IRB1.loc_key, G2_IRB1.loc_key) +G2_IRA.add_uniq_edge(G2_IRB0.loc_key, G2_IRB1.loc_key) +G2_IRA.add_uniq_edge(G2_IRB1.loc_key, G2_IRB2.loc_key) +G2_IRA.add_uniq_edge(G2_IRB1.loc_key, G2_IRB1.loc_key) # Expected output for graph 2 -G2_EXP_IRA = IRATest(loc_db) +G2_EXP_IRA = IRA.new_ircfg() G2_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(r, CST1)]]) G2_EXP_IRB1 = gen_irblock(LBL1, [[]]) G2_EXP_IRB2 = gen_irblock(LBL2, [[]]) -G2_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G2_EXP_IRB0, G2_EXP_IRB1, - G2_EXP_IRB2]} +for irb in [G2_EXP_IRB0, G2_EXP_IRB1, G2_EXP_IRB2]: + G2_EXP_IRA.add_irblock(irb) # graph 3 : Natural loop with alive variables -G3_IRA = IRATest(loc_db) +G3_IRA = IRA.new_ircfg() G3_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G3_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G3_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) -G3_IRA.blocks = {irb.loc_key : irb for irb in [G3_IRB0, G3_IRB1, G3_IRB2]} +for irb in [G3_IRB0, G3_IRB1, G3_IRB2]: + G3_IRA.add_irblock(irb) -G3_IRA.graph.add_uniq_edge(G3_IRB0.loc_key, G3_IRB1.loc_key) -G3_IRA.graph.add_uniq_edge(G3_IRB1.loc_key, G3_IRB2.loc_key) -G3_IRA.graph.add_uniq_edge(G3_IRB1.loc_key, G3_IRB1.loc_key) +G3_IRA.add_uniq_edge(G3_IRB0.loc_key, G3_IRB1.loc_key) +G3_IRA.add_uniq_edge(G3_IRB1.loc_key, G3_IRB2.loc_key) +G3_IRA.add_uniq_edge(G3_IRB1.loc_key, G3_IRB1.loc_key) # Expected output for graph 3 -G3_EXP_IRA = IRATest(loc_db) +G3_EXP_IRA = IRA.new_ircfg() G3_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G3_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G3_EXP_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) -G3_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G3_EXP_IRB0, G3_EXP_IRB1, - G3_EXP_IRB2]} +for irb in [G3_EXP_IRB0, G3_EXP_IRB1, G3_EXP_IRB2]: + G3_EXP_IRA.add_irblock(irb) # graph 4 : If/else with dead variables -G4_IRA = IRATest(loc_db) +G4_IRA = IRA.new_ircfg() G4_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G4_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G4_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)]]) G4_IRB3 = gen_irblock(LBL3, [[ExprAff(a, CST3)], [ExprAff(r, a)]]) -G4_IRA.blocks = {irb.loc_key : irb for irb in [G4_IRB0, G4_IRB1, G4_IRB2, - G4_IRB3]} +for irb in [G4_IRB0, G4_IRB1, G4_IRB2, G4_IRB3]: + G4_IRA.add_irblock(irb) -G4_IRA.graph.add_uniq_edge(G4_IRB0.loc_key, G4_IRB1.loc_key) -G4_IRA.graph.add_uniq_edge(G4_IRB0.loc_key, G4_IRB2.loc_key) -G4_IRA.graph.add_uniq_edge(G4_IRB1.loc_key, G4_IRB3.loc_key) -G4_IRA.graph.add_uniq_edge(G4_IRB2.loc_key, G4_IRB3.loc_key) +G4_IRA.add_uniq_edge(G4_IRB0.loc_key, G4_IRB1.loc_key) +G4_IRA.add_uniq_edge(G4_IRB0.loc_key, G4_IRB2.loc_key) +G4_IRA.add_uniq_edge(G4_IRB1.loc_key, G4_IRB3.loc_key) +G4_IRA.add_uniq_edge(G4_IRB2.loc_key, G4_IRB3.loc_key) # Expected output for graph 4 -G4_EXP_IRA = IRATest(loc_db) +G4_EXP_IRA = IRA.new_ircfg() G4_EXP_IRB0 = gen_irblock(LBL0, [[]]) G4_EXP_IRB1 = gen_irblock(LBL1, [[]]) G4_EXP_IRB2 = gen_irblock(LBL2, [[]]) G4_EXP_IRB3 = gen_irblock(LBL3, [[ExprAff(a, CST3)], [ExprAff(r, a)]]) -G4_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G4_EXP_IRB0, G4_EXP_IRB1, - G4_EXP_IRB2, G4_EXP_IRB3]} +for irb in [G4_EXP_IRB0, G4_EXP_IRB1, G4_EXP_IRB2, G4_EXP_IRB3]: + G4_EXP_IRA.add_irblock(irb) # graph 5 : Loop and If/else with dead variables -G5_IRA = IRATest(loc_db) +G5_IRA = IRA.new_ircfg() G5_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G5_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -187,19 +192,19 @@ G5_IRB3 = gen_irblock(LBL3, [[ExprAff(a, a+CST3)]]) G5_IRB4 = gen_irblock(LBL4, [[ExprAff(a, a+CST1)]]) G5_IRB5 = gen_irblock(LBL5, [[ExprAff(a, r)]]) -G5_IRA.blocks = {irb.loc_key : irb for irb in [G5_IRB0, G5_IRB1, G5_IRB2, G5_IRB3, - G5_IRB4, G5_IRB5]} +for irb in [G5_IRB0, G5_IRB1, G5_IRB2, G5_IRB3, G5_IRB4, G5_IRB5]: + G5_IRA.add_irblock(irb) -G5_IRA.graph.add_uniq_edge(G5_IRB0.loc_key, G5_IRB1.loc_key) -G5_IRA.graph.add_uniq_edge(G5_IRB1.loc_key, G5_IRB2.loc_key) -G5_IRA.graph.add_uniq_edge(G5_IRB1.loc_key, G5_IRB3.loc_key) -G5_IRA.graph.add_uniq_edge(G5_IRB2.loc_key, G5_IRB4.loc_key) -G5_IRA.graph.add_uniq_edge(G5_IRB3.loc_key, G5_IRB4.loc_key) -G5_IRA.graph.add_uniq_edge(G5_IRB4.loc_key, G5_IRB5.loc_key) -G5_IRA.graph.add_uniq_edge(G5_IRB4.loc_key, G5_IRB1.loc_key) +G5_IRA.add_uniq_edge(G5_IRB0.loc_key, G5_IRB1.loc_key) +G5_IRA.add_uniq_edge(G5_IRB1.loc_key, G5_IRB2.loc_key) +G5_IRA.add_uniq_edge(G5_IRB1.loc_key, G5_IRB3.loc_key) +G5_IRA.add_uniq_edge(G5_IRB2.loc_key, G5_IRB4.loc_key) +G5_IRA.add_uniq_edge(G5_IRB3.loc_key, G5_IRB4.loc_key) +G5_IRA.add_uniq_edge(G5_IRB4.loc_key, G5_IRB5.loc_key) +G5_IRA.add_uniq_edge(G5_IRB4.loc_key, G5_IRB1.loc_key) # Expected output for graph 5 -G5_EXP_IRA = IRATest(loc_db) +G5_EXP_IRA = IRA.new_ircfg() G5_EXP_IRB0 = gen_irblock(LBL0, [[]]) G5_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -208,72 +213,72 @@ G5_EXP_IRB3 = gen_irblock(LBL3, [[]]) G5_EXP_IRB4 = gen_irblock(LBL4, [[]]) G5_EXP_IRB5 = gen_irblock(LBL5, [[]]) -G5_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G5_EXP_IRB0, G5_EXP_IRB1, - G5_EXP_IRB2, G5_EXP_IRB3, - G5_EXP_IRB4, G5_EXP_IRB5]} +for irb in [G5_EXP_IRB0, G5_EXP_IRB1, G5_EXP_IRB2, + G5_EXP_IRB3, G5_EXP_IRB4, G5_EXP_IRB5]: + G5_EXP_IRA.add_irblock(irb) # graph 6 : Natural loop with dead variables symetric affectation # (a = b <-> b = a ) -G6_IRA = IRATest(loc_db) +G6_IRA = IRA.new_ircfg() G6_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G6_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) G6_IRB2 = gen_irblock(LBL2, [[ExprAff(a, b)]]) G6_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST2)]]) -G6_IRA.blocks = {irb.loc_key : irb for irb in [G6_IRB0, G6_IRB1, G6_IRB2, - G6_IRB3]} +for irb in [G6_IRB0, G6_IRB1, G6_IRB2, G6_IRB3]: + G6_IRA.add_irblock(irb) -G6_IRA.graph.add_uniq_edge(G6_IRB0.loc_key, G6_IRB1.loc_key) -G6_IRA.graph.add_uniq_edge(G6_IRB1.loc_key, G6_IRB2.loc_key) -G6_IRA.graph.add_uniq_edge(G6_IRB2.loc_key, G6_IRB1.loc_key) -G6_IRA.graph.add_uniq_edge(G6_IRB2.loc_key, G6_IRB3.loc_key) +G6_IRA.add_uniq_edge(G6_IRB0.loc_key, G6_IRB1.loc_key) +G6_IRA.add_uniq_edge(G6_IRB1.loc_key, G6_IRB2.loc_key) +G6_IRA.add_uniq_edge(G6_IRB2.loc_key, G6_IRB1.loc_key) +G6_IRA.add_uniq_edge(G6_IRB2.loc_key, G6_IRB3.loc_key) # Expected output for graph 6 -G6_EXP_IRA = IRATest(loc_db) +G6_EXP_IRA = IRA.new_ircfg() G6_EXP_IRB0 = gen_irblock(LBL0, [[]]) G6_EXP_IRB1 = gen_irblock(LBL1, [[]]) G6_EXP_IRB2 = gen_irblock(LBL2, [[]]) G6_EXP_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST2)]]) -G6_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G6_EXP_IRB0, G6_EXP_IRB1, - G6_EXP_IRB2, G6_EXP_IRB3]} +for irb in [G6_EXP_IRB0, G6_EXP_IRB1, G6_EXP_IRB2, G6_EXP_IRB3]: + G6_EXP_IRA.add_irblock(irb) # graph 7 : Double entry loop with dead variables -G7_IRA = IRATest(loc_db) +G7_IRA = IRA.new_ircfg() G7_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(r, CST1)]]) G7_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G7_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)]]) G7_IRB3 = gen_irblock(LBL3, [[ExprAff(a, r)]]) -G7_IRA.blocks = {irb.loc_key : irb for irb in [G7_IRB0, G7_IRB1, G7_IRB2, - G7_IRB3]} +for irb in [G7_IRB0, G7_IRB1, G7_IRB2, G7_IRB3]: + G7_IRA.add_irblock(irb) -G7_IRA.graph.add_uniq_edge(G7_IRB0.loc_key, G7_IRB1.loc_key) -G7_IRA.graph.add_uniq_edge(G7_IRB1.loc_key, G7_IRB2.loc_key) -G7_IRA.graph.add_uniq_edge(G7_IRB2.loc_key, G7_IRB1.loc_key) -G7_IRA.graph.add_uniq_edge(G7_IRB2.loc_key, G7_IRB3.loc_key) -G7_IRA.graph.add_uniq_edge(G7_IRB0.loc_key, G7_IRB2.loc_key) +G7_IRA.add_uniq_edge(G7_IRB0.loc_key, G7_IRB1.loc_key) +G7_IRA.add_uniq_edge(G7_IRB1.loc_key, G7_IRB2.loc_key) +G7_IRA.add_uniq_edge(G7_IRB2.loc_key, G7_IRB1.loc_key) +G7_IRA.add_uniq_edge(G7_IRB2.loc_key, G7_IRB3.loc_key) +G7_IRA.add_uniq_edge(G7_IRB0.loc_key, G7_IRB2.loc_key) # Expected output for graph 7 -G7_EXP_IRA = IRATest(loc_db) +G7_EXP_IRA = IRA.new_ircfg() G7_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(r, CST1)]]) G7_EXP_IRB1 = gen_irblock(LBL1, [[]]) G7_EXP_IRB2 = gen_irblock(LBL2, [[]]) G7_EXP_IRB3 = gen_irblock(LBL3, [[]]) -G7_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G7_EXP_IRB0, G7_EXP_IRB1, - G7_EXP_IRB2, G7_EXP_IRB3]} +for irb in [G7_EXP_IRB0, G7_EXP_IRB1, G7_EXP_IRB2, G7_EXP_IRB3]: + G7_EXP_IRA.add_irblock(irb) # graph 8 : Nested loops with dead variables -G8_IRA = IRATest(loc_db) +G8_IRA = IRA.new_ircfg() G8_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST1)]]) G8_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) @@ -281,31 +286,31 @@ G8_IRB2 = gen_irblock(LBL2, [[ExprAff(b, b+CST2)]]) G8_IRB3 = gen_irblock(LBL3, [[ExprAff(a, b)]]) -G8_IRA.blocks = {irb.loc_key : irb for irb in [G8_IRB0, G8_IRB1, G8_IRB2, - G8_IRB3]} +for irb in [G8_IRB0, G8_IRB1, G8_IRB2, G8_IRB3]: + G8_IRA.add_irblock(irb) -G8_IRA.graph.add_uniq_edge(G8_IRB0.loc_key, G8_IRB1.loc_key) -G8_IRA.graph.add_uniq_edge(G8_IRB1.loc_key, G8_IRB2.loc_key) -G8_IRA.graph.add_uniq_edge(G8_IRB2.loc_key, G8_IRB1.loc_key) -G8_IRA.graph.add_uniq_edge(G8_IRB2.loc_key, G8_IRB3.loc_key) -G8_IRA.graph.add_uniq_edge(G8_IRB3.loc_key, G8_IRB2.loc_key) +G8_IRA.add_uniq_edge(G8_IRB0.loc_key, G8_IRB1.loc_key) +G8_IRA.add_uniq_edge(G8_IRB1.loc_key, G8_IRB2.loc_key) +G8_IRA.add_uniq_edge(G8_IRB2.loc_key, G8_IRB1.loc_key) +G8_IRA.add_uniq_edge(G8_IRB2.loc_key, G8_IRB3.loc_key) +G8_IRA.add_uniq_edge(G8_IRB3.loc_key, G8_IRB2.loc_key) # Expected output for graph 8 -G8_EXP_IRA = IRATest(loc_db) +G8_EXP_IRA = IRA.new_ircfg() G8_EXP_IRB0 = gen_irblock(LBL0, [[], []]) G8_EXP_IRB1 = gen_irblock(LBL1, [[]]) G8_EXP_IRB2 = gen_irblock(LBL2, [[]]) G8_EXP_IRB3 = gen_irblock(LBL3, [[]]) -G8_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G8_EXP_IRB0, G8_EXP_IRB1, - G8_EXP_IRB2, G8_EXP_IRB3]} +for irb in [G8_EXP_IRB0, G8_EXP_IRB1, G8_EXP_IRB2, G8_EXP_IRB3]: + G8_EXP_IRA.add_irblock(irb) # graph 9 : Miultiple-exits loops with dead variables -G9_IRA = IRATest(loc_db) +G9_IRA = IRA.new_ircfg() G9_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST1)]]) G9_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)], [ExprAff(b, b+CST1)]]) @@ -313,22 +318,22 @@ G9_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)], [ExprAff(b, b+CST2)]]) G9_IRB3 = gen_irblock(LBL3, [[ExprAff(a, b)]]) G9_IRB4 = gen_irblock(LBL4, [[ExprAff(r, a)], [ExprAff(r, b)]]) -G9_IRA.blocks = {irb.loc_key : irb for irb in [G9_IRB0, G9_IRB1, G9_IRB2, - G9_IRB3, G9_IRB4]} +for irb in [G9_IRB0, G9_IRB1, G9_IRB2, G9_IRB3, G9_IRB4]: + G9_IRA.add_irblock(irb) -G9_IRA.graph.add_uniq_edge(G9_IRB0.loc_key, G9_IRB4.loc_key) -G9_IRA.graph.add_uniq_edge(G9_IRB0.loc_key, G9_IRB1.loc_key) -G9_IRA.graph.add_uniq_edge(G9_IRB1.loc_key, G9_IRB0.loc_key) -G9_IRA.graph.add_uniq_edge(G9_IRB1.loc_key, G9_IRB4.loc_key) -G9_IRA.graph.add_uniq_edge(G9_IRB1.loc_key, G9_IRB2.loc_key) -G9_IRA.graph.add_uniq_edge(G9_IRB2.loc_key, G9_IRB0.loc_key) -G9_IRA.graph.add_uniq_edge(G9_IRB2.loc_key, G9_IRB3.loc_key) -G9_IRA.graph.add_uniq_edge(G9_IRB3.loc_key, G9_IRB4.loc_key) +G9_IRA.add_uniq_edge(G9_IRB0.loc_key, G9_IRB4.loc_key) +G9_IRA.add_uniq_edge(G9_IRB0.loc_key, G9_IRB1.loc_key) +G9_IRA.add_uniq_edge(G9_IRB1.loc_key, G9_IRB0.loc_key) +G9_IRA.add_uniq_edge(G9_IRB1.loc_key, G9_IRB4.loc_key) +G9_IRA.add_uniq_edge(G9_IRB1.loc_key, G9_IRB2.loc_key) +G9_IRA.add_uniq_edge(G9_IRB2.loc_key, G9_IRB0.loc_key) +G9_IRA.add_uniq_edge(G9_IRB2.loc_key, G9_IRB3.loc_key) +G9_IRA.add_uniq_edge(G9_IRB3.loc_key, G9_IRB4.loc_key) # Expected output for graph 9 -G9_EXP_IRA = IRATest(loc_db) +G9_EXP_IRA = IRA.new_ircfg() G9_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(b, CST1)]]) G9_EXP_IRB1 = gen_irblock(LBL1, [[], [ExprAff(b, b+CST1)]]) @@ -336,42 +341,42 @@ G9_EXP_IRB2 = gen_irblock(LBL2, [[], [ExprAff(b, b+CST2)]]) G9_EXP_IRB3 = gen_irblock(LBL3, [[]]) G9_EXP_IRB4 = gen_irblock(LBL4, [[], [ExprAff(r, b)]]) -G9_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G9_EXP_IRB0, G9_EXP_IRB1, - G9_EXP_IRB2, G9_EXP_IRB3, - G9_EXP_IRB4]} +for irb in [G9_EXP_IRB0, G9_EXP_IRB1, G9_EXP_IRB2, G9_EXP_IRB3, G9_EXP_IRB4]: + G9_EXP_IRA.add_irblock(irb) # graph 10 : Natural loop with alive variables symetric affectation # (a = b <-> b = a ) -G10_IRA = IRATest(loc_db) +G10_IRA = IRA.new_ircfg() G10_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G10_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) G10_IRB2 = gen_irblock(LBL2, [[ExprAff(a, b)]]) G10_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST1)]]) -G10_IRA.blocks = {irb.loc_key : irb for irb in [G10_IRB0, G10_IRB1, - G10_IRB2, G10_IRB3]} +for irb in [G10_IRB0, G10_IRB1, G10_IRB2, G10_IRB3]: + G10_IRA.add_irblock(irb) + -G10_IRA.graph.add_uniq_edge(G10_IRB0.loc_key, G10_IRB1.loc_key) -G10_IRA.graph.add_uniq_edge(G10_IRB1.loc_key, G10_IRB2.loc_key) -G10_IRA.graph.add_uniq_edge(G10_IRB2.loc_key, G10_IRB1.loc_key) -G10_IRA.graph.add_uniq_edge(G10_IRB2.loc_key, G10_IRB3.loc_key) +G10_IRA.add_uniq_edge(G10_IRB0.loc_key, G10_IRB1.loc_key) +G10_IRA.add_uniq_edge(G10_IRB1.loc_key, G10_IRB2.loc_key) +G10_IRA.add_uniq_edge(G10_IRB2.loc_key, G10_IRB1.loc_key) +G10_IRA.add_uniq_edge(G10_IRB2.loc_key, G10_IRB3.loc_key) # Expected output for graph 10 -G10_EXP_IRA = IRATest(loc_db) +G10_EXP_IRA = IRA.new_ircfg() G10_EXP_IRB0 = gen_irblock(LBL0, [[]]) G10_EXP_IRB1 = gen_irblock(LBL1, [[]]) G10_EXP_IRB2 = gen_irblock(LBL2, [[]]) G10_EXP_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST1)]]) -G10_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G10_EXP_IRB0, G10_EXP_IRB1, - G10_EXP_IRB2, G10_EXP_IRB3]} +for irb in [G10_EXP_IRB0, G10_EXP_IRB1, G10_EXP_IRB2, G10_EXP_IRB3]: + G10_EXP_IRA.add_irblock(irb) # graph 11 : If/Else conditions with alive variables -G11_IRA = IRATest(loc_db) +G11_IRA = IRA.new_ircfg() G11_IRB0 = gen_irblock(LBL0, [[ExprAff(a, b)]]) G11_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) @@ -380,17 +385,18 @@ G11_IRB3 = gen_irblock(LBL3, [[ExprAff(a, a+CST1)]]) G11_IRB4 = gen_irblock(LBL4, [[ExprAff(b, b+CST1)]]) -G11_IRA.blocks = {irb.loc_key : irb for irb in [G11_IRB0, G11_IRB1, G11_IRB2]} +for irb in [G11_IRB0, G11_IRB1, G11_IRB2]: + G11_IRA.add_irblock(irb) -G11_IRA.graph.add_uniq_edge(G11_IRB0.loc_key, G11_IRB1.loc_key) -#G11_IRA.graph.add_uniq_edge(G11_IRB3.loc_key, G11_IRB1.loc_key) -G11_IRA.graph.add_uniq_edge(G11_IRB1.loc_key, G11_IRB0.loc_key) -#G11_IRA.graph.add_uniq_edge(G11_IRB4.loc_key, G11_IRB0.loc_key) -G11_IRA.graph.add_uniq_edge(G11_IRB1.loc_key, G11_IRB2.loc_key) +G11_IRA.add_uniq_edge(G11_IRB0.loc_key, G11_IRB1.loc_key) +#G11_IRA.add_uniq_edge(G11_IRB3.loc_key, G11_IRB1.loc_key) +G11_IRA.add_uniq_edge(G11_IRB1.loc_key, G11_IRB0.loc_key) +#G11_IRA.add_uniq_edge(G11_IRB4.loc_key, G11_IRB0.loc_key) +G11_IRA.add_uniq_edge(G11_IRB1.loc_key, G11_IRB2.loc_key) # Expected output for graph 11 -G11_EXP_IRA = IRATest(loc_db) +G11_EXP_IRA = IRA.new_ircfg() G11_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, b)]]) G11_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) @@ -398,13 +404,14 @@ G11_EXP_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) #G11_EXP_IRB3 = gen_irblock(LBL3, [[ExprAff(a, a+CST1)]]) #G11_EXP_IRB4 = gen_irblock(LBL4, [[ExprAff(b, b+CST1)]]) -G11_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G11_EXP_IRB0, G11_EXP_IRB1, - G11_EXP_IRB2]} +for irb in [G11_EXP_IRB0, G11_EXP_IRB1, + G11_EXP_IRB2]: + G11_EXP_IRA.add_irblock(irb) # graph 12 : Graph with multiple out points and useless definitions # of return register -G12_IRA = IRATest(loc_db) +G12_IRA = IRA.new_ircfg() G12_IRB0 = gen_irblock(LBL0, [[ExprAff(r, CST1)], [ExprAff(a, CST2)]]) G12_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -413,17 +420,17 @@ G12_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST3)]]) G12_IRB4 = gen_irblock(LBL4, [[ExprAff(r, CST2)]]) G12_IRB5 = gen_irblock(LBL5, [[ExprAff(r, b)]]) -G12_IRA.blocks = {irb.loc_key : irb for irb in [G12_IRB0, G12_IRB1, G12_IRB2, - G12_IRB3, G12_IRB4, G12_IRB5]} +for irb in [G12_IRB0, G12_IRB1, G12_IRB2, G12_IRB3, G12_IRB4, G12_IRB5]: + G12_IRA.add_irblock(irb) -G12_IRA.graph.add_uniq_edge(G12_IRB0.loc_key, G12_IRB1.loc_key) -G12_IRA.graph.add_uniq_edge(G12_IRB0.loc_key, G12_IRB2.loc_key) -G12_IRA.graph.add_uniq_edge(G12_IRB2.loc_key, G12_IRB3.loc_key) -G12_IRA.graph.add_uniq_edge(G12_IRB2.loc_key, G12_IRB4.loc_key) -G12_IRA.graph.add_uniq_edge(G12_IRB4.loc_key, G12_IRB5.loc_key) +G12_IRA.add_uniq_edge(G12_IRB0.loc_key, G12_IRB1.loc_key) +G12_IRA.add_uniq_edge(G12_IRB0.loc_key, G12_IRB2.loc_key) +G12_IRA.add_uniq_edge(G12_IRB2.loc_key, G12_IRB3.loc_key) +G12_IRA.add_uniq_edge(G12_IRB2.loc_key, G12_IRB4.loc_key) +G12_IRA.add_uniq_edge(G12_IRB4.loc_key, G12_IRB5.loc_key) # Expected output for graph 12 -G12_EXP_IRA = IRATest(loc_db) +G12_EXP_IRA = IRA.new_ircfg() G12_EXP_IRB0 = gen_irblock(LBL0, [[], []]) G12_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -433,13 +440,14 @@ G12_EXP_IRB4 = gen_irblock(LBL4, [[]]) G12_EXP_IRB5 = gen_irblock(LBL5, [[ExprAff(r, b)]]) -G12_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G12_EXP_IRB0, G12_EXP_IRB1, - G12_EXP_IRB2, G12_EXP_IRB3, - G12_EXP_IRB4, G12_EXP_IRB5]} +for irb in [G12_EXP_IRB0, G12_EXP_IRB1, + G12_EXP_IRB2, G12_EXP_IRB3, + G12_EXP_IRB4, G12_EXP_IRB5]: + G12_EXP_IRA.add_irblock(irb) # graph 13 : Graph where a leaf has lost its son -G13_IRA = IRATest(loc_db) +G13_IRA = IRA.new_ircfg() G13_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]]) G13_IRB1 = gen_irblock(LBL1, [[ExprAff(r, b)]]) @@ -448,16 +456,16 @@ G13_IRB2 = gen_irblock(LBL2, [[ExprAff(d, CST2)], [ExprAff(a, b+CST1), G13_IRB3 = gen_irblock(LBL3, [[]]) # lost son G13_IRB4 = gen_irblock(LBL4, [[ExprAff(b, CST2)]]) -G13_IRA.blocks = {irb.loc_key : irb for irb in [G13_IRB0, G13_IRB1, G13_IRB2, - G13_IRB4]} +for irb in [G13_IRB0, G13_IRB1, G13_IRB2, G13_IRB4]: + G13_IRA.add_irblock(irb) -G13_IRA.graph.add_uniq_edge(G13_IRB0.loc_key, G13_IRB1.loc_key) -G13_IRA.graph.add_uniq_edge(G13_IRB0.loc_key, G13_IRB4.loc_key) -G13_IRA.graph.add_uniq_edge(G13_IRB2.loc_key, G13_IRB3.loc_key) -G13_IRA.graph.add_uniq_edge(G13_IRB4.loc_key, G13_IRB2.loc_key) +G13_IRA.add_uniq_edge(G13_IRB0.loc_key, G13_IRB1.loc_key) +G13_IRA.add_uniq_edge(G13_IRB0.loc_key, G13_IRB4.loc_key) +G13_IRA.add_uniq_edge(G13_IRB2.loc_key, G13_IRB3.loc_key) +G13_IRA.add_uniq_edge(G13_IRB4.loc_key, G13_IRB2.loc_key) # Expected output for graph 13 -G13_EXP_IRA = IRATest(loc_db) +G13_EXP_IRA = IRA.new_ircfg() G13_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]]) G13_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, b)]]) @@ -466,58 +474,62 @@ G13_EXP_IRB2 = gen_irblock(LBL2, [[ExprAff(d, CST2)], [ExprAff(a, b+CST1), G13_EXP_IRB3 = gen_irblock(LBL3, [[]]) G13_EXP_IRB4 = gen_irblock(LBL4, [[ExprAff(b, CST2)]]) -G13_EXP_IRA.blocks = {irb.loc_key: irb for irb in [G13_EXP_IRB0, G13_EXP_IRB1, - G13_EXP_IRB2, G13_EXP_IRB4]} +for irb in [G13_EXP_IRB0, G13_EXP_IRB1, G13_EXP_IRB2, G13_EXP_IRB4]: + G13_EXP_IRA.add_irblock(irb) #G13_EXP_IRA = G13_IRA # graph 14 : Graph where variable assigned multiple times in a block but still # useful in the end -G14_IRA = IRATest(loc_db) +G14_IRA = IRA.new_ircfg() G14_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(c, a)], [ExprAff(a, CST2)]]) G14_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+c)]]) -G14_IRA.blocks = {irb.loc_key : irb for irb in [G14_IRB0, G14_IRB1]} +for irb in [G14_IRB0, G14_IRB1]: + G14_IRA.add_irblock(irb) -G14_IRA.graph.add_uniq_edge(G14_IRB0.loc_key, G14_IRB1.loc_key) +G14_IRA.add_uniq_edge(G14_IRB0.loc_key, G14_IRB1.loc_key) # Expected output for graph 1 -G14_EXP_IRA = IRATest(loc_db) +G14_EXP_IRA = IRA.new_ircfg() G14_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(c, a)], [ExprAff(a, CST2)]]) G14_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+c)]]) -G14_EXP_IRA.blocks = {irb.loc_key: irb for irb in [G14_EXP_IRB0, G14_EXP_IRB1]} +for irb in [G14_EXP_IRB0, G14_EXP_IRB1]: + G14_EXP_IRA.add_irblock(irb) # graph 15 : Graph where variable assigned multiple and read at the same time, # but useless -G15_IRA = IRATest(loc_db) +G15_IRA = IRA.new_ircfg() G15_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST2)], [ExprAff(a, CST1), ExprAff(b, a+CST2), ExprAff(c, CST1)]]) G15_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a)]]) -G15_IRA.blocks = {irb.loc_key : irb for irb in [G15_IRB0, G15_IRB1]} +for irb in [G15_IRB0, G15_IRB1]: + G15_IRA.add_irblock(irb) -G15_IRA.graph.add_uniq_edge(G15_IRB0.loc_key, G15_IRB1.loc_key) +G15_IRA.add_uniq_edge(G15_IRB0.loc_key, G15_IRB1.loc_key) # Expected output for graph 1 -G15_EXP_IRA = IRATest(loc_db) +G15_EXP_IRA = IRA.new_ircfg() G15_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(a, CST1)]]) G15_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a)]]) -G15_EXP_IRA.blocks = {irb.loc_key: irb for irb in [G15_EXP_IRB0, G15_EXP_IRB1]} +for irb in [G15_EXP_IRB0, G15_EXP_IRB1]: + G15_EXP_IRA.add_irblock(irb) # graph 16 : Graph where variable assigned multiple times in the same bloc -G16_IRA = IRATest(loc_db) +G16_IRA = IRA.new_ircfg() G16_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1), ExprAff(b, CST2), ExprAff(c, CST3)], [ExprAff(a, c+CST1), @@ -525,25 +537,28 @@ G16_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1), ExprAff(b, CST2), G16_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+b)], [ExprAff(r, c+r)]]) G16_IRB2 = gen_irblock(LBL2, [[]]) -G16_IRA.blocks = {irb.loc_key : irb for irb in [G16_IRB0, G16_IRB1]} +for irb in [G16_IRB0, G16_IRB1]: + G16_IRA.add_irblock(irb) -G16_IRA.graph.add_uniq_edge(G16_IRB0.loc_key, G16_IRB1.loc_key) -G16_IRA.graph.add_uniq_edge(G16_IRB1.loc_key, G16_IRB2.loc_key) +G16_IRA.add_uniq_edge(G16_IRB0.loc_key, G16_IRB1.loc_key) +G16_IRA.add_uniq_edge(G16_IRB1.loc_key, G16_IRB2.loc_key) -G16_IRA.blocks = {irb.loc_key : irb for irb in [G16_IRB0, G16_IRB1]} +for irb in [G16_IRB0, G16_IRB1]: + G16_IRA.add_irblock(irb) # Expected output for graph 1 -G16_EXP_IRA = IRATest(loc_db) +G16_EXP_IRA = IRA.new_ircfg() G16_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(c, CST3)], [ExprAff(a, c + CST1), ExprAff(b, c + CST2)]]) G16_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+b)], [ExprAff(r, c+r)]]) -G16_EXP_IRA.blocks = {irb.loc_key: irb for irb in [G16_EXP_IRB0, G16_EXP_IRB1]} +for irb in [G16_EXP_IRB0, G16_EXP_IRB1]: + G16_EXP_IRA.add_irblock(irb) # graph 17 : parallel ir -G17_IRA = IRATest(loc_db) +G17_IRA = IRA.new_ircfg() G17_IRB0 = gen_irblock(LBL0, [[ExprAff(a, a*b), ExprAff(b, c), @@ -599,12 +614,13 @@ G17_IRB0 = gen_irblock(LBL0, [[ExprAff(a, a*b), ]) -G17_IRA.blocks = {irb.loc_key : irb for irb in [G17_IRB0]} +for irb in [G17_IRB0]: + G17_IRA.add_irblock(irb) -G17_IRA.graph.add_node(G17_IRB0.loc_key) +#G17_IRA.graph.add_node(G17_IRB0.loc_key) # Expected output for graph 17 -G17_EXP_IRA = IRATest(loc_db) +G17_EXP_IRA = IRA.new_ircfg() G17_EXP_IRB0 = gen_irblock(LBL0, [[], @@ -641,7 +657,8 @@ G17_EXP_IRB0 = gen_irblock(LBL0, [[], # Trick because a+b+c != ((a+b)+c) ]) -G17_EXP_IRA.blocks = {irb.loc_key : irb for irb in [G17_EXP_IRB0]} +for irb in [G17_EXP_IRB0]: + G17_EXP_IRA.add_irblock(irb) # Begining of tests @@ -669,16 +686,16 @@ for test_nb, test in enumerate([(G1_IRA, G1_EXP_IRA), print "[+] Test", test_nb+1 # Print initial graph, for debug - open("graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph.dot()) + open("graph_%02d.dot" % (test_nb+1), "w").write(g_ira.dot()) reaching_defs = ReachingDefinitions(g_ira) defuse = DiGraphDefUse(reaching_defs, deref_mem=True) # # Simplify graph - dead_simp(g_ira) + dead_simp(IRA, g_ira) # # Print simplified graph, for debug - open("simp_graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph.dot()) + open("simp_graph_%02d.dot" % (test_nb+1), "w").write(g_ira.dot()) # Same number of blocks assert len(g_ira.blocks) == len(g_exp_ira.blocks) diff --git a/test/analysis/depgraph.py b/test/analysis/depgraph.py index f0772cca..2ba5f044 100644 --- a/test/analysis/depgraph.py +++ b/test/analysis/depgraph.py @@ -24,6 +24,7 @@ B = ExprId("b", 32) C = ExprId("c", 32) D = ExprId("d", 32) R = ExprId("r", 32) +COND = ExprId("cond", 32) A_INIT = ExprId("a_init", 32) B_INIT = ExprId("b_init", 32) @@ -93,13 +94,84 @@ class IRATest(ira): def __init__(self, loc_db=None): arch = Arch() super(IRATest, self).__init__(arch, 32, loc_db) - self.IRDst = PC + self.IRDst = ExprId("IRDst", 32) self.ret_reg = R def get_out_regs(self, _): return set([self.ret_reg, self.sp]) +def bloc2graph(irgraph, label=False, lines=True): + """Render dot graph of @blocks""" + + escape_chars = re.compile('[' + re.escape('{}') + ']') + label_attr = 'colspan="2" align="center" bgcolor="grey"' + edge_attr = 'label = "%s" color="%s" style="bold"' + td_attr = 'align="left"' + block_attr = 'shape="Mrecord" fontname="Courier New"' + + out = ["digraph asm_graph {"] + fix_chars = lambda x: '\\' + x.group() + + # Generate basic blocks + out_blocks = [] + for label in irgraph.nodes(): + assert isinstance(label, LocKey) + label_names = irgraph.loc_db.get_location_names(label) + label_name = list(label_names)[0] + + if hasattr(irgraph, 'blocks'): + irblock = irgraph.blocks[label] + else: + irblock = None + if isinstance(label, LocKey): + out_block = '%s [\n' % label_name + else: + out_block = '%s [\n' % label + out_block += "%s " % block_attr + out_block += 'label =<<table border="0" cellborder="0" cellpadding="3">' + + block_label = '<tr><td %s>%s</td></tr>' % ( + label_attr, label_name) + block_html_lines = [] + if lines and irblock is not None: + for assignblk in irblock: + for dst, src in assignblk.iteritems(): + if False: + out_render = "%.8X</td><td %s> " % (0, td_attr) + else: + out_render = "" + out_render += escape_chars.sub(fix_chars, "%s = %s" % (dst, src)) + block_html_lines.append(out_render) + block_html_lines.append(" ") + block_html_lines.pop() + block_html_lines = ('<tr><td %s>' % td_attr + + ('</td></tr><tr><td %s>' % td_attr).join(block_html_lines) + + '</td></tr>') + out_block += "%s " % block_label + out_block += block_html_lines + "</table>> ];" + out_blocks.append(out_block) + + out += out_blocks + # Generate links + for src, dst in irgraph.edges(): + assert isinstance(src, LocKey) + src_names = irgraph.loc_db.get_location_names(src) + assert isinstance(dst, LocKey) + dst_names = irgraph.loc_db.get_location_names(dst) + + src_name = list(src_names)[0] + dst_name = list(dst_names)[0] + + edge_color = "black" + out.append('%s -> %s' % (src_name, + dst_name) + + '[' + edge_attr % ("", edge_color) + '];') + + out.append("}") + return '\n'.join(out) + + def dg2graph(graph, label=False, lines=True): """Render dot graph of @blocks""" @@ -157,287 +229,393 @@ DNC2 = DependencyNode(LBL1, C, 0) DNB3 = DependencyNode(LBL1, B, 1) DNC3 = DependencyNode(LBL1, C, 0) +IRA = IRATest(loc_db) +IRDst = IRA.IRDst +END = ExprId("END", IRDst.size) # graph 1 -G1_IRA = IRATest(loc_db) +G1_IRA = IRA.new_ircfg() -G1_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G1_IRB1 = gen_irblock(LBL1, [[ExprAff(B, C)]]) -G1_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) +G1_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G1_IRB1 = gen_irblock(LBL1, [[ExprAff(B, C), ExprAff(IRDst, ExprLoc(LBL2, 32))]]) +G1_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B), ExprAff(IRDst, END)]]) -G1_IRA.graph.add_uniq_edge(G1_IRB0.loc_key, G1_IRB1.loc_key) -G1_IRA.graph.add_uniq_edge(G1_IRB1.loc_key, G1_IRB2.loc_key) - -G1_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G1_IRB0, G1_IRB1, G1_IRB2]]) +for irb in [G1_IRB0, G1_IRB1, G1_IRB2]: + G1_IRA.add_irblock(irb) # graph 2 -G2_IRA = IRATest(loc_db) - -G2_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G2_IRB1 = gen_irblock(LBL1, [[ExprAff(B, CST2)]]) -G2_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B + C)]]) +G2_IRA = IRA.new_ircfg() -G2_IRA.graph.add_uniq_edge(G2_IRB0.loc_key, G2_IRB1.loc_key) -G2_IRA.graph.add_uniq_edge(G2_IRB1.loc_key, G2_IRB2.loc_key) +G2_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G2_IRB1 = gen_irblock(LBL1, [[ExprAff(B, CST2), ExprAff(IRDst, ExprLoc(LBL2, 32))]]) +G2_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B + C), ExprAff(IRDst, END)]]) -G2_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G2_IRB0, G2_IRB1, G2_IRB2]]) +for irb in [G2_IRB0, G2_IRB1, G2_IRB2]: + G2_IRA.add_irblock(irb) # graph 3 -G3_IRA = IRATest(loc_db) - -G3_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G3_IRB1 = gen_irblock(LBL1, [[ExprAff(B, CST2)]]) -G3_IRB2 = gen_irblock(LBL2, [[ExprAff(B, CST3)]]) -G3_IRB3 = gen_irblock(LBL3, [[ExprAff(A, B + C)]]) - -G3_IRA.graph.add_uniq_edge(G3_IRB0.loc_key, G3_IRB1.loc_key) -G3_IRA.graph.add_uniq_edge(G3_IRB0.loc_key, G3_IRB2.loc_key) -G3_IRA.graph.add_uniq_edge(G3_IRB1.loc_key, G3_IRB3.loc_key) -G3_IRA.graph.add_uniq_edge(G3_IRB2.loc_key, G3_IRB3.loc_key) - -G3_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G3_IRB0, G3_IRB1, - G3_IRB2, G3_IRB3]]) +G3_IRA = IRA.new_ircfg() + +G3_IRB0 = gen_irblock( + LBL0, + [ + [ExprAff(C, CST1), ExprAff( + IRDst, ExprCond( + COND, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + ) + ] + ] +) + +G3_IRB1 = gen_irblock(LBL1, [[ExprAff(B, CST2), ExprAff(IRDst, ExprLoc(LBL3, 32))]]) +G3_IRB2 = gen_irblock(LBL2, [[ExprAff(B, CST3), ExprAff(IRDst, ExprLoc(LBL3, 32))]]) +G3_IRB3 = gen_irblock(LBL3, [[ExprAff(A, B + C), ExprAff(IRDst, END)]]) + +for irb in [G3_IRB0, G3_IRB1, G3_IRB2, G3_IRB3]: + G3_IRA.add_irblock(irb) # graph 4 -G4_IRA = IRATest(loc_db) - -G4_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G4_IRB1 = gen_irblock(LBL1, [[ExprAff(C, C + CST2)], - [ExprAff(G4_IRA.IRDst, - ExprCond(C, ExprLoc(LBL2, 32), - ExprLoc(LBL1, 32)))]]) +G4_IRA = IRA.new_ircfg() -G4_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) +G4_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G4_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(C, C + CST2)], + [ExprAff(IRDst, + ExprCond( + C, + ExprLoc(LBL2, 32), + ExprLoc(LBL1, 32)) + ) + ]] +) -G4_IRA.graph.add_uniq_edge(G4_IRB0.loc_key, G4_IRB1.loc_key) -G4_IRA.graph.add_uniq_edge(G4_IRB1.loc_key, G4_IRB2.loc_key) -G4_IRA.graph.add_uniq_edge(G4_IRB1.loc_key, G4_IRB1.loc_key) +G4_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B), ExprAff(IRDst, END)]]) -G4_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G4_IRB0, G4_IRB1, G4_IRB2]]) +for irb in [G4_IRB0, G4_IRB1, G4_IRB2]: + G4_IRA.add_irblock(irb) # graph 5 -G5_IRA = IRATest(loc_db) - -G5_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1)]]) -G5_IRB1 = gen_irblock(LBL1, [[ExprAff(B, B + CST2)], - [ExprAff(G5_IRA.IRDst, - ExprCond(B, ExprLoc(LBL2, 32), - ExprLoc(LBL1, 32)))]]) - -G5_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) - -G5_IRA.graph.add_uniq_edge(G5_IRB0.loc_key, G5_IRB1.loc_key) -G5_IRA.graph.add_uniq_edge(G5_IRB1.loc_key, G5_IRB2.loc_key) -G5_IRA.graph.add_uniq_edge(G5_IRB1.loc_key, G5_IRB1.loc_key) - -G5_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G5_IRB0, G5_IRB1, G5_IRB2]]) +G5_IRA = IRA.new_ircfg() + +G5_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G5_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(B, B + CST2)], + [ExprAff( + IRDst, + ExprCond( + B, + ExprLoc(LBL2, 32), + ExprLoc(LBL1, 32) + ) + ) + ] + ] +) + +G5_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B), ExprAff(IRDst, END)]]) + +for irb in [G5_IRB0, G5_IRB1, G5_IRB2]: + G5_IRA.add_irblock(irb) # graph 6 -G6_IRA = IRATest(loc_db) +G6_IRA = IRA.new_ircfg() -G6_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1)]]) -G6_IRB1 = gen_irblock(LBL1, [[ExprAff(A, B)]]) +G6_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G6_IRB1 = gen_irblock(LBL1, [[ExprAff(A, B), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) -G6_IRA.graph.add_uniq_edge(G6_IRB0.loc_key, G6_IRB1.loc_key) -G6_IRA.graph.add_uniq_edge(G6_IRB1.loc_key, G6_IRB1.loc_key) - -G6_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G6_IRB0, G6_IRB1]]) +for irb in [G6_IRB0, G6_IRB1]: + G6_IRA.add_irblock(irb) # graph 7 -G7_IRA = IRATest(loc_db) - -G7_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G7_IRB1 = gen_irblock(LBL1, [[ExprAff(B, C)], [ExprAff(A, B)]]) -G7_IRB2 = gen_irblock(LBL2, [[ExprAff(D, A)]]) - -G7_IRA.graph.add_uniq_edge(G7_IRB0.loc_key, G7_IRB1.loc_key) -G7_IRA.graph.add_uniq_edge(G7_IRB1.loc_key, G7_IRB1.loc_key) -G7_IRA.graph.add_uniq_edge(G7_IRB1.loc_key, G7_IRB2.loc_key) - -G7_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G7_IRB0, G7_IRB1, G7_IRB2]]) +G7_IRA = IRA.new_ircfg() + +G7_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G7_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(B, C)], + [ExprAff(A, B)], + [ExprAff( + IRDst, + ExprCond( + COND, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + ) + ] + ] +) + +G7_IRB2 = gen_irblock(LBL2, [[ExprAff(D, A), ExprAff(IRDst, END)]]) + +for irb in [G7_IRB0, G7_IRB1, G7_IRB2]: + G7_IRA.add_irblock(irb) # graph 8 -G8_IRA = IRATest(loc_db) - -G8_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G8_IRB1 = gen_irblock(LBL1, [[ExprAff(B, C)], [ExprAff(C, D)]]) -G8_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) - -G8_IRA.graph.add_uniq_edge(G8_IRB0.loc_key, G8_IRB1.loc_key) -G8_IRA.graph.add_uniq_edge(G8_IRB1.loc_key, G8_IRB1.loc_key) -G8_IRA.graph.add_uniq_edge(G8_IRB1.loc_key, G8_IRB2.loc_key) - -G8_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G8_IRB0, G8_IRB1, G8_IRB2]]) +G8_IRA = IRA.new_ircfg() + +G8_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G8_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(B, C)], + [ExprAff(C, D), + ExprAff( + IRDst, + ExprCond( + COND, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + ) + ] + ] +) +G8_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B), ExprAff(IRDst, END)]]) + +for irb in [G8_IRB0, G8_IRB1, G8_IRB2]: + G8_IRA.add_irblock(irb) # graph 9 is graph 8 # graph 10 -G10_IRA = IRATest(loc_db) - -G10_IRB1 = gen_irblock(LBL1, [[ExprAff(B, B + CST2)]]) -G10_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) - -G10_IRA.graph.add_uniq_edge(G10_IRB1.loc_key, G10_IRB2.loc_key) -G10_IRA.graph.add_uniq_edge(G10_IRB1.loc_key, G10_IRB1.loc_key) - -G10_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G10_IRB1, G10_IRB2]]) +G10_IRA = IRA.new_ircfg() + +G10_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(B, B + CST2), + ExprAff( + IRDst, + ExprCond( + COND, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + ) + ] + ] +) + +G10_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B), ExprAff(IRDst, END)]]) + +for irb in [G10_IRB1, G10_IRB2]: + G10_IRA.add_irblock(irb) # graph 11 -G11_IRA = IRATest(loc_db) - -G11_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1), - ExprAff(B, CST2)]]) -G11_IRB1 = gen_irblock(LBL1, [[ExprAff(A, B), - ExprAff(B, A)]]) -G11_IRB2 = gen_irblock(LBL2, [[ExprAff(A, A - B)]]) - -G11_IRA.graph.add_uniq_edge(G11_IRB0.loc_key, G11_IRB1.loc_key) -G11_IRA.graph.add_uniq_edge(G11_IRB1.loc_key, G11_IRB2.loc_key) - -G11_IRA.blocks = dict([(irb.loc_key, irb) - for irb in [G11_IRB0, G11_IRB1, G11_IRB2]]) +G11_IRA = IRA.new_ircfg() + +G11_IRB0 = gen_irblock( + LBL0, + [ + [ExprAff(A, CST1), + ExprAff(B, CST2), + ExprAff(IRDst, ExprLoc(LBL1, 32)) + ] + ] +) + +G11_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(A, B), + ExprAff(B, A), + ExprAff(IRDst, ExprLoc(LBL2, 32)) + ] + ] +) + +G11_IRB2 = gen_irblock(LBL2, [[ExprAff(A, A - B), ExprAff(IRDst, END)]]) + +for irb in [G11_IRB0, G11_IRB1, G11_IRB2]: + G11_IRA.add_irblock(irb) # graph 12 -G12_IRA = IRATest(loc_db) +G12_IRA = IRA.new_ircfg() -G12_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1)]]) -G12_IRB1 = gen_irblock(LBL1, [[ExprAff(A, B)], [ExprAff(B, B + CST2)]]) -G12_IRB2 = gen_irblock(LBL2, [[ExprAff(B, A)]]) +G12_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G12_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(A, B)], + [ExprAff(B, B + CST2), + ExprAff( + IRDst, + ExprCond( + COND, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + ) + ] + ] +) -G12_IRA.graph.add_uniq_edge(G12_IRB0.loc_key, G12_IRB1.loc_key) -G12_IRA.graph.add_uniq_edge(G12_IRB1.loc_key, G12_IRB2.loc_key) -G12_IRA.graph.add_uniq_edge(G12_IRB1.loc_key, G12_IRB1.loc_key) +G12_IRB2 = gen_irblock(LBL2, [[ExprAff(B, A), ExprAff(IRDst, END)]]) -G12_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G12_IRB0, G12_IRB1, - G12_IRB2]]) +for irb in [G12_IRB0, G12_IRB1, G12_IRB2]: + G12_IRA.add_irblock(irb) # graph 13 -G13_IRA = IRATest(loc_db) +G13_IRA = IRA.new_ircfg() G13_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)], #[ExprAff(B, A)], - [ExprAff(G13_IRA.IRDst, + [ExprAff(IRDst, ExprLoc(LBL1, 32))]]) G13_IRB1 = gen_irblock(LBL1, [[ExprAff(C, A)], #[ExprAff(A, A + CST1)], - [ExprAff(G13_IRA.IRDst, - ExprCond(R, ExprLoc(LBL2, 32), - ExprLoc(LBL1, 32)))]]) + [ExprAff(IRDst, + ExprCond( + R, + ExprLoc(LBL2, 32), + ExprLoc(LBL3, 32) + ) + )]]) G13_IRB2 = gen_irblock(LBL2, [[ExprAff(B, A + CST3)], [ExprAff(A, B + CST3)], - [ExprAff(G13_IRA.IRDst, + [ExprAff(IRDst, ExprLoc(LBL1, 32))]]) -G13_IRB3 = gen_irblock(LBL3, [[ExprAff(R, C)]]) - -G13_IRA.graph.add_uniq_edge(G13_IRB0.loc_key, G13_IRB1.loc_key) -G13_IRA.graph.add_uniq_edge(G13_IRB1.loc_key, G13_IRB2.loc_key) -G13_IRA.graph.add_uniq_edge(G13_IRB2.loc_key, G13_IRB1.loc_key) -G13_IRA.graph.add_uniq_edge(G13_IRB1.loc_key, G13_IRB3.loc_key) +G13_IRB3 = gen_irblock(LBL3, [[ExprAff(R, C), ExprAff(IRDst, END)]]) -G13_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G13_IRB0, G13_IRB1, - G13_IRB2, G13_IRB3]]) +for irb in [G13_IRB0, G13_IRB1, G13_IRB2, G13_IRB3]: + G13_IRA.add_irblock(irb) # graph 14 -G14_IRA = IRATest(loc_db) +G14_IRA = IRA.new_ircfg() G14_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)], - [ExprAff(G14_IRA.IRDst, + [ExprAff(IRDst, ExprLoc(LBL1, 32))] ]) G14_IRB1 = gen_irblock(LBL1, [[ExprAff(B, A)], - [ExprAff(G14_IRA.IRDst, - ExprCond(C, ExprLoc(LBL2, 32), - ExprLoc(LBL3, 32)))] + [ExprAff(IRDst, + ExprCond( + C, + ExprLoc(LBL2, 32), + ExprLoc(LBL3, 32) + ) + ) + ] ]) G14_IRB2 = gen_irblock(LBL2, [[ExprAff(D, A)], [ExprAff(A, D + CST1)], - [ExprAff(G14_IRA.IRDst, + [ExprAff(IRDst, ExprLoc(LBL1, 32))] ]) -G14_IRB3 = gen_irblock(LBL3, [[ExprAff(R, D + B)]]) +G14_IRB3 = gen_irblock(LBL3, [[ExprAff(R, D + B), ExprAff(IRDst, END)]]) -G14_IRA.graph.add_uniq_edge(G14_IRB0.loc_key, G14_IRB1.loc_key) -G14_IRA.graph.add_uniq_edge(G14_IRB1.loc_key, G14_IRB2.loc_key) -G14_IRA.graph.add_uniq_edge(G14_IRB2.loc_key, G14_IRB1.loc_key) -G14_IRA.graph.add_uniq_edge(G14_IRB1.loc_key, G14_IRB3.loc_key) - -G14_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G14_IRB0, G14_IRB1, - G14_IRB2, G14_IRB3]]) +for irb in [G14_IRB0, G14_IRB1, G14_IRB2, G14_IRB3]: + G14_IRA.add_irblock(irb) # graph 16 -G15_IRA = IRATest(loc_db) +G15_IRA = IRA.new_ircfg() -G15_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)]]) +G15_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) G15_IRB1 = gen_irblock(LBL1, [[ExprAff(D, A + B)], [ExprAff(C, D)], - [ExprAff(B, C)]]) -G15_IRB2 = gen_irblock(LBL2, [[ExprAff(R, B)]]) - -G15_IRA.graph.add_uniq_edge(G15_IRB0.loc_key, G15_IRB1.loc_key) -G15_IRA.graph.add_uniq_edge(G15_IRB1.loc_key, G15_IRB2.loc_key) -G15_IRA.graph.add_uniq_edge(G15_IRB1.loc_key, G15_IRB1.loc_key) - -G15_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G15_IRB0, G15_IRB1, - G15_IRB2]]) + [ExprAff(B, C), + ExprAff(IRDst, + ExprCond( + C, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + )]]) +G15_IRB2 = gen_irblock(LBL2, [[ExprAff(R, B), ExprAff(IRDst, END)]]) + +for irb in [G15_IRB0, G15_IRB1, G15_IRB2]: + G15_IRA.add_irblock(irb) # graph 16 -G16_IRA = IRATest(loc_db) - -G16_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)]]) -G16_IRB1 = gen_irblock(LBL1, [[ExprAff(R, D)]]) -G16_IRB2 = gen_irblock(LBL2, [[ExprAff(D, A)]]) -G16_IRB3 = gen_irblock(LBL3, [[ExprAff(R, D)]]) -G16_IRB4 = gen_irblock(LBL4, [[ExprAff(R, A)]]) -G16_IRB5 = gen_irblock(LBL5, [[ExprAff(R, A)]]) - -G16_IRA.graph.add_uniq_edge(G16_IRB0.loc_key, G16_IRB1.loc_key) -G16_IRA.graph.add_uniq_edge(G16_IRB1.loc_key, G16_IRB2.loc_key) -G16_IRA.graph.add_uniq_edge(G16_IRB2.loc_key, G16_IRB1.loc_key) -G16_IRA.graph.add_uniq_edge(G16_IRB1.loc_key, G16_IRB3.loc_key) -G16_IRA.graph.add_uniq_edge(G16_IRB3.loc_key, G16_IRB1.loc_key) -G16_IRA.graph.add_uniq_edge(G16_IRB1.loc_key, G16_IRB4.loc_key) -G16_IRA.graph.add_uniq_edge(G16_IRB4.loc_key, G16_IRB1.loc_key) -G16_IRA.graph.add_uniq_edge(G16_IRB1.loc_key, G16_IRB5.loc_key) - -G16_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G16_IRB0, G16_IRB1, - G16_IRB2, G16_IRB3, - G16_IRB4, G16_IRB5]]) +G16_IRA = IRA.new_ircfg() + +G16_IRB0 = gen_irblock( + LBL0, [ + [ExprAff(A, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))] + ] +) + +G16_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(R, D), + ExprAff( + IRDst, + ExprCond( + C, + ExprCond( + C, + ExprCond( + C, + ExprLoc(LBL2, 32), + ExprLoc(LBL3, 32) + ), + ExprLoc(LBL4, 32) + ), + ExprLoc(LBL5, 32) + ) + ) + ] + ] +) + + + +G16_IRB2 = gen_irblock(LBL2, [[ExprAff(D, A), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G16_IRB3 = gen_irblock(LBL3, [[ExprAff(R, D), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G16_IRB4 = gen_irblock(LBL4, [[ExprAff(R, A), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G16_IRB5 = gen_irblock(LBL5, [[ExprAff(R, A), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) + +for irb in [G16_IRB0, G16_IRB1, G16_IRB2, G16_IRB3, G16_IRB4, G16_IRB5]: + G16_IRA.add_irblock(irb) # graph 17 -G17_IRA = IRATest(loc_db) +G17_IRA = IRA.new_ircfg() G17_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1), - ExprAff(D, CST2)]]) + ExprAff(D, CST2), + ExprAff(IRDst, ExprLoc(LBL1, 32))]]) G17_IRB1 = gen_irblock(LBL1, [[ExprAff(A, D), - ExprAff(B, D)]]) -G17_IRB2 = gen_irblock(LBL2, [[ExprAff(A, A - B)]]) + ExprAff(B, D), + ExprAff(IRDst, ExprLoc(LBL2, 32))]]) +G17_IRB2 = gen_irblock(LBL2, [[ExprAff(A, A - B), + ExprAff(IRDst, END)]]) -G17_IRA.graph.add_uniq_edge(G17_IRB0.loc_key, G17_IRB1.loc_key) -G17_IRA.graph.add_uniq_edge(G17_IRB1.loc_key, G17_IRB2.loc_key) +G17_IRA.add_uniq_edge(G17_IRB0.loc_key, G17_IRB1.loc_key) +G17_IRA.add_uniq_edge(G17_IRB1.loc_key, G17_IRB2.loc_key) -G17_IRA.blocks = dict([(irb.loc_key, irb) for irb in [G17_IRB0, G17_IRB1, - G17_IRB2]]) +for irb in [G17_IRB0, G17_IRB1, G17_IRB2]: + G17_IRA.add_irblock(irb) # Test graph 1 G1_TEST1_DN1 = DependencyNode( @@ -950,20 +1128,23 @@ for test_nb, test in enumerate([(G1_IRA, G1_INPUT), # Extract test elements print "[+] Test", test_nb + 1 - g_ira, (depnodes, heads) = test + ircfg, (depnodes, heads) = test - open("graph_%02d.dot" % (test_nb + 1), "w").write(g_ira.graph.dot()) + open("graph_%02d.dot" % (test_nb + 1), "w").write(ircfg.dot()) + open("graph_%02d.dot" % (test_nb + 1), "w").write(bloc2graph(ircfg)) # Different options suffix_key_list = ["", "_nosimp", "_nomem", "_nocall", "_implicit"] # Test classes - for g_ind, g_dep in enumerate([DependencyGraph(g_ira), - DependencyGraph(g_ira, apply_simp=False), - DependencyGraph(g_ira, follow_mem=False), - DependencyGraph(g_ira, follow_mem=False, - follow_call=False), - # DependencyGraph(g_ira, implicit=True), + for g_ind, g_dep in enumerate([DependencyGraph(ircfg), + DependencyGraph(ircfg, apply_simp=False), + DependencyGraph(ircfg, follow_mem=False), + DependencyGraph( + ircfg, follow_mem=False, + follow_call=False + ), + # DependencyGraph(ircfg, implicit=True), ]): # if g_ind == 4: # TODO: Implicit specifications @@ -991,7 +1172,6 @@ for test_nb, test in enumerate([(G1_IRA, G1_INPUT), flat_depnodes = get_flat_init_depnodes(depnodes) if not match_results(all_results, test_results[test_nb], flat_depnodes): FAILED.add(test_nb) - # fds continue if FAILED: diff --git a/test/arch/arm/sem.py b/test/arch/arm/sem.py index 57dd2b77..64cda610 100755 --- a/test/arch/arm/sem.py +++ b/test/arch/arm/sem.py @@ -15,23 +15,24 @@ from pdb import pm logging.getLogger('cpuhelper').setLevel(logging.ERROR) EXCLUDE_REGS = set([ir_arch().IRDst]) -loc_db = LocationDB() def M(addr): return ExprMem(ExprInt(addr, 16), 16) def compute(asm, inputstate={}, debug=False): + loc_db = LocationDB() sympool = dict(regs_init) sympool.update({k: ExprInt(v, k.size) for k, v in inputstate.iteritems()}) - interm = ir_arch(loc_db) - symexec = SymbolicExecutionEngine(interm, sympool) + ir_tmp = ir_arch(loc_db) + ircfg = ir_tmp.new_ircfg() + symexec = SymbolicExecutionEngine(ir_tmp, sympool) instr = mn.fromstring(asm, loc_db, "l") code = mn.asm(instr)[0] instr = mn.dis(code, "l") instr.offset = inputstate.get(PC, 0) - lbl = interm.add_instr(instr) - symexec.run_at(lbl) + lbl = ir_tmp.add_instr_to_ircfg(instr, ircfg) + symexec.run_at(ircfg, lbl) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: diff --git a/test/arch/msp430/sem.py b/test/arch/msp430/sem.py index 6693a6f0..10e57e36 100755 --- a/test/arch/msp430/sem.py +++ b/test/arch/msp430/sem.py @@ -9,25 +9,29 @@ from miasm2.arch.msp430.arch import mn_msp430 as mn, mode_msp430 as mode from miasm2.arch.msp430.sem import ir_msp430 as ir_arch from miasm2.arch.msp430.regs import * from miasm2.expression.expression import * +from miasm2.core.locationdb import LocationDB logging.getLogger('cpuhelper').setLevel(logging.ERROR) EXCLUDE_REGS = set([res, ir_arch().IRDst]) + def M(addr): return ExprMem(ExprInt(addr, 16), 16) def compute(asm, inputstate={}, debug=False): + loc_db = LocationDB() sympool = dict(regs_init) sympool.update({k: ExprInt(v, k.size) for k, v in inputstate.iteritems()}) - interm = ir_arch() - symexec = SymbolicExecutionEngine(interm, sympool) + ir_tmp = ir_arch(loc_db) + ircfg = ir_tmp.new_ircfg() + symexec = SymbolicExecutionEngine(ir_tmp, sympool) instr = mn.fromstring(asm, mode) code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(PC, 0) - loc_key = interm.add_instr(instr) - symexec.run_at(loc_key) + loc_key = ir_tmp.add_instr_to_ircfg(instr, ircfg) + symexec.run_at(ircfg, loc_key) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: diff --git a/test/arch/x86/sem.py b/test/arch/x86/sem.py index a2493d12..0783089d 100755 --- a/test/arch/x86/sem.py +++ b/test/arch/x86/sem.py @@ -12,7 +12,7 @@ from miasm2.arch.x86.arch import mn_x86 as mn from miasm2.arch.x86.sem import ir_x86_32 as ir_32, ir_x86_64 as ir_64 from miasm2.arch.x86.regs import * from miasm2.expression.expression import * -from miasm2.expression.simplifications import expr_simp +from miasm2.expression.simplifications import expr_simp from miasm2.core import parse_asm, asmblock from miasm2.core.locationdb import LocationDB @@ -20,16 +20,15 @@ from miasm2.core.locationdb import LocationDB logging.getLogger('cpuhelper').setLevel(logging.ERROR) EXCLUDE_REGS = set([ir_32().IRDst, ir_64().IRDst]) -loc_db = LocationDB() m32 = 32 m64 = 64 -def symb_exec(lbl, interm, inputstate, debug): +def symb_exec(lbl, ir_arch, ircfg, inputstate, debug): sympool = dict(regs_init) sympool.update(inputstate) - symexec = SymbolicExecutionEngine(interm, sympool) - symexec.run_at(lbl) + symexec = SymbolicExecutionEngine(ir_arch, sympool) + symexec.run_at(ircfg, lbl) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: @@ -38,24 +37,25 @@ def symb_exec(lbl, interm, inputstate, debug): if k not in EXCLUDE_REGS and regs_init.get(k, None) != v} def compute(ir, mode, asm, inputstate={}, debug=False): + loc_db = LocationDB() instr = mn.fromstring(asm, loc_db, mode) code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(EIP, 0) - interm = ir() - lbl = interm.add_instr(instr) - return symb_exec(lbl, interm, inputstate, debug) + ir_arch = ir(loc_db) + ircfg = ir_arch.new_ircfg() + lbl = ir_arch.add_instr_to_ircfg(instr, ircfg) + return symb_exec(lbl, ir_arch, ircfg, inputstate, debug) def compute_txt(ir, mode, txt, inputstate={}, debug=False): asmcfg, loc_db = parse_asm.parse_txt(mn, mode, txt) loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) patches = asmblock.asm_resolve_final(mn, asmcfg, loc_db) - interm = ir(loc_db) + ir_arch = ir(loc_db) lbl = loc_db.get_name_location("main") - for bbl in asmcfg.blocks: - interm.add_block(bbl) - return symb_exec(lbl, interm, inputstate, debug) + ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) + return symb_exec(lbl, ir_arch, ircfg, inputstate, debug) op_add = lambda a, b: a+b op_sub = lambda a, b: a-b diff --git a/test/ir/symbexec.py b/test/ir/symbexec.py index d57bcba0..3158be60 100755 --- a/test/ir/symbexec.py +++ b/test/ir/symbexec.py @@ -10,10 +10,15 @@ class TestSymbExec(unittest.TestCase): from miasm2.expression.expression import ExprInt, ExprId, ExprMem, \ ExprCompose, ExprAff from miasm2.arch.x86.sem import ir_x86_32 + from miasm2.core.locationdb import LocationDB from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import AssignBlock + loc_db = LocationDB() + ira = ir_x86_32(loc_db) + ircfg = ira.new_ircfg() + id_x = ExprId('x', 32) id_a = ExprId('a', 32) id_b = ExprId('b', 32) @@ -21,7 +26,7 @@ class TestSymbExec(unittest.TestCase): id_d = ExprId('d', 32) id_e = ExprId('e', 64) - sb = SymbolicExecutionEngine(ir_x86_32(), + sb = SymbolicExecutionEngine(ira, { ExprMem(ExprInt(0x4, 32), 8): ExprInt(0x44, 8), ExprMem(ExprInt(0x5, 32), 8): ExprInt(0x33, 8), @@ -222,13 +227,14 @@ class TestSymbExec(unittest.TestCase): assert found - sb_empty = SymbolicExecutionEngine(ir_x86_32()) + sb_empty = SymbolicExecutionEngine(ira) sb_empty.dump() # Test memory full print 'full' - arch_addr8 = ir_x86_32() + arch_addr8 = ir_x86_32(loc_db) + ircfg = arch_addr8.new_ircfg() # Hack to obtain tiny address space arch_addr8.addrsize = 5 sb_addr8 = SymbolicExecutionEngine(arch_addr8) |