diff options
Diffstat (limited to 'example')
27 files changed, 411 insertions, 374 deletions
diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index 0c08a8a3..9be5b517 100755 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -8,6 +8,7 @@ from elfesteem.strpatchwork import StrPatchwork from miasm2.core import parse_asm, asmblock from miasm2.analysis.machine import Machine from miasm2.core.interval import interval +from miasm2.core.locationdb import LocationDB parser = ArgumentParser("Multi-arch (32 bits) assembler") parser.add_argument('architecture', help="architecture: " + @@ -65,31 +66,34 @@ with open(args.source) as fstream: source = fstream.read() -symbol_pool = asmblock.AsmSymbolPool() +loc_db = LocationDB() -blocks, symbol_pool = parse_asm.parse_txt(machine.mn, attrib, source, symbol_pool) +asmcfg, loc_db = parse_asm.parse_txt(machine.mn, attrib, source, loc_db) # Fix shellcode addrs -symbol_pool.set_offset(symbol_pool.getby_name("main"), addr_main) +loc_db.set_location_offset(loc_db.get_name_location("main"), addr_main) if args.PE: - symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), - pe.DirImport.get_funcvirt('USER32.dll', 'MessageBoxA')) + loc_db.set_location_offset(loc_db.get_or_create_name_location("MessageBoxA"), + pe.DirImport.get_funcvirt('USER32.dll', + 'MessageBoxA')) # Print and graph firsts blocks before patching it -for block in blocks: +for block in asmcfg.blocks: print block -open("graph.dot", "w").write(blocks.dot()) +open("graph.dot", "w").write(asmcfg.dot()) # Apply patches patches = asmblock.asm_resolve_final(machine.mn, - blocks, - symbol_pool, + asmcfg, + loc_db, dst_interval) if args.encrypt: # Encrypt code - ad_start = symbol_pool.getby_name_create(args.encrypt[0]).offset - ad_stop = symbol_pool.getby_name_create(args.encrypt[1]).offset + loc_start = loc_db.get_or_create_name_location(args.encrypt[0]) + loc_stop = loc_db.get_or_create_name_location(args.encrypt[1]) + ad_start = loc_db.get_location_offset(loc_start) + ad_stop = loc_db.get_location_offset(loc_stop) new_patches = dict(patches) for ad, val in patches.items(): diff --git a/example/asm/simple.py b/example/asm/simple.py index 62d2ff80..5480e2f5 100644 --- a/example/asm/simple.py +++ b/example/asm/simple.py @@ -6,7 +6,7 @@ from miasm2.core import parse_asm, asmblock # Assemble code -blocks, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +asmcfg, loc_db = parse_asm.parse_txt(mn_x86, 32, ''' main: MOV EAX, 1 MOV EBX, 2 @@ -21,14 +21,14 @@ loop: RET ''') -# Set 'main' label's offset -symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) +# Set 'main' loc_key's offset +loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) # Spread information and resolve instructions offset -patches = asmblock.asm_resolve_final(mn_x86, blocks, symbol_pool) +patches = asmblock.asm_resolve_final(mn_x86, asmcfg, loc_db) -# Show resolved blocks -for block in blocks: +# Show resolved asmcfg +for block in asmcfg.blocks: print block # Print offset -> bytes diff --git a/example/disasm/callback.py b/example/disasm/callback.py index a9bef20b..b9a09c09 100644 --- a/example/disasm/callback.py +++ b/example/disasm/callback.py @@ -1,9 +1,9 @@ from miasm2.core.bin_stream import bin_stream_str -from miasm2.core.asmblock import AsmLabel, AsmConstraint, expr_is_label +from miasm2.core.asmblock import AsmConstraint from miasm2.arch.x86.disasm import dis_x86_32, cb_x86_funcs -def cb_x86_callpop(cur_bloc, symbol_pool, *args, **kwargs): +def cb_x86_callpop(cur_bloc, loc_db, *args, **kwargs): """ 1000: call 1005 1005: pop @@ -21,12 +21,15 @@ def cb_x86_callpop(cur_bloc, symbol_pool, *args, **kwargs): last_instr = cur_bloc.lines[-1] if last_instr.name != 'CALL': return - ## The destination must be a label + ## The destination must be a location dst = last_instr.args[0] - if not expr_is_label(dst): + if not dst.is_loc(): return + + loc_key = dst.loc_key + offset = loc_db.get_location_offset(loc_key) ## The destination must be the next instruction - if dst.name.offset != last_instr.offset + last_instr.l: + if offset != last_instr.offset + last_instr.l: return # Update instruction instance @@ -34,7 +37,7 @@ def cb_x86_callpop(cur_bloc, symbol_pool, *args, **kwargs): # Update next blocks to process in the disassembly engine cur_bloc.bto.clear() - cur_bloc.add_cst(dst.name.offset, AsmConstraint.c_next, symbol_pool) + cur_bloc.add_cst(loc_key, AsmConstraint.c_next) # Prepare a tiny shellcode @@ -46,8 +49,8 @@ bin_stream = bin_stream_str(shellcode) mdis = dis_x86_32(bin_stream) print "Without callback:\n" -blocks = mdis.dis_multiblock(0) -print "\n".join(str(block) for block in blocks) +asmcfg = mdis.dis_multiblock(0) +print "\n".join(str(block) for block in asmcfg.blocks) # Enable callback cb_x86_funcs.append(cb_x86_callpop) @@ -56,9 +59,9 @@ cb_x86_funcs.append(cb_x86_callpop) print "=" * 40 print "With callback:\n" -blocks_after = mdis.dis_multiblock(0) -print "\n".join(str(block) for block in blocks_after) +asmcfg_after = mdis.dis_multiblock(0) +print "\n".join(str(block) for block in asmcfg_after.blocks) # Ensure the callback has been called -assert blocks.heads()[0].lines[0].name == "CALL" -assert blocks_after.heads()[0].lines[0].name == "PUSH" +assert asmcfg.loc_key_to_block(asmcfg.heads()[0]).lines[0].name == "CALL" +assert asmcfg_after.loc_key_to_block(asmcfg_after.heads()[0]).lines[0].name == "PUSH" diff --git a/example/disasm/file.py b/example/disasm/file.py index 88ba6162..196e1b1a 100644 --- a/example/disasm/file.py +++ b/example/disasm/file.py @@ -13,6 +13,6 @@ cont = Container.from_stream(open(sys.argv[1])) mdis = dis_x86_32(cont.bin_stream) # Inform the engine to avoid disassembling null instructions mdis.dont_dis_nulstart_bloc = True -blocks = mdis.dis_multiblock(addr) +asmcfg = mdis.dis_multiblock(addr) -open('graph.dot', 'w').write(blocks.dot()) +open('graph.dot', 'w').write(asmcfg.dot()) diff --git a/example/disasm/full.py b/example/disasm/full.py index 84c856e1..cfbfc80c 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -3,7 +3,7 @@ from argparse import ArgumentParser from pdb import pm from miasm2.analysis.binary import Container -from miasm2.core.asmblock import log_asmblock, AsmLabel, AsmCFG +from miasm2.core.asmblock import log_asmblock, AsmCFG from miasm2.expression.expression import ExprId from miasm2.core.interval import interval from miasm2.analysis.machine import Machine @@ -85,7 +85,7 @@ mn, dis_engine = machine.mn, machine.dis_engine ira, ir = machine.ira, machine.ir log.info('ok') -mdis = dis_engine(bs, symbol_pool=cont.symbol_pool) +mdis = dis_engine(bs, loc_db=cont.loc_db) # configure disasm engine mdis.dontdis_retcall = args.dontdis_retcall mdis.blocs_wd = args.blockwatchdog @@ -99,7 +99,9 @@ for addr in args.address: addrs.append(int(addr, 0)) except ValueError: # Second chance, try with symbol - addrs.append(mdis.symbol_pool.getby_name(addr).offset) + loc_key = mdis.loc_db.get_name_location(addr) + offset = mdis.loc_db.get_location_offset(loc_key) + addrs.append(offset) if len(addrs) == 0 and default_addr is not None: addrs.append(default_addr) @@ -121,27 +123,28 @@ while not finish and todo: if ad in done: continue done.add(ad) - allblocks = mdis.dis_multiblock(ad) + asmcfg = mdis.dis_multiblock(ad) log.info('func ok %.16x (%d)' % (ad, len(all_funcs))) all_funcs.add(ad) - all_funcs_blocks[ad] = allblocks - for block in allblocks: + all_funcs_blocks[ad] = asmcfg + for block in asmcfg.blocks: for l in block.lines: done_interval += interval([(l.offset, l.offset + l.l)]) if args.funcswatchdog is not None: args.funcswatchdog -= 1 if args.recurfunctions: - for block in allblocks: + for block in asmcfg.blocks: instr = block.get_subcall_instr() if not instr: continue - for dest in instr.getdstflow(mdis.symbol_pool): - if not (isinstance(dest, ExprId) and isinstance(dest.name, AsmLabel)): + for dest in instr.getdstflow(mdis.loc_db): + if not dest.is_loc(): continue - todo.append((mdis, instr, dest.name.offset)) + offset = mdis.loc_db.get_location_offset(dest.loc_key) + todo.append((mdis, instr, offset)) if args.funcswatchdog is not None and args.funcswatchdog <= 0: finish = True @@ -155,13 +158,13 @@ while not finish and todo: # Generate dotty graph -all_blocks = AsmCFG() +all_asmcfg = AsmCFG(mdis.loc_db) for blocks in all_funcs_blocks.values(): - all_blocks += blocks + all_asmcfg += blocks log.info('generate graph file') -open('graph_execflow.dot', 'w').write(all_blocks.dot(offset=True)) +open('graph_execflow.dot', 'w').write(all_asmcfg.dot(offset=True)) log.info('generate intervals') @@ -186,15 +189,19 @@ log.info('total lines %s' % total_l) if args.gen_ir: log.info("generating IR and IR analysis") - ir_arch = ir(mdis.symbol_pool) - ir_arch_a = ira(mdis.symbol_pool) + ir_arch = ir(mdis.loc_db) + ir_arch_a = ira(mdis.loc_db) + + ircfg = ir_arch.new_ircfg() + ircfg_a = ir_arch.new_ircfg() + ir_arch.blocks = {} ir_arch_a.blocks = {} - for ad, all_block in all_funcs_blocks.items(): + for ad, asmcfg in all_funcs_blocks.items(): log.info("generating IR... %x" % ad) - for block in all_block: - ir_arch_a.add_block(block) - ir_arch.add_block(block) + for block in asmcfg.blocks: + ir_arch.add_asmblock_to_ircfg(block, ircfg) + ir_arch_a.add_asmblock_to_ircfg(block, ircfg_a) log.info("Print blocks (without analyse)") for label, block in ir_arch.blocks.iteritems(): @@ -207,25 +214,25 @@ if args.gen_ir: print block if args.simplify > 0: - dead_simp(ir_arch_a) + dead_simp(ir_arch_a, ircfg_a) if args.defuse: reachings = ReachingDefinitions(ir_arch_a) open('graph_defuse.dot', 'w').write(DiGraphDefUse(reachings).dot()) - out = ir_arch_a.graph.dot() + out = ircfg.dot() open('graph_irflow.dot', 'w').write(out) - out = ir_arch.graph.dot() + out = ircfg_a.dot() open('graph_irflow_raw.dot', 'w').write(out) if args.simplify > 1: - ir_arch_a.simplify(expr_simp) + ircfg_a.simplify(expr_simp) modified = True while modified: modified = False - modified |= dead_simp(ir_arch_a) - modified |= ir_arch_a.remove_empty_assignblks() - modified |= ir_arch_a.remove_jmp_blocks() - modified |= ir_arch_a.merge_blocks() + modified |= dead_simp(ir_arch_a, ircfg_a) + modified |= ircfg_a.remove_empty_assignblks() + modified |= ircfg_a.remove_jmp_blocks() + modified |= ircfg_a.merge_blocks() - open('graph_irflow_reduced.dot', 'w').write(ir_arch_a.graph.dot()) + open('graph_irflow_reduced.dot', 'w').write(ircfg_a.dot()) diff --git a/example/disasm/function.py b/example/disasm/function.py index 89f65abb..10495dbc 100644 --- a/example/disasm/function.py +++ b/example/disasm/function.py @@ -8,9 +8,9 @@ from miasm2.arch.x86.disasm import dis_x86_32 # RET shellcode = '\xb8\xef\xbe7\x13\xb9\x04\x00\x00\x00\xc1\xc0\x08\xe2\xfb\xc3' mdis = dis_x86_32(shellcode) -blocks = mdis.dis_multiblock(0) +asmcfg = mdis.dis_multiblock(0) -for block in blocks: +for block in asmcfg.blocks: print block -open('graph.dot', 'w').write(blocks.dot()) +open('graph.dot', 'w').write(asmcfg.dot()) diff --git a/example/disasm/single_instr.py b/example/disasm/single_instr.py index 59b81de7..d17e303f 100644 --- a/example/disasm/single_instr.py +++ b/example/disasm/single_instr.py @@ -1,9 +1,9 @@ from miasm2.arch.x86.arch import mn_x86 from miasm2.arch.x86.regs import EDX -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB -symbol_pool = AsmSymbolPool() -l = mn_x86.fromstring('MOV EAX, EBX', symbol_pool, 32) +loc_db = LocationDB() +l = mn_x86.fromstring('MOV EAX, EBX', loc_db, 32) print "instruction:", l print "arg:", l.args[0] x = mn_x86.asm(l) diff --git a/example/expression/access_c.py b/example/expression/access_c.py index de158730..e8d5e318 100644 --- a/example/expression/access_c.py +++ b/example/expression/access_c.py @@ -54,11 +54,10 @@ from miasm2.core.objc import ExprToAccessC, CHandler from miasm2.core.objc import CTypesManagerNotPacked from miasm2.core.ctypesmngr import CAstTypes, CTypePtr, CTypeStruct - -def find_call(ira): +def find_call(ircfg): """Returns (irb, index) which call""" - for irb in ira.blocks.values(): + for irb in ircfg.blocks.values(): out = set() if len(irb) < 2: continue @@ -92,17 +91,17 @@ class MyExprToAccessC(ExprToAccessC): reduction_rules = ExprToAccessC.reduction_rules + [reduce_compose] -def get_funcs_arg0(ctx, ira, lbl_head): +def get_funcs_arg0(ctx, ira, ircfg, lbl_head): """Compute DependencyGraph on the func @lbl_head""" - g_dep = DependencyGraph(ira, follow_call=False) + g_dep = DependencyGraph(ircfg, follow_call=False) element = ira.arch.regs.RSI - for irb, index in find_call(ira): + for irb, index in find_call(ircfg): instr = irb[index].instr print 'Analysing references from:', hex(instr.offset), instr - g_list = g_dep.get(irb.label, set([element]), index, set([lbl_head])) + g_list = g_dep.get(irb.loc_key, set([element]), index, set([lbl_head])) for dep in g_list: - emul_result = dep.emul(ctx) + emul_result = dep.emul(ira, ctx) value = emul_result[element] yield value @@ -141,16 +140,15 @@ cont = Container.fallback_container(data, None, addr=0) machine = Machine("x86_64") dis_engine, ira = machine.dis_engine, machine.ira -mdis = dis_engine(cont.bin_stream, symbol_pool=cont.symbol_pool) +mdis = dis_engine(cont.bin_stream, loc_db=cont.loc_db) addr_head = 0 -blocks = mdis.dis_multiblock(addr_head) -lbl_head = mdis.symbol_pool.getby_offset(addr_head) +asmcfg = mdis.dis_multiblock(addr_head) +lbl_head = mdis.loc_db.get_offset_location(addr_head) -ir_arch_a = ira(mdis.symbol_pool) -for block in blocks: - ir_arch_a.add_block(block) +ir_arch_a = ira(mdis.loc_db) +ircfg = ir_arch_a.new_ircfg_from_asmcfg(asmcfg) -open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot()) +open('graph_irflow.dot', 'w').write(ircfg.dot()) # Main function's first argument's type is "struct ll_human*" ptr_llhuman = types_mngr.get_objc(CTypePtr(CTypeStruct('ll_human'))) @@ -161,7 +159,7 @@ expr_types = {arg0: (ptr_llhuman,), mychandler = MyCHandler(types_mngr, expr_types) -for expr in get_funcs_arg0(ctx, ir_arch_a, lbl_head): +for expr in get_funcs_arg0(ctx, ir_arch_a, ircfg, lbl_head): print "Access:", expr for c_str, ctype in mychandler.expr_to_c_and_types(expr): print '\taccess:', c_str diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py index 786b860e..7036d960 100644 --- a/example/expression/asm_to_ir.py +++ b/example/expression/asm_to_ir.py @@ -7,8 +7,9 @@ from miasm2.core import asmblock from miasm2.arch.x86.ira import ir_a_x86_32 from miasm2.analysis.data_flow import dead_simp + # First, asm code -blocks, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +asmcfg, loc_db = parse_asm.parse_txt(mn_x86, 32, ''' main: MOV EAX, 1 MOV EBX, 2 @@ -24,33 +25,30 @@ loop: ''') -symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) -for block in blocks: +loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) +for block in asmcfg.blocks: print block print "symbols:" -print symbol_pool -patches = asmblock.asm_resolve_final(mn_x86, blocks, symbol_pool) +print loc_db +patches = asmblock.asm_resolve_final(mn_x86, asmcfg, loc_db) # Translate to IR -ir_arch = ir_a_x86_32(symbol_pool) -for block in blocks: - print 'add block' - print block - ir_arch.add_block(block) +ir_arch = ir_a_x86_32(loc_db) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Display IR -for lbl, irblock in ir_arch.blocks.items(): +for lbl, irblock in ircfg.blocks.items(): print irblock # Dead propagation -open('graph.dot', 'w').write(ir_arch.graph.dot()) +open('graph.dot', 'w').write(ircfg.dot()) print '*' * 80 -dead_simp(ir_arch) -open('graph2.dot', 'w').write(ir_arch.graph.dot()) +dead_simp(ir_arch, ircfg) +open('graph2.dot', 'w').write(ircfg.dot()) # Display new IR print 'new ir blocks' -for lbl, irblock in ir_arch.blocks.items(): +for lbl, irblock in ircfg.blocks.items(): print irblock diff --git a/example/expression/constant_propagation.py b/example/expression/constant_propagation.py index 70394580..d9c5fe65 100644 --- a/example/expression/constant_propagation.py +++ b/example/expression/constant_propagation.py @@ -28,27 +28,24 @@ machine = Machine("x86_32") cont = Container.from_stream(open(args.filename)) ira, dis_engine = machine.ira, machine.dis_engine mdis = dis_engine(cont.bin_stream) -ir_arch = ira(mdis.symbol_pool) +ir_arch = ira(mdis.loc_db) addr = int(args.address, 0) - -blocks = mdis.dis_multiblock(addr) -for block in blocks: - ir_arch.add_block(block) - +asmcfg = mdis.dis_multiblock(addr) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) init_infos = ir_arch.arch.regs.regs_init -cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) +cst_propag_link = propagate_cst_expr(ir_arch, ircfg, addr, init_infos) if args.simplify: - ir_arch.simplify(expr_simp) + ircfg.simplify(expr_simp) modified = True while modified: modified = False - modified |= dead_simp(ir_arch) - modified |= ir_arch.remove_empty_assignblks() - modified |= ir_arch.remove_jmp_blocks() - modified |= ir_arch.merge_blocks() + modified |= dead_simp(ir_arch, ircfg) + modified |= ircfg.remove_empty_assignblks() + modified |= ircfg.remove_jmp_blocks() + modified |= ircfg.merge_blocks() -open("%s.propag.dot" % args.filename, 'w').write(ir_arch.graph.dot()) +open("%s.propag.dot" % args.filename, 'w').write(ircfg.dot()) diff --git a/example/expression/get_read_write.py b/example/expression/get_read_write.py index 9e3b5caf..34d0f94a 100644 --- a/example/expression/get_read_write.py +++ b/example/expression/get_read_write.py @@ -1,9 +1,9 @@ from miasm2.arch.x86.arch import mn_x86 from miasm2.expression.expression import get_rw from miasm2.arch.x86.ira import ir_a_x86_32 -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB -symbol_pool = AsmSymbolPool() +loc_db = LocationDB() print """ @@ -12,14 +12,14 @@ Get read/written registers for a given instruction """ arch = mn_x86 -ir_arch = ir_a_x86_32() - -l = arch.fromstring('LODSB', symbol_pool, 32) -l.offset, l.l = 0, 15 -ir_arch.add_instr(l) +ir_arch = ir_a_x86_32(loc_db) +ircfg = ir_arch.new_ircfg() +instr = arch.fromstring('LODSB', loc_db, 32) +instr.offset, instr.l = 0, 15 +ir_arch.add_instr_to_ircfg(instr, ircfg) print '*' * 80 -for lbl, irblock in ir_arch.blocks.iteritems(): +for lbl, irblock in ircfg.blocks.iteritems(): print irblock for assignblk in irblock: rw = assignblk.get_rw() @@ -28,4 +28,4 @@ for lbl, irblock in ir_arch.blocks.iteritems(): print 'written:', dst print -open('graph_instr.dot', 'w').write(ir_arch.graph.dot()) +open('graph_instr.dot', 'w').write(ircfg.dot()) diff --git a/example/expression/graph_dataflow.py b/example/expression/graph_dataflow.py index 26fdd2ec..b30bd29f 100644 --- a/example/expression/graph_dataflow.py +++ b/example/expression/graph_dataflow.py @@ -24,11 +24,11 @@ def node_x_2_id(n, x): def get_node_name(label, i, n): - n_name = (label.name, i, n) + n_name = (label, i, n) return n_name -def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): +def intra_block_flow_symb(ir_arch, _, flow_graph, irblock, in_nodes, out_nodes): symbols_init = ir_arch.arch.regs.regs_init.copy() sb = SymbolicExecutionEngine(ir_arch, symbols_init) sb.eval_updt_irblock(irblock) @@ -47,7 +47,7 @@ def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): all_mems.update(get_expr_mem(n)) for n in all_mems: - node_n_w = get_node_name(irblock.label, 0, n) + node_n_w = get_node_name(irblock.loc_key, 0, n) if not n == src: continue o_r = n.arg.get_r(mem_read=False, cst_read=True) @@ -55,7 +55,7 @@ def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irblock.label, i, n_r) + node_n_r = get_node_name(irblock.loc_key, i, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_uniq_edge(node_n_r, node_n_w) @@ -69,13 +69,13 @@ def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irblock.label, 0, n_r) + node_n_r = get_node_name(irblock.loc_key, 0, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_node(node_n_r) for n_w in nodes_w: - node_n_w = get_node_name(irblock.label, 1, n_w) + node_n_w = get_node_name(irblock.loc_key, 1, n_w) out_nodes[n_w] = node_n_w flow_graph.add_node(node_n_w) @@ -87,15 +87,18 @@ def node2str(self, node): return out -def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): - for irblock in ir_arch.blocks.values(): +def gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb): + for irblock in ircfg.blocks.values(): print irblock - dead_simp(ir_arch) + dead_simp(ir_arch, ircfg) + irblock_0 = None - for irblock in ir_arch.blocks.values(): - if irblock.label.offset == ad: + for irblock in ircfg.blocks.values(): + loc_key = irblock.loc_key + offset = ircfg.loc_db.get_location_offset(loc_key) + if offset == ad: irblock_0 = irblock break assert(irblock_0 is not None) @@ -105,20 +108,20 @@ def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): irb_in_nodes = {} irb_out_nodes = {} - for label in ir_arch.blocks: + for label in ircfg.blocks: irb_in_nodes[label] = {} irb_out_nodes[label] = {} - for label, irblock in ir_arch.blocks.iteritems(): - block_flow_cb(ir_arch, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) + for label, irblock in ircfg.blocks.iteritems(): + block_flow_cb(ir_arch, ircfg, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) - for label in ir_arch.blocks: + for label in ircfg.blocks: print label print 'IN', [str(x) for x in irb_in_nodes[label]] print 'OUT', [str(x) for x in irb_out_nodes[label]] print '*' * 20, 'interblock', '*' * 20 - inter_block_flow(ir_arch, flow_graph, irblock_0.label, irb_in_nodes, irb_out_nodes) + inter_block_flow(ir_arch, ircfg, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes) # from graph_qt import graph_qt # graph_qt(flow_graph) @@ -131,21 +134,16 @@ ad = int(args.addr, 16) print 'disasm...' mdis = dis_x86_32(data) mdis.follow_call = True -ab = mdis.dis_multiblock(ad) +asmcfg = mdis.dis_multiblock(ad) print 'ok' print 'generating dataflow graph for:' -ir_arch = ir_a_x86_32(mdis.symbol_pool) +ir_arch = ir_a_x86_32(mdis.loc_db) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) -blocks = ab -for block in blocks: - print block - ir_arch.add_block(block) -for irblock in ir_arch.blocks.values(): +for irblock in ircfg.blocks.values(): print irblock - if irblock.label.offset != 0: - continue if args.symb: @@ -153,7 +151,7 @@ if args.symb: else: block_flow_cb = intra_block_flow_raw -gen_block_data_flow_graph(ir_arch, ad, block_flow_cb) +gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb) print '*' * 40 print """ diff --git a/example/expression/simplification_tools.py b/example/expression/simplification_tools.py index 7c15b3e7..cb062fb3 100644 --- a/example/expression/simplification_tools.py +++ b/example/expression/simplification_tools.py @@ -32,7 +32,6 @@ x = ExprMem(a + b + ExprInt(0x42, 32), 32) def replace_expr(e): - # print 'visit', e dct = {c + ExprInt(0x42, 32): d, a + b: c, } if e in dct: diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index 201d9f26..acb3abf4 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -5,16 +5,14 @@ from pdb import pm from miasm2.analysis.machine import Machine from miasm2.expression.expression import ExprInt, ExprCond, ExprId, \ - get_expr_ids, ExprAff + get_expr_ids, ExprAff, ExprLoc from miasm2.core.bin_stream import bin_stream_str -from miasm2.core import asmblock from miasm2.ir.symbexec import SymbolicExecutionEngine, get_block from miasm2.expression.simplifications import expr_simp from miasm2.core import parse_asm from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine from miasm2.ir.translators.translator import Translator - machine = Machine("x86_32") @@ -28,7 +26,7 @@ if not args: sys.exit(0) -def emul_symb(ir_arch, mdis, states_todo, states_done): +def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print '*' * 40, "addr", addr, '*' * 40 @@ -36,11 +34,11 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): print 'Known state, skipping', addr continue states_done.add((addr, symbols, conds)) - symbexec = SymbolicExecutionEngine(ir_arch, {}) + symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] - irblock = get_block(ir_arch, mdis, addr) + irblock = get_block(ir_arch, ircfg, mdis, addr) print 'Run block:' print irblock @@ -55,8 +53,8 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) - if not (addr_a.is_int() or asmblock.expr_is_label(addr_a) and - addr_b.is_int() or asmblock.expr_is_label(addr_b)): + if not (addr_a.is_int() or addr_a.is_loc() and + addr_b.is_int() or addr_b.is_loc()): print str(addr_a), str(addr_b) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): @@ -68,11 +66,10 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): elif addr == ret_addr: print 'Return address reached' continue - elif isinstance(addr, ExprInt): + elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) - elif asmblock.expr_is_label(addr): - addr = addr.name + elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination") @@ -88,39 +85,51 @@ if __name__ == '__main__': addr = int(options.address, 16) - symbols_init = dict(machine.mn.regs.regs_init) - - ir_arch = machine.ir(mdis.symbol_pool) - symbexec = SymbolicExecutionEngine(ir_arch, symbols_init) + ir_arch = machine.ir(mdis.loc_db) + ircfg = ir_arch.new_ircfg() + symbexec = SymbolicExecutionEngine(ir_arch) - blocks, symbol_pool = parse_asm.parse_txt(machine.mn, 32, ''' + asmcfg, loc_db = parse_asm.parse_txt(machine.mn, 32, ''' + init: PUSH argv PUSH argc PUSH ret_addr ''', - symbol_pool=mdis.symbol_pool) + loc_db=mdis.loc_db) + + + argc_lbl = loc_db.get_name_location('argc') + argv_lbl = loc_db.get_name_location('argv') + ret_addr_lbl = loc_db.get_name_location('ret_addr') + init_lbl = loc_db.get_name_location('init') + argc_loc = ExprLoc(argc_lbl, 32) + argv_loc = ExprLoc(argv_lbl, 32) + ret_addr_loc = ExprLoc(ret_addr_lbl, 32) - argc_lbl = symbol_pool.getby_name('argc') - argv_lbl = symbol_pool.getby_name('argv') - ret_addr_lbl = symbol_pool.getby_name('ret_addr') - argc = ExprId(argc_lbl, 32) - argv = ExprId(argv_lbl, 32) - ret_addr = ExprId(ret_addr_lbl, 32) + ret_addr = ExprId("ret_addr", ret_addr_loc.size) + fix_args = { + argc_loc: ExprId("argc", argc_loc.size), + argv_loc: ExprId("argv", argv_loc.size), + ret_addr_loc: ret_addr, + } + + + + block = asmcfg.loc_key_to_block(init_lbl) + for instr in block.lines: + for i, arg in enumerate(instr.args): + instr.args[i]= arg.replace_expr(fix_args) + print block - b = list(blocks)[0] - print b # add fake address and len to parsed instructions - for i, line in enumerate(b.lines): - line.offset, line.l = i, 1 - ir_arch.add_block(b) - irb = get_block(ir_arch, mdis, 0) + ir_arch.add_asmblock_to_ircfg(block, ircfg) + irb = ircfg.blocks[init_lbl] symbexec.eval_updt_irblock(irb) symbexec.dump(ids=False) - # reset ir_arch blocks ir_arch.blocks = {} @@ -129,7 +138,7 @@ if __name__ == '__main__': states_todo.add((addr, symbexec.symbols, ())) # emul blocks, propagate states - emul_symb(ir_arch, mdis, states_todo, states_done) + emul_symb(ir_arch, ircfg, mdis, states_todo, states_done) all_info = [] @@ -144,7 +153,7 @@ if __name__ == '__main__': all_cases = set() - symbexec = SymbolicExecutionEngine(ir_arch, symbols_init) + symbexec = SymbolicExecutionEngine(ir_arch) for addr, reqs_cond in all_info: out = ['(set-logic QF_ABV)', '(set-info :smt-lib-version 2.0)'] diff --git a/example/ida/ctype_propagation.py b/example/ida/ctype_propagation.py index 9b9c2e95..e8b52e3e 100644 --- a/example/ida/ctype_propagation.py +++ b/example/ida/ctype_propagation.py @@ -10,7 +10,7 @@ from miasm2.arch.x86.ctype import CTypeAMD64_unk, CTypeX86_unk from miasm2.arch.msp430.ctype import CTypeMSP430_unk from miasm2.core.objc import CTypesManagerNotPacked, ExprToAccessC, CHandler from miasm2.core.ctypesmngr import CAstTypes -from miasm2.expression.expression import ExprId, ExprInt, ExprOp, ExprAff +from miasm2.expression.expression import ExprLoc, ExprInt, ExprOp, ExprAff from miasm2.ir.symbexec_types import SymbExecCType from miasm2.expression.parser import str_to_expr from miasm2.analysis.cst_propag import add_state, propagate_cst_expr @@ -19,9 +19,7 @@ from utils import guess_machine class TypePropagationForm(ida_kernwin.Form): - def __init__(self, ira): - - self.ira = ira + def __init__(self): default_types_info = r"""ExprId("RDX", 64): char *""" archs = ["AMD64_unk", "X86_32_unk", "msp430_unk"] @@ -201,10 +199,9 @@ class SymbExecCTypeFix(SymbExecCType): if expr.is_int(): continue for c_str, c_type in self.chandler.expr_to_c_and_types(expr, self.symbols): - expr = self.cst_propag_link.get((irb.label, index), {}).get(expr, expr) + expr = self.cst_propag_link.get((irb.loc_key, index), {}).get(expr, expr) offset2cmt.setdefault(instr.offset, set()).add( "\n%s: %s\n%s" % (expr, c_str, c_type)) - self.eval_updt_assignblk(assignblk) for offset, value in offset2cmt.iteritems(): idc.MakeComm(offset, '\n'.join(value)) @@ -243,42 +240,42 @@ def get_ira_call_fixer(ira): def analyse_function(): - - # Init - machine = guess_machine() - mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira - - bs = bin_stream_ida() - mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) - - - iraCallStackFixer = get_ira_call_fixer(ira) - ir_arch = iraCallStackFixer(mdis.symbol_pool) - - # Get settings - settings = TypePropagationForm(ir_arch) + settings = TypePropagationForm() ret = settings.Execute() if not ret: return + + end = None if settings.cScope.value == 0: addr = settings.functionAddr.value else: addr = settings.startAddr.value if settings.cScope.value == 2: end = settings.endAddr - mdis.dont_dis = [end] - blocks = mdis.dis_multiblock(addr) + # Init + machine = guess_machine(addr=addr) + mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira + + bs = bin_stream_ida() + mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) + if end is not None: + mdis.dont_dis = [end] + + + iraCallStackFixer = get_ira_call_fixer(ira) + ir_arch = iraCallStackFixer(mdis.loc_db) + + asmcfg = mdis.dis_multiblock(addr) # Generate IR - for block in blocks: - ir_arch.add_block(block) + ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) cst_propag_link = {} if settings.cUnalias.value: init_infos = {ir_arch.sp: ir_arch.arch.regs.regs_init[ir_arch.sp] } - cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) + cst_propag_link = propagate_cst_expr(ir_arch, ircfg, addr, init_infos) types_mngr = get_types_mngr(settings.headerFile.value, settings.arch.value) @@ -298,7 +295,8 @@ def analyse_function(): expr_str, ctype_str = expr_str.strip(), ctype_str.strip() expr = str_to_expr(expr_str) ast = mychandler.types_mngr.types_ast.parse_c_type( - ctype_str) + ctype_str + ) ctype = mychandler.types_mngr.types_ast.ast_parse_declaration(ast.ext[0]) objc = types_mngr.get_objc(ctype) print '=' * 20 @@ -306,18 +304,21 @@ def analyse_function(): infos_types[expr] = set([objc]) # Add fake head - lbl_real_start = ir_arch.symbol_pool.getby_offset(addr) - lbl_head = ir_arch.symbol_pool.getby_name_create("start") - - first_block = blocks.label2block(lbl_real_start) - - assignblk_head = AssignBlock([ExprAff(ir_arch.IRDst, ExprId(lbl_real_start, ir_arch.IRDst.size)), - ExprAff( - ir_arch.sp, ir_arch.arch.regs.regs_init[ir_arch.sp]) - ], first_block.lines[0]) + lbl_real_start = ir_arch.loc_db.get_offset_location(addr) + lbl_head = ir_arch.loc_db.get_or_create_name_location("start") + + first_block = asmcfg.label2block(lbl_real_start) + + assignblk_head = AssignBlock( + [ + ExprAff(ir_arch.IRDst, ExprLoc(lbl_real_start, ir_arch.IRDst.size)), + ExprAff(ir_arch.sp, ir_arch.arch.regs.regs_init[ir_arch.sp]) + ], + first_block.lines[0] + ) irb_head = IRBlock(lbl_head, [assignblk_head]) - ir_arch.blocks[lbl_head] = irb_head - ir_arch.graph.add_uniq_edge(lbl_head, lbl_real_start) + ircfg.blocks[lbl_head] = irb_head + ircfg.add_uniq_edge(lbl_head, lbl_real_start) state = TypePropagationEngine.StateEngine(infos_types) states = {lbl_head: state} @@ -330,24 +331,24 @@ def analyse_function(): if (lbl, state) in done: continue done.add((lbl, state)) - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue - symbexec_engine = TypePropagationEngine(ir_arch, types_mngr, state) - addr = symbexec_engine.run_block_at(lbl) + addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) - ir_arch._graph = None - sons = ir_arch.graph.successors(lbl) + sons = ircfg.successors(lbl) for son in sons: - add_state(ir_arch, todo, states, son, - symbexec_engine.get_state()) + add_state( + ircfg, todo, states, son, + symbexec_engine.get_state() + ) for lbl, state in states.iteritems(): - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue symbexec_engine = CTypeEngineFixer(ir_arch, types_mngr, state, cst_propag_link) - addr = symbexec_engine.run_block_at(lbl) + addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) diff --git a/example/ida/depgraph.py b/example/ida/depgraph.py index 5342313a..297877a1 100644 --- a/example/ida/depgraph.py +++ b/example/ida/depgraph.py @@ -19,16 +19,18 @@ from utils import guess_machine class depGraphSettingsForm(ida_kernwin.Form): - def __init__(self, ira): + def __init__(self, ira, ircfg): self.ira = ira + self.ircfg = ircfg self.stk_args = {'ARG%d' % i:i for i in xrange(10)} self.stk_unalias_force = False self.address = idc.ScreenEA() cur_block = None - for block in ira.getby_offset(self.address): - if block.label.offset is not None: + for block in ircfg.getby_offset(self.address): + offset = self.ircfg.loc_db.get_location_offset(block.loc_key) + if offset is not None: # Only one block non-generated assert cur_block is None cur_block = block @@ -38,8 +40,8 @@ class depGraphSettingsForm(ida_kernwin.Form): if assignblk.instr.offset == self.address: break assert line_nb is not None - cur_label = str(cur_block.label) - labels = sorted(map(str, ira.blocks.keys())) + cur_loc_key = str(cur_block.loc_key) + loc_keys = sorted(map(str, ircfg.blocks.keys())) regs = sorted(ira.arch.regs.all_regs_ids_byname.keys()) regs += self.stk_args.keys() reg_default = regs[0] @@ -85,21 +87,21 @@ Method to use: tp=ida_kernwin.Form.FT_RAWHEX, value=line_nb), 'cbBBL': ida_kernwin.Form.DropdownListControl( - items=labels, + items=loc_keys, readonly=False, - selval=cur_label), + selval=cur_loc_key), 'cColor': ida_kernwin.Form.ColorInput(value=0xc0c020), }) self.Compile() @property - def label(self): + def loc_key(self): value = self.cbBBL.value - for real_label in self.ira.blocks: - if str(real_label) == value: - return real_label - raise ValueError("Bad label") + for real_loc_key in self.ircfg.blocks: + if str(real_loc_key) == value: + return real_loc_key + raise ValueError("Bad loc_key") @property def line_nb(self): @@ -110,13 +112,13 @@ Method to use: elif mode == 1: return value + 1 else: - return len(self.ira.blocks[self.label]) + return len(self.ircfg.blocks[self.loc_key]) @property def elements(self): value = self.cbReg.value if value in self.stk_args: - line = self.ira.blocks[self.label][self.line_nb].instr + line = self.ircfg.blocks[self.loc_key][self.line_nb].instr arg_num = self.stk_args[value] stk_high = m2_expr.ExprInt(idc.GetSpd(line.offset), ir_arch.sp.size) stk_off = m2_expr.ExprInt(self.ira.sp.size/8 * arg_num, ir_arch.sp.size) @@ -134,7 +136,7 @@ Method to use: @property def depgraph(self): value = self.cMethod.value - return DependencyGraph(self.ira, + return DependencyGraph(self.ircfg, implicit=value & 4, follow_mem=value & 1, follow_call=value & 2) @@ -174,7 +176,7 @@ def treat_element(): for node in graph.relevant_nodes: try: - offset = ir_arch.blocks[node.label][node.line_nb].instr.offset + offset = ir_arch.blocks[node.loc_key][node.line_nb].instr.offset except IndexError: print "Unable to highlight %s" % node continue @@ -184,7 +186,7 @@ def treat_element(): if graph.has_loop: print 'Graph has dependency loop: symbolic execution is inexact' else: - print "Possible value: %s" % graph.emul().values()[0] + print "Possible value: %s" % graph.emul(self.ira).values()[0] for offset, elements in comments.iteritems(): idc.MakeComm(offset, ", ".join(map(str, elements))) @@ -197,38 +199,39 @@ def next_element(): def launch_depgraph(): global graphs, comments, sol_nb, settings, addr, ir_arch + # Get the current function + addr = idc.ScreenEA() + func = ida_funcs.get_func(addr) + # Init - machine = guess_machine() + machine = guess_machine(addr=func.startEA) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira bs = bin_stream_ida() mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) - ir_arch = ira(mdis.symbol_pool) + ir_arch = ira(mdis.loc_db) # Populate symbols with ida names for ad, name in idautils.Names(): if name is None: continue - mdis.symbol_pool.add_label(name, ad) + mdis.loc_db.add_location(name, ad) - # Get the current function - addr = idc.ScreenEA() - func = ida_funcs.get_func(addr) - blocks = mdis.dis_multiblock(func.startEA) + asmcfg = mdis.dis_multiblock(func.startEA) # Generate IR - for block in blocks: - ir_arch.add_block(block) + ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Get settings - settings = depGraphSettingsForm(ir_arch) + settings = depGraphSettingsForm(ir_arch, ircfg) settings.Execute() - label, elements, line_nb = settings.label, settings.elements, settings.line_nb + loc_key, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb # Simplify affectations for irb in ir_arch.blocks.values(): irs = [] - fix_stack = irb.label.offset is not None and settings.unalias_stack + offset = ir_arch.loc_db.get_location_offset(irb.loc_key) + fix_stack = offset is not None and settings.unalias_stack for assignblk in irb: if fix_stack: stk_high = m2_expr.ExprInt(idc.GetSpd(assignblk.instr.offset), ir_arch.sp.size) @@ -243,12 +246,12 @@ def launch_depgraph(): dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) - ir_arch.blocks[irb.label] = IRBlock(irb.label, irs) + ir_arch.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) # Get dependency graphs dg = settings.depgraph - graphs = dg.get(label, elements, line_nb, - set([ir_arch.symbol_pool.getby_offset(func.startEA)])) + graphs = dg.get(loc_key, elements, line_nb, + set([ir_arch.loc_db.get_offset_location(func.startEA)])) # Display the result comments = {} diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index 6dfa1f7d..afd00d5c 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -6,7 +6,7 @@ import idc import idautils from miasm2.core.bin_stream_ida import bin_stream_ida -from miasm2.core.asmblock import expr_is_label, AsmLabel, is_int +from miasm2.core.asmblock import is_int from miasm2.expression.simplifications import expr_simp from miasm2.analysis.data_flow import dead_simp from miasm2.ir.ir import AssignBlock, IRBlock @@ -33,17 +33,15 @@ def label_str(self): else: return "%s:%s" % (self.name, str(self.offset)) -AsmLabel.__init__ = label_init -AsmLabel.__str__ = label_str def color_irblock(irblock, ir_arch): out = [] - lbl = idaapi.COLSTR(str(irblock.label), idaapi.SCOLOR_INSN) + lbl = idaapi.COLSTR(ir_arch.loc_db.pretty_str(irblock.loc_key), idaapi.SCOLOR_INSN) out.append(lbl) for assignblk in irblock: for dst, src in sorted(assignblk.iteritems()): - dst_f = expr2colorstr(ir_arch.arch.regs.all_regs_ids, dst) - src_f = expr2colorstr(ir_arch.arch.regs.all_regs_ids, src) + dst_f = expr2colorstr(dst, loc_db=ir_arch.loc_db) + src_f = expr2colorstr(src, loc_db=ir_arch.loc_db) line = idaapi.COLSTR("%s = %s" % (dst_f, src_f), idaapi.SCOLOR_INSN) out.append(' %s' % line) out.append("") @@ -56,31 +54,29 @@ def color_irblock(irblock, ir_arch): class GraphMiasmIR(idaapi.GraphViewer): - def __init__(self, ir_arch, title, result): + def __init__(self, ircfg, title, result): idaapi.GraphViewer.__init__(self, title) - self.ir_arch = ir_arch + self.ircfg = ircfg self.result = result self.names = {} def OnRefresh(self): self.Clear() addr_id = {} - for irblock in self.ir_arch.blocks.values(): - id_irblock = self.AddNode(color_irblock(irblock, self.ir_arch)) + for irblock in self.ircfg.blocks.values(): + id_irblock = self.AddNode(color_irblock(irblock, self.ircfg)) addr_id[irblock] = id_irblock - for irblock in self.ir_arch.blocks.values(): + for irblock in self.ircfg.blocks.values(): if not irblock: continue - all_dst = self.ir_arch.dst_trackback(irblock) + all_dst = self.ircfg.dst_trackback(irblock) for dst in all_dst: - if not expr_is_label(dst): + if not dst.is_loc(): continue - - dst = dst.name - if not dst in self.ir_arch.blocks: + if not dst.loc_key in self.ircfg.blocks: continue - dst_block = self.ir_arch.blocks[dst] + dst_block = self.ircfg.blocks[dst.loc_key] node1 = addr_id[irblock] node2 = addr_id[dst_block] self.AddEdge(node1, node2) @@ -102,7 +98,9 @@ class GraphMiasmIR(idaapi.GraphViewer): def build_graph(verbose=False, simplify=False): - machine = guess_machine() + start_addr = idc.ScreenEA() + + machine = guess_machine(addr=start_addr) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira if verbose: @@ -114,43 +112,37 @@ def build_graph(verbose=False, simplify=False): bs = bin_stream_ida() mdis = dis_engine(bs) - ir_arch = ira(mdis.symbol_pool) + ir_arch = ira(mdis.loc_db) # populate symbols with ida names for addr, name in idautils.Names(): - # print hex(ad), repr(name) if name is None: continue - if (mdis.symbol_pool.getby_offset(addr) or - mdis.symbol_pool.getby_name(name)): + if (mdis.loc_db.get_offset_location(addr) or + mdis.loc_db.get_name_location(name)): # Symbol alias continue - mdis.symbol_pool.add_label(name, addr) + mdis.loc_db.add_location(name, addr) if verbose: print "start disasm" - addr = idc.ScreenEA() if verbose: print hex(addr) - blocks = mdis.dis_multiblock(addr) + asmcfg = mdis.dis_multiblock(start_addr) if verbose: print "generating graph" - open('asm_flow.dot', 'w').write(blocks.dot()) + open('asm_flow.dot', 'w').write(asmcfg.dot()) - print "generating IR... %x" % addr + print "generating IR... %x" % start_addr - for block in blocks: - if verbose: - print 'ADD' - print block - ir_arch.add_block(block) + ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) if verbose: - print "IR ok... %x" % addr + print "IR ok... %x" % start_addr - for irb in ir_arch.blocks.itervalues(): + for irb in ircfg.blocks.itervalues(): irs = [] for assignblk in irb: new_assignblk = { @@ -158,27 +150,27 @@ def build_graph(verbose=False, simplify=False): for dst, src in assignblk.iteritems() } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) - ir_arch.blocks[irb.label] = IRBlock(irb.label, irs) + ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) if verbose: - out = ir_arch.graph.dot() + out = ircfg.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" if simplify: - dead_simp(ir_arch) + dead_simp(ir_arch, ircfg) - ir_arch.simplify(expr_simp) + ircfg.simplify(expr_simp) modified = True while modified: modified = False - modified |= dead_simp(ir_arch) - modified |= ir_arch.remove_empty_assignblks() - modified |= ir_arch.remove_jmp_blocks() - modified |= ir_arch.merge_blocks() + modified |= dead_simp(ir_arch, ircfg) + modified |= ircfg.remove_empty_assignblks() + modified |= ircfg.remove_jmp_blocks() + modified |= ircfg.merge_blocks() title += " (simplified)" - g = GraphMiasmIR(ir_arch, title, None) + g = GraphMiasmIR(ircfg, title, None) g.Show() diff --git a/example/ida/symbol_exec.py b/example/ida/symbol_exec.py index f019f77d..ffaa9b27 100644 --- a/example/ida/symbol_exec.py +++ b/example/ida/symbol_exec.py @@ -34,8 +34,16 @@ class ActionHandlerTranslate(ActionHandler): class symbolicexec_t(idaapi.simplecustviewer_t): def add(self, key, value): - self.AddLine("%s = %s" % (expr2colorstr(self.machine.mn.regs.all_regs_ids, key), - expr2colorstr(self.machine.mn.regs.all_regs_ids, value))) + self.AddLine("%s = %s" % ( + expr2colorstr( + key, + loc_db=self.loc_db + ), + expr2colorstr( + value, + loc_db=self.loc_db + ) + )) def expand(self, linenum): element = self.line2eq[linenum] @@ -61,11 +69,12 @@ class symbolicexec_t(idaapi.simplecustviewer_t): form.Compile() form.Execute() - def Create(self, equations, machine, *args, **kwargs): + def Create(self, equations, machine, loc_db, *args, **kwargs): if not super(symbolicexec_t, self).Create(*args, **kwargs): return False self.machine = machine + self.loc_db = loc_db self.line2eq = sorted(equations.items(), key=operator.itemgetter(0)) self.lines_expanded = set() @@ -119,21 +128,25 @@ def symbolic_exec(): from utils import guess_machine + start, end = idc.SelStart(), idc.SelEnd() + bs = bin_stream_ida() - machine = guess_machine() + machine = guess_machine(addr=start) mdis = machine.dis_engine(bs) - start, end = idc.SelStart(), idc.SelEnd() + + if start == idc.BADADDR and end == idc.BADADDR: + start = idc.ScreenEA() + end = idc.next_head(start) # Get next instruction address mdis.dont_dis = [end] - blocks = mdis.dis_multiblock(start) - ira = machine.ira() - for block in blocks: - ira.add_block(block) + asmcfg = mdis.dis_multiblock(start) + ira = machine.ira(loc_db=mdis.loc_db) + ircfg = ira.new_ircfg_from_asmcfg(asmcfg) print "Run symbolic execution..." sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init) - sb.run_at(start) + sb.run_at(ircfg, start) modified = {} for dst, src in sb.modified(init_state=machine.mn.regs.regs_init): @@ -141,8 +154,9 @@ def symbolic_exec(): view = symbolicexec_t() all_views.append(view) - if not view.Create(modified, machine, - "Symbolic Execution - 0x%x to 0x%x" % (start, end)): + if not view.Create(modified, machine, mdis.loc_db, + "Symbolic Execution - 0x%x to 0x%x" + % (start, idc.prev_head(end))): return view.Show() diff --git a/example/ida/utils.py b/example/ida/utils.py index e026f2fc..c66475f2 100644 --- a/example/ida/utils.py +++ b/example/ida/utils.py @@ -5,7 +5,7 @@ from miasm2.analysis.machine import Machine from miasm2.ir.translators import Translator import miasm2.expression.expression as m2_expr -def guess_machine(): +def guess_machine(addr=None): "Return an instance of Machine corresponding to the IDA guessed processor" processor_name = GetLongPrm(INF_PROCNAME) @@ -39,7 +39,14 @@ def guess_machine(): (False, 64, True): "aarch64b", (False, 64, False): "aarch64l", } - is_armt = globals().get('armt', False) + + # Get T reg to detect arm/thumb function + # Default is arm + is_armt = False + if addr is not None: + t_reg = GetReg(addr, "T") + is_armt = t_reg == 1 + is_bigendian = info.is_be() infos = (is_armt, size, is_bigendian) if not infos in info2machine: @@ -72,22 +79,29 @@ class TranslatorIDA(Translator): # Implemented language __LANG__ = "ida_w_color" - def __init__(self, regs_ids=None, **kwargs): + def __init__(self, loc_db=None, **kwargs): super(TranslatorIDA, self).__init__(**kwargs) - if regs_ids is None: - regs_ids = {} - self.regs_ids = regs_ids + self.loc_db = loc_db def str_protected_child(self, child, parent): - return ("(%s)" % self.from_expr(child)) if m2_expr.should_parenthesize_child(child, parent) else self.from_expr(child) + return ("(%s)" % ( + self.from_expr(child)) if m2_expr.should_parenthesize_child(child, parent) + else self.from_expr(child) + ) def from_ExprInt(self, expr): return idaapi.COLSTR(str(expr), idaapi.SCOLOR_NUMBER) def from_ExprId(self, expr): - out = str(expr) - if expr in self.regs_ids: - out = idaapi.COLSTR(out, idaapi.SCOLOR_REG) + out = idaapi.COLSTR(str(expr), idaapi.SCOLOR_REG) + return out + + def from_ExprLoc(self, expr): + if self.loc_db is not None: + out = self.loc_db.pretty_str(expr.loc_key) + else: + out = str(expr) + out = idaapi.COLSTR(out, idaapi.SCOLOR_REG) return out def from_ExprMem(self, expr): @@ -126,20 +140,23 @@ class TranslatorIDA(Translator): return (' ' + expr._op + ' ').join([self.str_protected_child(arg, expr) for arg in expr._args]) return (expr._op + '(' + - ', '.join([self.from_expr(arg) for arg in expr._args]) + ')') + ', '.join( + self.from_expr(arg) + for arg in expr._args + ) + ')') def from_ExprAff(self, expr): return "%s = %s" % tuple(map(expr.from_expr, (expr.dst, expr.src))) -def expr2colorstr(regs_ids, expr): +def expr2colorstr(expr, loc_db): """Colorize an Expr instance for IDA - @regs_ids: list of ExprId corresponding to available registers @expr: Expr instance to colorize + @loc_db: LocationDB instance """ - translator = TranslatorIDA(regs_ids) + translator = TranslatorIDA(loc_db=loc_db) return translator.from_expr(expr) diff --git a/example/jitter/mips32.py b/example/jitter/mips32.py index c5b2f7f5..31ab03c8 100755 --- a/example/jitter/mips32.py +++ b/example/jitter/mips32.py @@ -5,16 +5,11 @@ from miasm2.analysis import debugging from miasm2.jitter.csts import * from miasm2.analysis.machine import Machine -from pdb import pm - parser = ArgumentParser( description="""Sandbox raw binary with mips32 engine (ex: jit_mips32.py example/mips32_sc_l.bin 0)""") -parser.add_argument("-r", "--log-regs", - help="Log registers value for each instruction", - action="store_true") -parser.add_argument("-m", "--log-mn", - help="Log desassembly conversion for each instruction", +parser.add_argument("-t", "--trace", + help="Log instructions/registers values", action="store_true") parser.add_argument("-n", "--log-newbloc", help="Log basic blocks processed by the Jitter", @@ -43,9 +38,11 @@ def jit_mips32_binary(args): myjit.init_stack() # Log level (if available with jitter engine) - myjit.jit.log_regs = args.log_regs - myjit.jit.log_mn = args.log_mn - myjit.jit.log_newbloc = args.log_newbloc + myjit.set_trace_log( + trace_instr=args.trace, + trace_regs=args.trace, + trace_new_blocks=args.log_newbloc + ) myjit.vm.add_memory_page(0, PAGE_READ | PAGE_WRITE, open(filepath).read()) myjit.add_breakpoint(0x1337BEEF, code_sentinelle) diff --git a/example/jitter/msp430.py b/example/jitter/msp430.py index 6dd67542..2f9b8649 100755 --- a/example/jitter/msp430.py +++ b/example/jitter/msp430.py @@ -8,11 +8,8 @@ from miasm2.analysis.machine import Machine parser = ArgumentParser( description="""Sandbox raw binary with msp430 engine (ex: jit_msp430.py example/msp430_sc.bin 0)""") -parser.add_argument("-r", "--log-regs", - help="Log registers value for each instruction", - action="store_true") -parser.add_argument("-m", "--log-mn", - help="Log desassembly conversion for each instruction", +parser.add_argument("-t", "--trace", + help="Log instructions/registers values", action="store_true") parser.add_argument("-n", "--log-newbloc", help="Log basic blocks processed by the Jitter", @@ -36,9 +33,11 @@ def jit_msp430_binary(args): myjit.init_stack() # Log level (if available with jitter engine) - myjit.jit.log_regs = args.log_regs - myjit.jit.log_mn = args.log_mn - myjit.jit.log_newbloc = args.log_newbloc + myjit.set_trace_log( + trace_instr=args.trace, + trace_regs=args.trace, + trace_new_blocks=args.log_newbloc + ) myjit.vm.add_memory_page(0, PAGE_READ | PAGE_WRITE, open(filepath, "rb").read()) myjit.add_breakpoint(0x1337, lambda _: exit(0)) diff --git a/example/jitter/sandbox_call.py b/example/jitter/sandbox_call.py index dc64af15..3eb0b86e 100644 --- a/example/jitter/sandbox_call.py +++ b/example/jitter/sandbox_call.py @@ -15,7 +15,8 @@ sb = Sandbox_Linux_arml(options.filename, options, globals()) with open(options.filename, "rb") as fdesc: cont = Container.from_stream(fdesc) - addr_to_call = cont.symbol_pool.getby_name("md5_starts").offset + loc_key = cont.loc_db.get_name_location("md5_starts") + addr_to_call = cont.loc_db.get_location_offset(loc_key) # Calling md5_starts(malloc(0x64)) addr = linobjs.heap.alloc(sb.jitter, 0x64) diff --git a/example/jitter/unpack_upx.py b/example/jitter/unpack_upx.py index f9b0aed1..665fa15a 100644 --- a/example/jitter/unpack_upx.py +++ b/example/jitter/unpack_upx.py @@ -53,20 +53,21 @@ if options.verbose is True: # Ensure there is one and only one leave (for OEP discovering) mdis = sb.machine.dis_engine(sb.jitter.bs) mdis.dont_dis_nulstart_bloc = True -ab = mdis.dis_multiblock(sb.entry_point) +asmcfg = mdis.dis_multiblock(sb.entry_point) -leaves = list(ab.get_bad_blocks_predecessors()) +leaves = list(asmcfg.get_bad_blocks_predecessors()) assert(len(leaves) == 1) l = leaves.pop() logging.info(l) -end_label = l.label.offset -logging.info('final label') -logging.info(end_label) +end_offset = mdis.loc_db.get_location_offset(l) + +logging.info('final offset') +logging.info(hex(end_offset)) # Export CFG graph (dot format) if options.graph is True: - open("graph.dot", "w").write(ab.graph.dot()) + open("graph.dot", "w").write(asmcfg.dot()) if options.verbose is True: @@ -85,7 +86,7 @@ def update_binary(jitter): return False # Set callbacks -sb.jitter.add_breakpoint(end_label, update_binary) +sb.jitter.add_breakpoint(end_offset, update_binary) # Run sb.run() diff --git a/example/jitter/x86_32.py b/example/jitter/x86_32.py index 1409d7aa..5272f732 100644 --- a/example/jitter/x86_32.py +++ b/example/jitter/x86_32.py @@ -24,8 +24,7 @@ data = open(args.filename).read() run_addr = 0x40000000 myjit.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, data) -myjit.jit.log_regs = True -myjit.jit.log_mn = True +myjit.set_trace_log() myjit.push_uint32_t(0x1337beef) myjit.add_breakpoint(0x1337beef, code_sentinelle) diff --git a/example/symbol_exec/depgraph.py b/example/symbol_exec/depgraph.py index b8d838ae..f306e6e3 100644 --- a/example/symbol_exec/depgraph.py +++ b/example/symbol_exec/depgraph.py @@ -47,7 +47,7 @@ for element in args.element: raise ValueError("Unknown element '%s'" % element) mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True) -ir_arch = machine.ira(mdis.symbol_pool) +ir_arch = machine.ira(mdis.loc_db) # Common argument forms init_ctx = {} @@ -59,21 +59,22 @@ if args.rename_args: init_ctx[e_mem] = ExprId("arg%d" % i, 32) # Disassemble the targeted function -blocks = mdis.dis_multiblock(int(args.func_addr, 0)) +asmcfg = mdis.dis_multiblock(int(args.func_addr, 0)) # Generate IR -for block in blocks: - ir_arch.add_block(block) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Get the instance -dg = DependencyGraph(ir_arch, implicit=args.implicit, - apply_simp=not args.do_not_simplify, - follow_mem=not args.unfollow_mem, - follow_call=not args.unfollow_call) +dg = DependencyGraph( + ircfg, implicit=args.implicit, + apply_simp=not args.do_not_simplify, + follow_mem=not args.unfollow_mem, + follow_call=not args.unfollow_call +) # Build information target_addr = int(args.target_addr, 0) -current_block = list(ir_arch.getby_offset(target_addr))[0] +current_block = list(ircfg.getby_offset(target_addr))[0] assignblk_index = 0 for assignblk_index, assignblk in enumerate(current_block): if assignblk.instr.offset == target_addr: @@ -81,12 +82,12 @@ for assignblk_index, assignblk in enumerate(current_block): # Enumerate solutions json_solutions = [] -for sol_nb, sol in enumerate(dg.get(current_block.label, elements, assignblk_index, set())): +for sol_nb, sol in enumerate(dg.get(current_block.loc_key, elements, assignblk_index, set())): fname = "sol_%d.dot" % sol_nb with open(fname, "w") as fdesc: fdesc.write(sol.graph.dot()) - results = sol.emul(ctx=init_ctx) + results = sol.emul(ir_arch, ctx=init_ctx) tokens = {str(k): str(v) for k, v in results.iteritems()} if not args.json: result = ", ".join("=".join(x) for x in tokens.iteritems()) diff --git a/example/symbol_exec/dse_strategies.py b/example/symbol_exec/dse_strategies.py index a981853a..5a4be321 100644 --- a/example/symbol_exec/dse_strategies.py +++ b/example/symbol_exec/dse_strategies.py @@ -67,7 +67,7 @@ jitter.init_run(run_addr) # Init a DSE instance with a given strategy dse = DSEPathConstraint(machine, produce_solution=strategy) dse.attach(jitter) -# Concretize everything exept the argument +# Concretize everything except the argument dse.update_state_from_concrete() regs = jitter.ir_arch.arch.regs arg = ExprId("ARG", 32) diff --git a/example/symbol_exec/single_instr.py b/example/symbol_exec/single_instr.py index 22a48fc6..c78f1f7f 100644 --- a/example/symbol_exec/single_instr.py +++ b/example/symbol_exec/single_instr.py @@ -2,35 +2,34 @@ from miasm2.core.bin_stream import bin_stream_str from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.analysis.machine import Machine -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB START_ADDR = 0 machine = Machine("x86_32") - -symbol_pool = AsmSymbolPool() - +loc_db = LocationDB() # Assemble and disassemble a MOV ## Ensure that attributes 'offset' and 'l' are set -line = machine.mn.fromstring("MOV EAX, EBX", symbol_pool, 32) +line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32) asm = machine.mn.asm(line)[0] # Get back block bin_stream = bin_stream_str(asm) -mdis = machine.dis_engine(bin_stream, symbol_pool=symbol_pool) +mdis = machine.dis_engine(bin_stream, loc_db=loc_db) mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) # Translate ASM -> IR -ira = machine.ira(mdis.symbol_pool) -ira.add_block(asm_block) +ira = machine.ira(mdis.loc_db) +ircfg = ira.new_ircfg() +ira.add_asmblock_to_ircfg(asm_block, ircfg) # Instanciate a Symbolic Execution engine with default value for registers -symb = SymbolicExecutionEngine(ira, {}) +symb = SymbolicExecutionEngine(ira) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks -cur_addr = symb.run_at(START_ADDR) +cur_addr = symb.run_at(ircfg, START_ADDR) # Modified elements print 'Modified registers:' |