diff options
Diffstat (limited to 'example/expression')
| -rw-r--r-- | example/expression/access_c.py | 30 | ||||
| -rw-r--r-- | example/expression/asm_to_ir.py | 28 | ||||
| -rw-r--r-- | example/expression/constant_propagation.py | 23 | ||||
| -rw-r--r-- | example/expression/get_read_write.py | 18 | ||||
| -rw-r--r-- | example/expression/graph_dataflow.py | 50 | ||||
| -rw-r--r-- | example/expression/simplification_tools.py | 1 | ||||
| -rw-r--r-- | example/expression/solve_condition_stp.py | 73 |
7 files changed, 111 insertions, 112 deletions
diff --git a/example/expression/access_c.py b/example/expression/access_c.py index de158730..e8d5e318 100644 --- a/example/expression/access_c.py +++ b/example/expression/access_c.py @@ -54,11 +54,10 @@ from miasm2.core.objc import ExprToAccessC, CHandler from miasm2.core.objc import CTypesManagerNotPacked from miasm2.core.ctypesmngr import CAstTypes, CTypePtr, CTypeStruct - -def find_call(ira): +def find_call(ircfg): """Returns (irb, index) which call""" - for irb in ira.blocks.values(): + for irb in ircfg.blocks.values(): out = set() if len(irb) < 2: continue @@ -92,17 +91,17 @@ class MyExprToAccessC(ExprToAccessC): reduction_rules = ExprToAccessC.reduction_rules + [reduce_compose] -def get_funcs_arg0(ctx, ira, lbl_head): +def get_funcs_arg0(ctx, ira, ircfg, lbl_head): """Compute DependencyGraph on the func @lbl_head""" - g_dep = DependencyGraph(ira, follow_call=False) + g_dep = DependencyGraph(ircfg, follow_call=False) element = ira.arch.regs.RSI - for irb, index in find_call(ira): + for irb, index in find_call(ircfg): instr = irb[index].instr print 'Analysing references from:', hex(instr.offset), instr - g_list = g_dep.get(irb.label, set([element]), index, set([lbl_head])) + g_list = g_dep.get(irb.loc_key, set([element]), index, set([lbl_head])) for dep in g_list: - emul_result = dep.emul(ctx) + emul_result = dep.emul(ira, ctx) value = emul_result[element] yield value @@ -141,16 +140,15 @@ cont = Container.fallback_container(data, None, addr=0) machine = Machine("x86_64") dis_engine, ira = machine.dis_engine, machine.ira -mdis = dis_engine(cont.bin_stream, symbol_pool=cont.symbol_pool) +mdis = dis_engine(cont.bin_stream, loc_db=cont.loc_db) addr_head = 0 -blocks = mdis.dis_multiblock(addr_head) -lbl_head = mdis.symbol_pool.getby_offset(addr_head) +asmcfg = mdis.dis_multiblock(addr_head) +lbl_head = mdis.loc_db.get_offset_location(addr_head) -ir_arch_a = ira(mdis.symbol_pool) -for block in blocks: - ir_arch_a.add_block(block) +ir_arch_a = ira(mdis.loc_db) +ircfg = ir_arch_a.new_ircfg_from_asmcfg(asmcfg) -open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot()) +open('graph_irflow.dot', 'w').write(ircfg.dot()) # Main function's first argument's type is "struct ll_human*" ptr_llhuman = types_mngr.get_objc(CTypePtr(CTypeStruct('ll_human'))) @@ -161,7 +159,7 @@ expr_types = {arg0: (ptr_llhuman,), mychandler = MyCHandler(types_mngr, expr_types) -for expr in get_funcs_arg0(ctx, ir_arch_a, lbl_head): +for expr in get_funcs_arg0(ctx, ir_arch_a, ircfg, lbl_head): print "Access:", expr for c_str, ctype in mychandler.expr_to_c_and_types(expr): print '\taccess:', c_str diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py index 786b860e..7036d960 100644 --- a/example/expression/asm_to_ir.py +++ b/example/expression/asm_to_ir.py @@ -7,8 +7,9 @@ from miasm2.core import asmblock from miasm2.arch.x86.ira import ir_a_x86_32 from miasm2.analysis.data_flow import dead_simp + # First, asm code -blocks, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +asmcfg, loc_db = parse_asm.parse_txt(mn_x86, 32, ''' main: MOV EAX, 1 MOV EBX, 2 @@ -24,33 +25,30 @@ loop: ''') -symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) -for block in blocks: +loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) +for block in asmcfg.blocks: print block print "symbols:" -print symbol_pool -patches = asmblock.asm_resolve_final(mn_x86, blocks, symbol_pool) +print loc_db +patches = asmblock.asm_resolve_final(mn_x86, asmcfg, loc_db) # Translate to IR -ir_arch = ir_a_x86_32(symbol_pool) -for block in blocks: - print 'add block' - print block - ir_arch.add_block(block) +ir_arch = ir_a_x86_32(loc_db) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Display IR -for lbl, irblock in ir_arch.blocks.items(): +for lbl, irblock in ircfg.blocks.items(): print irblock # Dead propagation -open('graph.dot', 'w').write(ir_arch.graph.dot()) +open('graph.dot', 'w').write(ircfg.dot()) print '*' * 80 -dead_simp(ir_arch) -open('graph2.dot', 'w').write(ir_arch.graph.dot()) +dead_simp(ir_arch, ircfg) +open('graph2.dot', 'w').write(ircfg.dot()) # Display new IR print 'new ir blocks' -for lbl, irblock in ir_arch.blocks.items(): +for lbl, irblock in ircfg.blocks.items(): print irblock diff --git a/example/expression/constant_propagation.py b/example/expression/constant_propagation.py index 70394580..d9c5fe65 100644 --- a/example/expression/constant_propagation.py +++ b/example/expression/constant_propagation.py @@ -28,27 +28,24 @@ machine = Machine("x86_32") cont = Container.from_stream(open(args.filename)) ira, dis_engine = machine.ira, machine.dis_engine mdis = dis_engine(cont.bin_stream) -ir_arch = ira(mdis.symbol_pool) +ir_arch = ira(mdis.loc_db) addr = int(args.address, 0) - -blocks = mdis.dis_multiblock(addr) -for block in blocks: - ir_arch.add_block(block) - +asmcfg = mdis.dis_multiblock(addr) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) init_infos = ir_arch.arch.regs.regs_init -cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) +cst_propag_link = propagate_cst_expr(ir_arch, ircfg, addr, init_infos) if args.simplify: - ir_arch.simplify(expr_simp) + ircfg.simplify(expr_simp) modified = True while modified: modified = False - modified |= dead_simp(ir_arch) - modified |= ir_arch.remove_empty_assignblks() - modified |= ir_arch.remove_jmp_blocks() - modified |= ir_arch.merge_blocks() + modified |= dead_simp(ir_arch, ircfg) + modified |= ircfg.remove_empty_assignblks() + modified |= ircfg.remove_jmp_blocks() + modified |= ircfg.merge_blocks() -open("%s.propag.dot" % args.filename, 'w').write(ir_arch.graph.dot()) +open("%s.propag.dot" % args.filename, 'w').write(ircfg.dot()) diff --git a/example/expression/get_read_write.py b/example/expression/get_read_write.py index 9e3b5caf..34d0f94a 100644 --- a/example/expression/get_read_write.py +++ b/example/expression/get_read_write.py @@ -1,9 +1,9 @@ from miasm2.arch.x86.arch import mn_x86 from miasm2.expression.expression import get_rw from miasm2.arch.x86.ira import ir_a_x86_32 -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB -symbol_pool = AsmSymbolPool() +loc_db = LocationDB() print """ @@ -12,14 +12,14 @@ Get read/written registers for a given instruction """ arch = mn_x86 -ir_arch = ir_a_x86_32() - -l = arch.fromstring('LODSB', symbol_pool, 32) -l.offset, l.l = 0, 15 -ir_arch.add_instr(l) +ir_arch = ir_a_x86_32(loc_db) +ircfg = ir_arch.new_ircfg() +instr = arch.fromstring('LODSB', loc_db, 32) +instr.offset, instr.l = 0, 15 +ir_arch.add_instr_to_ircfg(instr, ircfg) print '*' * 80 -for lbl, irblock in ir_arch.blocks.iteritems(): +for lbl, irblock in ircfg.blocks.iteritems(): print irblock for assignblk in irblock: rw = assignblk.get_rw() @@ -28,4 +28,4 @@ for lbl, irblock in ir_arch.blocks.iteritems(): print 'written:', dst print -open('graph_instr.dot', 'w').write(ir_arch.graph.dot()) +open('graph_instr.dot', 'w').write(ircfg.dot()) diff --git a/example/expression/graph_dataflow.py b/example/expression/graph_dataflow.py index 26fdd2ec..b30bd29f 100644 --- a/example/expression/graph_dataflow.py +++ b/example/expression/graph_dataflow.py @@ -24,11 +24,11 @@ def node_x_2_id(n, x): def get_node_name(label, i, n): - n_name = (label.name, i, n) + n_name = (label, i, n) return n_name -def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): +def intra_block_flow_symb(ir_arch, _, flow_graph, irblock, in_nodes, out_nodes): symbols_init = ir_arch.arch.regs.regs_init.copy() sb = SymbolicExecutionEngine(ir_arch, symbols_init) sb.eval_updt_irblock(irblock) @@ -47,7 +47,7 @@ def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): all_mems.update(get_expr_mem(n)) for n in all_mems: - node_n_w = get_node_name(irblock.label, 0, n) + node_n_w = get_node_name(irblock.loc_key, 0, n) if not n == src: continue o_r = n.arg.get_r(mem_read=False, cst_read=True) @@ -55,7 +55,7 @@ def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irblock.label, i, n_r) + node_n_r = get_node_name(irblock.loc_key, i, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_uniq_edge(node_n_r, node_n_w) @@ -69,13 +69,13 @@ def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irblock.label, 0, n_r) + node_n_r = get_node_name(irblock.loc_key, 0, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_node(node_n_r) for n_w in nodes_w: - node_n_w = get_node_name(irblock.label, 1, n_w) + node_n_w = get_node_name(irblock.loc_key, 1, n_w) out_nodes[n_w] = node_n_w flow_graph.add_node(node_n_w) @@ -87,15 +87,18 @@ def node2str(self, node): return out -def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): - for irblock in ir_arch.blocks.values(): +def gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb): + for irblock in ircfg.blocks.values(): print irblock - dead_simp(ir_arch) + dead_simp(ir_arch, ircfg) + irblock_0 = None - for irblock in ir_arch.blocks.values(): - if irblock.label.offset == ad: + for irblock in ircfg.blocks.values(): + loc_key = irblock.loc_key + offset = ircfg.loc_db.get_location_offset(loc_key) + if offset == ad: irblock_0 = irblock break assert(irblock_0 is not None) @@ -105,20 +108,20 @@ def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): irb_in_nodes = {} irb_out_nodes = {} - for label in ir_arch.blocks: + for label in ircfg.blocks: irb_in_nodes[label] = {} irb_out_nodes[label] = {} - for label, irblock in ir_arch.blocks.iteritems(): - block_flow_cb(ir_arch, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) + for label, irblock in ircfg.blocks.iteritems(): + block_flow_cb(ir_arch, ircfg, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) - for label in ir_arch.blocks: + for label in ircfg.blocks: print label print 'IN', [str(x) for x in irb_in_nodes[label]] print 'OUT', [str(x) for x in irb_out_nodes[label]] print '*' * 20, 'interblock', '*' * 20 - inter_block_flow(ir_arch, flow_graph, irblock_0.label, irb_in_nodes, irb_out_nodes) + inter_block_flow(ir_arch, ircfg, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes) # from graph_qt import graph_qt # graph_qt(flow_graph) @@ -131,21 +134,16 @@ ad = int(args.addr, 16) print 'disasm...' mdis = dis_x86_32(data) mdis.follow_call = True -ab = mdis.dis_multiblock(ad) +asmcfg = mdis.dis_multiblock(ad) print 'ok' print 'generating dataflow graph for:' -ir_arch = ir_a_x86_32(mdis.symbol_pool) +ir_arch = ir_a_x86_32(mdis.loc_db) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) -blocks = ab -for block in blocks: - print block - ir_arch.add_block(block) -for irblock in ir_arch.blocks.values(): +for irblock in ircfg.blocks.values(): print irblock - if irblock.label.offset != 0: - continue if args.symb: @@ -153,7 +151,7 @@ if args.symb: else: block_flow_cb = intra_block_flow_raw -gen_block_data_flow_graph(ir_arch, ad, block_flow_cb) +gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb) print '*' * 40 print """ diff --git a/example/expression/simplification_tools.py b/example/expression/simplification_tools.py index 7c15b3e7..cb062fb3 100644 --- a/example/expression/simplification_tools.py +++ b/example/expression/simplification_tools.py @@ -32,7 +32,6 @@ x = ExprMem(a + b + ExprInt(0x42, 32), 32) def replace_expr(e): - # print 'visit', e dct = {c + ExprInt(0x42, 32): d, a + b: c, } if e in dct: diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index 201d9f26..acb3abf4 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -5,16 +5,14 @@ from pdb import pm from miasm2.analysis.machine import Machine from miasm2.expression.expression import ExprInt, ExprCond, ExprId, \ - get_expr_ids, ExprAff + get_expr_ids, ExprAff, ExprLoc from miasm2.core.bin_stream import bin_stream_str -from miasm2.core import asmblock from miasm2.ir.symbexec import SymbolicExecutionEngine, get_block from miasm2.expression.simplifications import expr_simp from miasm2.core import parse_asm from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine from miasm2.ir.translators.translator import Translator - machine = Machine("x86_32") @@ -28,7 +26,7 @@ if not args: sys.exit(0) -def emul_symb(ir_arch, mdis, states_todo, states_done): +def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print '*' * 40, "addr", addr, '*' * 40 @@ -36,11 +34,11 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): print 'Known state, skipping', addr continue states_done.add((addr, symbols, conds)) - symbexec = SymbolicExecutionEngine(ir_arch, {}) + symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] - irblock = get_block(ir_arch, mdis, addr) + irblock = get_block(ir_arch, ircfg, mdis, addr) print 'Run block:' print irblock @@ -55,8 +53,8 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) - if not (addr_a.is_int() or asmblock.expr_is_label(addr_a) and - addr_b.is_int() or asmblock.expr_is_label(addr_b)): + if not (addr_a.is_int() or addr_a.is_loc() and + addr_b.is_int() or addr_b.is_loc()): print str(addr_a), str(addr_b) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): @@ -68,11 +66,10 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): elif addr == ret_addr: print 'Return address reached' continue - elif isinstance(addr, ExprInt): + elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) - elif asmblock.expr_is_label(addr): - addr = addr.name + elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination") @@ -88,39 +85,51 @@ if __name__ == '__main__': addr = int(options.address, 16) - symbols_init = dict(machine.mn.regs.regs_init) - - ir_arch = machine.ir(mdis.symbol_pool) - symbexec = SymbolicExecutionEngine(ir_arch, symbols_init) + ir_arch = machine.ir(mdis.loc_db) + ircfg = ir_arch.new_ircfg() + symbexec = SymbolicExecutionEngine(ir_arch) - blocks, symbol_pool = parse_asm.parse_txt(machine.mn, 32, ''' + asmcfg, loc_db = parse_asm.parse_txt(machine.mn, 32, ''' + init: PUSH argv PUSH argc PUSH ret_addr ''', - symbol_pool=mdis.symbol_pool) + loc_db=mdis.loc_db) + + + argc_lbl = loc_db.get_name_location('argc') + argv_lbl = loc_db.get_name_location('argv') + ret_addr_lbl = loc_db.get_name_location('ret_addr') + init_lbl = loc_db.get_name_location('init') + argc_loc = ExprLoc(argc_lbl, 32) + argv_loc = ExprLoc(argv_lbl, 32) + ret_addr_loc = ExprLoc(ret_addr_lbl, 32) - argc_lbl = symbol_pool.getby_name('argc') - argv_lbl = symbol_pool.getby_name('argv') - ret_addr_lbl = symbol_pool.getby_name('ret_addr') - argc = ExprId(argc_lbl, 32) - argv = ExprId(argv_lbl, 32) - ret_addr = ExprId(ret_addr_lbl, 32) + ret_addr = ExprId("ret_addr", ret_addr_loc.size) + fix_args = { + argc_loc: ExprId("argc", argc_loc.size), + argv_loc: ExprId("argv", argv_loc.size), + ret_addr_loc: ret_addr, + } + + + + block = asmcfg.loc_key_to_block(init_lbl) + for instr in block.lines: + for i, arg in enumerate(instr.args): + instr.args[i]= arg.replace_expr(fix_args) + print block - b = list(blocks)[0] - print b # add fake address and len to parsed instructions - for i, line in enumerate(b.lines): - line.offset, line.l = i, 1 - ir_arch.add_block(b) - irb = get_block(ir_arch, mdis, 0) + ir_arch.add_asmblock_to_ircfg(block, ircfg) + irb = ircfg.blocks[init_lbl] symbexec.eval_updt_irblock(irb) symbexec.dump(ids=False) - # reset ir_arch blocks ir_arch.blocks = {} @@ -129,7 +138,7 @@ if __name__ == '__main__': states_todo.add((addr, symbexec.symbols, ())) # emul blocks, propagate states - emul_symb(ir_arch, mdis, states_todo, states_done) + emul_symb(ir_arch, ircfg, mdis, states_todo, states_done) all_info = [] @@ -144,7 +153,7 @@ if __name__ == '__main__': all_cases = set() - symbexec = SymbolicExecutionEngine(ir_arch, symbols_init) + symbexec = SymbolicExecutionEngine(ir_arch) for addr, reqs_cond in all_info: out = ['(set-logic QF_ABV)', '(set-info :smt-lib-version 2.0)'] |