diff options
| author | serpilliere <devnull@localhost> | 2014-06-03 10:27:56 +0200 |
|---|---|---|
| committer | serpilliere <devnull@localhost> | 2014-06-03 10:27:56 +0200 |
| commit | ed5c3668cc9f545b52674ad699fc2b0ed1ccb575 (patch) | |
| tree | 07faf97d7e4d083173a1f7e1bfd249baed2d74f9 /example/expression | |
| parent | a183e1ebd525453710306695daa8c410fd0cb2af (diff) | |
| download | miasm-ed5c3668cc9f545b52674ad699fc2b0ed1ccb575.tar.gz miasm-ed5c3668cc9f545b52674ad699fc2b0ed1ccb575.zip | |
Miasm v2
* API has changed, so old scripts need updates * See example for API usage * Use tcc or llvm for jit emulation * Go to test and run test_all.py to check install Enjoy !
Diffstat (limited to 'example/expression')
| -rw-r--r-- | example/expression/manip_expression1.py | 31 | ||||
| -rw-r--r-- | example/expression/manip_expression2.py | 26 | ||||
| -rw-r--r-- | example/expression/manip_expression3.py | 20 | ||||
| -rw-r--r-- | example/expression/manip_expression4.py | 215 | ||||
| -rw-r--r-- | example/expression/manip_expression5.py | 73 | ||||
| -rw-r--r-- | example/expression/manip_expression6.py | 67 | ||||
| -rw-r--r-- | example/expression/manip_expression7.py | 21 | ||||
| -rw-r--r-- | example/expression/sc_connect_back.bin | bin | 0 -> 290 bytes | |||
| -rw-r--r-- | example/expression/simple_test.bin | bin | 0 -> 141 bytes | |||
| -rw-r--r-- | example/expression/simple_test.c | 26 | ||||
| -rw-r--r-- | example/expression/solve_condition_stp.py | 245 |
11 files changed, 724 insertions, 0 deletions
diff --git a/example/expression/manip_expression1.py b/example/expression/manip_expression1.py new file mode 100644 index 00000000..a9ed00e3 --- /dev/null +++ b/example/expression/manip_expression1.py @@ -0,0 +1,31 @@ +from miasm2.expression.expression import * + +print """ +Simple expression manipulation demo +""" + +# define 2 ID +a = ExprId('eax', 32) +b = ExprId('ebx', 32) +print a, b +# eax ebx + +# add those ID +c = ExprOp('+', a, b) +print c +# (eax + ebx) + +# + automaticaly generates ExprOp('+', a, b) +c = a + b +print c +# (eax + ebx) + +# ax is a slice of eax +ax = a[:16] +print ax +# eax[0:16] + +# memory deref +d = ExprMem(c, 32) +print d +# @32[(eax + ebx)] diff --git a/example/expression/manip_expression2.py b/example/expression/manip_expression2.py new file mode 100644 index 00000000..4153f875 --- /dev/null +++ b/example/expression/manip_expression2.py @@ -0,0 +1,26 @@ +from miasm2.arch.x86.arch import mn_x86 +from miasm2.expression.expression import get_rw +from miasm2.arch.x86.ira import ir_a_x86_32 +print """ +Simple expression manipulation demo. +Get read/written registers for a given instruction +""" + +arch = mn_x86 +my_ir = ir_a_x86_32() + +l = arch.fromstring('LODSB', 32) +l.offset, l.l = 0, 15 +my_ir.add_instr(l) + +print '*' * 80 +for lbl, b in my_ir.blocs.items(): + print b + for irs in b.irs: + o_r, o_w = get_rw(irs) + print 'read: ', [str(x) for x in o_r] + print 'written:', [str(x) for x in o_w] + print +my_ir.gen_graph() +g = my_ir.graph() +open('graph_instr.txt', 'w').write(g) diff --git a/example/expression/manip_expression3.py b/example/expression/manip_expression3.py new file mode 100644 index 00000000..27c86096 --- /dev/null +++ b/example/expression/manip_expression3.py @@ -0,0 +1,20 @@ +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp + +print """ +Simple expression simplification demo +""" + + +a = ExprId('eax') +b = ExprId('ebx') + +exprs = [a + b - a, + ExprInt32(0x12) + ExprInt32(0x30) - a, + ExprCompose([(a[:8], 0, 8), + (a[8:16], 8, 16)])] + +for e in exprs: + print '*' * 40 + print 'original expression:', e + print "simplified:", expr_simp(e) diff --git a/example/expression/manip_expression4.py b/example/expression/manip_expression4.py new file mode 100644 index 00000000..f4a55a3c --- /dev/null +++ b/example/expression/manip_expression4.py @@ -0,0 +1,215 @@ +import os +import sys +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from miasm2.arch.x86.ira import ir_a_x86_32 +from miasm2.arch.x86.arch import mn_x86 +from miasm2.core import asmbloc +from miasm2.core.bin_stream import bin_stream_str +from elfesteem import pe_init +from optparse import OptionParser +from pdb import pm +from miasm2.ir.ir import ir +from miasm2.arch.x86.regs import * +from miasm2.arch.x86.disasm import dis_x86_32 + +from miasm2.analysis.data_analysis import intra_bloc_flow_raw, inter_bloc_flow + +from miasm2.core.graph import DiGraph +from miasm2.ir.symbexec import symbexec + +from pprint import pprint as pp + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +print """ +Simple expression use for generating dataflow graph +Exemple: +python manip_expression4.py sc_connect_back.bin 0x2e +""" + + +parser = OptionParser(usage="usage: %prog [options] sc_connect_back.bin") + +(options, args) = parser.parse_args(sys.argv[1:]) +if len(args) != 2: + parser.print_help() + sys.exit(0) + + +def node_x_2_id(n, x): + return hash(str(n) + str(x)) & 0xffffffffffffffff + + +def get_node_name(label, i, n): + # n_name = "%s_%d_%s"%(label.name, i, n) + n_name = (label.name, i, n) + return n_name + + +def get_modified_symbols(sb): + # get modified IDS + ids = sb.symbols.symbols_id.keys() + ids.sort() + out = {} + for i in ids: + if i in sb.arch.regs.regs_init and \ + i in sb.symbols.symbols_id and \ + sb.symbols.symbols_id[i] == sb.arch.regs.regs_init[i]: + continue + # print i, sb.symbols.symbols_id[i] + out[i] = sb.symbols.symbols_id[i] + + # get mem IDS + mems = sb.symbols.symbols_mem.values() + for m, v in mems: + print m, v + out[m] = v + pp([(str(x[0]), str(x[1])) for x in out.items()]) + return out + + +def intra_bloc_flow_symb(my_ir, flow_graph, irbloc): + symbols_init = {} + for i, r in enumerate(all_regs_ids): + symbols_init[r] = all_regs_ids_init[i] + sb = symbexec(mn_x86, symbols_init) + sb.emulbloc(irbloc) + print '*' * 40 + print irbloc + # sb.dump_mem() + # sb.dump_id() + in_nodes = {} + out_nodes = {} + + out = get_modified_symbols(sb) + current_nodes = {} + # gen mem arg to mem node links + for dst, src in out.items(): + for n in [dst, src]: + + all_mems = set() + all_mems.update(get_expr_mem(n)) + + for n in all_mems: + node_n_w = get_node_name(irbloc.label, 0, n) + if not n == src: + continue + o_r = n.arg.get_r(mem_read=False, cst_read=True) + for n_r in o_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irbloc.label, i, n_r) + if not n_r in in_nodes: + in_nodes[n_r] = node_n_r + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + # gen data flow links + for dst, src in out.items(): + nodes_r = src.get_r(mem_read=False, cst_read=True) + nodes_w = set([dst]) + for n_r in nodes_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irbloc.label, 0, n_r) + if not n_r in in_nodes: + in_nodes[n_r] = node_n_r + + flow_graph.add_node(node_n_r) + for n_w in nodes_w: + node_n_w = get_node_name(irbloc.label, 1, n_w) + out_nodes[n_w] = node_n_w + + flow_graph.add_node(node_n_w) + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + irbloc.in_nodes = in_nodes + irbloc.out_nodes = out_nodes + + +def node2str(self, n): + label, i, node = n + # print n + out = "%s,%s\\l\\\n%s" % n + return out + + +def gen_bloc_data_flow_graph(my_ir, in_str, ad): # arch, attrib, pool_bin, bloc, symbol_pool): + out_str = "" + + # my_ir = ir_x86_32(symbol_pool) + + for irbloc in my_ir.blocs.values(): + print irbloc + + my_ir.gen_graph() + my_ir.dead_simp() + + irbloc_0 = None + for irbloc in my_ir.blocs.values(): + if irbloc.label.offset == ad: + irbloc_0 = irbloc + break + assert(irbloc_0 is not None) + flow_graph = DiGraph() + flow_graph.node2str = lambda n: node2str(flow_graph, n) + done = set() + todo = set([irbloc_0.label]) + + bloc2w = {} + + for irbloc in my_ir.blocs.values(): + intra_bloc_flow_raw(my_ir, flow_graph, irbloc) + # intra_bloc_flow_symb(my_ir, flow_graph, irbloc) + + for irbloc in my_ir.blocs.values(): + print irbloc + print 'IN', [str(x) for x in irbloc.in_nodes] + print 'OUT', [str(x) for x in irbloc.out_nodes] + + print '*' * 20, 'interbloc', '*' * 20 + inter_bloc_flow(my_ir, flow_graph, irbloc_0.label) + + # sys.path.append('/home/serpilliere/projet/m2_devel/miasm2/core') + # from graph_qt import graph_qt + # graph_qt(flow_graph) + open('data.txt', 'w').write(flow_graph.dot()) + + +data = open(args[0]).read() +ad = int(args[1], 16) + +print 'disasm...' +mdis = dis_x86_32(data) +mdis.follow_call = True +ab = mdis.dis_multibloc(ad) +print 'ok' + + +print 'generating dataflow graph for:' +my_ir = ir_a_x86_32(mdis.symbol_pool) + +blocs = ab +for bloc in blocs: + print bloc + my_ir.add_bloc(bloc) +for irbloc in my_ir.blocs.values(): + print irbloc + if irbloc.label.offset != 0: + continue + + +out_str = gen_bloc_data_flow_graph(my_ir, mdis.bs, ad) + +print '*' * 40 +print """ + View with: +dotty dataflow.txt + or + Generate ps with pdf: +dot -Tps dataflow_xx.txt -o graph.ps +""" diff --git a/example/expression/manip_expression5.py b/example/expression/manip_expression5.py new file mode 100644 index 00000000..ed147c04 --- /dev/null +++ b/example/expression/manip_expression5.py @@ -0,0 +1,73 @@ +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from pdb import pm +import os + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +print """ +Expression simplification demo. +(and regression test) +""" + + +a = ExprId('a') +b = ExprId('b') +c = ExprId('c') +d = ExprId('d') +e = ExprId('e') + +m = ExprMem(a) +s = a[:8] + +i1 = ExprInt(uint32(0x1)) +i2 = ExprInt(uint32(0x2)) +cc = ExprCond(a, b, c) + +o = ExprCompose([(a[:8], 8, 16), + (a[8:16], 0, 8)]) + +o2 = ExprCompose([(a[8:16], 0, 8), + (a[:8], 8, 16)]) + +l = [a[:8], b[:8], c[:8], m[:8], s, i1[:8], i2[:8], o[:8]] +l2 = l[::-1] + + +x = ExprMem(a + b + ExprInt32(0x42)) + + +def replace_expr(e): + # print 'visit', e + dct = {c + ExprInt32(0x42): d, + a + b: c, } + if e in dct: + return dct[e] + return e + + +print x +y = x.visit(replace_expr) +print y +print x.copy() +print y.copy() +print y == y.copy() +print repr(y), repr(y.copy()) + + +z = ExprCompose([(a[5:5 + 8], 0, 8), (b[:16], 8, 24), (x[:8], 24, 32)]) +print z +print z.copy() +print z[:31].copy().visit(replace_expr) + +print 'replace' +print x.replace_expr({c + ExprInt32(0x42): d, + a + b: c, }) +print z.replace_expr({c + ExprInt32(0x42): d, + a + b: c, }) + + +u = z.copy() +print u diff --git a/example/expression/manip_expression6.py b/example/expression/manip_expression6.py new file mode 100644 index 00000000..45a6c8c1 --- /dev/null +++ b/example/expression/manip_expression6.py @@ -0,0 +1,67 @@ +from miasm2.core.cpu import parse_ast, ast_id2expr +from miasm2.arch.x86.arch import mn_x86, base_expr +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from miasm2.core import asmbloc +from miasm2.arch.x86.ira import ir_a_x86_32 +from pdb import pm + + +def my_ast_int2expr(a): + return ExprInt32(a) + +my_var_parser = parse_ast(ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + + +# First, asm code +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + MOV EAX, 1 + MOV EBX, 2 + MOV ECX, 2 + MOV DX, 2 + +loop: + INC EBX + CMOVZ EAX, EBX + ADD EAX, ECX + JZ loop + RET +''') + +blocs = blocs[0] + +symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) +for b in blocs: + print b + + +print "symbols:" +print symbol_pool +resolved_b, patches = asmbloc.asm_resolve_final(mn_x86, 32, blocs, symbol_pool) + +# Translate to IR +my_ir = ir_a_x86_32(symbol_pool) +for b in blocs: + print 'add bloc' + print b + my_ir.add_bloc(b) + +# Display IR +for lbl, b in my_ir.blocs.items(): + print b + +# Dead propagation +my_ir.gen_graph() +out = my_ir.graph() +open('graph.txt', 'w').write(out) +print '*' * 80 +my_ir.dead_simp() +out2 = my_ir.graph() +open('graph2.txt', 'w').write(out2) + +# Display new IR +print 'new ir blocs' +for lbl, b in my_ir.blocs.items(): + print b diff --git a/example/expression/manip_expression7.py b/example/expression/manip_expression7.py new file mode 100644 index 00000000..d1cbb73b --- /dev/null +++ b/example/expression/manip_expression7.py @@ -0,0 +1,21 @@ +from miasm2.core.graph import DiGraph +from miasm2.expression.expression import * + +print "Simple Expression grapher demo" + +a = ExprId("A") +b = ExprId("B") +c = ExprId("C") +d = ExprId("D") +m = ExprMem(a + b + c + a) + +e1 = ExprCompose([(a + b - (c * a) / m | b, 0, 32), (a + m, 32, 64)]) +e2 = ExprInt64(15) +e = ExprCond(d, e1, e2)[0:32] + +print "[+] Expression:" +print e + +g = e.graph() +print "[+] Graph:" +print g.dot() diff --git a/example/expression/sc_connect_back.bin b/example/expression/sc_connect_back.bin new file mode 100644 index 00000000..9e9c80a5 --- /dev/null +++ b/example/expression/sc_connect_back.bin Binary files differdiff --git a/example/expression/simple_test.bin b/example/expression/simple_test.bin new file mode 100644 index 00000000..60f4e768 --- /dev/null +++ b/example/expression/simple_test.bin Binary files differdiff --git a/example/expression/simple_test.c b/example/expression/simple_test.c new file mode 100644 index 00000000..8e344f18 --- /dev/null +++ b/example/expression/simple_test.c @@ -0,0 +1,26 @@ +int test(unsigned int argc, char** argv) +{ + unsigned int ret; + if (argc == 0) + ret = 0x1001; + else if (argc < 2) + ret = 0x1002; + else if (argc <= 5) + ret = 0x1003; + else if (argc != 7 && argc*2 == 14) + ret = 0x1004; + else if (argc*2 == 14) + ret = 0x1005; + else if (argc & 0x30) + ret = 0x1006; + else if (argc + 3 == 0x45) + ret = 0x1007; + else + ret = 0x1008; + return ret; +} + +int main(int argc, char** argv) +{ + return test(argc, argv); +} diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py new file mode 100644 index 00000000..828629fc --- /dev/null +++ b/example/expression/solve_condition_stp.py @@ -0,0 +1,245 @@ +import os +import sys +from miasm2.arch.x86.arch import * +from miasm2.arch.x86.regs import * +from miasm2.arch.x86.sem import * +from miasm2.core.bin_stream import bin_stream_str +from miasm2.core import asmbloc +from miasm2.expression.expression import get_rw +from miasm2.ir.symbexec import symbexec +from miasm2.expression.simplifications import expr_simp +from miasm2.expression import stp +from collections import defaultdict +from optparse import OptionParser +import subprocess +from miasm2.core import parse_asm +from elfesteem.strpatchwork import StrPatchwork + +from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine + + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + + +mn = mn_x86 + +parser = OptionParser(usage="usage: %prog [options] file") +parser.add_option('-a', "--address", dest="address", metavar="ADDRESS", + help="address to disasemble", default="0") + +(options, args) = parser.parse_args(sys.argv[1:]) +if not args: + parser.print_help() + sys.exit(0) + + +def get_bloc(my_ir, mdis, ad): + if isinstance(ad, asmbloc.asm_label): + l = ad + else: + l = mdis.symbol_pool.getby_offset_create(ad) + if not l in my_ir.blocs: + ad = l.offset + b = mdis.dis_bloc(ad) + my_ir.add_bloc(b) + b = my_ir.get_bloc(l) + if b is None: + raise LookupError('no bloc found at that address: %s' % l) + return b + + +def emul_symb(my_ir, mdis, states_todo, states_done): + while states_todo: + ad, symbols, conds = states_todo.pop() + print '*' * 40, "addr", ad, '*' * 40 + if (ad, symbols, conds) in states_done: + print 'skip', ad + continue + states_done.add((ad, symbols, conds)) + sb = symbexec(mn, {}) + sb.symbols = symbols.copy() + if my_ir.pc in sb.symbols: + del(sb.symbols[my_ir.pc]) + b = get_bloc(my_ir, mdis, ad) + + print 'run bloc' + print b + # print blocs[ad] + ad = sb.emulbloc(b) + print 'final state' + sb.dump_id() + print 'dataflow' + # data_flow_graph_from_expr(sb) + + assert(ad is not None) + print "DST", ad + + if isinstance(ad, ExprCond): + # Create 2 states, each including complementary conditions + p1 = sb.symbols.copy() + p2 = sb.symbols.copy() + c1 = {ad.cond: ExprInt_from(ad.cond, 0)} + c2 = {ad.cond: ExprInt_from(ad.cond, 1)} + print ad.cond + p1[ad.cond] = ExprInt_from(ad.cond, 0) + p2[ad.cond] = ExprInt_from(ad.cond, 1) + ad1 = expr_simp(sb.eval_expr(ad.replace_expr(c1), {})) + ad2 = expr_simp(sb.eval_expr(ad.replace_expr(c2), {})) + if not (isinstance(ad1, ExprInt) or (isinstance(ad1, ExprId) and isinstance(ad1.name, asmbloc.asm_label)) and + isinstance(ad2, ExprInt) or (isinstance(ad2, ExprId) and isinstance(ad2.name, asmbloc.asm_label))): + print str(ad1), str(ad2) + raise ValueError("zarb condition") + conds1 = list(conds) + c1.items() + conds2 = list(conds) + c2.items() + if isinstance(ad1, ExprId): + ad1 = ad1.name + if isinstance(ad2, ExprId): + ad2 = ad2.name + if isinstance(ad1, ExprInt): + ad1 = ad1.arg + if isinstance(ad2, ExprInt): + ad2 = ad2.arg + states_todo.add((ad1, p1, tuple(conds1))) + states_todo.add((ad2, p2, tuple(conds2))) + elif isinstance(ad, ExprInt): + ad = int(ad.arg) + states_todo.add((ad, sb.symbols.copy(), tuple(conds))) + elif isinstance(ad, ExprId) and isinstance(ad.name, asmbloc.asm_label): + if isinstance(ad, ExprId): + ad = ad.name + states_todo.add((ad, sb.symbols.copy(), tuple(conds))) + elif ad == ret_addr: + print 'ret reached' + continue + else: + raise ValueError("zarb eip") + + +if __name__ == '__main__': + + data = open(args[0]).read() + bs = bin_stream_str(data) + + mdis = dis_engine(bs) + + ad = int(options.address, 16) + + symbols_init = {} + for i, r in enumerate(all_regs_ids): + symbols_init[r] = all_regs_ids_init[i] + + # config parser for 32 bit + reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + def my_ast_int2expr(a): + return ExprInt32(a) + + def my_ast_id2expr(t): + if t in reg_and_id: + r = reg_and_id[t] + else: + r = ExprId(t, size=32) + return r + my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) + base_expr.setParseAction(my_var_parser) + + argc = ExprId('argc', 32) + argv = ExprId('argv', 32) + ret_addr = ExprId('ret_addr') + reg_and_id[argc.name] = argc + reg_and_id[argv.name] = argv + reg_and_id[ret_addr.name] = ret_addr + + my_symbols = [argc, argv, ret_addr] + my_symbols = dict([(x.name, x) for x in my_symbols]) + my_symbols.update(mn_x86.regs.all_regs_ids_byname) + + sb = symbexec(mn, symbols_init) + + blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' + PUSH argv + PUSH argc + PUSH ret_addr + ''') + + my_ir = ir_x86_32(mdis.symbol_pool) + + b = blocs[0][0] + print b + # add fake address and len to parsed instructions + for i, l in enumerate(b.lines): + l.offset, l.l = i, 1 + my_ir.add_bloc(b) + irb = get_bloc(my_ir, mdis, 0) + sb.emulbloc(irb) + sb.dump_mem() + + # reset my_ir blocs + my_ir.blocs = {} + + states_todo = set() + states_done = set() + states_todo.add((uint32(ad), sb.symbols, ())) + + # emul blocs, propagate states + emul_symb(my_ir, mdis, states_todo, states_done) + + all_info = [] + + print '*' * 40, 'conditions to match', '*' * 40 + for ad, symbols, conds in sorted(states_done): + print '*' * 40, ad, '*' * 40 + reqs = [] + for k, v in conds: + print k, v + reqs.append((k, v)) + all_info.append((ad, reqs)) + + all_cases = set() + + sb = symbexec(mn, symbols_init) + for ad, reqs_cond in all_info: + all_ids = set() + for k, v in reqs_cond: + all_ids.update(get_expr_ids(k)) + + out = [] + + # declare variables + for v in all_ids: + out.append(str(v) + ":" + "BITVECTOR(%d);" % v.size) + + all_csts = [] + for k, v in reqs_cond: + cst = k.strcst() + val = v.arg + assert(val in [0, 1]) + inv = "" + if val == 1: + inv = "NOT " + val = "0" * v.size + all_csts.append("(%s%s=0bin%s)" % (inv, cst, val)) + if not all_csts: + continue + rez = " AND ".join(all_csts) + out.append("QUERY(NOT (%s));" % rez) + end = "\n".join(out) + open('out.txt', 'w').write(end) + try: + cases = subprocess.check_output(["/home/serpilliere/tools/stp/stp", + "-p", + "out.txt"]) + except OSError: + print "ERF, cannot find stp" + break + for c in cases.split('\n'): + if c.startswith('ASSERT'): + all_cases.add((ad, c)) + + print '*' * 40, 'ALL COND', '*' * 40 + all_cases = list(all_cases) + all_cases.sort(key=lambda x: (x[0], x[1])) + for ad, val in all_cases: + print 'address', ad, 'is reachable using argc', val |