diff options
Diffstat (limited to 'example')
36 files changed, 2776 insertions, 0 deletions
diff --git a/example/asm_arm.py b/example/asm_arm.py new file mode 100644 index 00000000..a848f22b --- /dev/null +++ b/example/asm_arm.py @@ -0,0 +1,79 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.arm.arch import mn_arm, base_expr, variable +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from miasm2.core import asmbloc +from elfesteem.strpatchwork import StrPatchwork + +my_mn = mn_arm + +reg_and_id = dict(mn_arm.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(my_mn, "arm", ''' +main: + STMFD SP!, {R4, R5, LR} + MOV R0, mystr & 0xffff + ORR R0, R0, mystr & 0xffff0000 + MOV R1, mystrend & 0xffff + ORR R1, R1, mystrend & 0xffff0000 +xxx: + LDR R2, [PC, key-(xxx+8)] +loop: + LDRB R3, [R0] + EOR R3, R3, R2 + STRB R3, [R0], 1 + CMP R0, R1 + BNE loop + EOR R0, R0, R0 + BNE end + EOR R1, R1, R1 + EOR R2, R2, R2 + EORGE R1, R1, R1 + EORGE R2, R2, R2 + ADDLTS R2, R2, R2 + SUBEQ R2, R2, R2 +end: + LDMFD SP!, {R4, R5, PC} +key: +.long 0x11223344 +mystr: +.string "test string" +mystrend: +.long 0 +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) + +for b in blocs[0]: + print b +# graph sc#### +g = asmbloc.bloc2graph(blocs[0]) +open("graph.txt", "w").write(g) + +s = StrPatchwork() + +print "symbols" +print symbol_pool +# dont erase from start to shell code padading +resolved_b, patches = asmbloc.asm_resolve_final( + my_mn, 'arm', blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + s[offset] = raw + +open('demo_arm.bin', 'wb').write(str(s)) diff --git a/example/asm_arm_sc.py b/example/asm_arm_sc.py new file mode 100644 index 00000000..824145fa --- /dev/null +++ b/example/asm_arm_sc.py @@ -0,0 +1,57 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.arm.arch import mn_arm, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem.strpatchwork import StrPatchwork + +from pdb import pm +from miasm2.core import asmbloc +import struct + +reg_and_id = dict(mn_arm.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + + +st = StrPatchwork() + +blocs, symbol_pool = parse_asm.parse_txt(mn_arm, 'arm', ''' +main: + MOV R1, R0 + MOV R2, 0x100 +loop: + ADD R2, R1, R2 + ADD R1, R1, 1 + CMP R1, 0x10 + BEQ loop + + ADD R0, R1, R2 + BX LR +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), 0) + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_arm, "arm", blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + st[offset] = raw + +open('arm_sc.bin', 'wb').write(str(st)) diff --git a/example/asm_box_x86_32.py b/example/asm_box_x86_32.py new file mode 100644 index 00000000..2fa63c78 --- /dev/null +++ b/example/asm_box_x86_32.py @@ -0,0 +1,66 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE() +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + PUSH 0 + PUSH title + PUSH msg + PUSH 0 + CALL DWORD PTR [ MessageBoxA ] + RET + +title: +.string "Hello!" +msg: +.string "World!" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), + e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 32, blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + e.virt[offset] = raw + +open('box_x86_32.bin', 'wb').write(str(e)) diff --git a/example/asm_box_x86_32_enc.py b/example/asm_box_x86_32_enc.py new file mode 100644 index 00000000..7f1ef7ec --- /dev/null +++ b/example/asm_box_x86_32_enc.py @@ -0,0 +1,105 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE() +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + CALL cipher_code + CALL msgbox_encrypted_start + CALL cipher_code + RET + +cipher_code: + PUSH EBP + MOV EBP, ESP + + LEA ESI, DWORD PTR [msgbox_encrypted_start] + LEA EDI, DWORD PTR [msgbox_encrypted_stop] + +loop: + XOR BYTE PTR [ESI], 0x42 + INC ESI + CMP ESI, EDI + JBE loop + + MOV ESP, EBP + POP EBP + RET + +msgbox_encrypted_start: + PUSH 0 + PUSH title + PUSH msg + PUSH 0 + CALL DWORD PTR [ MessageBoxA ] + RET +.dontsplit +msgbox_encrypted_stop: +.long 0 + +title: +.string "Hello!" +msg: +.string "World!" +''') + + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create( + "MessageBoxA"), e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +print "symbols" +print symbol_pool + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 32, blocs[0], symbol_pool) +print patches + +ad_start = symbol_pool.getby_name_create("msgbox_encrypted_start").offset +ad_stop = symbol_pool.getby_name_create("msgbox_encrypted_stop").offset + +# cipher code +new_patches = dict(patches) +for ad, val in patches.items(): + if ad_start <= ad < ad_stop: + new_patches[ad] = "".join([chr(ord(x) ^ 0x42) for x in val]) + +for offset, raw in new_patches.items(): + e.virt[offset] = raw + +open('box_x86_32_enc.bin', 'wb').write(str(e)) diff --git a/example/asm_box_x86_32_mod.py b/example/asm_box_x86_32_mod.py new file mode 100644 index 00000000..f9f53f58 --- /dev/null +++ b/example/asm_box_x86_32_mod.py @@ -0,0 +1,89 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE() +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + CALL test_automod + CALL test_automod + RET + +test_automod: + PUSH EBP + MOV EBP, ESP + +loop: + MOV EAX, 0 + CMP EAX, 0 + JMP mod_addr +mod_addr: + JNZ end + + PUSH 0 + PUSH title + PUSH msg + PUSH 0 + CALL DWORD PTR [ MessageBoxA ] + + ; automodif code + MOV BYTE PTR [mod_addr], 0xEB + JMP loop +end: + MOV BYTE PTR [mod_addr], 0x75 + MOV ESP, EBP + POP EBP + RET + +title: +.string "Hello!" +msg: +.string "World!" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), + e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 32, blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + e.virt[offset] = raw + +open('box_x86_32_mod.bin', 'wb').write(str(e)) diff --git a/example/asm_box_x86_32_mod_self.py b/example/asm_box_x86_32_mod_self.py new file mode 100644 index 00000000..95898722 --- /dev/null +++ b/example/asm_box_x86_32_mod_self.py @@ -0,0 +1,74 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE() +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + PUSH EBP + MOV EBP, ESP + MOV BYTE PTR [myint], 0x90 +myint: + INT 0x3 + + PUSH 0 + PUSH title + PUSH msg + PUSH 0 + CALL DWORD PTR [ MessageBoxA ] + MOV ESP, EBP + POP EBP + RET + +title: +.string "Hello!" +msg: +.string "World!" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), + e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 32, blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + e.virt[offset] = raw + +open('box_x86_32_mod_self.bin', 'wb').write(str(e)) diff --git a/example/asm_box_x86_32_repmod.py b/example/asm_box_x86_32_repmod.py new file mode 100644 index 00000000..f351fcfa --- /dev/null +++ b/example/asm_box_x86_32_repmod.py @@ -0,0 +1,103 @@ +#! /usr/bin/env python + +# test instruction caching + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE() +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + CALL test_automod + RET + +lbl_good: + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + +test_automod: + PUSH EBP + MOV EBP, ESP + + LEA EDI, DWORD PTR [lbl_mod] + LEA ESI, DWORD PTR [lbl_good] + + MOV ECX, 0x8 + REPE MOVSB +lbl_mod: + XOR EAX, EAX + MOV DWORD PTR [EAX], 0xDEADC0DE + + NOP + NOP + NOP + + PUSH 0 + PUSH title + PUSH msg + PUSH 0 + CALL DWORD PTR [ MessageBoxA ] + + MOV ESP, EBP + POP EBP + RET + +title: +.string "Hello!" +msg: +.string "World!" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), + e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 32, blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + e.virt[offset] = raw + +open('box_x86_32_repmod.bin', 'wb').write(str(e)) diff --git a/example/asm_box_x86_64.py b/example/asm_box_x86_64.py new file mode 100644 index 00000000..3ae4bcb5 --- /dev/null +++ b/example/asm_box_x86_64.py @@ -0,0 +1,68 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE(wsize=64) +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt64(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=64)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 64, ''' +main: + MOV R9, 0x0 + MOV R8, title + MOV RDX, msg + MOV RCX, 0x0 + MOV RAX, QWORD PTR [ MessageBoxA ] + CALL RAX + RET + +title: +.string "Hello!" +msg: +.string "World!" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), + e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 64, blocs[0], symbol_pool, + max_offset=0xFFFFFFFFFFFFFFFF) +print patches + +for offset, raw in patches.items(): + e.virt[offset] = raw + +open('box_x86_64.bin', 'wb').write(str(e)) diff --git a/example/asm_x86.py b/example/asm_x86.py new file mode 100644 index 00000000..3637f9ed --- /dev/null +++ b/example/asm_x86.py @@ -0,0 +1,47 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from miasm2.core import asmbloc +from elfesteem.strpatchwork import StrPatchwork + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + PUSH EBP + MOV EBP, ESP + SUB ESP, 0x100 + MOV EAX, 0x1337 + LEA ESI, DWORD PTR [mystr] + MOV ESP, EBP + POP EBP + RET +mystr: +.string "test string" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) +s = StrPatchwork() +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, '32', blocs[0], symbol_pool) +for offset, raw in patches.items(): + s[offset] = raw + +print patches + +open('demo_x86_32.bin', 'wb').write(str(s)) diff --git a/example/box.exe b/example/box.exe new file mode 100755 index 00000000..1c11bcda --- /dev/null +++ b/example/box.exe Binary files differdiff --git a/example/box_upx.exe b/example/box_upx.exe new file mode 100755 index 00000000..d0776ec2 --- /dev/null +++ b/example/box_upx.exe Binary files differdiff --git a/example/disasm_01.py b/example/disasm_01.py new file mode 100644 index 00000000..bc6a2fd6 --- /dev/null +++ b/example/disasm_01.py @@ -0,0 +1,12 @@ +from miasm2.arch.x86.arch import mn_x86 +from miasm2.arch.x86.regs import * + +l = mn_x86.fromstring('MOV EAX, EBX', 32) +print "instruction:", l +print "arg:", l.args[0] +x = mn_x86.asm(l) +print x +l.args[0] = EDX +y = mn_x86.asm(l) +print y +print mn_x86.dis(y[0], 32) diff --git a/example/disasm_02.py b/example/disasm_02.py new file mode 100644 index 00000000..5bfd77de --- /dev/null +++ b/example/disasm_02.py @@ -0,0 +1,13 @@ +from miasm2.arch.x86.disasm import dis_x86_32 +from miasm2.core.asmbloc import bloc2graph + + +s = '\xb8\xef\xbe7\x13\xb9\x04\x00\x00\x00\xc1\xc0\x08\xe2\xfb\xc3' +mdis = dis_x86_32(s) +blocs = mdis.dis_multibloc(0) + +for b in blocs: + print b + +g = bloc2graph(blocs) +open('graph.txt', 'w').write(g) diff --git a/example/disasm_03.py b/example/disasm_03.py new file mode 100644 index 00000000..08b209a1 --- /dev/null +++ b/example/disasm_03.py @@ -0,0 +1,23 @@ +import sys +from elfesteem import pe_init +from miasm2.arch.x86.disasm import dis_x86_32 +from miasm2.core.asmbloc import bloc2graph +from miasm2.core.bin_stream import bin_stream_pe + +if len(sys.argv) != 3: + print 'Example:' + print "%s box_upx.exe 0x410f90" % sys.argv[0] + sys.exit(0) + +fname = sys.argv[1] +ad = int(sys.argv[2], 16) +e = pe_init.PE(open(fname).read()) +bs = bin_stream_pe(e.virt) + +mdis = dis_x86_32(bs) +# inform the engine not to disasm nul instructions +mdis.dont_dis_nulstart_bloc = True +blocs = mdis.dis_multibloc(ad) + +g = bloc2graph(blocs) +open('graph.txt', 'w').write(g) diff --git a/example/expression/manip_expression1.py b/example/expression/manip_expression1.py new file mode 100644 index 00000000..a9ed00e3 --- /dev/null +++ b/example/expression/manip_expression1.py @@ -0,0 +1,31 @@ +from miasm2.expression.expression import * + +print """ +Simple expression manipulation demo +""" + +# define 2 ID +a = ExprId('eax', 32) +b = ExprId('ebx', 32) +print a, b +# eax ebx + +# add those ID +c = ExprOp('+', a, b) +print c +# (eax + ebx) + +# + automaticaly generates ExprOp('+', a, b) +c = a + b +print c +# (eax + ebx) + +# ax is a slice of eax +ax = a[:16] +print ax +# eax[0:16] + +# memory deref +d = ExprMem(c, 32) +print d +# @32[(eax + ebx)] diff --git a/example/expression/manip_expression2.py b/example/expression/manip_expression2.py new file mode 100644 index 00000000..4153f875 --- /dev/null +++ b/example/expression/manip_expression2.py @@ -0,0 +1,26 @@ +from miasm2.arch.x86.arch import mn_x86 +from miasm2.expression.expression import get_rw +from miasm2.arch.x86.ira import ir_a_x86_32 +print """ +Simple expression manipulation demo. +Get read/written registers for a given instruction +""" + +arch = mn_x86 +my_ir = ir_a_x86_32() + +l = arch.fromstring('LODSB', 32) +l.offset, l.l = 0, 15 +my_ir.add_instr(l) + +print '*' * 80 +for lbl, b in my_ir.blocs.items(): + print b + for irs in b.irs: + o_r, o_w = get_rw(irs) + print 'read: ', [str(x) for x in o_r] + print 'written:', [str(x) for x in o_w] + print +my_ir.gen_graph() +g = my_ir.graph() +open('graph_instr.txt', 'w').write(g) diff --git a/example/expression/manip_expression3.py b/example/expression/manip_expression3.py new file mode 100644 index 00000000..27c86096 --- /dev/null +++ b/example/expression/manip_expression3.py @@ -0,0 +1,20 @@ +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp + +print """ +Simple expression simplification demo +""" + + +a = ExprId('eax') +b = ExprId('ebx') + +exprs = [a + b - a, + ExprInt32(0x12) + ExprInt32(0x30) - a, + ExprCompose([(a[:8], 0, 8), + (a[8:16], 8, 16)])] + +for e in exprs: + print '*' * 40 + print 'original expression:', e + print "simplified:", expr_simp(e) diff --git a/example/expression/manip_expression4.py b/example/expression/manip_expression4.py new file mode 100644 index 00000000..f4a55a3c --- /dev/null +++ b/example/expression/manip_expression4.py @@ -0,0 +1,215 @@ +import os +import sys +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from miasm2.arch.x86.ira import ir_a_x86_32 +from miasm2.arch.x86.arch import mn_x86 +from miasm2.core import asmbloc +from miasm2.core.bin_stream import bin_stream_str +from elfesteem import pe_init +from optparse import OptionParser +from pdb import pm +from miasm2.ir.ir import ir +from miasm2.arch.x86.regs import * +from miasm2.arch.x86.disasm import dis_x86_32 + +from miasm2.analysis.data_analysis import intra_bloc_flow_raw, inter_bloc_flow + +from miasm2.core.graph import DiGraph +from miasm2.ir.symbexec import symbexec + +from pprint import pprint as pp + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +print """ +Simple expression use for generating dataflow graph +Exemple: +python manip_expression4.py sc_connect_back.bin 0x2e +""" + + +parser = OptionParser(usage="usage: %prog [options] sc_connect_back.bin") + +(options, args) = parser.parse_args(sys.argv[1:]) +if len(args) != 2: + parser.print_help() + sys.exit(0) + + +def node_x_2_id(n, x): + return hash(str(n) + str(x)) & 0xffffffffffffffff + + +def get_node_name(label, i, n): + # n_name = "%s_%d_%s"%(label.name, i, n) + n_name = (label.name, i, n) + return n_name + + +def get_modified_symbols(sb): + # get modified IDS + ids = sb.symbols.symbols_id.keys() + ids.sort() + out = {} + for i in ids: + if i in sb.arch.regs.regs_init and \ + i in sb.symbols.symbols_id and \ + sb.symbols.symbols_id[i] == sb.arch.regs.regs_init[i]: + continue + # print i, sb.symbols.symbols_id[i] + out[i] = sb.symbols.symbols_id[i] + + # get mem IDS + mems = sb.symbols.symbols_mem.values() + for m, v in mems: + print m, v + out[m] = v + pp([(str(x[0]), str(x[1])) for x in out.items()]) + return out + + +def intra_bloc_flow_symb(my_ir, flow_graph, irbloc): + symbols_init = {} + for i, r in enumerate(all_regs_ids): + symbols_init[r] = all_regs_ids_init[i] + sb = symbexec(mn_x86, symbols_init) + sb.emulbloc(irbloc) + print '*' * 40 + print irbloc + # sb.dump_mem() + # sb.dump_id() + in_nodes = {} + out_nodes = {} + + out = get_modified_symbols(sb) + current_nodes = {} + # gen mem arg to mem node links + for dst, src in out.items(): + for n in [dst, src]: + + all_mems = set() + all_mems.update(get_expr_mem(n)) + + for n in all_mems: + node_n_w = get_node_name(irbloc.label, 0, n) + if not n == src: + continue + o_r = n.arg.get_r(mem_read=False, cst_read=True) + for n_r in o_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irbloc.label, i, n_r) + if not n_r in in_nodes: + in_nodes[n_r] = node_n_r + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + # gen data flow links + for dst, src in out.items(): + nodes_r = src.get_r(mem_read=False, cst_read=True) + nodes_w = set([dst]) + for n_r in nodes_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irbloc.label, 0, n_r) + if not n_r in in_nodes: + in_nodes[n_r] = node_n_r + + flow_graph.add_node(node_n_r) + for n_w in nodes_w: + node_n_w = get_node_name(irbloc.label, 1, n_w) + out_nodes[n_w] = node_n_w + + flow_graph.add_node(node_n_w) + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + irbloc.in_nodes = in_nodes + irbloc.out_nodes = out_nodes + + +def node2str(self, n): + label, i, node = n + # print n + out = "%s,%s\\l\\\n%s" % n + return out + + +def gen_bloc_data_flow_graph(my_ir, in_str, ad): # arch, attrib, pool_bin, bloc, symbol_pool): + out_str = "" + + # my_ir = ir_x86_32(symbol_pool) + + for irbloc in my_ir.blocs.values(): + print irbloc + + my_ir.gen_graph() + my_ir.dead_simp() + + irbloc_0 = None + for irbloc in my_ir.blocs.values(): + if irbloc.label.offset == ad: + irbloc_0 = irbloc + break + assert(irbloc_0 is not None) + flow_graph = DiGraph() + flow_graph.node2str = lambda n: node2str(flow_graph, n) + done = set() + todo = set([irbloc_0.label]) + + bloc2w = {} + + for irbloc in my_ir.blocs.values(): + intra_bloc_flow_raw(my_ir, flow_graph, irbloc) + # intra_bloc_flow_symb(my_ir, flow_graph, irbloc) + + for irbloc in my_ir.blocs.values(): + print irbloc + print 'IN', [str(x) for x in irbloc.in_nodes] + print 'OUT', [str(x) for x in irbloc.out_nodes] + + print '*' * 20, 'interbloc', '*' * 20 + inter_bloc_flow(my_ir, flow_graph, irbloc_0.label) + + # sys.path.append('/home/serpilliere/projet/m2_devel/miasm2/core') + # from graph_qt import graph_qt + # graph_qt(flow_graph) + open('data.txt', 'w').write(flow_graph.dot()) + + +data = open(args[0]).read() +ad = int(args[1], 16) + +print 'disasm...' +mdis = dis_x86_32(data) +mdis.follow_call = True +ab = mdis.dis_multibloc(ad) +print 'ok' + + +print 'generating dataflow graph for:' +my_ir = ir_a_x86_32(mdis.symbol_pool) + +blocs = ab +for bloc in blocs: + print bloc + my_ir.add_bloc(bloc) +for irbloc in my_ir.blocs.values(): + print irbloc + if irbloc.label.offset != 0: + continue + + +out_str = gen_bloc_data_flow_graph(my_ir, mdis.bs, ad) + +print '*' * 40 +print """ + View with: +dotty dataflow.txt + or + Generate ps with pdf: +dot -Tps dataflow_xx.txt -o graph.ps +""" diff --git a/example/expression/manip_expression5.py b/example/expression/manip_expression5.py new file mode 100644 index 00000000..ed147c04 --- /dev/null +++ b/example/expression/manip_expression5.py @@ -0,0 +1,73 @@ +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from pdb import pm +import os + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +print """ +Expression simplification demo. +(and regression test) +""" + + +a = ExprId('a') +b = ExprId('b') +c = ExprId('c') +d = ExprId('d') +e = ExprId('e') + +m = ExprMem(a) +s = a[:8] + +i1 = ExprInt(uint32(0x1)) +i2 = ExprInt(uint32(0x2)) +cc = ExprCond(a, b, c) + +o = ExprCompose([(a[:8], 8, 16), + (a[8:16], 0, 8)]) + +o2 = ExprCompose([(a[8:16], 0, 8), + (a[:8], 8, 16)]) + +l = [a[:8], b[:8], c[:8], m[:8], s, i1[:8], i2[:8], o[:8]] +l2 = l[::-1] + + +x = ExprMem(a + b + ExprInt32(0x42)) + + +def replace_expr(e): + # print 'visit', e + dct = {c + ExprInt32(0x42): d, + a + b: c, } + if e in dct: + return dct[e] + return e + + +print x +y = x.visit(replace_expr) +print y +print x.copy() +print y.copy() +print y == y.copy() +print repr(y), repr(y.copy()) + + +z = ExprCompose([(a[5:5 + 8], 0, 8), (b[:16], 8, 24), (x[:8], 24, 32)]) +print z +print z.copy() +print z[:31].copy().visit(replace_expr) + +print 'replace' +print x.replace_expr({c + ExprInt32(0x42): d, + a + b: c, }) +print z.replace_expr({c + ExprInt32(0x42): d, + a + b: c, }) + + +u = z.copy() +print u diff --git a/example/expression/manip_expression6.py b/example/expression/manip_expression6.py new file mode 100644 index 00000000..45a6c8c1 --- /dev/null +++ b/example/expression/manip_expression6.py @@ -0,0 +1,67 @@ +from miasm2.core.cpu import parse_ast, ast_id2expr +from miasm2.arch.x86.arch import mn_x86, base_expr +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from miasm2.core import asmbloc +from miasm2.arch.x86.ira import ir_a_x86_32 +from pdb import pm + + +def my_ast_int2expr(a): + return ExprInt32(a) + +my_var_parser = parse_ast(ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + + +# First, asm code +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + MOV EAX, 1 + MOV EBX, 2 + MOV ECX, 2 + MOV DX, 2 + +loop: + INC EBX + CMOVZ EAX, EBX + ADD EAX, ECX + JZ loop + RET +''') + +blocs = blocs[0] + +symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) +for b in blocs: + print b + + +print "symbols:" +print symbol_pool +resolved_b, patches = asmbloc.asm_resolve_final(mn_x86, 32, blocs, symbol_pool) + +# Translate to IR +my_ir = ir_a_x86_32(symbol_pool) +for b in blocs: + print 'add bloc' + print b + my_ir.add_bloc(b) + +# Display IR +for lbl, b in my_ir.blocs.items(): + print b + +# Dead propagation +my_ir.gen_graph() +out = my_ir.graph() +open('graph.txt', 'w').write(out) +print '*' * 80 +my_ir.dead_simp() +out2 = my_ir.graph() +open('graph2.txt', 'w').write(out2) + +# Display new IR +print 'new ir blocs' +for lbl, b in my_ir.blocs.items(): + print b diff --git a/example/expression/manip_expression7.py b/example/expression/manip_expression7.py new file mode 100644 index 00000000..d1cbb73b --- /dev/null +++ b/example/expression/manip_expression7.py @@ -0,0 +1,21 @@ +from miasm2.core.graph import DiGraph +from miasm2.expression.expression import * + +print "Simple Expression grapher demo" + +a = ExprId("A") +b = ExprId("B") +c = ExprId("C") +d = ExprId("D") +m = ExprMem(a + b + c + a) + +e1 = ExprCompose([(a + b - (c * a) / m | b, 0, 32), (a + m, 32, 64)]) +e2 = ExprInt64(15) +e = ExprCond(d, e1, e2)[0:32] + +print "[+] Expression:" +print e + +g = e.graph() +print "[+] Graph:" +print g.dot() diff --git a/example/expression/sc_connect_back.bin b/example/expression/sc_connect_back.bin new file mode 100644 index 00000000..9e9c80a5 --- /dev/null +++ b/example/expression/sc_connect_back.bin Binary files differdiff --git a/example/expression/simple_test.bin b/example/expression/simple_test.bin new file mode 100644 index 00000000..60f4e768 --- /dev/null +++ b/example/expression/simple_test.bin Binary files differdiff --git a/example/expression/simple_test.c b/example/expression/simple_test.c new file mode 100644 index 00000000..8e344f18 --- /dev/null +++ b/example/expression/simple_test.c @@ -0,0 +1,26 @@ +int test(unsigned int argc, char** argv) +{ + unsigned int ret; + if (argc == 0) + ret = 0x1001; + else if (argc < 2) + ret = 0x1002; + else if (argc <= 5) + ret = 0x1003; + else if (argc != 7 && argc*2 == 14) + ret = 0x1004; + else if (argc*2 == 14) + ret = 0x1005; + else if (argc & 0x30) + ret = 0x1006; + else if (argc + 3 == 0x45) + ret = 0x1007; + else + ret = 0x1008; + return ret; +} + +int main(int argc, char** argv) +{ + return test(argc, argv); +} diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py new file mode 100644 index 00000000..828629fc --- /dev/null +++ b/example/expression/solve_condition_stp.py @@ -0,0 +1,245 @@ +import os +import sys +from miasm2.arch.x86.arch import * +from miasm2.arch.x86.regs import * +from miasm2.arch.x86.sem import * +from miasm2.core.bin_stream import bin_stream_str +from miasm2.core import asmbloc +from miasm2.expression.expression import get_rw +from miasm2.ir.symbexec import symbexec +from miasm2.expression.simplifications import expr_simp +from miasm2.expression import stp +from collections import defaultdict +from optparse import OptionParser +import subprocess +from miasm2.core import parse_asm +from elfesteem.strpatchwork import StrPatchwork + +from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine + + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + + +mn = mn_x86 + +parser = OptionParser(usage="usage: %prog [options] file") +parser.add_option('-a', "--address", dest="address", metavar="ADDRESS", + help="address to disasemble", default="0") + +(options, args) = parser.parse_args(sys.argv[1:]) +if not args: + parser.print_help() + sys.exit(0) + + +def get_bloc(my_ir, mdis, ad): + if isinstance(ad, asmbloc.asm_label): + l = ad + else: + l = mdis.symbol_pool.getby_offset_create(ad) + if not l in my_ir.blocs: + ad = l.offset + b = mdis.dis_bloc(ad) + my_ir.add_bloc(b) + b = my_ir.get_bloc(l) + if b is None: + raise LookupError('no bloc found at that address: %s' % l) + return b + + +def emul_symb(my_ir, mdis, states_todo, states_done): + while states_todo: + ad, symbols, conds = states_todo.pop() + print '*' * 40, "addr", ad, '*' * 40 + if (ad, symbols, conds) in states_done: + print 'skip', ad + continue + states_done.add((ad, symbols, conds)) + sb = symbexec(mn, {}) + sb.symbols = symbols.copy() + if my_ir.pc in sb.symbols: + del(sb.symbols[my_ir.pc]) + b = get_bloc(my_ir, mdis, ad) + + print 'run bloc' + print b + # print blocs[ad] + ad = sb.emulbloc(b) + print 'final state' + sb.dump_id() + print 'dataflow' + # data_flow_graph_from_expr(sb) + + assert(ad is not None) + print "DST", ad + + if isinstance(ad, ExprCond): + # Create 2 states, each including complementary conditions + p1 = sb.symbols.copy() + p2 = sb.symbols.copy() + c1 = {ad.cond: ExprInt_from(ad.cond, 0)} + c2 = {ad.cond: ExprInt_from(ad.cond, 1)} + print ad.cond + p1[ad.cond] = ExprInt_from(ad.cond, 0) + p2[ad.cond] = ExprInt_from(ad.cond, 1) + ad1 = expr_simp(sb.eval_expr(ad.replace_expr(c1), {})) + ad2 = expr_simp(sb.eval_expr(ad.replace_expr(c2), {})) + if not (isinstance(ad1, ExprInt) or (isinstance(ad1, ExprId) and isinstance(ad1.name, asmbloc.asm_label)) and + isinstance(ad2, ExprInt) or (isinstance(ad2, ExprId) and isinstance(ad2.name, asmbloc.asm_label))): + print str(ad1), str(ad2) + raise ValueError("zarb condition") + conds1 = list(conds) + c1.items() + conds2 = list(conds) + c2.items() + if isinstance(ad1, ExprId): + ad1 = ad1.name + if isinstance(ad2, ExprId): + ad2 = ad2.name + if isinstance(ad1, ExprInt): + ad1 = ad1.arg + if isinstance(ad2, ExprInt): + ad2 = ad2.arg + states_todo.add((ad1, p1, tuple(conds1))) + states_todo.add((ad2, p2, tuple(conds2))) + elif isinstance(ad, ExprInt): + ad = int(ad.arg) + states_todo.add((ad, sb.symbols.copy(), tuple(conds))) + elif isinstance(ad, ExprId) and isinstance(ad.name, asmbloc.asm_label): + if isinstance(ad, ExprId): + ad = ad.name + states_todo.add((ad, sb.symbols.copy(), tuple(conds))) + elif ad == ret_addr: + print 'ret reached' + continue + else: + raise ValueError("zarb eip") + + +if __name__ == '__main__': + + data = open(args[0]).read() + bs = bin_stream_str(data) + + mdis = dis_engine(bs) + + ad = int(options.address, 16) + + symbols_init = {} + for i, r in enumerate(all_regs_ids): + symbols_init[r] = all_regs_ids_init[i] + + # config parser for 32 bit + reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + def my_ast_int2expr(a): + return ExprInt32(a) + + def my_ast_id2expr(t): + if t in reg_and_id: + r = reg_and_id[t] + else: + r = ExprId(t, size=32) + return r + my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) + base_expr.setParseAction(my_var_parser) + + argc = ExprId('argc', 32) + argv = ExprId('argv', 32) + ret_addr = ExprId('ret_addr') + reg_and_id[argc.name] = argc + reg_and_id[argv.name] = argv + reg_and_id[ret_addr.name] = ret_addr + + my_symbols = [argc, argv, ret_addr] + my_symbols = dict([(x.name, x) for x in my_symbols]) + my_symbols.update(mn_x86.regs.all_regs_ids_byname) + + sb = symbexec(mn, symbols_init) + + blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' + PUSH argv + PUSH argc + PUSH ret_addr + ''') + + my_ir = ir_x86_32(mdis.symbol_pool) + + b = blocs[0][0] + print b + # add fake address and len to parsed instructions + for i, l in enumerate(b.lines): + l.offset, l.l = i, 1 + my_ir.add_bloc(b) + irb = get_bloc(my_ir, mdis, 0) + sb.emulbloc(irb) + sb.dump_mem() + + # reset my_ir blocs + my_ir.blocs = {} + + states_todo = set() + states_done = set() + states_todo.add((uint32(ad), sb.symbols, ())) + + # emul blocs, propagate states + emul_symb(my_ir, mdis, states_todo, states_done) + + all_info = [] + + print '*' * 40, 'conditions to match', '*' * 40 + for ad, symbols, conds in sorted(states_done): + print '*' * 40, ad, '*' * 40 + reqs = [] + for k, v in conds: + print k, v + reqs.append((k, v)) + all_info.append((ad, reqs)) + + all_cases = set() + + sb = symbexec(mn, symbols_init) + for ad, reqs_cond in all_info: + all_ids = set() + for k, v in reqs_cond: + all_ids.update(get_expr_ids(k)) + + out = [] + + # declare variables + for v in all_ids: + out.append(str(v) + ":" + "BITVECTOR(%d);" % v.size) + + all_csts = [] + for k, v in reqs_cond: + cst = k.strcst() + val = v.arg + assert(val in [0, 1]) + inv = "" + if val == 1: + inv = "NOT " + val = "0" * v.size + all_csts.append("(%s%s=0bin%s)" % (inv, cst, val)) + if not all_csts: + continue + rez = " AND ".join(all_csts) + out.append("QUERY(NOT (%s));" % rez) + end = "\n".join(out) + open('out.txt', 'w').write(end) + try: + cases = subprocess.check_output(["/home/serpilliere/tools/stp/stp", + "-p", + "out.txt"]) + except OSError: + print "ERF, cannot find stp" + break + for c in cases.split('\n'): + if c.startswith('ASSERT'): + all_cases.add((ad, c)) + + print '*' * 40, 'ALL COND', '*' * 40 + all_cases = list(all_cases) + all_cases.sort(key=lambda x: (x[0], x[1])) + for ad, val in all_cases: + print 'address', ad, 'is reachable using argc', val diff --git a/example/extract_pe_ressources.py b/example/extract_pe_ressources.py new file mode 100644 index 00000000..d5c59ae5 --- /dev/null +++ b/example/extract_pe_ressources.py @@ -0,0 +1,42 @@ +import sys +import struct +from elfesteem import * +import os +import sys + +# example for extracting all pe ressources + + +def extract_res(res, name_o="", num=0, lvl=-1): + lvl += 1 + if not res: + return num + for x in res.resentries: + print "\t" * lvl, repr(x) + num += 1 + + if x.name_s: + name = name_o[:] + repr(x.name_s.value[::2]) + else: + name = name_o[:] + + if x.data: + print "\t" * lvl, 'data', len(x.data.s) + open('out/%.3d_%s.bin' % (num, name), 'w').write(str(x.data.s)) + else: + print "\t" * lvl, None + if x.offsettosubdir: + num = extract_res(x.subdir, name, num, lvl + 1) + return num + +try: + os.stat('out') +except: + os.mkdir('out') + +fname = sys.argv[1] +e = pe_init.PE(open(fname, 'rb').read()) +res = e.DirRes.resdesc + + +extract_res(res) diff --git a/example/md5_arm b/example/md5_arm new file mode 100755 index 00000000..148e0611 --- /dev/null +++ b/example/md5_arm Binary files differdiff --git a/example/sandbox_pe_x86_32.py b/example/sandbox_pe_x86_32.py new file mode 100644 index 00000000..0f660668 --- /dev/null +++ b/example/sandbox_pe_x86_32.py @@ -0,0 +1,125 @@ +import sys +import os +from argparse import ArgumentParser +from miasm2.arch.x86.arch import mn_x86 +from miasm2.jitter.jitload import jitter_x86_32, vm_load_pe, preload_pe, libimp +from miasm2.jitter.jitload import bin_stream_vm +from miasm2.jitter.csts import * +from miasm2.jitter.os_dep import win_api_x86_32 +from miasm2.analysis import debugging, gdbserver +import inspect + +# Debug settings # +from pdb import pm + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +# + +# Handle arguments + +parser = ArgumentParser( + description="Sandbox a PE binary with x86 32bits engine") +parser.add_argument("filename", help="PE binary") +parser.add_argument("-r", "--log-regs", + help="Log registers value for each instruction", + action="store_true") +parser.add_argument("-m", "--log-mn", + help="Log desassembly conversion for each instruction", + action="store_true") +parser.add_argument("-n", "--log-newbloc", + help="Log basic blocks processed by the Jitter", + action="store_true") +parser.add_argument("-j", "--jitter", + help="Jitter engine. Possible values are : tcc (default), llvm", + default="tcc") +parser.add_argument("-d", "--debugging", + help="Attach a CLI debugguer to the sandboxed programm", + action="store_true") +parser.add_argument("-g", "--gdbserver", + help="Listen on [port] with a GDB server", + type=int, + default=False) +args = parser.parse_args() + +# User defined methods + + +def msvcrt_memset(myjit): + ret_ad, args = myjit.func_args_cdecl(3) + dst, c, size = args + + myjit.vm.vm_set_mem(dst, chr(c & 0xFF) * size) + myjit.func_ret_cdecl(ret_ad, 0) + + +def msvcrt_memcpy(myjit): + ret_ad, args = myjit.func_args_cdecl(3) + dst, src, size = args + + x = myjit.vm.vm_get_mem(src, size) + myjit.vm.vm_set_mem(dst, x) + myjit.func_ret_cdecl(ret_ad, 0) + +# Breakpoint callbacks + + +def code_sentinelle(jitter): + jitter.run = False + jitter.pc = 0 + print "End Emulation" + return True + +# x86 32 bits engine instanciation +myjit = jitter_x86_32(jit_type=args.jitter) +myjit.init_stack() +libs = libimp() + +# Set libs for win_32 api +win_api_x86_32.winobjs.runtime_dll = libs + +# Load PE and get entry point address +e = vm_load_pe(myjit.vm, args.filename) +preload_pe(myjit.vm, e, libs) + +addr = e.rva2virt(e.Opthdr.AddressOfEntryPoint) + +# Log level (if available with jitter engine) +myjit.jit.log_regs = args.log_regs +myjit.jit.log_mn = args.log_mn +myjit.jit.log_newbloc = args.log_newbloc + +# Set up stack +myjit.vm_push_uint32_t(0x1337beef) + +# Set callbacks +myjit.add_breakpoint(0x1337beef, code_sentinelle) + +myjit.add_lib_handler(libs, globals()) + +# Start Emulation +myjit.init_run(addr) + +# Handle debugging +if any([args.debugging, args.gdbserver]): + dbg = debugging.Debugguer(myjit) + if args.debugging is True: + cmd = debugging.DebugCmd(dbg) + cmd.cmdloop() + else: + gdb = gdbserver.GdbServer_x86_32(dbg, args.gdbserver) + print("Listenning on port %d" % args.gdbserver) + gdb.run() + +else: + print(myjit.continue_run()) + +# Performance tests +# +# import cProfile +# cProfile.run(r'run_bin(myjit, addr)') + +# Test if emulation ended properly +assert(myjit.run is False) diff --git a/example/symbol_exec.py b/example/symbol_exec.py new file mode 100644 index 00000000..6d7457aa --- /dev/null +++ b/example/symbol_exec.py @@ -0,0 +1,31 @@ +# Minimalist Symbol Exec example +from miasm2.core.bin_stream import bin_stream_str +from miasm2.arch.x86.arch import mn_x86 +from miasm2.arch.x86.ira import ir_a_x86_32 +from miasm2.arch.x86.regs import all_regs_ids, all_regs_ids_init +from miasm2.ir.symbexec import symbexec +from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine +import miasm2.expression.expression as m2_expr + +l = mn_x86.fromstring("MOV EAX, EBX", 32) +asm = mn_x86.asm(l)[0] + +bin_stream = bin_stream_str(asm) + +mdis = dis_engine(bin_stream) +disasm = mdis.dis_multibloc(0) + +ir = ir_a_x86_32(mdis.symbol_pool) +for bbl in disasm: ir.add_bloc(bbl) + +symbols_init = {} +for i, r in enumerate(all_regs_ids): + symbols_init[r] = all_regs_ids_init[i] +symb = symbexec(mn_x86, symbols_init) + +block = ir.get_bloc(0) + +cur_addr = symb.emulbloc(block) +assert(symb.symbols[m2_expr.ExprId("EAX")] == symbols_init[m2_expr.ExprId("EBX")]) +print 'modified registers:' +symb.dump_id() diff --git a/example/test_dis.py b/example/test_dis.py new file mode 100644 index 00000000..4400ec14 --- /dev/null +++ b/example/test_dis.py @@ -0,0 +1,266 @@ +import sys +import os +import time + +from miasm2.core.bin_stream import bin_stream_elf, bin_stream_pe, bin_stream_str +from elfesteem import * +from miasm2.core.asmbloc import * +from miasm2.expression.simplifications import expr_simp +from optparse import OptionParser +from miasm2.core.cpu import dum_arg +import cProfile +from miasm2.expression.expression import * +from miasm2.core.interval import interval +from miasm2.core.utils import hexdump + +log = logging.getLogger("dis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.INFO) + + +# log_asmbloc.setLevel(logging.DEBUG) +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + + +parser = OptionParser(usage="usage: %prog [options] file address") +parser.add_option('-m', "--architecture", dest="machine", metavar="MACHINE", + help="architecture: arm, x86_16, x86_32, x86_64, msp430") +parser.add_option('-f', "--followcall", dest="followcall", action="store_true", + default=False, + help="follow call") + +parser.add_option('-b', "--blocwatchdog", dest="bw", + default=None, + help="address to disasemble") + +parser.add_option('-n', "--funcsnumwatchdog", dest="funcswd", + default=None, + help="max func to disasm") + +parser.add_option( + '-r', "--recurfunctions", dest="recurfunctions", action="store_true", + default=False, + help="disasm found functions") + +parser.add_option('-v', "--verbose", dest="verbose", action="store_true", + default=False, + help="verbose") + +parser.add_option('-g', "--gen_ir", dest="gen_ir", action="store_true", + default=False, + help="gen intermediate representation") + +parser.add_option('-z', "--dis_nulstart_bloc", dest="dis_nulstart_bloc", + action="store_true", default=False, + help="dont_dis_nulstart_bloc") +parser.add_option('-l', "--dontdis_retcall", dest="dontdis_retcall", + action="store_true", default=False, + help="only disasm call dst") + +parser.add_option('-s', "--simplify", dest="simplify", action="store_true", + default=False, + help="for test purpose") + +parser.add_option('-o', "--shiftoffset", dest="shiftoffset", + default="0", + help="shift input str by offset") + +parser.add_option( + '-a', "--trydisasmall", dest="trydisasmall", action="store_true", + default=False, + help="try disasm all binary") + +parser.add_option('-i', "--image", dest="image", action="store_true", + default=False, + help="display image representation of disasm") + +(options, args) = parser.parse_args(sys.argv[1:]) +if not args: + parser.print_help() + sys.exit(0) +fname = args[0] + +if options.verbose: + log_asmbloc.setLevel(logging.DEBUG) + +log.info("import machine...") +mode = None +dis_cb = None + + +if options.machine == "arm": + from miasm2.arch.arm.disasm import dis_arm as dis_engine + from miasm2.arch.arm.arch import mn_arm as mn + from miasm2.arch.arm.ira import ir_a_arm as ira +elif options.machine == "armt": + from miasm2.arch.arm.disasm import dis_armt as dis_engine + from miasm2.arch.arm.arch import mn_armt as mn + from miasm2.arch.arm.ira import ir_a_armt as ira +elif options.machine == "sh4": + from miasm2.arch.sh4.disasm import dis_sha4 as dis_engine + from miasm2.arch.sh4.arch import mn_sh4 as mn + from miasm2.arch.sh4.ira import ir_a_sh4 as ira +elif options.machine == "x86_16": + from miasm2.arch.x86.disasm import dis_x86_16 as dis_engine + from miasm2.arch.x86.arch import mn_x86 as mn + from miasm2.arch.x86.ira import ir_a_x86_16 as ira +elif options.machine == "x86_32": + from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine + from miasm2.arch.x86.arch import mn_x86 as mn + from miasm2.arch.x86.ira import ir_a_x86_32 as ira +elif options.machine == "x86_64": + from miasm2.arch.x86.disasm import dis_x86_64 as dis_engine + from miasm2.arch.x86.arch import mn_x86 as mn + from miasm2.arch.x86.ira import ir_a_x86_64 as ira +elif options.machine == "msp430": + from miasm2.arch.msp430.disasm import dis_msp430 as dis_engine + from miasm2.arch.msp430.arch import mn_msp430 as mn + from miasm2.arch.msp430.ira import ir_a_msp430 as ira +else: + raise ValueError('unknown machine') +log.info('ok') + +if options.bw != None: + options.bw = int(options.bw) +if options.funcswd != None: + options.funcswd = int(options.funcswd) +machine = options.machine + +log.info('load binary') +b = open(fname).read() + +default_addr = 0 +bs = None +if b.startswith('MZ'): + e = pe_init.PE(b) + if e.isPE() and e.NTsig.signature_value == 0x4550: + bs = bin_stream_pe(e.virt) + default_addr = e.rva2virt(e.Opthdr.AddressOfEntryPoint) +elif b.startswith('\x7fELF'): + e = elf_init.ELF(b) + bs = bin_stream_elf(e.virt) + default_addr = e.Ehdr.entry + +if bs is None: + shift = int(options.shiftoffset, 16) + log.warning('fallback to string input (offset=%s)' % hex(shift)) + bs = bin_stream_str(b, shift=shift) + + +log.info('ok') +mdis = dis_engine(bs) +# configure disasm engine +mdis.dontdis_retcall = options.dontdis_retcall +mdis.blocs_wd = options.bw +mdis.dont_dis_nulstart_bloc = not options.dis_nulstart_bloc + +todo = [] +addrs = [int(a, 16) for a in args[1:]] + +if len(addrs) == 0 and default_addr is not None: + addrs.append(default_addr) +for ad in addrs: + todo = [(mdis, None, ad)] + +done = set() +all_funcs = set() +all_funcs_blocs = {} + + +done_interval = interval() +finish = False + +# Main disasm loop +while not finish and todo: + while not finish and todo: + mdis, caller, ad = todo.pop(0) + if ad in done: + continue + done.add(ad) + ab = mdis.dis_multibloc(ad) + + log.info('func ok %.16x (%d)' % (ad, len(all_funcs))) + + all_funcs.add(ad) + all_funcs_blocs[ad] = ab + for b in ab: + for l in b.lines: + done_interval += interval([(l.offset, l.offset + l.l)]) + + if options.funcswd is not None: + options.funcswd -= 1 + if options.recurfunctions: + for b in ab: + i = b.get_subcall_instr() + if not i: + continue + for d in i.getdstflow(mdis.symbol_pool): + if not (isinstance(d, ExprId) and isinstance(d.name, asm_label)): + continue + todo.append((mdis, i, d.name.offset)) + + if options.funcswd is not None and options.funcswd <= 0: + finish = True + + if options.trydisasmall: + for a, b in done_interval.intervals: + if b in done: + continue + log.debug('add func %s' % hex(b)) + todo.append((mdis, None, b)) + + +# Generate dotty graph +all_blocs = [] +for blocs in all_funcs_blocs.values(): + all_blocs += blocs + # for b in blocs: + # print b + +log.info('generate graph file') +g = bloc2graph(all_blocs, True) +open('graph_execflow.txt', 'w').write(g) + +log.info('generate intervals') + +all_lines = [] +total_l = 0 + +print done_interval +if options.image: + log.info('build img') + done_interval.show() + +for i, j in done_interval.intervals: + log.debug((hex(i), "->", hex(j))) + + +all_lines.sort(key=lambda x: x.offset) +open('lines.txt', 'w').write('\n'.join([str(l) for l in all_lines])) +log.info('total lines %s' % total_l) + + +# Bonus, generate IR graph +if options.gen_ir: + log.info("generating IR") + + my_ir = ira(mdis.symbol_pool) + my_ir.blocs = {} + for ad, all_bloc in all_funcs_blocs.items(): + log.info("generating IR... %x" % ad) + for b in all_bloc: + my_ir.add_bloc(b) + + log.info("Gen Graph... %x" % ad) + + my_ir.gen_graph() + + if options.simplify: + my_ir.dead_simp() + + out = my_ir.graph() + open('graph_irflow.txt', 'w').write(out) diff --git a/example/test_ida.py b/example/test_ida.py new file mode 100644 index 00000000..449c630c --- /dev/null +++ b/example/test_ida.py @@ -0,0 +1,409 @@ +import sys + +# Set your path first! +sys.path.append("/home/serpilliere/tools/pyparsing/pyparsing-2.0.1/build/lib.linux-x86_64-2.7") +sys.path.append("/home/serpilliere/projet/m2_devel/build/lib.linux-x86_64-2.7") + +from miasm2.core.bin_stream import bin_stream_str +from miasm2.core.asmbloc import * +from miasm2.expression.simplifications import expr_simp + +from miasm2.analysis.data_analysis import intra_bloc_flow_raw, inter_bloc_flow +from miasm2.analysis.data_analysis import intra_bloc_flow_symbexec + +from idaapi import * +import idautils + + +class bin_stream_ida(bin_stream_str): + # ida should provide Byte function + + def getbytes(self, start, l=1): + o = "" + for ad in xrange(start - self.shift, start - self.shift + l): + o += chr(Byte(ad)) + return o + + def readbs(self, l=1): + if self.offset + l > self.l: + raise IOError + o = self.getbytes(self.offset) + self.offset += l + return p + + def writebs(self, l=1): + raise ValueError('writebs unsupported') + + def __str__(self): + raise NotImplementedError('not fully functional') + out = self.bin[self.offset - self.shift:] + return out + + def setoffset(self, val): + self.offset = val + + def __len__(self): + return 0x7FFFFFFF + + def getlen(self): + return 0x7FFFFFFF - self.offset - self.shift + + +def expr2colorstr(my_ir, e): + # print "XXX", e + if isinstance(e, ExprId): + s = str(e) + if e in my_ir.arch.regs.all_regs_ids: + s = idaapi.COLSTR(s, idaapi.SCOLOR_REG) + elif isinstance(e, ExprInt): + s = str(e) + s = idaapi.COLSTR(s, idaapi.SCOLOR_NUMBER) + elif isinstance(e, ExprMem): + s = '@%d[%s]' % (e.size, expr2colorstr(my_ir, e.arg)) + elif isinstance(e, ExprOp): + out = [] + for a in e.args: + s = expr2colorstr(my_ir, a) + if isinstance(a, ExprOp): + s = "(%s)" % s + out.append(s) + if len(out) == 1: + s = "%s %s" % (e.op, str(out[0])) + else: + s = (" " + e.op + " ").join(out) + elif isinstance(e, ExprAff): + s = "%s = %s" % ( + expr2colorstr(my_ir, e.dst), expr2colorstr(my_ir, e.src)) + elif isinstance(e, ExprCond): + cond = expr2colorstr(my_ir, e.cond) + src1 = expr2colorstr(my_ir, e.src1) + src2 = expr2colorstr(my_ir, e.src2) + s = "(%s?%s:%s)" % (cond, src1, src2) + elif isinstance(e, ExprSlice): + s = "(%s)[%d:%d]" % (expr2colorstr(my_ir, e.arg), e.start, e.stop) + else: + s = str(e) + # print repr(s) + return s + + +def color_irbloc(irbloc): + o = [] + lbl = '%s' % irbloc.label + lbl = idaapi.COLSTR(lbl, idaapi.SCOLOR_INSN) + o.append(lbl) + for i, expr in enumerate(irbloc.irs): + for e in expr: + s = expr2colorstr(my_ir, e) + s = idaapi.COLSTR(s, idaapi.SCOLOR_INSN) + o.append(' %s' % s) + o.append("") + o.pop() + i = len(irbloc.irs) + s = str(' Dst: %s' % irbloc.dst) + s = idaapi.COLSTR(s, idaapi.SCOLOR_RPTCMT) + o.append(s) + + return "\n".join(o) + + +class GraphMiasmIR(GraphViewer): + + def __init__(self, my_ir, title, result): + GraphViewer.__init__(self, title) + print 'init' + self.my_ir = my_ir + self.result = result + self.names = {} + + def OnRefresh(self): + print 'refresh' + self.Clear() + addr_id = {} + for irbloc in self.my_ir.blocs.values(): + id_irbloc = self.AddNode(color_irbloc(irbloc)) + addr_id[irbloc] = id_irbloc + + for irbloc in self.my_ir.blocs.values(): + if not irbloc: + continue + dst = my_ir.dst_trackback(irbloc) + for d in dst: + if not self.my_ir.ExprIsLabel(d): + continue + + d = d.name + if not d in self.my_ir.blocs: + continue + b = self.my_ir.blocs[d] + node1 = addr_id[irbloc] + node2 = addr_id[b] + self.AddEdge(node1, node2) + return True + + def OnGetText(self, node_id): + b = self[node_id] + return str(b) + + def OnSelect(self, node_id): + return True + + def OnClick(self, node_id): + return True + + def OnCommand(self, cmd_id): + if self.cmd_test == cmd_id: + print 'TEST!' + return + print "command:", cmd_id + + def Show(self): + if not GraphViewer.Show(self): + return False + self.cmd_test = self.AddCommand("Test", "F2") + if self.cmd_test == 0: + print "Failed to add popup menu item!" + return True + + +from miasm2.analysis.disasm_cb import guess_funcs, guess_multi_cb + + +processor_name = GetLongPrm(INF_PROCNAME) +dis_engine = None +if processor_name == "metapc": + + # HACK: check 32/64 using INF_START_SP + max_size = GetLongPrm(INF_START_SP) + if max_size == 0x80: # TODO XXX check + from miasm2.arch.x86.disasm import dis_x86_16 as dis_engine + from miasm2.arch.x86.x86.ira import ir_a_x86_16 as ira + elif max_size == 0xFFFFFFFF: + from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine + from miasm2.arch.x86.ira import ir_a_x86_32 as ira + + elif max_size == 0xFFFFFFFFFFFFFFFF: + from miasm2.arch.x86.disasm import dis_x86_64 as dis_engine + from miasm2.arch.x86.ira import ir_a_x86_64 as ira + + else: + raise ValueError('cannot guess 32/64 bit! (%x)' % max_size) +elif processor_name == "ARM": + # TODO ARM/thumb + # hack for thumb: place armt = True in globals :/ + is_armt = globals().get('armt', False) + if is_armt: + from miasm2.arch.arm.disasm import dis_armt as dis_engine + from miasm2.arch.arm.ira import ir_a_armt as ira + else: + from miasm2.arch.arm.disasm import dis_arm as dis_engine + from miasm2.arch.arm.ira import ir_a_arm as ira + + from miasm2.analysis.disasm_cb import arm_guess_subcall, arm_guess_jump_table + guess_funcs.append(arm_guess_subcall) + guess_funcs.append(arm_guess_jump_table) + +elif processor_name == "msp430": + # TODO ARM/thumb + from miasm2.arch.msp430.disasm import dis_msp430 as dis_engine + from miasm2.arch.msp430.ira import ir_a_msp430 as ira + +else: + print repr(processor_name) + raise NotImplementedError('not fully functional') + +print "Arch", dis_engine + +fname = GetInputFile() +print fname + +bs = bin_stream_ida() +mdis = dis_engine(bs) +my_ir = ira(mdis.symbol_pool) + +# populate symbols with ida names +for ad, name in Names(): + # print hex(ad), repr(name) + if name is None: + continue + mdis.symbol_pool.add_label(name, ad) + +print "start disasm" +ad = ScreenEA() +print hex(ad) + +ab = mdis.dis_multibloc(ad) + +print "generating graph" +g = bloc2graph(ab, True) +open('asm_flow.txt', 'w').write(g) + + +print "generating IR... %x" % ad + +for b in ab: + print 'ADD' + print b + my_ir.add_bloc(b) + + +print "IR ok... %x" % ad + +for irb in my_ir.blocs.values(): + for irs in irb.irs: + for i, e in enumerate(irs): + e.dst, e.src = expr_simp(e.dst), expr_simp(e.src) + +my_ir.gen_graph() +out = my_ir.graph() +open('/tmp/graph.txt', 'w').write(out) + + +# my_ir.dead_simp() + +g = GraphMiasmIR(my_ir, "Miasm IR graph", None) + + +def mycb(*test): + print test + raise NotImplementedError('not fully functional') + +g.cmd_a = g.AddCommand("cmd a", "x") +g.cmd_b = g.AddCommand("cmd b", "y") + +g.Show() + + +def node2str(n): + label, i, node = n + print n + # out = "%s,%s\n%s"%n + out = "%s" % node + return out + + +def get_node_name(label, i, n): + # n_name = "%s_%d_%s"%(label.name, i, n) + n_name = (label.name, i, n) + return n_name + + +def get_modified_symbols(sb): + # get modified IDS + ids = sb.symbols.symbols_id.keys() + ids.sort() + out = {} + for i in ids: + if i in sb.arch.regs.regs_init and \ + i in sb.symbols.symbols_id and \ + sb.symbols.symbols_id[i] == sb.arch.regs.regs_init[i]: + continue + # print i, sb.symbols.symbols_id[i] + out[i] = sb.symbols.symbols_id[i] + + # get mem IDS + mems = sb.symbols.symbols_mem.values() + for m, v in mems: + # print m, v + out[m] = v + pp([(str(x[0]), str(x[1])) for x in out.items()]) + return out + + +def gen_bloc_data_flow_graph(my_ir, in_str, ad): # arch, attrib, pool_bin, bloc, symbol_pool): + out_str = "" + + my_ir.gen_graph() + # my_ir.dead_simp() + + irbloc_0 = None + for irbloc in my_ir.blocs.values(): + if irbloc.label.offset == ad: + irbloc_0 = irbloc + break + assert(irbloc_0 is not None) + flow_graph = DiGraph() + done = set() + todo = set([irbloc_0.label]) + + bloc2w = {} + + for irbloc in my_ir.blocs.values(): + # intra_bloc_flow_raw(my_ir, flow_graph, irbloc) + intra_bloc_flow_symbexec(my_ir, flow_graph, irbloc) + # intra_bloc_flow_symb(my_ir, flow_graph, irbloc) + + for irbloc in my_ir.blocs.values(): + print irbloc + print 'IN', [str(x) for x in irbloc.in_nodes] + print 'OUT', [str(x) for x in irbloc.out_nodes] + + print '*' * 20, 'interbloc', '*' * 20 + inter_bloc_flow(my_ir, flow_graph, irbloc_0.label, False) + + print 'Dataflow roots:' + for node in flow_graph.roots(): + lbl, i, n = node + if n in my_ir.arch.regs.all_regs_ids: + print node + + open('data.txt', 'w').write(flow_graph.dot()) + return flow_graph + + +class GraphMiasmIRFlow(GraphViewer): + + def __init__(self, flow_graph, title, result): + GraphViewer.__init__(self, title) + print 'init' + self.flow_graph = flow_graph + self.result = result + self.names = {} + + def OnRefresh(self): + print 'refresh' + self.Clear() + addr_id = {} + for n in self.flow_graph.nodes(): + id_n = self.AddNode(node2str(self.flow_graph, n)) + addr_id[n] = id_n + + for a, b in self.flow_graph.edges(): + node1, node2 = addr_id[a], addr_id[b] + self.AddEdge(node1, node2) + return True + + def OnGetText(self, node_id): + b = self[node_id] + return str(b).lower() + + def OnSelect(self, node_id): + return True + + def OnClick(self, node_id): + return True + + def OnCommand(self, cmd_id): + if self.cmd_test == cmd_id: + print 'TEST!' + return + print "command:", cmd_id + + def Show(self): + if not GraphViewer.Show(self): + return False + self.cmd_test = self.AddCommand("Test", "F2") + if self.cmd_test == 0: + print "Failed to add popup menu item!" + return True + + +#print "gen bloc data flow" +#flow_graph = gen_bloc_data_flow_graph(my_ir, bs, ad) +#def node2str(self, n): +# return "%s, %s\\l%s" % n +#flow_graph.node2str = lambda n: node2str(flow_graph, n) +#open('data_flow.txt', 'w').write(flow_graph.dot()) + +# h = GraphMiasmIRFlow(flow_graph, "Miasm IRFlow graph", None) +# h.Show() diff --git a/example/test_jit_arm.py b/example/test_jit_arm.py new file mode 100644 index 00000000..7ccfd447 --- /dev/null +++ b/example/test_jit_arm.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +from argparse import ArgumentParser +from miasm2.analysis import debugging, gdbserver + +from miasm2.arch.arm.arch import mn_arm +from miasm2.jitter.jitload import * + + +parser = ArgumentParser( + description="""Sandbox an elf binary with arm engine +(ex: test_jit_arm.py example/md5_arm A684)""") +parser.add_argument("-r", "--log-regs", + help="Log registers value for each instruction", + action="store_true") +parser.add_argument("-m", "--log-mn", + help="Log desassembly conversion for each instruction", + action="store_true") +parser.add_argument("-n", "--log-newbloc", + help="Log basic blocks processed by the Jitter", + action="store_true") +parser.add_argument("-j", "--jitter", + help="Jitter engine. Possible values are : tcc (default), llvm", + default="tcc") +parser.add_argument("-d", "--debugging", + help="Attach a CLI debugguer to the sandboxed programm", + action="store_true") +parser.add_argument("binary", + help="binary to run") +parser.add_argument("addr", + help="start exec on addr") + + + +def jit_arm_binary(args): + filepath, entryp = args.binary, int(args.addr, 16) + myjit = jitter_arm() + myjit.init_stack() + + # Log level (if available with jitter engine) + myjit.jit.log_regs = args.log_regs + myjit.jit.log_mn = args.log_mn + myjit.jit.log_newbloc = args.log_newbloc + + elf = vm_load_elf(myjit.vm, filepath) + libs = libimp() + preload_elf(myjit.vm, elf, libs) + myjit.add_lib_handler(libs) + myjit.add_breakpoint(0x1337BEEF, lambda _: exit(0)) + regs = myjit.cpu.vm_get_gpreg() + regs['LR'] = 0x1337BEEF + myjit.cpu.vm_set_gpreg(regs) + myjit.init_run(entryp) + + + + # Handle debugging + if args.debugging is True: + dbg = debugging.Debugguer(myjit) + cmd = debugging.DebugCmd(dbg) + cmd.cmdloop() + + else: + print(myjit.continue_run()) + +if __name__ == '__main__': + from sys import stderr + args = parser.parse_args() + jit_arm_binary(args) diff --git a/example/test_jit_x86_32.py b/example/test_jit_x86_32.py new file mode 100644 index 00000000..b26161bc --- /dev/null +++ b/example/test_jit_x86_32.py @@ -0,0 +1,45 @@ +import sys +import os +from optparse import OptionParser +from miasm2.arch.x86.arch import mn_x86 +from miasm2.jitter.jitload import jitter_x86_32 +from miasm2.jitter.jitload import bin_stream_vm +from miasm2.jitter.csts import * + +from pdb import pm + + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +parser = OptionParser(usage="usage: %prog rawfiley arch address [options]") +(options, args) = parser.parse_args(sys.argv[1:]) + +if len(args) < 1: + parser.print_help() + sys.exit(0) + + +def code_sentinelle(jitter): + jitter.run = False + jitter.pc = 0 + return True + + +myjit = jitter_x86_32() +myjit.init_stack() + +fname = args[0] +data = open(fname).read() +run_addr = 0x40000000 +myjit.vm.vm_add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, data) + +myjit.jit.log_regs = True +myjit.jit.log_mn = True +myjit.vm_push_uint32_t(0x1337beef) + +myjit.add_breakpoint(0x1337beef, code_sentinelle) + +myjit.init_run(run_addr) +myjit.continue_run() diff --git a/example/test_symbexec.py b/example/test_symbexec.py new file mode 100644 index 00000000..1eabe824 --- /dev/null +++ b/example/test_symbexec.py @@ -0,0 +1,141 @@ +import sys +import os +from elfesteem import * +from elfesteem.strpatchwork import StrPatchwork +import inspect +import logging +from pdb import pm +import struct +from optparse import OptionParser +from miasm2.expression.expression import * +from miasm2.core import asmbloc + +from miasm2.arch.x86.arch import mn_x86 +from miasm2.jitter.jitload import load_pe_in_vm, load_elf_in_vm, bin_stream_vm, get_import_address_elf +from miasm2.jitter.jitter import updt_bloc_emul +from miasm2.jitter.vm_mngr import * +from miasm2.jitter.arch import Jit_x86 +from miasm2.jitter.arch import Jit_arm +from miasm2.ir.ir2C import init_arch_C + + +from miasm2.core.bin_stream import bin_stream +# from jitter import * +from miasm2.jitter.os_dep import win_api_x86_32 + +from miasm2.ir.symbexec import symbexec + +from miasm2.ir.ir2C import bloc2IR + +from miasm2.arch.x86.regs import * + + +def whoami(): + return inspect.stack()[1][3] + + +log = logging.getLogger("dis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.INFO) + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + + +parser = OptionParser(usage="usage: %prog [options] file") +parser.add_option('-a', "--address", dest="address", metavar="ADDRESS", + help="force eop address", default=None) +parser.add_option('-m', "--architecture", dest="machine", metavar="MACHINE", + help="architecture to use for disasm: arm, x86_32, x86_64, ppc, java") +parser.add_option('-s', "--segm", dest="usesegm", action="store_true", + help="use segments fs:", default=False) +parser.add_option('-d', "--hdr", dest="loadhdr", action="store_true", + help="load pe hdr", default=False) +parser.add_option( + '-l', "--loadbasedll", dest="loadbasedll", action="store_true", + help="load base dll", default=False) +parser.add_option('-x', "--dumpall", dest="dumpall", action="store_true", + help="load base dll", default=False) +parser.add_option('-e', "--loadmainpe", dest="loadmainpe", action="store_true", + help="load main pe", default=False) + +parser.add_option('-b', "--dumpblocs", dest="dumpblocs", action="store_true", + help="log disasm blogs", default=False) + +parser.add_option('-r', "--parse_resources", dest="parse_resources", + action="store_true", help="parse pe resources", default=False) + +(options, args) = parser.parse_args(sys.argv[1:]) +if not args: + parser.print_help() + sys.exit(0) + + +log.info("import machine...") +mode = None +if options.machine == "arm": + from miasm2.arch.arm.arch import mn_arm as mn +elif options.machine == "sh4": + from miasm2.arch.sh4_arch import mn_sh4 as mn +elif options.machine == "x86_32": + from miasm2.arch.x86.arch import mn_x86 as mn +elif options.machine == "x86_64": + from miasm2.arch.x86.arch import mn_x86 as mn +else: + raise ValueError('unknown machine') +log.info('ok') +machines = {'arm': (mn, 'arm'), + 'sh4': (mn, None), + 'x86_32': (mn, 32), + 'x86_64': (mn, 64), + } + +mn, attrib = machines[options.machine] + +arch2jit = {'x86': Jit_x86, + 'arm': Jit_arm} + +jitarch = arch2jit[mn.name] + +e, in_str, runtime_dll, segm_to_do, symbol_pool, stack_ad = load_pe_in_vm( + mn, args[0], options) +# e, in_str, runtime_dll, segm_to_do, symbol_pool, stack_ad = +# load_elf_in_vm(mn, args[0], options) +init_arch_C(mn) + +win_api_x86_32.winobjs.runtime_dll = runtime_dll +""" +regs = jitarch.vm_get_gpreg() +regs['RSP'] = stack_ad +jitarch.vm_set_gpreg(regs) +""" + +symbol_pool = asmbloc.asm_symbol_pool() +known_blocs = {} +code_blocs_mem_range = [] + + +ad = 0x951DAF +ad = 0x9518C6 +ad = 0x9519FE +symbols_init = {} +for i, r in enumerate(all_regs_ids): + symbols_init[r] = all_regs_ids_init[i] + + +def se_bloc(ad, arch, attrib, sb): + l = asmbloc.asm_label(ad) + b = asmbloc.asm_bloc(l) + job_done = set() + asmbloc.dis_bloc(arch, in_str, b, ad, job_done, symbol_pool, + attrib=attrib) # , lines_wd = 8) + print b + bloc_ir = bloc2IR(arch, attrib, in_str, b, [], symbol_pool) + sb.emulbloc(arch, bloc_ir) + sb.dump_mem() + +sb = symbexec(mn, symbols_init) +se_bloc(ad, mn, attrib, sb) diff --git a/example/unpack_upx.py b/example/unpack_upx.py new file mode 100644 index 00000000..14eac9ef --- /dev/null +++ b/example/unpack_upx.py @@ -0,0 +1,187 @@ +import sys +import os +import inspect +import logging +import struct +from argparse import ArgumentParser + +from elfesteem import pe +from elfesteem import * +from elfesteem.strpatchwork import StrPatchwork + +from miasm2.core import asmbloc +from miasm2.arch.x86.arch import mn_x86 +from miasm2.arch.x86.disasm import dis_x86_32 +from miasm2.jitter.jitload import jitter_x86_32, vm_load_pe, preload_pe, libimp +from miasm2.jitter.jitload import bin_stream_vm +from miasm2.jitter.csts import * +from miasm2.jitter.os_dep import win_api_x86_32 + +# Debug settings # +from pdb import pm + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +# + +# Handle arguments +parser = ArgumentParser(description="Sandbox a PE binary packed with UPX") +parser.add_argument("filename", help="PE binary") +parser.add_argument("-r", "--log-regs", + help="Log registers value for each instruction", + action="store_true") +parser.add_argument("-m", "--log-mn", + help="Log desassembly conversion for each instruction", + action="store_true") +parser.add_argument("-n", "--log-newbloc", + help="Log basic blocks processed by the Jitter", + action="store_true") +parser.add_argument("-j", "--jitter", + help="Jitter engine. Possible values are : tcc (default), llvm", + default="tcc") +parser.add_argument("-g", "--graph", + help="Export the CFG graph in graph.txt", + action="store_true") +parser.add_argument("-v", "--verbose", + help="Verbose mode", + action="store_true") +args = parser.parse_args() + +# Verbose mode +if args.verbose is True: + logging.basicConfig(level=logging.INFO) +else: + logging.basicConfig(level=logging.WARNING) + +# Init arch +myjit = jitter_x86_32(jit_type=args.jitter) +myjit.init_stack() + +# Log level (if available with jitter engine) +myjit.jit.log_regs = args.log_regs +myjit.jit.log_mn = args.log_mn +myjit.jit.log_newbloc = args.log_newbloc + +# Load pe and get entry point address +e = vm_load_pe(myjit.vm, args.filename) +libs = libimp() +preload_pe(myjit.vm, e, libs) + +if args.verbose is True: + myjit.vm.vm_dump_memory_page_pool() +ep = e.rva2virt(e.Opthdr.AddressOfEntryPoint) + +# Ensure there is one and only one leave (for OEP discovering) +mdis = dis_x86_32(myjit.bs) +mdis.dont_dis_nulstart_bloc = True +ab = mdis.dis_multibloc(ep) + +bb = asmbloc.basicblocs(ab) +leaves = bb.get_bad_dst() +assert(len(leaves) == 1) +l = leaves.pop() +logging.info(l) +end_label = l.label.offset + +logging.info('final label') +logging.info(end_label) + +# Export CFG graph (dot format) +if args.graph is True: + g = asmbloc.bloc2graph(ab) + open("graph.txt", "w").write(g) + +# User defined methods + + +def mygetproc(myjit): + global libs + ret_ad, args = myjit.func_args_stdcall(2) + libbase, fname = args + + dst_ad = myjit.cpu.EBX + logging.info('EBX ' + hex(dst_ad)) + + if fname < 0x10000: + fname = fname + else: + fname = myjit.get_str_ansi(fname) + logging.info(fname) + + ad = libs.lib_get_add_func(libbase, fname, dst_ad) + myjit.func_ret_stdcall(ret_ad, ad) + + +def kernel32_GetProcAddress(myjit): + return mygetproc(myjit) + +# Set libs for win_32 api +win_api_x86_32.winobjs.runtime_dll = libs +if args.verbose is True: + myjit.vm.vm_dump_memory_page_pool() + +# Set up stack +myjit.vm_push_uint32_t(1) # reason code if dll +myjit.vm_push_uint32_t(1) # reason code if dll +myjit.vm_push_uint32_t(0x1337beef) + +# Breakpoint callbacks + + +def update_binary(myjit): + e.Opthdr.AddressOfEntryPoint = e.virt2rva(myjit.pc) + logging.info('updating binary') + for s in e.SHList: + sdata = myjit.vm.vm_get_mem(e.rva2virt(s.addr), s.rawsize) + e.virt[e.rva2virt(s.addr)] = sdata + + +# Set callbacks +myjit.add_breakpoint(end_label, update_binary) +myjit.add_lib_handler(libs, globals()) + +# Run until breakpoint is reached +myjit.init_run(ep) +myjit.continue_run() + + +regs = myjit.cpu.vm_get_gpreg() + + +new_dll = [] + + +# XXXXX + +e.SHList.align_sections(0x1000, 0x1000) +logging.info(repr(e.SHList)) +st = StrPatchwork() +st[0] = e.content + +# get back data from emulator +for s in e.SHList: + ad1 = e.rva2virt(s.addr) + ad2 = ad1 + len(s.data) + st[s.offset] = e.virt(ad1, ad2) +# e.content = str(st) + +e.DirRes = pe.DirRes(e) +e.DirImport.impdesc = None +logging.info(repr(e.DirImport.impdesc)) +new_dll = libs.gen_new_lib(e) +logging.info(new_dll) +e.DirImport.impdesc = [] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +logging.info(repr(e.SHList)) +e.DirImport.set_rva(s_myimp.addr) + +# XXXX TODO +e.NThdr.optentries[pe.DIRECTORY_ENTRY_DELAY_IMPORT].rva = 0 + +e.Opthdr.AddressOfEntryPoint = e.virt2rva(end_label) +bname, fname = os.path.split(args.filename) +fname = os.path.join(bname, fname.replace('.', '_')) +open(fname + '_unupx.bin', 'w').write(str(e)) diff --git a/example/x86_32_sc.bin b/example/x86_32_sc.bin new file mode 100644 index 00000000..c1931ce0 --- /dev/null +++ b/example/x86_32_sc.bin @@ -0,0 +1 @@ +I[t[[ \ No newline at end of file |