diff options
128 files changed, 43356 insertions, 0 deletions
diff --git a/example/asm_arm.py b/example/asm_arm.py new file mode 100644 index 00000000..a848f22b --- /dev/null +++ b/example/asm_arm.py @@ -0,0 +1,79 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.arm.arch import mn_arm, base_expr, variable +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from miasm2.core import asmbloc +from elfesteem.strpatchwork import StrPatchwork + +my_mn = mn_arm + +reg_and_id = dict(mn_arm.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(my_mn, "arm", ''' +main: + STMFD SP!, {R4, R5, LR} + MOV R0, mystr & 0xffff + ORR R0, R0, mystr & 0xffff0000 + MOV R1, mystrend & 0xffff + ORR R1, R1, mystrend & 0xffff0000 +xxx: + LDR R2, [PC, key-(xxx+8)] +loop: + LDRB R3, [R0] + EOR R3, R3, R2 + STRB R3, [R0], 1 + CMP R0, R1 + BNE loop + EOR R0, R0, R0 + BNE end + EOR R1, R1, R1 + EOR R2, R2, R2 + EORGE R1, R1, R1 + EORGE R2, R2, R2 + ADDLTS R2, R2, R2 + SUBEQ R2, R2, R2 +end: + LDMFD SP!, {R4, R5, PC} +key: +.long 0x11223344 +mystr: +.string "test string" +mystrend: +.long 0 +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) + +for b in blocs[0]: + print b +# graph sc#### +g = asmbloc.bloc2graph(blocs[0]) +open("graph.txt", "w").write(g) + +s = StrPatchwork() + +print "symbols" +print symbol_pool +# dont erase from start to shell code padading +resolved_b, patches = asmbloc.asm_resolve_final( + my_mn, 'arm', blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + s[offset] = raw + +open('demo_arm.bin', 'wb').write(str(s)) diff --git a/example/asm_arm_sc.py b/example/asm_arm_sc.py new file mode 100644 index 00000000..824145fa --- /dev/null +++ b/example/asm_arm_sc.py @@ -0,0 +1,57 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.arm.arch import mn_arm, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem.strpatchwork import StrPatchwork + +from pdb import pm +from miasm2.core import asmbloc +import struct + +reg_and_id = dict(mn_arm.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + + +st = StrPatchwork() + +blocs, symbol_pool = parse_asm.parse_txt(mn_arm, 'arm', ''' +main: + MOV R1, R0 + MOV R2, 0x100 +loop: + ADD R2, R1, R2 + ADD R1, R1, 1 + CMP R1, 0x10 + BEQ loop + + ADD R0, R1, R2 + BX LR +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), 0) + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_arm, "arm", blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + st[offset] = raw + +open('arm_sc.bin', 'wb').write(str(st)) diff --git a/example/asm_box_x86_32.py b/example/asm_box_x86_32.py new file mode 100644 index 00000000..2fa63c78 --- /dev/null +++ b/example/asm_box_x86_32.py @@ -0,0 +1,66 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE() +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + PUSH 0 + PUSH title + PUSH msg + PUSH 0 + CALL DWORD PTR [ MessageBoxA ] + RET + +title: +.string "Hello!" +msg: +.string "World!" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), + e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 32, blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + e.virt[offset] = raw + +open('box_x86_32.bin', 'wb').write(str(e)) diff --git a/example/asm_box_x86_32_enc.py b/example/asm_box_x86_32_enc.py new file mode 100644 index 00000000..7f1ef7ec --- /dev/null +++ b/example/asm_box_x86_32_enc.py @@ -0,0 +1,105 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE() +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + CALL cipher_code + CALL msgbox_encrypted_start + CALL cipher_code + RET + +cipher_code: + PUSH EBP + MOV EBP, ESP + + LEA ESI, DWORD PTR [msgbox_encrypted_start] + LEA EDI, DWORD PTR [msgbox_encrypted_stop] + +loop: + XOR BYTE PTR [ESI], 0x42 + INC ESI + CMP ESI, EDI + JBE loop + + MOV ESP, EBP + POP EBP + RET + +msgbox_encrypted_start: + PUSH 0 + PUSH title + PUSH msg + PUSH 0 + CALL DWORD PTR [ MessageBoxA ] + RET +.dontsplit +msgbox_encrypted_stop: +.long 0 + +title: +.string "Hello!" +msg: +.string "World!" +''') + + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create( + "MessageBoxA"), e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +print "symbols" +print symbol_pool + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 32, blocs[0], symbol_pool) +print patches + +ad_start = symbol_pool.getby_name_create("msgbox_encrypted_start").offset +ad_stop = symbol_pool.getby_name_create("msgbox_encrypted_stop").offset + +# cipher code +new_patches = dict(patches) +for ad, val in patches.items(): + if ad_start <= ad < ad_stop: + new_patches[ad] = "".join([chr(ord(x) ^ 0x42) for x in val]) + +for offset, raw in new_patches.items(): + e.virt[offset] = raw + +open('box_x86_32_enc.bin', 'wb').write(str(e)) diff --git a/example/asm_box_x86_32_mod.py b/example/asm_box_x86_32_mod.py new file mode 100644 index 00000000..f9f53f58 --- /dev/null +++ b/example/asm_box_x86_32_mod.py @@ -0,0 +1,89 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE() +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + CALL test_automod + CALL test_automod + RET + +test_automod: + PUSH EBP + MOV EBP, ESP + +loop: + MOV EAX, 0 + CMP EAX, 0 + JMP mod_addr +mod_addr: + JNZ end + + PUSH 0 + PUSH title + PUSH msg + PUSH 0 + CALL DWORD PTR [ MessageBoxA ] + + ; automodif code + MOV BYTE PTR [mod_addr], 0xEB + JMP loop +end: + MOV BYTE PTR [mod_addr], 0x75 + MOV ESP, EBP + POP EBP + RET + +title: +.string "Hello!" +msg: +.string "World!" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), + e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 32, blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + e.virt[offset] = raw + +open('box_x86_32_mod.bin', 'wb').write(str(e)) diff --git a/example/asm_box_x86_32_mod_self.py b/example/asm_box_x86_32_mod_self.py new file mode 100644 index 00000000..95898722 --- /dev/null +++ b/example/asm_box_x86_32_mod_self.py @@ -0,0 +1,74 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE() +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + PUSH EBP + MOV EBP, ESP + MOV BYTE PTR [myint], 0x90 +myint: + INT 0x3 + + PUSH 0 + PUSH title + PUSH msg + PUSH 0 + CALL DWORD PTR [ MessageBoxA ] + MOV ESP, EBP + POP EBP + RET + +title: +.string "Hello!" +msg: +.string "World!" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), + e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 32, blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + e.virt[offset] = raw + +open('box_x86_32_mod_self.bin', 'wb').write(str(e)) diff --git a/example/asm_box_x86_32_repmod.py b/example/asm_box_x86_32_repmod.py new file mode 100644 index 00000000..f351fcfa --- /dev/null +++ b/example/asm_box_x86_32_repmod.py @@ -0,0 +1,103 @@ +#! /usr/bin/env python + +# test instruction caching + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE() +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + CALL test_automod + RET + +lbl_good: + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + NOP + +test_automod: + PUSH EBP + MOV EBP, ESP + + LEA EDI, DWORD PTR [lbl_mod] + LEA ESI, DWORD PTR [lbl_good] + + MOV ECX, 0x8 + REPE MOVSB +lbl_mod: + XOR EAX, EAX + MOV DWORD PTR [EAX], 0xDEADC0DE + + NOP + NOP + NOP + + PUSH 0 + PUSH title + PUSH msg + PUSH 0 + CALL DWORD PTR [ MessageBoxA ] + + MOV ESP, EBP + POP EBP + RET + +title: +.string "Hello!" +msg: +.string "World!" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), + e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 32, blocs[0], symbol_pool) +print patches + +for offset, raw in patches.items(): + e.virt[offset] = raw + +open('box_x86_32_repmod.bin', 'wb').write(str(e)) diff --git a/example/asm_box_x86_64.py b/example/asm_box_x86_64.py new file mode 100644 index 00000000..3ae4bcb5 --- /dev/null +++ b/example/asm_box_x86_64.py @@ -0,0 +1,68 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core.bin_stream import bin_stream +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from elfesteem import * +from pdb import pm +from miasm2.core import asmbloc +import struct + +e = pe_init.PE(wsize=64) +s_text = e.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) +s_iat = e.SHList.add_section(name="iat", rawsize=0x100) +new_dll = [({"name": "USER32.dll", + "firstthunk": s_iat.addr}, ["MessageBoxA"])] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +e.DirImport.set_rva(s_myimp.addr) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt64(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=64)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 64, ''' +main: + MOV R9, 0x0 + MOV R8, title + MOV RDX, msg + MOV RCX, 0x0 + MOV RAX, QWORD PTR [ MessageBoxA ] + CALL RAX + RET + +title: +.string "Hello!" +msg: +.string "World!" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), e.rva2virt(s_text.addr)) +symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), + e.DirImport.get_funcvirt('MessageBoxA')) +e.Opthdr.AddressOfEntryPoint = s_text.addr + +for b in blocs[0]: + print b + +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, 64, blocs[0], symbol_pool, + max_offset=0xFFFFFFFFFFFFFFFF) +print patches + +for offset, raw in patches.items(): + e.virt[offset] = raw + +open('box_x86_64.bin', 'wb').write(str(e)) diff --git a/example/asm_x86.py b/example/asm_x86.py new file mode 100644 index 00000000..3637f9ed --- /dev/null +++ b/example/asm_x86.py @@ -0,0 +1,47 @@ +#! /usr/bin/env python + +from miasm2.core.cpu import parse_ast +from miasm2.arch.x86.arch import mn_x86, base_expr, variable +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from miasm2.core import asmbloc +from elfesteem.strpatchwork import StrPatchwork + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + +def my_ast_int2expr(a): + return ExprInt32(a) + + +def my_ast_id2expr(t): + return reg_and_id.get(t, ExprId(t, size=32)) + +my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + PUSH EBP + MOV EBP, ESP + SUB ESP, 0x100 + MOV EAX, 0x1337 + LEA ESI, DWORD PTR [mystr] + MOV ESP, EBP + POP EBP + RET +mystr: +.string "test string" +''') + +# fix shellcode addr +symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) +s = StrPatchwork() +resolved_b, patches = asmbloc.asm_resolve_final( + mn_x86, '32', blocs[0], symbol_pool) +for offset, raw in patches.items(): + s[offset] = raw + +print patches + +open('demo_x86_32.bin', 'wb').write(str(s)) diff --git a/example/box.exe b/example/box.exe new file mode 100755 index 00000000..1c11bcda --- /dev/null +++ b/example/box.exe Binary files differdiff --git a/example/box_upx.exe b/example/box_upx.exe new file mode 100755 index 00000000..d0776ec2 --- /dev/null +++ b/example/box_upx.exe Binary files differdiff --git a/example/disasm_01.py b/example/disasm_01.py new file mode 100644 index 00000000..bc6a2fd6 --- /dev/null +++ b/example/disasm_01.py @@ -0,0 +1,12 @@ +from miasm2.arch.x86.arch import mn_x86 +from miasm2.arch.x86.regs import * + +l = mn_x86.fromstring('MOV EAX, EBX', 32) +print "instruction:", l +print "arg:", l.args[0] +x = mn_x86.asm(l) +print x +l.args[0] = EDX +y = mn_x86.asm(l) +print y +print mn_x86.dis(y[0], 32) diff --git a/example/disasm_02.py b/example/disasm_02.py new file mode 100644 index 00000000..5bfd77de --- /dev/null +++ b/example/disasm_02.py @@ -0,0 +1,13 @@ +from miasm2.arch.x86.disasm import dis_x86_32 +from miasm2.core.asmbloc import bloc2graph + + +s = '\xb8\xef\xbe7\x13\xb9\x04\x00\x00\x00\xc1\xc0\x08\xe2\xfb\xc3' +mdis = dis_x86_32(s) +blocs = mdis.dis_multibloc(0) + +for b in blocs: + print b + +g = bloc2graph(blocs) +open('graph.txt', 'w').write(g) diff --git a/example/disasm_03.py b/example/disasm_03.py new file mode 100644 index 00000000..08b209a1 --- /dev/null +++ b/example/disasm_03.py @@ -0,0 +1,23 @@ +import sys +from elfesteem import pe_init +from miasm2.arch.x86.disasm import dis_x86_32 +from miasm2.core.asmbloc import bloc2graph +from miasm2.core.bin_stream import bin_stream_pe + +if len(sys.argv) != 3: + print 'Example:' + print "%s box_upx.exe 0x410f90" % sys.argv[0] + sys.exit(0) + +fname = sys.argv[1] +ad = int(sys.argv[2], 16) +e = pe_init.PE(open(fname).read()) +bs = bin_stream_pe(e.virt) + +mdis = dis_x86_32(bs) +# inform the engine not to disasm nul instructions +mdis.dont_dis_nulstart_bloc = True +blocs = mdis.dis_multibloc(ad) + +g = bloc2graph(blocs) +open('graph.txt', 'w').write(g) diff --git a/example/expression/manip_expression1.py b/example/expression/manip_expression1.py new file mode 100644 index 00000000..a9ed00e3 --- /dev/null +++ b/example/expression/manip_expression1.py @@ -0,0 +1,31 @@ +from miasm2.expression.expression import * + +print """ +Simple expression manipulation demo +""" + +# define 2 ID +a = ExprId('eax', 32) +b = ExprId('ebx', 32) +print a, b +# eax ebx + +# add those ID +c = ExprOp('+', a, b) +print c +# (eax + ebx) + +# + automaticaly generates ExprOp('+', a, b) +c = a + b +print c +# (eax + ebx) + +# ax is a slice of eax +ax = a[:16] +print ax +# eax[0:16] + +# memory deref +d = ExprMem(c, 32) +print d +# @32[(eax + ebx)] diff --git a/example/expression/manip_expression2.py b/example/expression/manip_expression2.py new file mode 100644 index 00000000..4153f875 --- /dev/null +++ b/example/expression/manip_expression2.py @@ -0,0 +1,26 @@ +from miasm2.arch.x86.arch import mn_x86 +from miasm2.expression.expression import get_rw +from miasm2.arch.x86.ira import ir_a_x86_32 +print """ +Simple expression manipulation demo. +Get read/written registers for a given instruction +""" + +arch = mn_x86 +my_ir = ir_a_x86_32() + +l = arch.fromstring('LODSB', 32) +l.offset, l.l = 0, 15 +my_ir.add_instr(l) + +print '*' * 80 +for lbl, b in my_ir.blocs.items(): + print b + for irs in b.irs: + o_r, o_w = get_rw(irs) + print 'read: ', [str(x) for x in o_r] + print 'written:', [str(x) for x in o_w] + print +my_ir.gen_graph() +g = my_ir.graph() +open('graph_instr.txt', 'w').write(g) diff --git a/example/expression/manip_expression3.py b/example/expression/manip_expression3.py new file mode 100644 index 00000000..27c86096 --- /dev/null +++ b/example/expression/manip_expression3.py @@ -0,0 +1,20 @@ +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp + +print """ +Simple expression simplification demo +""" + + +a = ExprId('eax') +b = ExprId('ebx') + +exprs = [a + b - a, + ExprInt32(0x12) + ExprInt32(0x30) - a, + ExprCompose([(a[:8], 0, 8), + (a[8:16], 8, 16)])] + +for e in exprs: + print '*' * 40 + print 'original expression:', e + print "simplified:", expr_simp(e) diff --git a/example/expression/manip_expression4.py b/example/expression/manip_expression4.py new file mode 100644 index 00000000..f4a55a3c --- /dev/null +++ b/example/expression/manip_expression4.py @@ -0,0 +1,215 @@ +import os +import sys +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from miasm2.arch.x86.ira import ir_a_x86_32 +from miasm2.arch.x86.arch import mn_x86 +from miasm2.core import asmbloc +from miasm2.core.bin_stream import bin_stream_str +from elfesteem import pe_init +from optparse import OptionParser +from pdb import pm +from miasm2.ir.ir import ir +from miasm2.arch.x86.regs import * +from miasm2.arch.x86.disasm import dis_x86_32 + +from miasm2.analysis.data_analysis import intra_bloc_flow_raw, inter_bloc_flow + +from miasm2.core.graph import DiGraph +from miasm2.ir.symbexec import symbexec + +from pprint import pprint as pp + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +print """ +Simple expression use for generating dataflow graph +Exemple: +python manip_expression4.py sc_connect_back.bin 0x2e +""" + + +parser = OptionParser(usage="usage: %prog [options] sc_connect_back.bin") + +(options, args) = parser.parse_args(sys.argv[1:]) +if len(args) != 2: + parser.print_help() + sys.exit(0) + + +def node_x_2_id(n, x): + return hash(str(n) + str(x)) & 0xffffffffffffffff + + +def get_node_name(label, i, n): + # n_name = "%s_%d_%s"%(label.name, i, n) + n_name = (label.name, i, n) + return n_name + + +def get_modified_symbols(sb): + # get modified IDS + ids = sb.symbols.symbols_id.keys() + ids.sort() + out = {} + for i in ids: + if i in sb.arch.regs.regs_init and \ + i in sb.symbols.symbols_id and \ + sb.symbols.symbols_id[i] == sb.arch.regs.regs_init[i]: + continue + # print i, sb.symbols.symbols_id[i] + out[i] = sb.symbols.symbols_id[i] + + # get mem IDS + mems = sb.symbols.symbols_mem.values() + for m, v in mems: + print m, v + out[m] = v + pp([(str(x[0]), str(x[1])) for x in out.items()]) + return out + + +def intra_bloc_flow_symb(my_ir, flow_graph, irbloc): + symbols_init = {} + for i, r in enumerate(all_regs_ids): + symbols_init[r] = all_regs_ids_init[i] + sb = symbexec(mn_x86, symbols_init) + sb.emulbloc(irbloc) + print '*' * 40 + print irbloc + # sb.dump_mem() + # sb.dump_id() + in_nodes = {} + out_nodes = {} + + out = get_modified_symbols(sb) + current_nodes = {} + # gen mem arg to mem node links + for dst, src in out.items(): + for n in [dst, src]: + + all_mems = set() + all_mems.update(get_expr_mem(n)) + + for n in all_mems: + node_n_w = get_node_name(irbloc.label, 0, n) + if not n == src: + continue + o_r = n.arg.get_r(mem_read=False, cst_read=True) + for n_r in o_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irbloc.label, i, n_r) + if not n_r in in_nodes: + in_nodes[n_r] = node_n_r + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + # gen data flow links + for dst, src in out.items(): + nodes_r = src.get_r(mem_read=False, cst_read=True) + nodes_w = set([dst]) + for n_r in nodes_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irbloc.label, 0, n_r) + if not n_r in in_nodes: + in_nodes[n_r] = node_n_r + + flow_graph.add_node(node_n_r) + for n_w in nodes_w: + node_n_w = get_node_name(irbloc.label, 1, n_w) + out_nodes[n_w] = node_n_w + + flow_graph.add_node(node_n_w) + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + irbloc.in_nodes = in_nodes + irbloc.out_nodes = out_nodes + + +def node2str(self, n): + label, i, node = n + # print n + out = "%s,%s\\l\\\n%s" % n + return out + + +def gen_bloc_data_flow_graph(my_ir, in_str, ad): # arch, attrib, pool_bin, bloc, symbol_pool): + out_str = "" + + # my_ir = ir_x86_32(symbol_pool) + + for irbloc in my_ir.blocs.values(): + print irbloc + + my_ir.gen_graph() + my_ir.dead_simp() + + irbloc_0 = None + for irbloc in my_ir.blocs.values(): + if irbloc.label.offset == ad: + irbloc_0 = irbloc + break + assert(irbloc_0 is not None) + flow_graph = DiGraph() + flow_graph.node2str = lambda n: node2str(flow_graph, n) + done = set() + todo = set([irbloc_0.label]) + + bloc2w = {} + + for irbloc in my_ir.blocs.values(): + intra_bloc_flow_raw(my_ir, flow_graph, irbloc) + # intra_bloc_flow_symb(my_ir, flow_graph, irbloc) + + for irbloc in my_ir.blocs.values(): + print irbloc + print 'IN', [str(x) for x in irbloc.in_nodes] + print 'OUT', [str(x) for x in irbloc.out_nodes] + + print '*' * 20, 'interbloc', '*' * 20 + inter_bloc_flow(my_ir, flow_graph, irbloc_0.label) + + # sys.path.append('/home/serpilliere/projet/m2_devel/miasm2/core') + # from graph_qt import graph_qt + # graph_qt(flow_graph) + open('data.txt', 'w').write(flow_graph.dot()) + + +data = open(args[0]).read() +ad = int(args[1], 16) + +print 'disasm...' +mdis = dis_x86_32(data) +mdis.follow_call = True +ab = mdis.dis_multibloc(ad) +print 'ok' + + +print 'generating dataflow graph for:' +my_ir = ir_a_x86_32(mdis.symbol_pool) + +blocs = ab +for bloc in blocs: + print bloc + my_ir.add_bloc(bloc) +for irbloc in my_ir.blocs.values(): + print irbloc + if irbloc.label.offset != 0: + continue + + +out_str = gen_bloc_data_flow_graph(my_ir, mdis.bs, ad) + +print '*' * 40 +print """ + View with: +dotty dataflow.txt + or + Generate ps with pdf: +dot -Tps dataflow_xx.txt -o graph.ps +""" diff --git a/example/expression/manip_expression5.py b/example/expression/manip_expression5.py new file mode 100644 index 00000000..ed147c04 --- /dev/null +++ b/example/expression/manip_expression5.py @@ -0,0 +1,73 @@ +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from pdb import pm +import os + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +print """ +Expression simplification demo. +(and regression test) +""" + + +a = ExprId('a') +b = ExprId('b') +c = ExprId('c') +d = ExprId('d') +e = ExprId('e') + +m = ExprMem(a) +s = a[:8] + +i1 = ExprInt(uint32(0x1)) +i2 = ExprInt(uint32(0x2)) +cc = ExprCond(a, b, c) + +o = ExprCompose([(a[:8], 8, 16), + (a[8:16], 0, 8)]) + +o2 = ExprCompose([(a[8:16], 0, 8), + (a[:8], 8, 16)]) + +l = [a[:8], b[:8], c[:8], m[:8], s, i1[:8], i2[:8], o[:8]] +l2 = l[::-1] + + +x = ExprMem(a + b + ExprInt32(0x42)) + + +def replace_expr(e): + # print 'visit', e + dct = {c + ExprInt32(0x42): d, + a + b: c, } + if e in dct: + return dct[e] + return e + + +print x +y = x.visit(replace_expr) +print y +print x.copy() +print y.copy() +print y == y.copy() +print repr(y), repr(y.copy()) + + +z = ExprCompose([(a[5:5 + 8], 0, 8), (b[:16], 8, 24), (x[:8], 24, 32)]) +print z +print z.copy() +print z[:31].copy().visit(replace_expr) + +print 'replace' +print x.replace_expr({c + ExprInt32(0x42): d, + a + b: c, }) +print z.replace_expr({c + ExprInt32(0x42): d, + a + b: c, }) + + +u = z.copy() +print u diff --git a/example/expression/manip_expression6.py b/example/expression/manip_expression6.py new file mode 100644 index 00000000..45a6c8c1 --- /dev/null +++ b/example/expression/manip_expression6.py @@ -0,0 +1,67 @@ +from miasm2.core.cpu import parse_ast, ast_id2expr +from miasm2.arch.x86.arch import mn_x86, base_expr +from miasm2.core import parse_asm +from miasm2.expression.expression import * +from miasm2.core import asmbloc +from miasm2.arch.x86.ira import ir_a_x86_32 +from pdb import pm + + +def my_ast_int2expr(a): + return ExprInt32(a) + +my_var_parser = parse_ast(ast_id2expr, my_ast_int2expr) +base_expr.setParseAction(my_var_parser) + + +# First, asm code +blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +main: + MOV EAX, 1 + MOV EBX, 2 + MOV ECX, 2 + MOV DX, 2 + +loop: + INC EBX + CMOVZ EAX, EBX + ADD EAX, ECX + JZ loop + RET +''') + +blocs = blocs[0] + +symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) +for b in blocs: + print b + + +print "symbols:" +print symbol_pool +resolved_b, patches = asmbloc.asm_resolve_final(mn_x86, 32, blocs, symbol_pool) + +# Translate to IR +my_ir = ir_a_x86_32(symbol_pool) +for b in blocs: + print 'add bloc' + print b + my_ir.add_bloc(b) + +# Display IR +for lbl, b in my_ir.blocs.items(): + print b + +# Dead propagation +my_ir.gen_graph() +out = my_ir.graph() +open('graph.txt', 'w').write(out) +print '*' * 80 +my_ir.dead_simp() +out2 = my_ir.graph() +open('graph2.txt', 'w').write(out2) + +# Display new IR +print 'new ir blocs' +for lbl, b in my_ir.blocs.items(): + print b diff --git a/example/expression/manip_expression7.py b/example/expression/manip_expression7.py new file mode 100644 index 00000000..d1cbb73b --- /dev/null +++ b/example/expression/manip_expression7.py @@ -0,0 +1,21 @@ +from miasm2.core.graph import DiGraph +from miasm2.expression.expression import * + +print "Simple Expression grapher demo" + +a = ExprId("A") +b = ExprId("B") +c = ExprId("C") +d = ExprId("D") +m = ExprMem(a + b + c + a) + +e1 = ExprCompose([(a + b - (c * a) / m | b, 0, 32), (a + m, 32, 64)]) +e2 = ExprInt64(15) +e = ExprCond(d, e1, e2)[0:32] + +print "[+] Expression:" +print e + +g = e.graph() +print "[+] Graph:" +print g.dot() diff --git a/example/expression/sc_connect_back.bin b/example/expression/sc_connect_back.bin new file mode 100644 index 00000000..9e9c80a5 --- /dev/null +++ b/example/expression/sc_connect_back.bin Binary files differdiff --git a/example/expression/simple_test.bin b/example/expression/simple_test.bin new file mode 100644 index 00000000..60f4e768 --- /dev/null +++ b/example/expression/simple_test.bin Binary files differdiff --git a/example/expression/simple_test.c b/example/expression/simple_test.c new file mode 100644 index 00000000..8e344f18 --- /dev/null +++ b/example/expression/simple_test.c @@ -0,0 +1,26 @@ +int test(unsigned int argc, char** argv) +{ + unsigned int ret; + if (argc == 0) + ret = 0x1001; + else if (argc < 2) + ret = 0x1002; + else if (argc <= 5) + ret = 0x1003; + else if (argc != 7 && argc*2 == 14) + ret = 0x1004; + else if (argc*2 == 14) + ret = 0x1005; + else if (argc & 0x30) + ret = 0x1006; + else if (argc + 3 == 0x45) + ret = 0x1007; + else + ret = 0x1008; + return ret; +} + +int main(int argc, char** argv) +{ + return test(argc, argv); +} diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py new file mode 100644 index 00000000..828629fc --- /dev/null +++ b/example/expression/solve_condition_stp.py @@ -0,0 +1,245 @@ +import os +import sys +from miasm2.arch.x86.arch import * +from miasm2.arch.x86.regs import * +from miasm2.arch.x86.sem import * +from miasm2.core.bin_stream import bin_stream_str +from miasm2.core import asmbloc +from miasm2.expression.expression import get_rw +from miasm2.ir.symbexec import symbexec +from miasm2.expression.simplifications import expr_simp +from miasm2.expression import stp +from collections import defaultdict +from optparse import OptionParser +import subprocess +from miasm2.core import parse_asm +from elfesteem.strpatchwork import StrPatchwork + +from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine + + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + + +mn = mn_x86 + +parser = OptionParser(usage="usage: %prog [options] file") +parser.add_option('-a', "--address", dest="address", metavar="ADDRESS", + help="address to disasemble", default="0") + +(options, args) = parser.parse_args(sys.argv[1:]) +if not args: + parser.print_help() + sys.exit(0) + + +def get_bloc(my_ir, mdis, ad): + if isinstance(ad, asmbloc.asm_label): + l = ad + else: + l = mdis.symbol_pool.getby_offset_create(ad) + if not l in my_ir.blocs: + ad = l.offset + b = mdis.dis_bloc(ad) + my_ir.add_bloc(b) + b = my_ir.get_bloc(l) + if b is None: + raise LookupError('no bloc found at that address: %s' % l) + return b + + +def emul_symb(my_ir, mdis, states_todo, states_done): + while states_todo: + ad, symbols, conds = states_todo.pop() + print '*' * 40, "addr", ad, '*' * 40 + if (ad, symbols, conds) in states_done: + print 'skip', ad + continue + states_done.add((ad, symbols, conds)) + sb = symbexec(mn, {}) + sb.symbols = symbols.copy() + if my_ir.pc in sb.symbols: + del(sb.symbols[my_ir.pc]) + b = get_bloc(my_ir, mdis, ad) + + print 'run bloc' + print b + # print blocs[ad] + ad = sb.emulbloc(b) + print 'final state' + sb.dump_id() + print 'dataflow' + # data_flow_graph_from_expr(sb) + + assert(ad is not None) + print "DST", ad + + if isinstance(ad, ExprCond): + # Create 2 states, each including complementary conditions + p1 = sb.symbols.copy() + p2 = sb.symbols.copy() + c1 = {ad.cond: ExprInt_from(ad.cond, 0)} + c2 = {ad.cond: ExprInt_from(ad.cond, 1)} + print ad.cond + p1[ad.cond] = ExprInt_from(ad.cond, 0) + p2[ad.cond] = ExprInt_from(ad.cond, 1) + ad1 = expr_simp(sb.eval_expr(ad.replace_expr(c1), {})) + ad2 = expr_simp(sb.eval_expr(ad.replace_expr(c2), {})) + if not (isinstance(ad1, ExprInt) or (isinstance(ad1, ExprId) and isinstance(ad1.name, asmbloc.asm_label)) and + isinstance(ad2, ExprInt) or (isinstance(ad2, ExprId) and isinstance(ad2.name, asmbloc.asm_label))): + print str(ad1), str(ad2) + raise ValueError("zarb condition") + conds1 = list(conds) + c1.items() + conds2 = list(conds) + c2.items() + if isinstance(ad1, ExprId): + ad1 = ad1.name + if isinstance(ad2, ExprId): + ad2 = ad2.name + if isinstance(ad1, ExprInt): + ad1 = ad1.arg + if isinstance(ad2, ExprInt): + ad2 = ad2.arg + states_todo.add((ad1, p1, tuple(conds1))) + states_todo.add((ad2, p2, tuple(conds2))) + elif isinstance(ad, ExprInt): + ad = int(ad.arg) + states_todo.add((ad, sb.symbols.copy(), tuple(conds))) + elif isinstance(ad, ExprId) and isinstance(ad.name, asmbloc.asm_label): + if isinstance(ad, ExprId): + ad = ad.name + states_todo.add((ad, sb.symbols.copy(), tuple(conds))) + elif ad == ret_addr: + print 'ret reached' + continue + else: + raise ValueError("zarb eip") + + +if __name__ == '__main__': + + data = open(args[0]).read() + bs = bin_stream_str(data) + + mdis = dis_engine(bs) + + ad = int(options.address, 16) + + symbols_init = {} + for i, r in enumerate(all_regs_ids): + symbols_init[r] = all_regs_ids_init[i] + + # config parser for 32 bit + reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) + + def my_ast_int2expr(a): + return ExprInt32(a) + + def my_ast_id2expr(t): + if t in reg_and_id: + r = reg_and_id[t] + else: + r = ExprId(t, size=32) + return r + my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) + base_expr.setParseAction(my_var_parser) + + argc = ExprId('argc', 32) + argv = ExprId('argv', 32) + ret_addr = ExprId('ret_addr') + reg_and_id[argc.name] = argc + reg_and_id[argv.name] = argv + reg_and_id[ret_addr.name] = ret_addr + + my_symbols = [argc, argv, ret_addr] + my_symbols = dict([(x.name, x) for x in my_symbols]) + my_symbols.update(mn_x86.regs.all_regs_ids_byname) + + sb = symbexec(mn, symbols_init) + + blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' + PUSH argv + PUSH argc + PUSH ret_addr + ''') + + my_ir = ir_x86_32(mdis.symbol_pool) + + b = blocs[0][0] + print b + # add fake address and len to parsed instructions + for i, l in enumerate(b.lines): + l.offset, l.l = i, 1 + my_ir.add_bloc(b) + irb = get_bloc(my_ir, mdis, 0) + sb.emulbloc(irb) + sb.dump_mem() + + # reset my_ir blocs + my_ir.blocs = {} + + states_todo = set() + states_done = set() + states_todo.add((uint32(ad), sb.symbols, ())) + + # emul blocs, propagate states + emul_symb(my_ir, mdis, states_todo, states_done) + + all_info = [] + + print '*' * 40, 'conditions to match', '*' * 40 + for ad, symbols, conds in sorted(states_done): + print '*' * 40, ad, '*' * 40 + reqs = [] + for k, v in conds: + print k, v + reqs.append((k, v)) + all_info.append((ad, reqs)) + + all_cases = set() + + sb = symbexec(mn, symbols_init) + for ad, reqs_cond in all_info: + all_ids = set() + for k, v in reqs_cond: + all_ids.update(get_expr_ids(k)) + + out = [] + + # declare variables + for v in all_ids: + out.append(str(v) + ":" + "BITVECTOR(%d);" % v.size) + + all_csts = [] + for k, v in reqs_cond: + cst = k.strcst() + val = v.arg + assert(val in [0, 1]) + inv = "" + if val == 1: + inv = "NOT " + val = "0" * v.size + all_csts.append("(%s%s=0bin%s)" % (inv, cst, val)) + if not all_csts: + continue + rez = " AND ".join(all_csts) + out.append("QUERY(NOT (%s));" % rez) + end = "\n".join(out) + open('out.txt', 'w').write(end) + try: + cases = subprocess.check_output(["/home/serpilliere/tools/stp/stp", + "-p", + "out.txt"]) + except OSError: + print "ERF, cannot find stp" + break + for c in cases.split('\n'): + if c.startswith('ASSERT'): + all_cases.add((ad, c)) + + print '*' * 40, 'ALL COND', '*' * 40 + all_cases = list(all_cases) + all_cases.sort(key=lambda x: (x[0], x[1])) + for ad, val in all_cases: + print 'address', ad, 'is reachable using argc', val diff --git a/example/extract_pe_ressources.py b/example/extract_pe_ressources.py new file mode 100644 index 00000000..d5c59ae5 --- /dev/null +++ b/example/extract_pe_ressources.py @@ -0,0 +1,42 @@ +import sys +import struct +from elfesteem import * +import os +import sys + +# example for extracting all pe ressources + + +def extract_res(res, name_o="", num=0, lvl=-1): + lvl += 1 + if not res: + return num + for x in res.resentries: + print "\t" * lvl, repr(x) + num += 1 + + if x.name_s: + name = name_o[:] + repr(x.name_s.value[::2]) + else: + name = name_o[:] + + if x.data: + print "\t" * lvl, 'data', len(x.data.s) + open('out/%.3d_%s.bin' % (num, name), 'w').write(str(x.data.s)) + else: + print "\t" * lvl, None + if x.offsettosubdir: + num = extract_res(x.subdir, name, num, lvl + 1) + return num + +try: + os.stat('out') +except: + os.mkdir('out') + +fname = sys.argv[1] +e = pe_init.PE(open(fname, 'rb').read()) +res = e.DirRes.resdesc + + +extract_res(res) diff --git a/example/md5_arm b/example/md5_arm new file mode 100755 index 00000000..148e0611 --- /dev/null +++ b/example/md5_arm Binary files differdiff --git a/example/sandbox_pe_x86_32.py b/example/sandbox_pe_x86_32.py new file mode 100644 index 00000000..0f660668 --- /dev/null +++ b/example/sandbox_pe_x86_32.py @@ -0,0 +1,125 @@ +import sys +import os +from argparse import ArgumentParser +from miasm2.arch.x86.arch import mn_x86 +from miasm2.jitter.jitload import jitter_x86_32, vm_load_pe, preload_pe, libimp +from miasm2.jitter.jitload import bin_stream_vm +from miasm2.jitter.csts import * +from miasm2.jitter.os_dep import win_api_x86_32 +from miasm2.analysis import debugging, gdbserver +import inspect + +# Debug settings # +from pdb import pm + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +# + +# Handle arguments + +parser = ArgumentParser( + description="Sandbox a PE binary with x86 32bits engine") +parser.add_argument("filename", help="PE binary") +parser.add_argument("-r", "--log-regs", + help="Log registers value for each instruction", + action="store_true") +parser.add_argument("-m", "--log-mn", + help="Log desassembly conversion for each instruction", + action="store_true") +parser.add_argument("-n", "--log-newbloc", + help="Log basic blocks processed by the Jitter", + action="store_true") +parser.add_argument("-j", "--jitter", + help="Jitter engine. Possible values are : tcc (default), llvm", + default="tcc") +parser.add_argument("-d", "--debugging", + help="Attach a CLI debugguer to the sandboxed programm", + action="store_true") +parser.add_argument("-g", "--gdbserver", + help="Listen on [port] with a GDB server", + type=int, + default=False) +args = parser.parse_args() + +# User defined methods + + +def msvcrt_memset(myjit): + ret_ad, args = myjit.func_args_cdecl(3) + dst, c, size = args + + myjit.vm.vm_set_mem(dst, chr(c & 0xFF) * size) + myjit.func_ret_cdecl(ret_ad, 0) + + +def msvcrt_memcpy(myjit): + ret_ad, args = myjit.func_args_cdecl(3) + dst, src, size = args + + x = myjit.vm.vm_get_mem(src, size) + myjit.vm.vm_set_mem(dst, x) + myjit.func_ret_cdecl(ret_ad, 0) + +# Breakpoint callbacks + + +def code_sentinelle(jitter): + jitter.run = False + jitter.pc = 0 + print "End Emulation" + return True + +# x86 32 bits engine instanciation +myjit = jitter_x86_32(jit_type=args.jitter) +myjit.init_stack() +libs = libimp() + +# Set libs for win_32 api +win_api_x86_32.winobjs.runtime_dll = libs + +# Load PE and get entry point address +e = vm_load_pe(myjit.vm, args.filename) +preload_pe(myjit.vm, e, libs) + +addr = e.rva2virt(e.Opthdr.AddressOfEntryPoint) + +# Log level (if available with jitter engine) +myjit.jit.log_regs = args.log_regs +myjit.jit.log_mn = args.log_mn +myjit.jit.log_newbloc = args.log_newbloc + +# Set up stack +myjit.vm_push_uint32_t(0x1337beef) + +# Set callbacks +myjit.add_breakpoint(0x1337beef, code_sentinelle) + +myjit.add_lib_handler(libs, globals()) + +# Start Emulation +myjit.init_run(addr) + +# Handle debugging +if any([args.debugging, args.gdbserver]): + dbg = debugging.Debugguer(myjit) + if args.debugging is True: + cmd = debugging.DebugCmd(dbg) + cmd.cmdloop() + else: + gdb = gdbserver.GdbServer_x86_32(dbg, args.gdbserver) + print("Listenning on port %d" % args.gdbserver) + gdb.run() + +else: + print(myjit.continue_run()) + +# Performance tests +# +# import cProfile +# cProfile.run(r'run_bin(myjit, addr)') + +# Test if emulation ended properly +assert(myjit.run is False) diff --git a/example/symbol_exec.py b/example/symbol_exec.py new file mode 100644 index 00000000..6d7457aa --- /dev/null +++ b/example/symbol_exec.py @@ -0,0 +1,31 @@ +# Minimalist Symbol Exec example +from miasm2.core.bin_stream import bin_stream_str +from miasm2.arch.x86.arch import mn_x86 +from miasm2.arch.x86.ira import ir_a_x86_32 +from miasm2.arch.x86.regs import all_regs_ids, all_regs_ids_init +from miasm2.ir.symbexec import symbexec +from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine +import miasm2.expression.expression as m2_expr + +l = mn_x86.fromstring("MOV EAX, EBX", 32) +asm = mn_x86.asm(l)[0] + +bin_stream = bin_stream_str(asm) + +mdis = dis_engine(bin_stream) +disasm = mdis.dis_multibloc(0) + +ir = ir_a_x86_32(mdis.symbol_pool) +for bbl in disasm: ir.add_bloc(bbl) + +symbols_init = {} +for i, r in enumerate(all_regs_ids): + symbols_init[r] = all_regs_ids_init[i] +symb = symbexec(mn_x86, symbols_init) + +block = ir.get_bloc(0) + +cur_addr = symb.emulbloc(block) +assert(symb.symbols[m2_expr.ExprId("EAX")] == symbols_init[m2_expr.ExprId("EBX")]) +print 'modified registers:' +symb.dump_id() diff --git a/example/test_dis.py b/example/test_dis.py new file mode 100644 index 00000000..4400ec14 --- /dev/null +++ b/example/test_dis.py @@ -0,0 +1,266 @@ +import sys +import os +import time + +from miasm2.core.bin_stream import bin_stream_elf, bin_stream_pe, bin_stream_str +from elfesteem import * +from miasm2.core.asmbloc import * +from miasm2.expression.simplifications import expr_simp +from optparse import OptionParser +from miasm2.core.cpu import dum_arg +import cProfile +from miasm2.expression.expression import * +from miasm2.core.interval import interval +from miasm2.core.utils import hexdump + +log = logging.getLogger("dis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.INFO) + + +# log_asmbloc.setLevel(logging.DEBUG) +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + + +parser = OptionParser(usage="usage: %prog [options] file address") +parser.add_option('-m', "--architecture", dest="machine", metavar="MACHINE", + help="architecture: arm, x86_16, x86_32, x86_64, msp430") +parser.add_option('-f', "--followcall", dest="followcall", action="store_true", + default=False, + help="follow call") + +parser.add_option('-b', "--blocwatchdog", dest="bw", + default=None, + help="address to disasemble") + +parser.add_option('-n', "--funcsnumwatchdog", dest="funcswd", + default=None, + help="max func to disasm") + +parser.add_option( + '-r', "--recurfunctions", dest="recurfunctions", action="store_true", + default=False, + help="disasm found functions") + +parser.add_option('-v', "--verbose", dest="verbose", action="store_true", + default=False, + help="verbose") + +parser.add_option('-g', "--gen_ir", dest="gen_ir", action="store_true", + default=False, + help="gen intermediate representation") + +parser.add_option('-z', "--dis_nulstart_bloc", dest="dis_nulstart_bloc", + action="store_true", default=False, + help="dont_dis_nulstart_bloc") +parser.add_option('-l', "--dontdis_retcall", dest="dontdis_retcall", + action="store_true", default=False, + help="only disasm call dst") + +parser.add_option('-s', "--simplify", dest="simplify", action="store_true", + default=False, + help="for test purpose") + +parser.add_option('-o', "--shiftoffset", dest="shiftoffset", + default="0", + help="shift input str by offset") + +parser.add_option( + '-a', "--trydisasmall", dest="trydisasmall", action="store_true", + default=False, + help="try disasm all binary") + +parser.add_option('-i', "--image", dest="image", action="store_true", + default=False, + help="display image representation of disasm") + +(options, args) = parser.parse_args(sys.argv[1:]) +if not args: + parser.print_help() + sys.exit(0) +fname = args[0] + +if options.verbose: + log_asmbloc.setLevel(logging.DEBUG) + +log.info("import machine...") +mode = None +dis_cb = None + + +if options.machine == "arm": + from miasm2.arch.arm.disasm import dis_arm as dis_engine + from miasm2.arch.arm.arch import mn_arm as mn + from miasm2.arch.arm.ira import ir_a_arm as ira +elif options.machine == "armt": + from miasm2.arch.arm.disasm import dis_armt as dis_engine + from miasm2.arch.arm.arch import mn_armt as mn + from miasm2.arch.arm.ira import ir_a_armt as ira +elif options.machine == "sh4": + from miasm2.arch.sh4.disasm import dis_sha4 as dis_engine + from miasm2.arch.sh4.arch import mn_sh4 as mn + from miasm2.arch.sh4.ira import ir_a_sh4 as ira +elif options.machine == "x86_16": + from miasm2.arch.x86.disasm import dis_x86_16 as dis_engine + from miasm2.arch.x86.arch import mn_x86 as mn + from miasm2.arch.x86.ira import ir_a_x86_16 as ira +elif options.machine == "x86_32": + from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine + from miasm2.arch.x86.arch import mn_x86 as mn + from miasm2.arch.x86.ira import ir_a_x86_32 as ira +elif options.machine == "x86_64": + from miasm2.arch.x86.disasm import dis_x86_64 as dis_engine + from miasm2.arch.x86.arch import mn_x86 as mn + from miasm2.arch.x86.ira import ir_a_x86_64 as ira +elif options.machine == "msp430": + from miasm2.arch.msp430.disasm import dis_msp430 as dis_engine + from miasm2.arch.msp430.arch import mn_msp430 as mn + from miasm2.arch.msp430.ira import ir_a_msp430 as ira +else: + raise ValueError('unknown machine') +log.info('ok') + +if options.bw != None: + options.bw = int(options.bw) +if options.funcswd != None: + options.funcswd = int(options.funcswd) +machine = options.machine + +log.info('load binary') +b = open(fname).read() + +default_addr = 0 +bs = None +if b.startswith('MZ'): + e = pe_init.PE(b) + if e.isPE() and e.NTsig.signature_value == 0x4550: + bs = bin_stream_pe(e.virt) + default_addr = e.rva2virt(e.Opthdr.AddressOfEntryPoint) +elif b.startswith('\x7fELF'): + e = elf_init.ELF(b) + bs = bin_stream_elf(e.virt) + default_addr = e.Ehdr.entry + +if bs is None: + shift = int(options.shiftoffset, 16) + log.warning('fallback to string input (offset=%s)' % hex(shift)) + bs = bin_stream_str(b, shift=shift) + + +log.info('ok') +mdis = dis_engine(bs) +# configure disasm engine +mdis.dontdis_retcall = options.dontdis_retcall +mdis.blocs_wd = options.bw +mdis.dont_dis_nulstart_bloc = not options.dis_nulstart_bloc + +todo = [] +addrs = [int(a, 16) for a in args[1:]] + +if len(addrs) == 0 and default_addr is not None: + addrs.append(default_addr) +for ad in addrs: + todo = [(mdis, None, ad)] + +done = set() +all_funcs = set() +all_funcs_blocs = {} + + +done_interval = interval() +finish = False + +# Main disasm loop +while not finish and todo: + while not finish and todo: + mdis, caller, ad = todo.pop(0) + if ad in done: + continue + done.add(ad) + ab = mdis.dis_multibloc(ad) + + log.info('func ok %.16x (%d)' % (ad, len(all_funcs))) + + all_funcs.add(ad) + all_funcs_blocs[ad] = ab + for b in ab: + for l in b.lines: + done_interval += interval([(l.offset, l.offset + l.l)]) + + if options.funcswd is not None: + options.funcswd -= 1 + if options.recurfunctions: + for b in ab: + i = b.get_subcall_instr() + if not i: + continue + for d in i.getdstflow(mdis.symbol_pool): + if not (isinstance(d, ExprId) and isinstance(d.name, asm_label)): + continue + todo.append((mdis, i, d.name.offset)) + + if options.funcswd is not None and options.funcswd <= 0: + finish = True + + if options.trydisasmall: + for a, b in done_interval.intervals: + if b in done: + continue + log.debug('add func %s' % hex(b)) + todo.append((mdis, None, b)) + + +# Generate dotty graph +all_blocs = [] +for blocs in all_funcs_blocs.values(): + all_blocs += blocs + # for b in blocs: + # print b + +log.info('generate graph file') +g = bloc2graph(all_blocs, True) +open('graph_execflow.txt', 'w').write(g) + +log.info('generate intervals') + +all_lines = [] +total_l = 0 + +print done_interval +if options.image: + log.info('build img') + done_interval.show() + +for i, j in done_interval.intervals: + log.debug((hex(i), "->", hex(j))) + + +all_lines.sort(key=lambda x: x.offset) +open('lines.txt', 'w').write('\n'.join([str(l) for l in all_lines])) +log.info('total lines %s' % total_l) + + +# Bonus, generate IR graph +if options.gen_ir: + log.info("generating IR") + + my_ir = ira(mdis.symbol_pool) + my_ir.blocs = {} + for ad, all_bloc in all_funcs_blocs.items(): + log.info("generating IR... %x" % ad) + for b in all_bloc: + my_ir.add_bloc(b) + + log.info("Gen Graph... %x" % ad) + + my_ir.gen_graph() + + if options.simplify: + my_ir.dead_simp() + + out = my_ir.graph() + open('graph_irflow.txt', 'w').write(out) diff --git a/example/test_ida.py b/example/test_ida.py new file mode 100644 index 00000000..449c630c --- /dev/null +++ b/example/test_ida.py @@ -0,0 +1,409 @@ +import sys + +# Set your path first! +sys.path.append("/home/serpilliere/tools/pyparsing/pyparsing-2.0.1/build/lib.linux-x86_64-2.7") +sys.path.append("/home/serpilliere/projet/m2_devel/build/lib.linux-x86_64-2.7") + +from miasm2.core.bin_stream import bin_stream_str +from miasm2.core.asmbloc import * +from miasm2.expression.simplifications import expr_simp + +from miasm2.analysis.data_analysis import intra_bloc_flow_raw, inter_bloc_flow +from miasm2.analysis.data_analysis import intra_bloc_flow_symbexec + +from idaapi import * +import idautils + + +class bin_stream_ida(bin_stream_str): + # ida should provide Byte function + + def getbytes(self, start, l=1): + o = "" + for ad in xrange(start - self.shift, start - self.shift + l): + o += chr(Byte(ad)) + return o + + def readbs(self, l=1): + if self.offset + l > self.l: + raise IOError + o = self.getbytes(self.offset) + self.offset += l + return p + + def writebs(self, l=1): + raise ValueError('writebs unsupported') + + def __str__(self): + raise NotImplementedError('not fully functional') + out = self.bin[self.offset - self.shift:] + return out + + def setoffset(self, val): + self.offset = val + + def __len__(self): + return 0x7FFFFFFF + + def getlen(self): + return 0x7FFFFFFF - self.offset - self.shift + + +def expr2colorstr(my_ir, e): + # print "XXX", e + if isinstance(e, ExprId): + s = str(e) + if e in my_ir.arch.regs.all_regs_ids: + s = idaapi.COLSTR(s, idaapi.SCOLOR_REG) + elif isinstance(e, ExprInt): + s = str(e) + s = idaapi.COLSTR(s, idaapi.SCOLOR_NUMBER) + elif isinstance(e, ExprMem): + s = '@%d[%s]' % (e.size, expr2colorstr(my_ir, e.arg)) + elif isinstance(e, ExprOp): + out = [] + for a in e.args: + s = expr2colorstr(my_ir, a) + if isinstance(a, ExprOp): + s = "(%s)" % s + out.append(s) + if len(out) == 1: + s = "%s %s" % (e.op, str(out[0])) + else: + s = (" " + e.op + " ").join(out) + elif isinstance(e, ExprAff): + s = "%s = %s" % ( + expr2colorstr(my_ir, e.dst), expr2colorstr(my_ir, e.src)) + elif isinstance(e, ExprCond): + cond = expr2colorstr(my_ir, e.cond) + src1 = expr2colorstr(my_ir, e.src1) + src2 = expr2colorstr(my_ir, e.src2) + s = "(%s?%s:%s)" % (cond, src1, src2) + elif isinstance(e, ExprSlice): + s = "(%s)[%d:%d]" % (expr2colorstr(my_ir, e.arg), e.start, e.stop) + else: + s = str(e) + # print repr(s) + return s + + +def color_irbloc(irbloc): + o = [] + lbl = '%s' % irbloc.label + lbl = idaapi.COLSTR(lbl, idaapi.SCOLOR_INSN) + o.append(lbl) + for i, expr in enumerate(irbloc.irs): + for e in expr: + s = expr2colorstr(my_ir, e) + s = idaapi.COLSTR(s, idaapi.SCOLOR_INSN) + o.append(' %s' % s) + o.append("") + o.pop() + i = len(irbloc.irs) + s = str(' Dst: %s' % irbloc.dst) + s = idaapi.COLSTR(s, idaapi.SCOLOR_RPTCMT) + o.append(s) + + return "\n".join(o) + + +class GraphMiasmIR(GraphViewer): + + def __init__(self, my_ir, title, result): + GraphViewer.__init__(self, title) + print 'init' + self.my_ir = my_ir + self.result = result + self.names = {} + + def OnRefresh(self): + print 'refresh' + self.Clear() + addr_id = {} + for irbloc in self.my_ir.blocs.values(): + id_irbloc = self.AddNode(color_irbloc(irbloc)) + addr_id[irbloc] = id_irbloc + + for irbloc in self.my_ir.blocs.values(): + if not irbloc: + continue + dst = my_ir.dst_trackback(irbloc) + for d in dst: + if not self.my_ir.ExprIsLabel(d): + continue + + d = d.name + if not d in self.my_ir.blocs: + continue + b = self.my_ir.blocs[d] + node1 = addr_id[irbloc] + node2 = addr_id[b] + self.AddEdge(node1, node2) + return True + + def OnGetText(self, node_id): + b = self[node_id] + return str(b) + + def OnSelect(self, node_id): + return True + + def OnClick(self, node_id): + return True + + def OnCommand(self, cmd_id): + if self.cmd_test == cmd_id: + print 'TEST!' + return + print "command:", cmd_id + + def Show(self): + if not GraphViewer.Show(self): + return False + self.cmd_test = self.AddCommand("Test", "F2") + if self.cmd_test == 0: + print "Failed to add popup menu item!" + return True + + +from miasm2.analysis.disasm_cb import guess_funcs, guess_multi_cb + + +processor_name = GetLongPrm(INF_PROCNAME) +dis_engine = None +if processor_name == "metapc": + + # HACK: check 32/64 using INF_START_SP + max_size = GetLongPrm(INF_START_SP) + if max_size == 0x80: # TODO XXX check + from miasm2.arch.x86.disasm import dis_x86_16 as dis_engine + from miasm2.arch.x86.x86.ira import ir_a_x86_16 as ira + elif max_size == 0xFFFFFFFF: + from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine + from miasm2.arch.x86.ira import ir_a_x86_32 as ira + + elif max_size == 0xFFFFFFFFFFFFFFFF: + from miasm2.arch.x86.disasm import dis_x86_64 as dis_engine + from miasm2.arch.x86.ira import ir_a_x86_64 as ira + + else: + raise ValueError('cannot guess 32/64 bit! (%x)' % max_size) +elif processor_name == "ARM": + # TODO ARM/thumb + # hack for thumb: place armt = True in globals :/ + is_armt = globals().get('armt', False) + if is_armt: + from miasm2.arch.arm.disasm import dis_armt as dis_engine + from miasm2.arch.arm.ira import ir_a_armt as ira + else: + from miasm2.arch.arm.disasm import dis_arm as dis_engine + from miasm2.arch.arm.ira import ir_a_arm as ira + + from miasm2.analysis.disasm_cb import arm_guess_subcall, arm_guess_jump_table + guess_funcs.append(arm_guess_subcall) + guess_funcs.append(arm_guess_jump_table) + +elif processor_name == "msp430": + # TODO ARM/thumb + from miasm2.arch.msp430.disasm import dis_msp430 as dis_engine + from miasm2.arch.msp430.ira import ir_a_msp430 as ira + +else: + print repr(processor_name) + raise NotImplementedError('not fully functional') + +print "Arch", dis_engine + +fname = GetInputFile() +print fname + +bs = bin_stream_ida() +mdis = dis_engine(bs) +my_ir = ira(mdis.symbol_pool) + +# populate symbols with ida names +for ad, name in Names(): + # print hex(ad), repr(name) + if name is None: + continue + mdis.symbol_pool.add_label(name, ad) + +print "start disasm" +ad = ScreenEA() +print hex(ad) + +ab = mdis.dis_multibloc(ad) + +print "generating graph" +g = bloc2graph(ab, True) +open('asm_flow.txt', 'w').write(g) + + +print "generating IR... %x" % ad + +for b in ab: + print 'ADD' + print b + my_ir.add_bloc(b) + + +print "IR ok... %x" % ad + +for irb in my_ir.blocs.values(): + for irs in irb.irs: + for i, e in enumerate(irs): + e.dst, e.src = expr_simp(e.dst), expr_simp(e.src) + +my_ir.gen_graph() +out = my_ir.graph() +open('/tmp/graph.txt', 'w').write(out) + + +# my_ir.dead_simp() + +g = GraphMiasmIR(my_ir, "Miasm IR graph", None) + + +def mycb(*test): + print test + raise NotImplementedError('not fully functional') + +g.cmd_a = g.AddCommand("cmd a", "x") +g.cmd_b = g.AddCommand("cmd b", "y") + +g.Show() + + +def node2str(n): + label, i, node = n + print n + # out = "%s,%s\n%s"%n + out = "%s" % node + return out + + +def get_node_name(label, i, n): + # n_name = "%s_%d_%s"%(label.name, i, n) + n_name = (label.name, i, n) + return n_name + + +def get_modified_symbols(sb): + # get modified IDS + ids = sb.symbols.symbols_id.keys() + ids.sort() + out = {} + for i in ids: + if i in sb.arch.regs.regs_init and \ + i in sb.symbols.symbols_id and \ + sb.symbols.symbols_id[i] == sb.arch.regs.regs_init[i]: + continue + # print i, sb.symbols.symbols_id[i] + out[i] = sb.symbols.symbols_id[i] + + # get mem IDS + mems = sb.symbols.symbols_mem.values() + for m, v in mems: + # print m, v + out[m] = v + pp([(str(x[0]), str(x[1])) for x in out.items()]) + return out + + +def gen_bloc_data_flow_graph(my_ir, in_str, ad): # arch, attrib, pool_bin, bloc, symbol_pool): + out_str = "" + + my_ir.gen_graph() + # my_ir.dead_simp() + + irbloc_0 = None + for irbloc in my_ir.blocs.values(): + if irbloc.label.offset == ad: + irbloc_0 = irbloc + break + assert(irbloc_0 is not None) + flow_graph = DiGraph() + done = set() + todo = set([irbloc_0.label]) + + bloc2w = {} + + for irbloc in my_ir.blocs.values(): + # intra_bloc_flow_raw(my_ir, flow_graph, irbloc) + intra_bloc_flow_symbexec(my_ir, flow_graph, irbloc) + # intra_bloc_flow_symb(my_ir, flow_graph, irbloc) + + for irbloc in my_ir.blocs.values(): + print irbloc + print 'IN', [str(x) for x in irbloc.in_nodes] + print 'OUT', [str(x) for x in irbloc.out_nodes] + + print '*' * 20, 'interbloc', '*' * 20 + inter_bloc_flow(my_ir, flow_graph, irbloc_0.label, False) + + print 'Dataflow roots:' + for node in flow_graph.roots(): + lbl, i, n = node + if n in my_ir.arch.regs.all_regs_ids: + print node + + open('data.txt', 'w').write(flow_graph.dot()) + return flow_graph + + +class GraphMiasmIRFlow(GraphViewer): + + def __init__(self, flow_graph, title, result): + GraphViewer.__init__(self, title) + print 'init' + self.flow_graph = flow_graph + self.result = result + self.names = {} + + def OnRefresh(self): + print 'refresh' + self.Clear() + addr_id = {} + for n in self.flow_graph.nodes(): + id_n = self.AddNode(node2str(self.flow_graph, n)) + addr_id[n] = id_n + + for a, b in self.flow_graph.edges(): + node1, node2 = addr_id[a], addr_id[b] + self.AddEdge(node1, node2) + return True + + def OnGetText(self, node_id): + b = self[node_id] + return str(b).lower() + + def OnSelect(self, node_id): + return True + + def OnClick(self, node_id): + return True + + def OnCommand(self, cmd_id): + if self.cmd_test == cmd_id: + print 'TEST!' + return + print "command:", cmd_id + + def Show(self): + if not GraphViewer.Show(self): + return False + self.cmd_test = self.AddCommand("Test", "F2") + if self.cmd_test == 0: + print "Failed to add popup menu item!" + return True + + +#print "gen bloc data flow" +#flow_graph = gen_bloc_data_flow_graph(my_ir, bs, ad) +#def node2str(self, n): +# return "%s, %s\\l%s" % n +#flow_graph.node2str = lambda n: node2str(flow_graph, n) +#open('data_flow.txt', 'w').write(flow_graph.dot()) + +# h = GraphMiasmIRFlow(flow_graph, "Miasm IRFlow graph", None) +# h.Show() diff --git a/example/test_jit_arm.py b/example/test_jit_arm.py new file mode 100644 index 00000000..7ccfd447 --- /dev/null +++ b/example/test_jit_arm.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +from argparse import ArgumentParser +from miasm2.analysis import debugging, gdbserver + +from miasm2.arch.arm.arch import mn_arm +from miasm2.jitter.jitload import * + + +parser = ArgumentParser( + description="""Sandbox an elf binary with arm engine +(ex: test_jit_arm.py example/md5_arm A684)""") +parser.add_argument("-r", "--log-regs", + help="Log registers value for each instruction", + action="store_true") +parser.add_argument("-m", "--log-mn", + help="Log desassembly conversion for each instruction", + action="store_true") +parser.add_argument("-n", "--log-newbloc", + help="Log basic blocks processed by the Jitter", + action="store_true") +parser.add_argument("-j", "--jitter", + help="Jitter engine. Possible values are : tcc (default), llvm", + default="tcc") +parser.add_argument("-d", "--debugging", + help="Attach a CLI debugguer to the sandboxed programm", + action="store_true") +parser.add_argument("binary", + help="binary to run") +parser.add_argument("addr", + help="start exec on addr") + + + +def jit_arm_binary(args): + filepath, entryp = args.binary, int(args.addr, 16) + myjit = jitter_arm() + myjit.init_stack() + + # Log level (if available with jitter engine) + myjit.jit.log_regs = args.log_regs + myjit.jit.log_mn = args.log_mn + myjit.jit.log_newbloc = args.log_newbloc + + elf = vm_load_elf(myjit.vm, filepath) + libs = libimp() + preload_elf(myjit.vm, elf, libs) + myjit.add_lib_handler(libs) + myjit.add_breakpoint(0x1337BEEF, lambda _: exit(0)) + regs = myjit.cpu.vm_get_gpreg() + regs['LR'] = 0x1337BEEF + myjit.cpu.vm_set_gpreg(regs) + myjit.init_run(entryp) + + + + # Handle debugging + if args.debugging is True: + dbg = debugging.Debugguer(myjit) + cmd = debugging.DebugCmd(dbg) + cmd.cmdloop() + + else: + print(myjit.continue_run()) + +if __name__ == '__main__': + from sys import stderr + args = parser.parse_args() + jit_arm_binary(args) diff --git a/example/test_jit_x86_32.py b/example/test_jit_x86_32.py new file mode 100644 index 00000000..b26161bc --- /dev/null +++ b/example/test_jit_x86_32.py @@ -0,0 +1,45 @@ +import sys +import os +from optparse import OptionParser +from miasm2.arch.x86.arch import mn_x86 +from miasm2.jitter.jitload import jitter_x86_32 +from miasm2.jitter.jitload import bin_stream_vm +from miasm2.jitter.csts import * + +from pdb import pm + + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +parser = OptionParser(usage="usage: %prog rawfiley arch address [options]") +(options, args) = parser.parse_args(sys.argv[1:]) + +if len(args) < 1: + parser.print_help() + sys.exit(0) + + +def code_sentinelle(jitter): + jitter.run = False + jitter.pc = 0 + return True + + +myjit = jitter_x86_32() +myjit.init_stack() + +fname = args[0] +data = open(fname).read() +run_addr = 0x40000000 +myjit.vm.vm_add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, data) + +myjit.jit.log_regs = True +myjit.jit.log_mn = True +myjit.vm_push_uint32_t(0x1337beef) + +myjit.add_breakpoint(0x1337beef, code_sentinelle) + +myjit.init_run(run_addr) +myjit.continue_run() diff --git a/example/test_symbexec.py b/example/test_symbexec.py new file mode 100644 index 00000000..1eabe824 --- /dev/null +++ b/example/test_symbexec.py @@ -0,0 +1,141 @@ +import sys +import os +from elfesteem import * +from elfesteem.strpatchwork import StrPatchwork +import inspect +import logging +from pdb import pm +import struct +from optparse import OptionParser +from miasm2.expression.expression import * +from miasm2.core import asmbloc + +from miasm2.arch.x86.arch import mn_x86 +from miasm2.jitter.jitload import load_pe_in_vm, load_elf_in_vm, bin_stream_vm, get_import_address_elf +from miasm2.jitter.jitter import updt_bloc_emul +from miasm2.jitter.vm_mngr import * +from miasm2.jitter.arch import Jit_x86 +from miasm2.jitter.arch import Jit_arm +from miasm2.ir.ir2C import init_arch_C + + +from miasm2.core.bin_stream import bin_stream +# from jitter import * +from miasm2.jitter.os_dep import win_api_x86_32 + +from miasm2.ir.symbexec import symbexec + +from miasm2.ir.ir2C import bloc2IR + +from miasm2.arch.x86.regs import * + + +def whoami(): + return inspect.stack()[1][3] + + +log = logging.getLogger("dis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.INFO) + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + + +parser = OptionParser(usage="usage: %prog [options] file") +parser.add_option('-a', "--address", dest="address", metavar="ADDRESS", + help="force eop address", default=None) +parser.add_option('-m', "--architecture", dest="machine", metavar="MACHINE", + help="architecture to use for disasm: arm, x86_32, x86_64, ppc, java") +parser.add_option('-s', "--segm", dest="usesegm", action="store_true", + help="use segments fs:", default=False) +parser.add_option('-d', "--hdr", dest="loadhdr", action="store_true", + help="load pe hdr", default=False) +parser.add_option( + '-l', "--loadbasedll", dest="loadbasedll", action="store_true", + help="load base dll", default=False) +parser.add_option('-x', "--dumpall", dest="dumpall", action="store_true", + help="load base dll", default=False) +parser.add_option('-e', "--loadmainpe", dest="loadmainpe", action="store_true", + help="load main pe", default=False) + +parser.add_option('-b', "--dumpblocs", dest="dumpblocs", action="store_true", + help="log disasm blogs", default=False) + +parser.add_option('-r', "--parse_resources", dest="parse_resources", + action="store_true", help="parse pe resources", default=False) + +(options, args) = parser.parse_args(sys.argv[1:]) +if not args: + parser.print_help() + sys.exit(0) + + +log.info("import machine...") +mode = None +if options.machine == "arm": + from miasm2.arch.arm.arch import mn_arm as mn +elif options.machine == "sh4": + from miasm2.arch.sh4_arch import mn_sh4 as mn +elif options.machine == "x86_32": + from miasm2.arch.x86.arch import mn_x86 as mn +elif options.machine == "x86_64": + from miasm2.arch.x86.arch import mn_x86 as mn +else: + raise ValueError('unknown machine') +log.info('ok') +machines = {'arm': (mn, 'arm'), + 'sh4': (mn, None), + 'x86_32': (mn, 32), + 'x86_64': (mn, 64), + } + +mn, attrib = machines[options.machine] + +arch2jit = {'x86': Jit_x86, + 'arm': Jit_arm} + +jitarch = arch2jit[mn.name] + +e, in_str, runtime_dll, segm_to_do, symbol_pool, stack_ad = load_pe_in_vm( + mn, args[0], options) +# e, in_str, runtime_dll, segm_to_do, symbol_pool, stack_ad = +# load_elf_in_vm(mn, args[0], options) +init_arch_C(mn) + +win_api_x86_32.winobjs.runtime_dll = runtime_dll +""" +regs = jitarch.vm_get_gpreg() +regs['RSP'] = stack_ad +jitarch.vm_set_gpreg(regs) +""" + +symbol_pool = asmbloc.asm_symbol_pool() +known_blocs = {} +code_blocs_mem_range = [] + + +ad = 0x951DAF +ad = 0x9518C6 +ad = 0x9519FE +symbols_init = {} +for i, r in enumerate(all_regs_ids): + symbols_init[r] = all_regs_ids_init[i] + + +def se_bloc(ad, arch, attrib, sb): + l = asmbloc.asm_label(ad) + b = asmbloc.asm_bloc(l) + job_done = set() + asmbloc.dis_bloc(arch, in_str, b, ad, job_done, symbol_pool, + attrib=attrib) # , lines_wd = 8) + print b + bloc_ir = bloc2IR(arch, attrib, in_str, b, [], symbol_pool) + sb.emulbloc(arch, bloc_ir) + sb.dump_mem() + +sb = symbexec(mn, symbols_init) +se_bloc(ad, mn, attrib, sb) diff --git a/example/unpack_upx.py b/example/unpack_upx.py new file mode 100644 index 00000000..14eac9ef --- /dev/null +++ b/example/unpack_upx.py @@ -0,0 +1,187 @@ +import sys +import os +import inspect +import logging +import struct +from argparse import ArgumentParser + +from elfesteem import pe +from elfesteem import * +from elfesteem.strpatchwork import StrPatchwork + +from miasm2.core import asmbloc +from miasm2.arch.x86.arch import mn_x86 +from miasm2.arch.x86.disasm import dis_x86_32 +from miasm2.jitter.jitload import jitter_x86_32, vm_load_pe, preload_pe, libimp +from miasm2.jitter.jitload import bin_stream_vm +from miasm2.jitter.csts import * +from miasm2.jitter.os_dep import win_api_x86_32 + +# Debug settings # +from pdb import pm + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +# + +# Handle arguments +parser = ArgumentParser(description="Sandbox a PE binary packed with UPX") +parser.add_argument("filename", help="PE binary") +parser.add_argument("-r", "--log-regs", + help="Log registers value for each instruction", + action="store_true") +parser.add_argument("-m", "--log-mn", + help="Log desassembly conversion for each instruction", + action="store_true") +parser.add_argument("-n", "--log-newbloc", + help="Log basic blocks processed by the Jitter", + action="store_true") +parser.add_argument("-j", "--jitter", + help="Jitter engine. Possible values are : tcc (default), llvm", + default="tcc") +parser.add_argument("-g", "--graph", + help="Export the CFG graph in graph.txt", + action="store_true") +parser.add_argument("-v", "--verbose", + help="Verbose mode", + action="store_true") +args = parser.parse_args() + +# Verbose mode +if args.verbose is True: + logging.basicConfig(level=logging.INFO) +else: + logging.basicConfig(level=logging.WARNING) + +# Init arch +myjit = jitter_x86_32(jit_type=args.jitter) +myjit.init_stack() + +# Log level (if available with jitter engine) +myjit.jit.log_regs = args.log_regs +myjit.jit.log_mn = args.log_mn +myjit.jit.log_newbloc = args.log_newbloc + +# Load pe and get entry point address +e = vm_load_pe(myjit.vm, args.filename) +libs = libimp() +preload_pe(myjit.vm, e, libs) + +if args.verbose is True: + myjit.vm.vm_dump_memory_page_pool() +ep = e.rva2virt(e.Opthdr.AddressOfEntryPoint) + +# Ensure there is one and only one leave (for OEP discovering) +mdis = dis_x86_32(myjit.bs) +mdis.dont_dis_nulstart_bloc = True +ab = mdis.dis_multibloc(ep) + +bb = asmbloc.basicblocs(ab) +leaves = bb.get_bad_dst() +assert(len(leaves) == 1) +l = leaves.pop() +logging.info(l) +end_label = l.label.offset + +logging.info('final label') +logging.info(end_label) + +# Export CFG graph (dot format) +if args.graph is True: + g = asmbloc.bloc2graph(ab) + open("graph.txt", "w").write(g) + +# User defined methods + + +def mygetproc(myjit): + global libs + ret_ad, args = myjit.func_args_stdcall(2) + libbase, fname = args + + dst_ad = myjit.cpu.EBX + logging.info('EBX ' + hex(dst_ad)) + + if fname < 0x10000: + fname = fname + else: + fname = myjit.get_str_ansi(fname) + logging.info(fname) + + ad = libs.lib_get_add_func(libbase, fname, dst_ad) + myjit.func_ret_stdcall(ret_ad, ad) + + +def kernel32_GetProcAddress(myjit): + return mygetproc(myjit) + +# Set libs for win_32 api +win_api_x86_32.winobjs.runtime_dll = libs +if args.verbose is True: + myjit.vm.vm_dump_memory_page_pool() + +# Set up stack +myjit.vm_push_uint32_t(1) # reason code if dll +myjit.vm_push_uint32_t(1) # reason code if dll +myjit.vm_push_uint32_t(0x1337beef) + +# Breakpoint callbacks + + +def update_binary(myjit): + e.Opthdr.AddressOfEntryPoint = e.virt2rva(myjit.pc) + logging.info('updating binary') + for s in e.SHList: + sdata = myjit.vm.vm_get_mem(e.rva2virt(s.addr), s.rawsize) + e.virt[e.rva2virt(s.addr)] = sdata + + +# Set callbacks +myjit.add_breakpoint(end_label, update_binary) +myjit.add_lib_handler(libs, globals()) + +# Run until breakpoint is reached +myjit.init_run(ep) +myjit.continue_run() + + +regs = myjit.cpu.vm_get_gpreg() + + +new_dll = [] + + +# XXXXX + +e.SHList.align_sections(0x1000, 0x1000) +logging.info(repr(e.SHList)) +st = StrPatchwork() +st[0] = e.content + +# get back data from emulator +for s in e.SHList: + ad1 = e.rva2virt(s.addr) + ad2 = ad1 + len(s.data) + st[s.offset] = e.virt(ad1, ad2) +# e.content = str(st) + +e.DirRes = pe.DirRes(e) +e.DirImport.impdesc = None +logging.info(repr(e.DirImport.impdesc)) +new_dll = libs.gen_new_lib(e) +logging.info(new_dll) +e.DirImport.impdesc = [] +e.DirImport.add_dlldesc(new_dll) +s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) +logging.info(repr(e.SHList)) +e.DirImport.set_rva(s_myimp.addr) + +# XXXX TODO +e.NThdr.optentries[pe.DIRECTORY_ENTRY_DELAY_IMPORT].rva = 0 + +e.Opthdr.AddressOfEntryPoint = e.virt2rva(end_label) +bname, fname = os.path.split(args.filename) +fname = os.path.join(bname, fname.replace('.', '_')) +open(fname + '_unupx.bin', 'w').write(str(e)) diff --git a/example/x86_32_sc.bin b/example/x86_32_sc.bin new file mode 100644 index 00000000..c1931ce0 --- /dev/null +++ b/example/x86_32_sc.bin @@ -0,0 +1 @@ +I[€ùt[ÿë[‰ØÃ \ No newline at end of file diff --git a/miasm2/.hg_archival.txt b/miasm2/.hg_archival.txt new file mode 100644 index 00000000..6d5aba4b --- /dev/null +++ b/miasm2/.hg_archival.txt @@ -0,0 +1,5 @@ +repo: 9bf379ebc5306f0ac03e1736966188d062a2723c +node: edd167467676cc9f096d766373001952f178b725 +branch: default +latesttag: asm/dis ok +latesttagdistance: 228 diff --git a/miasm2/.hgtags b/miasm2/.hgtags new file mode 100644 index 00000000..81b57c03 --- /dev/null +++ b/miasm2/.hgtags @@ -0,0 +1,2 @@ +07f301ad2a5a7457fbb350aa50c8084ee1394274 v0.01 +844c086c637a15263bc4aea10a0b2a8efa280062 asm/dis ok diff --git a/miasm2/__init__.py b/miasm2/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/__init__.py diff --git a/miasm2/analysis/__init__.py b/miasm2/analysis/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/analysis/__init__.py diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py new file mode 100644 index 00000000..cb953399 --- /dev/null +++ b/miasm2/analysis/data_analysis.py @@ -0,0 +1,326 @@ +from miasm2.expression.expression import * +from miasm2.ir.symbexec import symbexec + + +def get_node_name(label, i, n): + # n_name = "%s_%d_%s"%(label.name, i, n) + n_name = (label, i, n) + return n_name + + +def intra_bloc_flow_raw(my_ir, flow_graph, irb): + """ + Create data flow for an irbloc using raw IR expressions + """ + in_nodes = {} + out_nodes = {} + current_nodes = {} + for i, exprs in enumerate(irb.irs): + list_rw = get_list_rw(exprs) + current_nodes.update(out_nodes) + + # gen mem arg to mem node links + all_mems = set() + for nodes_r, nodes_w in list_rw: + for n in nodes_r.union(nodes_w): + all_mems.update(get_expr_mem(n)) + if not all_mems: + continue + + # print [str(x) for x in all_mems] + for n in all_mems: + node_n_w = get_node_name(irb.label, i, n) + if not n in nodes_r: + continue + o_r = n.arg.get_r(mem_read=False, cst_read=True) + for n_r in o_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irb.label, i, n_r) + current_nodes[n_r] = node_n_r + in_nodes[n_r] = node_n_r + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + # gen data flow links + for nodes_r, nodes_w in list_rw: + for n_r in nodes_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irb.label, i, n_r) + current_nodes[n_r] = node_n_r + in_nodes[n_r] = node_n_r + + flow_graph.add_node(node_n_r) + for n_w in nodes_w: + node_n_w = get_node_name(irb.label, i + 1, n_w) + out_nodes[n_w] = node_n_w + # current_nodes[n_w] = node_n_w + + flow_graph.add_node(node_n_w) + flow_graph.add_uniq_edge(node_n_r, node_n_w) + irb.in_nodes = in_nodes + irb.out_nodes = out_nodes + + +def intra_bloc_flow_symbexec(my_ir, flow_graph, irb): + """ + Create data flow for an irbloc using symbolic execution + """ + in_nodes = {} + out_nodes = {} + current_nodes = {} + + symbols_init = {} + for r in my_ir.arch.regs.all_regs_ids: + # symbols_init[r] = my_ir.arch.regs.all_regs_ids_init[i] + x = ExprId(r.name, r.size) + x.is_term = True + symbols_init[r] = x + + sb = symbexec(my_ir.arch, dict(symbols_init)) + sb.emulbloc(irb) + # print "*"*40 + # print irb + # print sb.dump_id() + # print sb.dump_mem() + + for n_w in sb.symbols: + # print n_w + v = sb.symbols[n_w] + if n_w in symbols_init and symbols_init[n_w] == v: + continue + read_values = v.get_r(cst_read=True) + # print n_w, v, [str(x) for x in read_values] + node_n_w = get_node_name(irb.label, len(irb.lines), n_w) + + for n_r in read_values: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irb.label, 0, n_r) + current_nodes[n_r] = node_n_r + in_nodes[n_r] = node_n_r + + out_nodes[n_w] = node_n_w + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + irb.in_nodes = in_nodes + irb.out_nodes = out_nodes + + +def inter_bloc_flow_link(my_ir, flow_graph, todo, link_exec_to_data): + lbl, current_nodes, exec_nodes = todo + # print 'TODO' + # print lbl + # print [(str(x[0]), str(x[1])) for x in current_nodes] + current_nodes = dict(current_nodes) + + # link current nodes to bloc in_nodes + if not lbl in my_ir.blocs: + print "cannot find bloc!!", lbl + return set() + irb = my_ir.blocs[lbl] + # pp(('IN', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) + to_del = set() + for n_r, node_n_r in irb.in_nodes.items(): + if not n_r in current_nodes: + continue + # print 'add link', current_nodes[n_r], node_n_r + flow_graph.add_uniq_edge(current_nodes[n_r], node_n_r) + to_del.add(n_r) + + # if link exec to data, all nodes depends on exec nodes + if link_exec_to_data: + for n_x_r in exec_nodes: + for n_r, node_n_r in irb.in_nodes.items(): + if not n_x_r in current_nodes: + continue + if isinstance(n_r, ExprInt): + continue + flow_graph.add_uniq_edge(current_nodes[n_x_r], node_n_r) + + # update current nodes using bloc out_nodes + for n_w, node_n_w in irb.out_nodes.items(): + current_nodes[n_w] = node_n_w + + # get nodes involved in exec flow + x_nodes = tuple(sorted(list(irb.dst.get_r()))) + + todo = set() + for lbl_dst in my_ir.g.successors(irb.label): + todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes)) + + # pp(('OUT', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) + + return todo + + +def create_implicit_flow(my_ir, flow_graph): + + # first fix IN/OUT + # If a son read a node which in not in OUT, add it + todo = set(my_ir.blocs.keys()) + while todo: + lbl = todo.pop() + irb = my_ir.blocs[lbl] + for lbl_son in my_ir.g.successors(irb.label): + if not lbl_son in my_ir.blocs: + print "cannot find bloc!!", lbl + continue + irb_son = my_ir.blocs[lbl_son] + for n_r in irb_son.in_nodes: + if n_r in irb.out_nodes: + continue + if not isinstance(n_r, ExprId): + continue + + # print "###", n_r + # print "###", irb + # print "###", 'OUT', [str(x) for x in irb.out_nodes] + # print "###", irb_son + # print "###", 'IN', [str(x) for x in irb_son.in_nodes] + + node_n_w = irb.label, len(irb.lines), n_r + irb.out_nodes[n_r] = node_n_w + if not n_r in irb.in_nodes: + irb.in_nodes[n_r] = irb.label, 0, n_r + node_n_r = irb.in_nodes[n_r] + # print "###", node_n_r + for lbl_p in my_ir.g.predecessors(irb.label): + todo.add(lbl_p) + + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + +def inter_bloc_flow(my_ir, flow_graph, irb_0, link_exec_to_data=True): + + todo = set() + done = set() + todo.add((irb_0, (), ())) + + while todo: + state = todo.pop() + if state in done: + continue + done.add(state) + out = inter_bloc_flow_link(my_ir, flow_graph, state, link_exec_to_data) + todo.update(out) + + +class symb_exec_func: + + """ + This algorithm will do symbolic execution on a function, trying to propagate + states between basic blocs in order to extract inter-blocs dataflow. The + algorithm tries to merge states from blocs with multiple parents. + + There is no real magic here, loops and complex merging will certainly fail. + """ + + def __init__(self, my_ir): + self.todo = set() + self.stateby_ad = {} + self.cpt = {} + self.states_var_done = set() + self.states_done = set() + self.total_done = 0 + self.my_ir = my_ir + + def add_state(self, parent, ad, state): + variables = dict(state.symbols.items()) + + # get bloc dead, and remove from state + b = self.my_ir.get_bloc(ad) + if b is None: + raise ValueError("unknown bloc! %s" % ad) + """ + dead = b.dead[0] + for d in dead: + if d in variables: + del(variables[d]) + """ + variables = variables.items() + + s = parent, ad, tuple(sorted(variables)) + """ + state_var = s[1] + if s in self.states_var_done: + print 'skip state' + return + if not ad in self.stateby_ad: + self.stateby_ad[ad] = set() + self.stateby_ad[ad].add(state_var) + + """ + self.todo.add(s) + + """ + if not ad in self.cpt: + self.cpt[ad] = 0 + """ + """ + def get_next_min(self): + state_by_ad = {} + for state in self.todo: + ad = state[1] + if not ad in state_by_ad: + state_by_ad[ad] = [] + state_by_ad[ad].append(state) + print "XX", [len(x) for x in state_by_ad.values()] + state_by_ad = state_by_ad.items() + state_by_ad.sort(key=lambda x:len(x[1])) + state_by_ad.reverse() + return state_by_ad.pop()[1][0] + """ + + def get_next_state(self): + state = self.todo.pop() + return state + + def do_step(self): + if len(self.todo) == 0: + return None + if self.total_done > 600: + print "symbexec watchdog!" + return None + self.total_done += 1 + print 'CPT', self.total_done + while self.todo: + # if self.total_done>20: + # self.get_next_min() + # state = self.todo.pop() + state = self.get_next_state() + parent, ad, s = state + self.states_done.add(state) + self.states_var_done.add(state) + # if s in self.states_var_done: + # print "state done" + # continue + + sb = symbexec(self.my_ir.arch, dict(s)) + """ + if (not is_dispatcher(ad)) and len(self.stateby_ad[ad]) > 10: + print "DROP", ad + continue + + if (not is_dispatcher(ad)) and len(self.stateby_ad[ad]) > 5: + print ad + big_keys = diff_states(*self.stateby_ad[ad]) + print big_keys + print "MERGE", ad + + if not big_keys: + return parent, sb + #assert(len(big_keys) == 1) + s_out = [] + for k, v in s: + if k not in big_keys : + s_out.append((k, v)) + sb = symbexec(mn, dict(s_out)) + return parent, ad, sb + #diff_states(*self.stateby_ad[ad]) + """ + return parent, ad, sb + return None diff --git a/miasm2/analysis/debugging.py b/miasm2/analysis/debugging.py new file mode 100644 index 00000000..473f20f1 --- /dev/null +++ b/miasm2/analysis/debugging.py @@ -0,0 +1,479 @@ +import cmd +from miasm2.core.utils import hexdump +import miasm2.jitter.csts as csts +from miasm2.jitter.jitload import ExceptionHandle + + +class DebugBreakpoint: + + "Debug Breakpoint parent class" + pass + + +class DebugBreakpointSoft(DebugBreakpoint): + + "Stand for software breakpoint" + + def __init__(self, addr): + self.addr = addr + + def __str__(self): + return "Soft BP @0x%08x" % self.addr + + +class DebugBreakpointMemory(DebugBreakpoint): + + "Stand for memory breakpoint" + + type2str = {csts.BREAKPOINT_READ: "R", + csts.BREAKPOINT_WRITE: "W"} + + def __init__(self, addr, size, access_type): + self.addr = addr + self.access_type = access_type + self.size = size + + def __str__(self): + bp_type = "" + for k, v in self.type2str.items(): + if k & self.access_type != 0: + bp_type += v + return "Memory BP @0x%08x, Size 0x%08x, Type %s" % (self.addr, + self.size, + bp_type) + + @classmethod + def get_access_type(cls, read=False, write=False): + value = 0 + for k, v in cls.type2str.items(): + if v == "R" and read is True: + value += k + if v == "W" and write is True: + value += k + return value + + +class Debugguer(object): + + "Debugguer linked with a Jitter instance" + + def __init__(self, myjit): + "myjit : jitter instance" + self.myjit = myjit + self.bp_list = [] # DebugBreakpointSoft list + self.hw_bp_list = [] # DebugBreakpointHard list + self.mem_watched = [] # Memory areas watched + + def init_run(self, addr): + self.myjit.init_run(addr) + + def add_breakpoint(self, addr): + "Add bp @addr" + bp = DebugBreakpointSoft(addr) + func = lambda x: bp + bp.func = func + self.bp_list.append(bp) + self.myjit.add_breakpoint(addr, func) + + def init_memory_breakpoint(self): + "Set exception handler on EXCEPT_BREAKPOINT_INTERN" + self.myjit.exception_handler + + def add_memory_breakpoint(self, addr, size, read=False, write=False): + "add mem bp @[addr, addr + size], on read/write/both" + access_type = DebugBreakpointMemory.get_access_type(read=read, + write=write) + dbm = DebugBreakpointMemory(addr, size, access_type) + self.hw_bp_list.append(dbm) + self.myjit.vm.vm_add_memory_breakpoint(addr, size, access_type) + + def remove_breakpoint(self, dbs): + "remove the DebugBreakpointSoft instance" + self.bp_list.remove(dbs) + self.myjit.remove_breakpoints_by_callback(dbs.func) + + def remove_breakpoint_by_addr(self, addr): + "remove breakpoints @ addr" + for bp in self.get_breakpoint_by_addr(addr): + self.remove_breakpoint(bp) + + def remove_memory_breakpoint(self, dbm): + "remove the DebugBreakpointMemory instance" + self.hw_bp_list.remove(dbm) + self.myjit.vm.vm_remove_memory_breakpoint(dbm.addr, dbm.access_type) + + def remove_memory_breakpoint_by_addr_access(self, addr, read=False, + write=False): + "remove breakpoints @ addr" + access_type = DebugBreakpointMemory.get_access_type(read=read, + write=write) + for bp in self.hw_bp_list: + if bp.addr == addr and bp.access_type == access_type: + self.remove_memory_breakpoint(bp) + + def get_breakpoint_by_addr(self, addr): + ret = [] + for dbgsoft in self.bp_list: + if dbgsoft.addr == addr: + ret.append(dbgsoft) + return ret + + def get_breakpoints(self): + return self.bp_list + + def active_trace(self, mn=None, regs=None, newbloc=None): + if mn is not None: + self.myjit.jit.log_mn = mn + if regs is not None: + self.myjit.jit.log_regs = regs + if newbloc is not None: + self.myjit.jit.log_newbloc = newbloc + + def handle_exception(self, res): + if res is None: + return + + if isinstance(res, DebugBreakpointSoft): + print "Breakpoint reached @0x%08x" % res.addr + elif isinstance(res, ExceptionHandle): + if res == ExceptionHandle.memoryBreakpoint(): + print "Memory breakpoint reached!" + + # Remove flag + except_flag = self.myjit.vm.vm_get_exception() + self.myjit.vm.vm_set_exception(except_flag ^ res.except_flag) + + else: + raise NotImplementedError("Unknown Except") + else: + raise NotImplementedError("type res") + + def step(self): + "Step in jit" + + self.myjit.jit.set_options(jit_maxline=1) + self.myjit.jit.updt_automod_code(self.myjit.vm, self.myjit.pc, 8) + + res = self.myjit.continue_run(step=True) + self.handle_exception(res) + + self.myjit.jit.set_options(jit_maxline=50) + self.on_step() + + return res + + def run(self): + res = self.myjit.continue_run() + self.handle_exception(res) + return res + + def get_mem(self, addr, size=0xF): + "hexdump @addr, size" + + hexdump(self.myjit.vm.vm_get_mem(addr, size)) + + def get_mem_raw(self, addr, size=0xF): + "hexdump @addr, size" + return self.myjit.vm.vm_get_mem(addr, size) + + def watch_mem(self, addr, size=0xF): + self.mem_watched.append((addr, size)) + + def on_step(self): + for addr, size in self.mem_watched: + print "@0x%08x:" % addr + self.get_mem(addr, size) + + def get_reg_value(self, reg_name): + return getattr(self.myjit.cpu, reg_name) + + def set_reg_value(self, reg_name, value): + + # Handle PC case + if reg_name == self.myjit.my_ir.pc.name: + self.init_run(value) + + setattr(self.myjit.cpu, reg_name, value) + + def get_gpreg_all(self): + "Return general purposes registers" + return self.myjit.cpu.vm_get_gpreg() + + +class DebugCmd(cmd.Cmd, object): + + "CommandLineInterpreter for Debugguer instance" + + color_g = '\033[92m' + color_e = '\033[0m' + color_b = '\033[94m' + color_r = '\033[91m' + + intro = color_g + "=== Miasm2 Debugging shell ===\nIf you need help, " + intro += "type 'help' or '?'" + color_e + prompt = color_b + "$> " + color_e + + def __init__(self, dbg): + "dbg : Debugguer" + self.dbg = dbg + super(DebugCmd, self).__init__() + + # Debug methods + + def print_breakpoints(self): + bp_list = self.dbg.bp_list + if len(bp_list) == 0: + print "No breakpoints." + else: + for i, b in enumerate(bp_list): + print "%d\t0x%08x" % (i, b.addr) + + def print_watchmems(self): + watch_list = self.dbg.mem_watched + if len(watch_list) == 0: + print "No memory watchpoints." + else: + print "Num\tAddress \tSize" + for i, w in enumerate(watch_list): + addr, size = w + print "%d\t0x%08x\t0x%08x" % (i, addr, size) + + def print_registers(self): + regs = self.dbg.get_gpreg_all() + + # Display settings + title1 = "Registers" + title2 = "Values" + max_name_len = max(map(len, regs.keys() + [title1])) + + # Print value table + s = "%s%s | %s" % ( + title1, " " * (max_name_len - len(title1)), title2) + print s + print "-" * len(s) + for name, value in sorted(regs.items(), key=lambda x: x[0]): + print "%s%s | %s" % (name, + " " * (max_name_len - len(name)), + hex(value).replace("L", "")) + + def add_breakpoints(self, bp_addr): + for addr in bp_addr: + if "0x" in addr: + addr = int(addr, 16) + else: + addr = int(addr) + + good = True + for i, dbg_obj in enumerate(self.dbg.bp_list): + if dbg_obj.addr == addr: + good = False + break + if good is False: + print "Breakpoint 0x%08x already set (%d)" % (addr, i) + else: + l = len(self.dbg.bp_list) + self.dbg.add_breakpoint(addr) + print "Breakpoint 0x%08x successfully added ! (%d)" % (addr, l) + + display_mode = {"mn": None, + "regs": None, + "newbloc": None} + + def update_display_mode(self): + self.display_mode = {"mn": self.dbg.myjit.jit.log_mn, + "regs": self.dbg.myjit.jit.log_regs, + "newbloc": self.dbg.myjit.jit.log_newbloc} + + # Command line methods + def print_warning(self, s): + print self.color_r + s + self.color_e + + def onecmd(self, line): + cmd_translate = {"h": "help", + "q": "exit", + "e": "exit", + "!": "exec", + "r": "run", + "i": "info", + "b": "breakpoint", + "s": "step", + "d": "dump"} + + if len(line) >= 2 and \ + line[1] == " " and \ + line[:1] in cmd_translate: + line = cmd_translate[line[:1]] + line[1:] + + if len(line) == 1 and line in cmd_translate: + line = cmd_translate[line] + + r = super(DebugCmd, self).onecmd(line) + return r + + def can_exit(self): + return True + + def do_display(self, arg): + if arg == "": + self.help_display() + return + + args = arg.split(" ") + if args[-1].lower() not in ["on", "off"]: + self.print_warning("/!\ %s not in 'on' / 'off'" % args[-1]) + return + mode = args[-1].lower() == "on" + d = {} + for a in args[:-1]: + d[a] = mode + self.dbg.active_trace(**d) + self.update_display_mode() + + def help_display(self): + print "Enable/Disable tracing." + print "Usage: display <mode1> <mode2> ... on|off" + print "Available modes are:" + for k in self.display_mode: + print "\t%s" % k + print "Use 'info display' to get current values" + + def do_watchmem(self, arg): + if arg == "": + self.help_watchmem() + return + + args = arg.split(" ") + if len(args) >= 2: + if "0x" in args[1]: + size = int(args[1], 16) + else: + size = int(args[1]) + else: + size = 0xF + if "0x" in args[0]: + addr = int(args[0], 16) + else: + addr = int(args[0]) + + self.dbg.watch_mem(addr, size) + + def help_watchmem(self): + print "Add a memory watcher." + print "Usage: watchmem <addr> [size]" + print "Use 'info watchmem' to get current memory watchers" + + def do_info(self, arg): + av_info = ["registers", + "display", + "breakpoints", + "watchmem"] + + if arg == "": + print "'info' must be followed by the name of an info command." + print "List of info subcommands:" + for k in av_info: + print "\t%s" % k + + if arg.startswith("b"): + # Breakpoint + self.print_breakpoints() + + if arg.startswith("d"): + # Display + self.update_display_mode() + for k, v in self.display_mode.items(): + print "%s\t\t%s" % (k, v) + + if arg.startswith("w"): + # Watchmem + self.print_watchmems() + + if arg.startswith("r"): + # Registers + self.print_registers() + + def help_info(self): + print "Generic command for showing things about the program being" + print "debugged. Use 'info' without arguments to get the list of" + print "available subcommands." + + def do_breakpoint(self, arg): + if arg == "": + self.help_breakpoint() + else: + addrs = arg.split(" ") + self.add_breakpoints(addrs) + + def help_breakpoint(self): + print "Add breakpoints to argument addresses." + print "Example:" + print "\tbreakpoint 0x11223344" + print "\tbreakpoint 1122 0xabcd" + + def do_step(self, arg): + if arg == "": + nb = 1 + else: + nb = int(arg) + for _ in xrange(nb): + self.dbg.step() + + def help_step(self): + print "Step program until it reaches a different source line." + print "Argument N means do this N times (or till program stops" + print "for another reason)." + + def do_dump(self, arg): + if arg == "": + self.help_dump() + else: + args = arg.split(" ") + if len(args) >= 2: + if "0x" in args[1]: + size = int(args[1], 16) + else: + size = int(args[1]) + else: + size = 0xF + if "0x" in args[0]: + addr = int(args[0], 16) + else: + addr = int(args[0]) + + self.dbg.get_mem(addr, size) + + def help_dump(self): + print "Dump <addr> [size]. Dump size bytes at addr." + + def do_run(self, arg): + self.dbg.run() + + def help_run(self): + print "Launch or continue the current program" + + def do_exit(self, s): + return True + + def do_exec(self, l): + try: + print eval(l) + except Exception, e: + print "*** Error: %s" % e + + def help_exec(self): + print "Exec a python command." + print "You can also use '!' shortcut." + + def help_exit(self): + print "Exit the interpreter." + print "You can also use the Ctrl-D shortcut." + + def help_help(self): + print "Print help" + + def postloop(self): + print '\nGoodbye !' + super(DebugCmd, self).postloop() + + do_EOF = do_exit + help_EOF = help_exit diff --git a/miasm2/analysis/disasm_cb.py b/miasm2/analysis/disasm_cb.py new file mode 100644 index 00000000..4b4832f8 --- /dev/null +++ b/miasm2/analysis/disasm_cb.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from miasm2.core.asmbloc import * +from miasm2.core.utils import * +# from miasm2.core.graph import DiGraph + + +def get_ira(mnemo, attrib): + arch = mnemo.name, attrib + if arch == ("arm", "arm"): + from miasm2.arch.arm.ira import ir_a_arm_base as ira + elif arch == ("x86", 32): + from miasm2.arch.x86.ira import ir_a_x86_32 as ira + elif arch == ("x86", 64): + from miasm2.arch.x86.ira import ir_a_x86_64 as ira + else: + raise ValueError('unknown architecture: %s' % mnemo.name) + return ira + + +def arm_guess_subcall( + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + ira = get_ira(mnemo, attrib) + + sp = asm_symbol_pool() + my_ir = ira(sp) + print '###' + print cur_bloc + my_ir.add_bloc(cur_bloc) + + ir_blocs = my_ir.blocs.values() + # flow_graph = DiGraph() + to_add = set() + for irb in ir_blocs: + # print 'X'*40 + # print irb + pc_val = None + lr_val = None + for exprs in irb.irs: + for e in exprs: + if e.dst == my_ir.pc: + pc_val = e.src + if e.dst == mnemo.regs.LR: + lr_val = e.src + if pc_val is None or lr_val is None: + continue + if not isinstance(lr_val, ExprInt): + continue + + l = cur_bloc.lines[-1] + if lr_val.arg != l.offset + l.l: + continue + # print 'IS CALL!' + l = symbol_pool.getby_offset_create(int(lr_val.arg)) + c = asm_constraint_next(l) + + to_add.add(c) + offsets_to_dis.add(int(lr_val.arg)) + + # if to_add: + # print 'R'*70 + for c in to_add: + # print c + cur_bloc.addto(c) + + +def arm_guess_jump_table( + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + ira = get_ira(mnemo, attrib) + + jra = ExprId('jra') + jrb = ExprId('jrb') + + sp = asm_symbol_pool() + my_ir = ira(sp) + my_ir.add_bloc(cur_bloc) + + ir_blocs = my_ir.blocs.values() + for irb in ir_blocs: + # print 'X'*40 + # print irb + pc_val = None + # lr_val = None + for exprs in irb.irs: + for e in exprs: + if e.dst == my_ir.pc: + pc_val = e.src + # if e.dst == mnemo.regs.LR: + # lr_val = e.src + if pc_val is None: + continue + if not isinstance(pc_val, ExprMem): + continue + assert(pc_val.size == 32) + print pc_val + ad = pc_val.arg + ad = expr_simp(ad) + print ad + res = MatchExpr(ad, jra + jrb, set([jra, jrb])) + if res is False: + raise NotImplementedError('not fully functional') + print res + if not isinstance(res[jrb], ExprInt): + raise NotImplementedError('not fully functional') + base_ad = int(res[jrb].arg) + print base_ad + addrs = set() + i = -1 + max_table_entry = 10000 + max_diff_addr = 0x100000 # heuristic + while i < max_table_entry: + i += 1 + try: + ad = upck32(pool_bin.getbytes(base_ad + 4 * i, 4)) + except: + break + if abs(ad - base_ad) > max_diff_addr: + break + addrs.add(ad) + print [hex(x) for x in addrs] + + for ad in addrs: + offsets_to_dis.add(ad) + l = symbol_pool.getby_offset_create(ad) + c = asm_constraint_to(l) + cur_bloc.addto(c) + +guess_funcs = [] + + +def guess_multi_cb( + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + for f in guess_funcs: + f(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool) diff --git a/miasm2/analysis/gdbserver.py b/miasm2/analysis/gdbserver.py new file mode 100644 index 00000000..8d0135e9 --- /dev/null +++ b/miasm2/analysis/gdbserver.py @@ -0,0 +1,425 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import socket +import struct +import time +import logging +from StringIO import StringIO +import miasm2.analysis.debugging as debugging +from miasm2.jitter.jitload import ExceptionHandle + + +class GdbServer(object): + + "Debugguer binding for GDBServer protocol" + + general_registers_order = [] + general_registers_size = {} # RegName : Size in octet + status = "S05" + + def __init__(self, dbg, port=4455): + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server.bind(('localhost', port)) + server.listen(1) + self.server = server + self.dbg = dbg + + # Communication methods + + def compute_checksum(self, data): + return chr(sum(map(ord, data)) % 256).encode("hex") + + def get_messages(self): + all_data = "" + data = self.sock.recv(4096) + all_data += data + while (len(data) == 4096 or data == ""): + if data == "": + # Avoid consuming CPU + time.sleep(0.001) + continue + data = self.sock.recv(4096) + all_data += data + + logging.debug("<- %r" % all_data) + self.recv_queue += self.parse_messages(all_data) + + def parse_messages(self, data): + buf = StringIO(data) + + msgs = [] + + while (buf.tell() < buf.len): + token = buf.read(1) + if token == "+": + continue + if token == "-": + raise NotImplementedError("Resend packet") + if token == "$": + packet_data = "" + c = buf.read(1) + while c != "#": + packet_data += c + c = buf.read(1) + checksum = buf.read(2) + if checksum != self.compute_checksum(packet_data): + raise ValueError("Incorrect checksum") + + msgs.append(packet_data) + + return msgs + + def send_string(self, s): + self.send_queue.append("O" + s.encode("hex")) + + def process_messages(self): + + while self.recv_queue: + msg = self.recv_queue.pop(0) + buf = StringIO(msg) + msg_type = buf.read(1) + + self.send_queue.append("+") + + if msg_type == "q": + if msg.startswith("qSupported"): + self.send_queue.append("PacketSize=3fff") + elif msg.startswith("qC"): + # Current thread + self.send_queue.append("") + elif msg.startswith("qAttached"): + # Not supported + self.send_queue.append("") + elif msg.startswith("qTStatus"): + # Not supported + self.send_queue.append("") + elif msg.startswith("qfThreadInfo"): + # Not supported + self.send_queue.append("") + else: + raise NotImplementedError() + + elif msg_type == "H": + # Set current thread + self.send_queue.append("OK") + + elif msg_type == "?": + # Report why the target halted + self.send_queue.append(self.status) # TRAP signal + + elif msg_type == "g": + # Report all general register values + self.send_queue.append(self.report_general_register_values()) + + elif msg_type == "p": + # Read a specific register + reg_num = int(buf.read(), 16) + self.send_queue.append(self.read_register(reg_num)) + + elif msg_type == "P": + # Set a specific register + reg_num, value = buf.read().split("=") + reg_num = int(reg_num, 16) + value = int(value.decode("hex")[::-1].encode("hex"), 16) + self.set_register(reg_num, value) + self.send_queue.append("OK") + + elif msg_type == "m": + # Read memory + addr, size = map(lambda x: int(x, 16), buf.read().split(",")) + self.send_queue.append(self.read_memory(addr, size)) + + elif msg_type == "k": + # Kill + self.sock.close() + exit(1) + + elif msg_type == "!": + # Extending debugging will be used + self.send_queue.append("OK") + + elif msg_type == "v": + if msg == "vCont?": + # Is vCont supported ? + self.send_queue.append("") + + elif msg_type == "s": + # Step + self.dbg.step() + self.send_queue.append("S05") # TRAP signal + + elif msg_type == "Z": + # Add breakpoint or watchpoint + bp_type = buf.read(1) + if bp_type == "0": + # Exec breakpoint + assert(buf.read(1) == ",") + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + + if size != 1: + raise NotImplementedError("Bigger size") + self.dbg.add_breakpoint(addr) + self.send_queue.append("OK") + + elif bp_type == "1": + # Hardware BP + assert(buf.read(1) == ",") + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + + self.dbg.add_memory_breakpoint(addr, size, + read=True, + write=True) + self.send_queue.append("OK") + + elif bp_type in ["2", "3", "4"]: + # Memory breakpoint + assert(buf.read(1) == ",") + read = bp_type in ["3", "4"] + write = bp_type in ["2", "4"] + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + + self.dbg.add_memory_breakpoint(addr, size, + read=read, + write=write) + self.send_queue.append("OK") + + else: + raise ValueError("Impossible value") + + elif msg_type == "z": + # Remove breakpoint or watchpoint + bp_type = buf.read(1) + if bp_type == "0": + # Exec breakpoint + assert(buf.read(1) == ",") + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + + if size != 1: + raise NotImplementedError("Bigger size") + dbgsoft = self.dbg.get_breakpoint_by_addr(addr) + assert(len(dbgsoft) == 1) + self.dbg.remove_breakpoint(dbgsoft[0]) + self.send_queue.append("OK") + + elif bp_type == "1": + # Hardware BP + assert(buf.read(1) == ",") + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + self.dbg.remove_memory_breakpoint_by_addr_access( + addr, read=True, write=True) + self.send_queue.append("OK") + + elif bp_type in ["2", "3", "4"]: + # Memory breakpoint + assert(buf.read(1) == ",") + read = bp_type in ["3", "4"] + write = bp_type in ["2", "4"] + addr, size = map( + lambda x: int(x, 16), buf.read().split(",")) + + self.dbg.remove_memory_breakpoint_by_addr_access( + addr, read=read, write=write) + self.send_queue.append("OK") + + else: + raise ValueError("Impossible value") + + elif msg_type == "c": + # Continue + self.status = "" + self.send_messages() + ret = self.dbg.run() + if isinstance(ret, debugging.DebugBreakpointSoft): + self.status = "S05" + self.send_queue.append("S05") # TRAP signal + elif isinstance(ret, ExceptionHandle): + if ret == ExceptionHandle.memoryBreakpoint(): + self.status = "S05" + self.send_queue.append("S05") + else: + raise NotImplementedError("Unknown Except") + else: + raise NotImplementedError() + + else: + raise NotImplementedError( + "Not implemented: message type '%s'" % msg_type) + + def send_messages(self): + for msg in self.send_queue: + if msg == "+": + data = "+" + else: + data = "$%s#%s" % (msg, self.compute_checksum(msg)) + logging.debug("-> %r" % data) + self.sock.send(data) + self.send_queue = [] + + def main_loop(self): + self.recv_queue = [] + self.send_queue = [] + + self.send_string("Test\n") + + while (self.sock): + self.get_messages() + self.process_messages() + self.send_messages() + + def run(self): + self.sock, self.address = self.server.accept() + self.main_loop() + + # Debugguer processing methods + def report_general_register_values(self): + s = "" + for i in xrange(len(self.general_registers_order)): + s += self.read_register(i) + return s + + def read_register(self, reg_num): + reg_name = self.general_registers_order[reg_num] + reg_value = self.read_register_by_name(reg_name) + size = self.general_registers_size[reg_name] + + pack_token = "" + if size == 1: + pack_token = "<B" + elif size == 2: + pack_token = "<H" + elif size == 4: + pack_token = "<I" + elif size == 8: + pack_token = "<Q" + else: + raise NotImplementedError("Unknown size") + + return struct.pack(pack_token, reg_value).encode("hex") + + def set_register(self, reg_num, value): + reg_name = self.general_registers_order[reg_num] + self.dbg.set_reg_value(reg_name, value) + + def read_register_by_name(self, reg_name): + return self.dbg.get_reg_value(reg_name) + + def read_memory(self, addr, size): + except_flag_vm = self.dbg.myjit.vm.vm_get_exception() + try: + return self.dbg.get_mem_raw(addr, size).encode("hex") + except RuntimeError: + self.dbg.myjit.vm.vm_set_exception(except_flag_vm) + return "00" * size + + +class GdbServer_x86_32(GdbServer): + + "Extend GdbServer for x86 32bits purposes" + + general_registers_order = ["EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", + "EDI", "EIP", "EFLAGS", "CS", "SS", "DS", "ES", + "FS", "GS"] + + general_registers_size = {"EAX": 4, + "ECX": 4, + "EDX": 4, + "EBX": 4, + "ESP": 4, + "EBP": 4, + "ESI": 4, + "EDI": 4, + "EIP": 4, + "EFLAGS": 2, + "CS": 2, + "SS": 2, + "DS": 2, + "ES": 2, + "FS": 2, + "GS": 2} + + register_ignore = [ + "tf", "i_f", "nt", "rf", "vm", "ac", "vif", "vip", "i_d"] + + def read_register_by_name(self, reg_name): + sup_func = super(GdbServer_x86_32, self).read_register_by_name + if reg_name == "EFLAGS": + val = 0 + eflags_args = [ + "cf", 1, "pf", 0, "af", 0, "zf", "nf", "tf", "i_f", "df", "of"] + eflags_args += ["nt", 0, "rf", "vm", "ac", "vif", "vip", "i_d"] + eflags_args += [0] * 10 + + for i, arg in enumerate(eflags_args): + if isinstance(arg, str): + if arg not in self.register_ignore: + to_add = sup_func(arg) + else: + to_add = 0 + else: + to_add = arg + + val |= (to_add << i) + return val + else: + return sup_func(reg_name) + + +class GdbServer_msp430(GdbServer): + + "Extend GdbServer for msp430 purposes" + + general_registers_order = ["PC", "SP", "SR", "R3", "R4", "R5", "R6", "R7", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", + "R15"] + + general_registers_size = {"PC": 2, + "SP": 2, + "SR": 2, + "R3": 2, + "R2": 2, + "R5": 2, + "R6": 2, + "R7": 2, + "R8": 2, + "R9": 2, + "R10": 2, + "R11": 2, + "R12": 2, + "R13": 2, + "R12": 2, + "R15": 2} + + def read_register_by_name(self, reg_name): + sup_func = super(GdbServer_msp430, self).read_register_by_name + if reg_name == "SR": + o = sup_func('res') + o <<= 1 + o |= sup_func('of') + o <<= 1 + o |= sup_func('scg1') + o <<= 1 + o |= sup_func('scg0') + o <<= 1 + o |= sup_func('osc') + o <<= 1 + o |= sup_func('cpuoff') + o <<= 1 + o |= sup_func('gie') + o <<= 1 + o |= sup_func('nf') + o <<= 1 + o |= sup_func('zf') + o <<= 1 + o |= sup_func('cf') + + return o + else: + return sup_func(reg_name) + diff --git a/miasm2/arch/__init__.py b/miasm2/arch/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/arch/__init__.py diff --git a/miasm2/arch/arm/__init__.py b/miasm2/arch/arm/__init__.py new file mode 100644 index 00000000..bbad893b --- /dev/null +++ b/miasm2/arch/arm/__init__.py @@ -0,0 +1 @@ +__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py new file mode 100644 index 00000000..3c219710 --- /dev/null +++ b/miasm2/arch/arm/arch.py @@ -0,0 +1,2168 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import logging +from pyparsing import * +from miasm2.expression.expression import * +from miasm2.core.cpu import * +from collections import defaultdict +from miasm2.core.bin_stream import bin_stream +import regs as regs_module +from regs import * + +# A1 encoding + +log = logging.getLogger("armdis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.DEBUG) + +# arm regs ############## +reg_dum = ExprId('DumReg') + +gen_reg('PC', globals()) + +# GP +regs_str = ['R%d' % r for r in xrange(0x10)] +regs_str[13] = 'SP' +regs_str[14] = 'LR' +regs_str[15] = 'PC' +regs_expr = [ExprId(x, 32) for x in regs_str] + +gpregs = reg_info(regs_str, regs_expr) + +gpregs_pc = reg_info(regs_str[-1:], regs_expr[-1:]) +gpregs_sp = reg_info(regs_str[13:14], regs_expr[13:14]) + +gpregs_nosppc = reg_info(regs_str[:13] + [str(reg_dum), regs_str[14]], + regs_expr[:13] + [reg_dum, regs_expr[14]]) + + +# psr +sr_flags = "cxsf" +cpsr_regs_str = [] +spsr_regs_str = [] +for i in xrange(0x10): + o = "" + for j in xrange(4): + if i & (1 << j): + o += sr_flags[j] + cpsr_regs_str.append("CPSR_%s" % o) + spsr_regs_str.append("SPSR_%s" % o) + +# psr_regs_str = ['CPSR', 'SPSR'] +# psr_regs_expr = [ExprId(x, 32) for x in psr_regs_str] + +# psr_regs = reg_info(psr_regs_str, psr_regs_expr) + +cpsr_regs_expr = [ExprId(x, 32) for x in cpsr_regs_str] +spsr_regs_expr = [ExprId(x, 32) for x in spsr_regs_str] + +cpsr_regs = reg_info(cpsr_regs_str, cpsr_regs_expr) +spsr_regs = reg_info(spsr_regs_str, spsr_regs_expr) + +# CP +cpregs_str = ['c%d' % r for r in xrange(0x10)] +cpregs_expr = [ExprId(x) for x in cpregs_str] + +cp_regs = reg_info(cpregs_str, cpregs_expr) + +# P +pregs_str = ['p%d' % r for r in xrange(0x10)] +pregs_expr = [ExprId(x) for x in pregs_str] + +p_regs = reg_info(pregs_str, pregs_expr) + + +# parser helper ########### + +def tok_reg_duo(s, l, t): + t = t[0] + i1 = gpregs.expr.index(t[0]) + i2 = gpregs.expr.index(t[1]) + o = [] + for i in xrange(i1, i2 + 1): + o.append(gpregs.expr[i]) + return o + +LPARENTHESIS = Literal("(") +RPARENTHESIS = Literal(")") + +LACC = Suppress(Literal("{")) +RACC = Suppress(Literal("}")) +MINUS = Suppress(Literal("-")) +CIRCUNFLEX = Literal("^") + + +def check_bounds(left_bound, right_bound, value): + if left_bound <= value and value <= right_bound: + return ExprInt32(value) + else: + raise ValueError('shift operator immediate value out of bound') + +int_1_31 = str_int.copy().setParseAction(lambda v: check_bounds(1, 31, v[0])) +int_1_32 = str_int.copy().setParseAction(lambda v: check_bounds(1, 32, v[0])) + + +def reglistparse(s, l, t): + t = t[0] + if t[-1] == "^": + return ExprOp('sbit', ExprOp('reglist', *t[:-1])) + return ExprOp('reglist', *t) + + +allshifts = ['<<', '>>', 'a>>', '>>>', 'rrx'] +allshifts_armt = ['<<', '>>', 'a>>', '>>>', 'rrx'] + +shift2expr_dct = {'LSL': '<<', 'LSR': '>>', 'ASR': 'a>>', + 'ROR': ">>>", 'RRX': "rrx"} + +expr2shift_dct = dict([(x[1], x[0]) for x in shift2expr_dct.items()]) + + +def op_shift2expr(s, l, t): + return shift2expr_dct[t[0]] + +reg_duo = Group(gpregs.parser + MINUS + + gpregs.parser).setParseAction(tok_reg_duo) +reg_or_duo = reg_duo | gpregs.parser +gpreg_list = Group(LACC + delimitedList( + reg_or_duo, delim=',') + RACC + Optional(CIRCUNFLEX)) +gpreg_list.setParseAction(reglistparse) + +LBRACK = Suppress("[") +RBRACK = Suppress("]") +COMMA = Suppress(",") +all_binaryop_1_31_shifts_t = literal_list( + ['LSL', 'ROR']).setParseAction(op_shift2expr) +all_binaryop_1_32_shifts_t = literal_list( + ['LSR', 'ASR']).setParseAction(op_shift2expr) +all_unaryop_shifts_t = literal_list(['RRX']).setParseAction(op_shift2expr) + +allshifts_t_armt = literal_list( + ['LSL', 'LSR', 'ASR', 'ROR', 'RRX']).setParseAction(op_shift2expr) + +gpreg_p = gpregs.parser + +psr_p = cpsr_regs.parser | spsr_regs.parser + + +def shift2expr(t): + if len(t) == 1: + return t[0] + elif len(t) == 2: + return ExprOp(t[1], t[0]) + elif len(t) == 3: + return ExprOp(t[1], t[0], t[2]) + +variable, operand, base_expr = gen_base_expr() + +int_or_expr = base_expr + + +def ast_id2expr(t): + if not t in mn_arm.regs.all_regs_ids_byname: + r = ExprId(t) + else: + r = mn_arm.regs.all_regs_ids_byname[t] + return r + + +def ast_int2expr(a): + return ExprInt32(a) + + +my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + + +shift_off = (gpregs.parser + Optional( + (all_unaryop_shifts_t) | + (all_binaryop_1_31_shifts_t + (gpregs.parser | int_1_31)) | + (all_binaryop_1_32_shifts_t + (gpregs.parser | int_1_32)) +)).setParseAction(shift2expr) +shift_off |= base_expr + + +def deref2expr_nooff(s, l, t): + t = t[0] + # XXX default + return ExprOp("preinc", t[0], ExprInt32(0)) + + +def deref2expr_pre(s, l, t): + t = t[0] + if len(t) == 1: + return ExprOp("preinc", t[0], ExprInt32(0)) + elif len(t) == 2: + return ExprOp("preinc", t[0], t[1]) + else: + raise NotImplementedError('len(t) > 2') + + +def deref2expr_post(s, l, t): + t = t[0] + return ExprOp("postinc", t[0], t[1]) + + +def deref_wb(s, l, t): + t = t[0] + if t[-1] == '!': + return ExprOp('wback', *t[:-1]) + return t[0] + +# shift_off.setParseAction(deref_off) +deref_nooff = Group( + LBRACK + gpregs.parser + RBRACK).setParseAction(deref2expr_nooff) +deref_pre = Group(LBRACK + gpregs.parser + Optional( + COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre) +deref_post = Group(LBRACK + gpregs.parser + RBRACK + + COMMA + shift_off).setParseAction(deref2expr_post) +deref = Group((deref_post | deref_pre | deref_nooff) + + Optional('!')).setParseAction(deref_wb) + + +def parsegpreg_wb(s, l, t): + t = t[0] + if t[-1] == '!': + return ExprOp('wback', *t[:-1]) + return t[0] + +gpregs_wb = Group(gpregs.parser + Optional('!')).setParseAction(parsegpreg_wb) + + +# + + +cond_list = ['EQ', 'NE', 'CS', 'CC', 'MI', 'PL', 'VS', 'VC', + 'HI', 'LS', 'GE', 'LT', 'GT', 'LE', ''] # , 'NV'] +cond_dct = dict([(x[1], x[0]) for x in enumerate(cond_list)]) +# default_prio = 0x1337 + +bm_cond = bs_mod_name(l=4, fname='cond', mn_mod=cond_list) # cond_dct) + + +def permut_args(order, args): + l = [] + for i, x in enumerate(order): + l.append((x.__class__, i)) + l = dict(l) + out = [None for x in xrange(len(args))] + for a in args: + out[l[a.__class__]] = a + return out + + +class additional_info: + + def __init__(self): + self.except_on_instr = False + self.lnk = None + self.cond = None + + +class instruction_arm(instruction): + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_arm, self).__init__(*args, **kargs) + + def dstflow(self): + if self.name.startswith('BIC'): + return False + return self.name.startswith('B') + + def dstflow2label(self, symbol_pool): + e = self.args[0] + if not isinstance(e, ExprInt): + return + if self.name == 'BLX': + ad = e.arg + 8 + self.offset + else: + ad = e.arg + 8 + self.offset + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0] = s + + def breakflow(self): + if self.name.startswith('B') and not self.name.startswith('BIC'): + return True + if self.name.startswith("LDM") and PC in self.args[1].args: + return True + + if self.args and PC in self.args[0].get_r(): + return True + return False + + def is_subcall(self): + if self.name == 'BLX': + return True + return self.additional_info.lnk + + def getdstflow(self, symbol_pool): + if self.name in ['CBZ', 'CBNZ']: + return [self.args[1]] + return [self.args[0]] + + def splitflow(self): + if self.additional_info.lnk: + return True + if self.name == 'BX': + return False + return self.breakflow() and self.additional_info.cond != 14 + + def get_symbol_size(self, symbol, symbol_pool): + return 32 + + def fixDstOffset(self): + e = self.args[0] + print 'FIX', e, self.offset, self.l + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + log.warning('zarb dst %r' % e) + return + off = e.arg - (self.offset + 4 + self.l) + print hex(off) + if int(off % 4): + raise ValueError('strange offset! %r' % off) + self.args[0] = ExprInt32(off) + print 'final', self.args[0] + + def get_args_expr(self): + args = [a for a in self.args] + return args + + +class instruction_armt(instruction_arm): + + def __init__(self, *args, **kargs): + super(instruction_armt, self).__init__(*args, **kargs) + + def dstflow(self): + if self.name.startswith('BIC'): + return False + if self.name in ["CBZ", "CBNZ"]: + return True + return self.name.startswith('B') + + def dstflow2label(self, symbol_pool): + if self.name in ["CBZ", "CBNZ"]: + e = self.args[1] + else: + e = self.args[0] + if not isinstance(e, ExprInt): + return + if self.name == 'BLX': + ad = e.arg + 4 + (self.offset & 0xfffffffc) + else: + ad = e.arg + 4 + self.offset + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + if self.name in ["CBZ", "CBNZ"]: + self.args[1] = s + else: + self.args[0] = s + + def breakflow(self): + if self.name in ['B', 'BL', 'BLX', + 'BEQ', 'BNE', 'BCS', 'BCC', 'BMI', 'BPL', 'BVS', + 'BVC', 'BHI', 'BLS', 'BGE', 'BLT', 'BGT', 'BLE', + 'CBZ', 'CBNZ']: + return True + if self.name.startswith("LDM") and PC in self.args[1].args: + return True + if self.args and PC in self.args[0].get_r(): + return True + return False + + def splitflow(self): + if self.name in ['BL', 'BLX', + 'BEQ', 'BNE', 'BCS', 'BCC', 'BMI', 'BPL', 'BVS', + 'BVC', 'BHI', 'BLS', 'BGE', 'BLT', 'BGT', 'BLE', + 'CBZ', 'CBNZ']: + return True + return False + + def is_subcall(self): + if self.name in ['BL', 'BLX']: + return True + return False + +mode_arm = 'arm' +mode_armthumb = 'armt' + + +class mn_arm(cls_mn): + delayslot = 0 + name = "arm" + regs = regs_module + bintree = {} + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + pc = {mode_arm: PC, mode_armthumb: PC} + sp = {mode_arm: SP, mode_armthumb: SP} + instruction = instruction_arm + max_instruction_len = 4 + + @classmethod + def fromstring(cls, s, mode='arm'): + return super(mn_arm, cls).fromstring(s, mode) + + @classmethod + def dis(cls, bs_o, mode_o='arm', offset=0): + return super(mn_arm, cls).dis(bs_o, mode_o, offset) + + @classmethod + def getpc(cls, attrib): + return PC + + @classmethod + def getsp(cls, attrib): + return SP + + def additional_info(self): + info = additional_info() + info.lnk = False + if hasattr(self, "lnk"): + info.lnk = self.lnk.value != 0 + info.cond = self.cond.value + return info + + @classmethod + def getbits(cls, bs, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enought bits %r %r' % (n, len(bs.bin) * 8)) + while n: + i = start / 8 + c = cls.getbytes(bs, i) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def getbytes(cls, bs, offset, l=1): + out = "" + for _ in xrange(l): + n_offset = (offset & ~3) + 3 - offset % 4 + out += bs.getbytes(n_offset, 1) + offset += 1 + return out + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l == 32, "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def mod_fields(cls, fields): + l = sum([x.l for x in fields]) + if l == 32: + return fields + return [bm_cond] + fields + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = mode_arm + return [(subcls, name, bases, dct, fields)] + + def value(self, mode): + v = super(mn_arm, self).value(mode) + return [x[::-1] for x in v] + + def fixDstOffset(self): + e = self.args[0].expr + + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + # raise ValueError('dst must be int or label') + log.warning('zarb dst %r' % e) + return + # return ExprInt32(e.arg - (self.offset + self.l)) + off = e.arg - (self.offset + 4 + self.l) + if int(off % 4): + raise ValueError('strange offset! %r' % off) + self.args[0].expr = ExprInt32(off / 4) + + def get_symbol_size(self, symbol, symbol_pool, mode): + return 32 + + +class mn_armt(cls_mn): + name = "armt" + regs = regs_module + delayslot = 0 + bintree = {} + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + pc = PC + sp = SP + instruction = instruction_armt + max_instruction_len = 8 + + @classmethod + def fromstring(cls, s, mode='armt'): + return super(mn_armt, cls).fromstring(s, mode) + + @classmethod + def dis(cls, bs_o, mode_o='armt', offset=0): + return super(mn_armt, cls).dis(bs_o, mode_o, offset) + + @classmethod + def getpc(cls, attrib): + return PC + + @classmethod + def getsp(cls, attrib): + return SP + + def additional_info(self): + info = additional_info() + info.lnk = False + if hasattr(self, "lnk"): + info.lnk = self.lnk.value != 0 + info.cond = 14 # COND_ALWAYS + return info + + @classmethod + def getbits(cls, bs, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enought bits %r %r' % (n, len(bs.bin) * 8)) + while n: + i = start / 8 + c = cls.getbytes(bs, i) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def getbytes(cls, bs, offset, l=1): + out = "" + for _ in xrange(l): + n_offset = (offset & ~1) + 1 - offset % 2 + out += bs.getbytes(n_offset, 1) + offset += 1 + return out + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l in [16, 32], "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def mod_fields(cls, fields): + return list(fields) + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = mode_armthumb + return [(subcls, name, bases, dct, fields)] + + def value(self, mode): + v = super(mn_armt, self).value(mode) + out = [] + for x in v: + if len(x) == 2: + out.append(x[::-1]) + elif len(x) == 4: + out.append(x[:2][::-1] + x[2:4][::-1]) + return out + + def get_args_expr(self): + args = [a.expr for a in self.args] + return args + + def get_symbol_size(self, symbol, symbol_pool, mode): + return 32 + + +class arm_reg(reg_noarg, m_arg): + pass + + +class arm_gpreg_noarg(reg_noarg): + reg_info = gpregs + parser = reg_info.parser + + +class arm_gpreg(arm_reg): + reg_info = gpregs + parser = reg_info.parser + + +class arm_reg_wb(arm_reg): + reg_info = gpregs + parser = gpregs_wb + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + return '%s' % e + return "%s!" % e.args[0] + + def decode(self, v): + v = v & self.lmask + e = self.reg_info.expr[v] + if self.parent.wback.value: + e = ExprOp('wback', e) + self.expr = e + return True + + def encode(self): + e = self.expr + self.parent.wback.value = 0 + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + if isinstance(e, ExprId): + self.value = self.reg_info.expr.index(e) + else: + self.parent.wback.value = 1 + self.value = self.reg_info.expr.index(e.args[0]) + return True + + +class arm_psr(m_arg): + parser = psr_p + + def decode(self, v): + v = v & self.lmask + if self.parent.psr.value == 0: + e = cpsr_regs.expr[v] + else: + e = spsr_regs.expr[v] + self.expr = e + return True + + def encode(self): + e = self.expr + if e in spsr_regs.expr: + self.parent.psr.value = 1 + v = spsr_regs.expr.index(e) + elif e in cpsr_regs.expr: + self.parent.psr.value = 0 + v = cpsr_regs.expr.index(e) + else: + return False + self.value = v + return True + + +class arm_cpreg(arm_reg): + reg_info = cp_regs + parser = reg_info.parser + + +class arm_preg(arm_reg): + reg_info = p_regs + parser = reg_info.parser + + +class arm_imm(imm_noarg, m_arg): + parser = base_expr + + +class arm_offs(arm_imm): + parser = base_expr + + def int2expr(self, v): + if v & ~self.intmask != 0: + return None + return ExprInt_fromsize(self.intsize, v) + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + return v >> 2 + + def decode(self, v): + v = v & self.lmask + if (1 << (self.l - 1)) & v: + v |= ~0 ^ self.lmask + v = self.decodeval(v) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if (1 << (self.l - 1)) & v: + v = -((0xffffffff ^ v) + 1) + v = self.encodeval(v) + self.value = (v & 0xffffffff) & self.lmask + return True + + +class arm_imm8_12(m_arg): + parser = deref + + def decode(self, v): + v = v & self.lmask + if self.parent.updown.value: + e = ExprInt32(v << 2) + else: + e = ExprInt32(-v << 2) + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, e) + else: + e = ExprOp('postinc', self.parent.rn.expr, e) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = e + return True + + def encode(self): + self.parent.updown.value = 1 + e = self.expr + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + else: + self.parent.wback.value = 0 + if e.op == "postinc": + self.parent.ppi.value = 0 + elif e.op == "preinc": + self.parent.ppi.value = 1 + else: + # XXX default + self.parent.ppi.value = 1 + self.parent.rn.expr = e.args[0] + if len(e.args) == 1: + self.value = 0 + return True + e = e.args[1] + if not isinstance(e, ExprInt): + log.debug('should be int %r' % e) + return False + v = int(e.arg) + if v < 0 or v & (1 << 31): + self.parent.updown.value = 0 + v = -v & 0xFFFFFFFF + if v & 0x3: + log.debug('arg shoulb be 4 aligned') + return False + v >>= 2 + self.value = v + return True + + @staticmethod + def arg2str(e): + wb = False + if isinstance(e, ExprOp) and e.op == 'wback': + wb = True + e = e.args[0] + if isinstance(e, ExprId): + r = e + s = None + else: + if len(e.args) == 1 and isinstance(e.args[0], ExprId): + r, s = e.args[0], None + elif isinstance(e.args[0], ExprId): + r, s = e.args[0], e.args[1] + else: + r, s = e.args[0].args + if isinstance(s, ExprOp) and s.op in expr2shift_dct: + s = ' '.join([str(x) + for x in s.args[0], expr2shift_dct[s.op], s.args[1]]) + if isinstance(e, ExprOp) and e.op == 'preinc': + if s and not (isinstance(s, ExprInt) and s.arg == 0): + o = '[%s, %s]' % (r, s) + else: + o = '[%s]' % (r) + else: + o = '[%s]' % r + if s and not (isinstance(s, ExprInt) and s.arg == 0): + o += ', %s' % s + if wb: + o += "!" + return o + + +class arm_imm_4_12(m_arg): + parser = base_expr + + def decode(self, v): + v = v & self.lmask + imm = (self.parent.imm4.value << 12) | v + self.expr = ExprInt32(imm) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if v > 0xffff: + return False + self.parent.imm4.value = v >> 12 + self.value = v & 0xfff + return True + + +class arm_op2(m_arg): + parser = shift_off + + def str_to_imm_rot_form(self, s, neg=False): + if neg: + s = -s & 0xffffffff + for i in xrange(0, 32, 2): + v = myrol32(s, i) + if 0 <= v < 0x100: + return ((i / 2) << 8) | v + return None + + def decode(self, v): + val = v & self.lmask + if self.parent.immop.value: + rot = val >> 8 + imm = val & 0xff + imm = myror32(imm, rot * 2) + self.expr = ExprInt32(imm) + return True + rm = val & 0xf + shift = val >> 4 + shift_kind = shift & 1 + shift_type = (shift >> 1) & 3 + shift >>= 3 + # print self.parent.immop.value, hex(shift), hex(shift_kind), + # hex(shift_type) + if shift_kind: + # shift kind is reg + if shift & 1: + # log.debug('error in shift1') + return False + rs = shift >> 1 + if rs == 0xf: + # log.debug('error in shift2') + return False + shift_op = regs_expr[rs] + else: + # shift kind is imm + amount = shift + shift_op = ExprInt32(amount) + a = regs_expr[rm] + if shift_op == ExprInt32(0): + if shift_type == 3: + self.expr = ExprOp(allshifts[4], a) + else: + self.expr = a + else: + self.expr = ExprOp(allshifts[shift_type], a, shift_op) + return True + + def encode(self): + e = self.expr + # pure imm + if isinstance(e, ExprInt): + val = self.str_to_imm_rot_form(int(e.arg)) + if val is None: + return False + self.parent.immop.value = 1 + self.value = val + return True + + self.parent.immop.value = 0 + # pure reg + if isinstance(e, ExprId): + rm = gpregs.expr.index(e) + shift_kind = 0 + shift_type = 0 + amount = 0 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + # rot reg + if not isinstance(e, ExprOp): + log.debug('bad reg rot1 %r', e) + return False + rm = gpregs.expr.index(e.args[0]) + shift_type = allshifts.index(e.op) + if e.op == 'rrx': + shift_kind = 0 + amount = 0 + shift_type = 3 + elif isinstance(e.args[1], ExprInt): + shift_kind = 0 + amount = int(e.args[1].arg) + # LSR/ASR of 32 => 0 + if amount == 32 and e.op in ['>>', 'a>>']: + amount = 0 + else: + shift_kind = 1 + amount = gpregs.expr.index(e.args[1]) << 1 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + + @staticmethod + def arg2str(e): + if isinstance(e, ExprInt) or isinstance(e, ExprId): + return str(e) + if isinstance(e, ExprOp) and e.op in expr2shift_dct: + if len(e.args) == 1: + return '%s %s' % (e.args[0], expr2shift_dct[e.op]) + elif len(e.args) == 2: + return '%s %s %s' % (e.args[0], expr2shift_dct[e.op], e.args[1]) + return str(e) + +# op2imm + rn + + +class arm_op2imm(arm_imm8_12): + parser = deref + + def str_to_imm_rot_form(self, s, neg=False): + if neg: + s = -s & 0xffffffff + if 0 <= s < (1 << 12): + return s + return None + + def decode(self, v): + val = v & self.lmask + if self.parent.immop.value == 0: + imm = val + if self.parent.updown.value == 0: + imm = -imm + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, ExprInt32(imm)) + else: + e = ExprOp('postinc', self.parent.rn.expr, ExprInt32(imm)) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = e + return True + rm = val & 0xf + shift = val >> 4 + shift_kind = shift & 1 + shift_type = (shift >> 1) & 3 + shift >>= 3 + # print self.parent.immop.value, hex(shift), hex(shift_kind), + # hex(shift_type) + if shift_kind: + # log.debug('error in disasm xx') + return False + else: + # shift kind is imm + amount = shift + shift_op = ExprInt32(amount) + a = regs_expr[rm] + if shift_op == ExprInt32(0): + pass + else: + a = ExprOp(allshifts[shift_type], a, shift_op) + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, a) + else: + e = ExprOp('postinc', self.parent.rn.expr, a) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = e + return True + + def encode(self): + self.parent.immop.value = 1 + self.parent.updown.value = 1 + + e = self.expr + if e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + else: + self.parent.wback.value = 0 + if e.op == "postinc": + self.parent.ppi.value = 0 + elif e.op == "preinc": + self.parent.ppi.value = 1 + else: + # XXX default + self.parent.ppi.value = 1 + + # if len(v) <1: + # raise ValueError('cannot parse', s) + self.parent.rn.fromstring(e.args[0]) + if len(e.args) == 1: + self.parent.immop.value = 0 + self.value = 0 + return True + # pure imm + if isinstance(e.args[1], ExprInt): + self.parent.immop.value = 0 + val = self.str_to_imm_rot_form(int(e.args[1].arg)) + if val is None: + val = self.str_to_imm_rot_form(int(e.args[1].arg), True) + if val is None: + log.debug('cannot encode inm') + return False + self.parent.updown.value = 0 + self.value = val + return True + # pure reg + if isinstance(e.args[1], ExprId): + rm = gpregs.expr.index(e.args[1]) + shift_kind = 0 + shift_type = 0 + amount = 0 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + # rot reg + if not isinstance(e.args[1], ExprOp): + log.debug('bad reg rot2 %r' % e) + return False + e = e.args[1] + rm = gpregs.expr.index(e.args[0]) + shift_type = allshifts.index(e.op) + if isinstance(e.args[1], ExprInt): + shift_kind = 0 + amount = int(e.args[1].arg) + else: + shift_kind = 1 + amount = gpregs.expr.index(e.args[1]) << 1 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + + +def reglist2str(rlist): + out = [] + i = 0 + while i < len(rlist): + j = i + 1 + while j < len(rlist) and rlist[j] < 13 and rlist[j] == rlist[j - 1] + 1: + j += 1 + j -= 1 + if j < i + 2: + out.append(regs_str[rlist[i]]) + i += 1 + else: + out.append(regs_str[rlist[i]] + '-' + regs_str[rlist[j]]) + i = j + 1 + return "{" + ", ".join(out) + '}' + + +class arm_rlist(m_arg): + parser = gpreg_list + + def encode(self): + self.parent.sbit.value = 0 + e = self.expr + if isinstance(e, ExprOp) and e.op == "sbit": + e = e.args[0] + self.parent.sbit.value = 1 + rlist = [gpregs.expr.index(x) for x in e.args] + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in xrange(0x10): + if 1 << i & v: + out.append(gpregs.expr[i]) + e = ExprOp('reglist', *out) + if self.parent.sbit.value == 1: + e = ExprOp('sbit', e) + self.expr = e + return True + + @staticmethod + def arg2str(e): + o = [] + sb = False + if isinstance(e, ExprOp) and e.op == "sbit": + sb = True + e = e.args[0] + o = [gpregs.expr.index(x) for x in e.args] + out = reglist2str(o) + if sb: + out += "^" + return out + + +class updown_b_nosp_mn(bs_mod_name): + mn_mod = ['D', 'I'] + + def modname(self, name, f_i): + return name + self.args['mn_mod'][f_i] + + +class ppi_b_nosp_mn(bs_mod_name): + prio = 5 + mn_mod = ['A', 'B'] + + +class updown_b_sp_mn(bs_mod_name): + mn_mod = ['A', 'D'] + + def modname(self, name, f_i): + if name.startswith("STM"): + f_i = [1, 0][f_i] + return name + self.args['mn_mod'][f_i] + + +class ppi_b_sp_mn(bs_mod_name): + mn_mod = ['F', 'E'] + + def modname(self, name, f_i): + if name.startswith("STM"): + f_i = [1, 0][f_i] + return name + self.args['mn_mod'][f_i] + + +class arm_reg_wb_nosp(arm_reg_wb): + + def decode(self, v): + v = v & self.lmask + if v == 13: + return False + e = self.reg_info.expr[v] + if self.parent.wback.value: + e = ExprOp('wback', e) + self.expr = e + return True + + +class arm_offs_blx(arm_imm): + + def decode(self, v): + v = v & self.lmask + v = (v << 2) + (self.parent.lowb.value << 1) + v = sign_ext(v, 26, 32) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + if v & 0x80000000: + v &= (1 << 26) - 1 + self.parent.lowb.value = (v >> 1) & 1 + self.value = v >> 2 + return True + + +class bs_lnk(bs_mod_name): + + def modname(self, name, i): + return name[:1] + self.args['mn_mod'][i] + name[1:] + + +accum = bs(l=1) +scc = bs_mod_name(l=1, fname='scc', mn_mod=['', 'S']) +dumscc = bs("1") +rd = bs(l=4, cls=(arm_gpreg,)) +rdl = bs(l=4, cls=(arm_gpreg,)) + +rn = bs(l=4, cls=(arm_gpreg,), fname="rn") +rs = bs(l=4, cls=(arm_gpreg,)) +rm = bs(l=4, cls=(arm_gpreg,)) +op2 = bs(l=12, cls=(arm_op2,)) +lnk = bs_lnk(l=1, fname='lnk', mn_mod=['', 'L']) +offs = bs(l=24, cls=(arm_offs,), fname="offs") + +rn_noarg = bs(l=4, cls=(arm_gpreg_noarg,), fname="rn") + +immop = bs(l=1, fname='immop') +dumr = bs(l=4, default_val="0000", fname="dumr") +# psr = bs(l=1, cls=(arm_psr,), fname="psr") + +psr = bs(l=1, fname="psr") +psr_field = bs(l=4, cls=(arm_psr,)) + +ppi = bs(l=1, fname='ppi') +updown = bs(l=1, fname='updown') +trb = bs_mod_name(l=1, fname='trb', mn_mod=['', 'B']) +wback = bs_mod_name(l=1, fname="wback", mn_mod=['', 'T']) +wback_no_t = bs(l=1, fname="wback") + +op2imm = bs(l=12, cls=(arm_op2imm,)) + +updown_b_nosp = updown_b_nosp_mn(l=1, mn_mod=['D', 'I'], fname='updown') +ppi_b_nosp = ppi_b_nosp_mn(l=1, mn_mod=['A', 'B'], fname='ppi') +updown_b_sp = updown_b_sp_mn(l=1, mn_mod=['A', 'D'], fname='updown') +ppi_b_sp = ppi_b_sp_mn(l=1, mn_mod=['F', 'E'], fname='ppi') + +sbit = bs(l=1, fname="sbit") +rn_sp = bs("1101", cls=(arm_reg_wb,), fname='rnsp') +rn_wb = bs(l=4, cls=(arm_reg_wb_nosp,), fname='rn') +rlist = bs(l=16, cls=(arm_rlist,), fname='rlist') + +swi_i = bs(l=24, cls=(arm_imm,), fname="swi_i") + +opc = bs(l=4, cls=(arm_imm, m_arg), fname='opc') +crn = bs(l=4, cls=(arm_cpreg,), fname='crn') +crd = bs(l=4, cls=(arm_cpreg,), fname='crd') +crm = bs(l=4, cls=(arm_cpreg,), fname='crm') +cpnum = bs(l=4, cls=(arm_preg,), fname='cpnum') +cp = bs(l=3, cls=(arm_imm, m_arg), fname='cp') + +imm8_12 = bs(l=8, cls=(arm_imm8_12, m_arg), fname='imm') +tl = bs_mod_name(l=1, fname="tl", mn_mod=['', 'L']) + +cpopc = bs(l=3, cls=(arm_imm, m_arg), fname='cpopc') +imm20 = bs(l=20, cls=(arm_imm, m_arg)) +imm4 = bs(l=4, cls=(arm_imm, m_arg)) +imm12 = bs(l=12, cls=(arm_imm, m_arg)) +imm16 = bs(l=16, cls=(arm_imm, m_arg)) + +imm4_noarg = bs(l=4, fname="imm4") + +imm_4_12 = bs(l=12, cls=(arm_imm_4_12,)) + +lowb = bs(l=1, fname='lowb') +offs_blx = bs(l=24, cls=(arm_offs_blx,), fname="offs") + +fix_cond = bs("1111", fname="cond") + + +class arm_immed(m_arg): + parser = deref + + def decode(self, v): + if self.parent.immop.value == 1: + imm = ExprInt32((self.parent.immedH.value << 4) | v) + else: + imm = gpregs.expr[v] + if self.parent.updown.value == 0: + imm = -imm + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, imm) + else: + e = ExprOp('postinc', self.parent.rn.expr, imm) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = e + + return True + + def encode(self): + self.parent.immop.value = 1 + self.parent.updown.value = 1 + e = self.expr + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + else: + self.parent.wback.value = 0 + if e.op == "postinc": + self.parent.ppi.value = 0 + elif e.op == "preinc": + self.parent.ppi.value = 1 + else: + # XXX default + self.parent.ppi.value = 1 + self.parent.rn.expr = e.args[0] + if len(e.args) == 1: + self.value = 0 + self.parent.immedH.value = 0 + return True + e = e.args[1] + if isinstance(e, ExprInt): + v = int(e.arg) + if v < 0 or v & (1 << 31): + self.parent.updown.value = 0 + v = (-v) & 0xFFFFFFFF + if v > 0xff: + log.debug('cannot encode imm XXX') + return False + self.value = v & 0xF + self.parent.immedH.value = v >> 4 + return True + + self.parent.immop.value = 0 + if isinstance(e, ExprOp) and len(e.args) == 1 and e.op == "-": + self.parent.updown.value = 0 + e = e.args[0] + if e in gpregs.expr: + self.value = gpregs.expr.index(e) + self.parent.immedH.value = 0x0 + return True + else: + raise ValueError('e should be int: %r' % e) + + @staticmethod + def arg2str(e): + return arm_imm8_12.arg2str(e) + +immedH = bs(l=4, fname='immedH') +immedL = bs(l=4, cls=(arm_immed, m_arg), fname='immedL') +hb = bs(l=1) + + +def armop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_arm,), dct) + + +def armtop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_armt,), dct) + + +op_list = ['AND', 'EOR', 'SUB', 'RSB', 'ADD', 'ADC', 'SBC', 'RSC', + 'TST', 'TEQ', 'CMP', 'CMN', 'ORR', 'MOV', 'BIC', 'MVN'] +data_mov_name = {'MOV': 13, 'MVN': 15} +data_test_name = {'TST': 8, 'TEQ': 9, 'CMP': 10, 'CMN': 11} + +data_name = {} +for i, n in enumerate(op_list): + if n in data_mov_name.keys() + data_test_name.keys(): + continue + data_name[n] = i +bs_data_name = bs_name(l=4, name=data_name) + +bs_data_mov_name = bs_name(l=4, name=data_mov_name) + +bs_data_test_name = bs_name(l=4, name=data_test_name) + + +transfer_name = {'STR': 0, 'LDR': 1} +bs_transfer_name = bs_name(l=1, name=transfer_name) + +transferh_name = {'STRH': 0, 'LDRH': 1} +bs_transferh_name = bs_name(l=1, name=transferh_name) + + +transfer_ldr_name = {'LDRD': 0, 'LDRSB': 1} +bs_transfer_ldr_name = bs_name(l=1, name=transfer_ldr_name) + +btransfer_name = {'STM': 0, 'LDM': 1} +bs_btransfer_name = bs_name(l=1, name=btransfer_name) + +ctransfer_name = {'STC': 0, 'LDC': 1} +bs_ctransfer_name = bs_name(l=1, name=ctransfer_name) + +mr_name = {'MCR': 0, 'MRC': 1} +bs_mr_name = bs_name(l=1, name=mr_name) + +armop("mul", [bs('000000'), bs('0'), scc, rd, + bs('0000'), rs, bs('1001'), rm], [rd, rm, rs]) +armop("umull", [bs('000010'), + bs('0'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("smull", [bs('000011'), bs('0'), scc, rd, + rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("smlal", [bs('000011'), bs('1'), scc, rd, + rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("mla", [bs('000000'), bs('1'), scc, rd, + rn, rs, bs('1001'), rm], [rd, rm, rs, rn]) +armop("mrs", [bs('00010'), psr, bs('00'), + psr_field, rd, bs('000000000000')], [rd, psr]) +armop("msr", [bs('00010'), psr, bs('10'), psr_field, + bs('1111'), bs('0000'), bs('0000'), rm], [psr_field, rm]) +armop("data", [bs('00'), immop, bs_data_name, scc, rn, rd, op2], [rd, rn, op2]) +armop("data_mov", + [bs('00'), immop, bs_data_mov_name, scc, bs('0000'), rd, op2], [rd, op2]) +armop("data_test", [bs('00'), immop, bs_data_test_name, dumscc, rn, dumr, op2]) +armop("b", [bs('101'), lnk, offs]) + +# TODO TEST +armop("und", [bs('011'), imm20, bs('1'), imm4]) +armop("transfer", [bs('01'), immop, ppi, updown, trb, wback_no_t, + bs_transfer_name, rn_noarg, rd, op2imm], [rd, op2imm]) +armop("transferh", [bs('000'), ppi, updown, immop, wback_no_t, + bs_transferh_name, rn_noarg, rd, immedH, bs('1011'), immedL], [rd, immedL]) +armop("ldrd", [bs('000'), ppi, updown, immop, wback_no_t, bs_transfer_ldr_name, + rn_noarg, rd, immedH, bs('1101'), immedL], [rd, immedL]) +armop("ldrsh", [bs('000'), ppi, updown, immop, wback_no_t, bs('1'), rn_noarg, + rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) +armop("strd", [bs('000'), ppi, updown, immop, wback_no_t, bs('0'), rn_noarg, + rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) +armop("btransfersp", [bs('100'), ppi_b_sp, updown_b_sp, sbit, wback_no_t, + bs_btransfer_name, rn_sp, rlist]) +armop("btransfer", [bs('100'), ppi_b_nosp, updown_b_nosp, sbit, wback_no_t, + bs_btransfer_name, rn_wb, rlist]) +# TODO: TEST +armop("swp", [bs('00010'), trb, bs('00'), rn, rd, bs('0000'), bs('1001'), rm]) +armop("svc", [bs('1111'), swi_i]) +armop("cdp", [bs('1110'), opc, crn, crd, cpnum, cp, bs('0'), crm], + [cpnum, opc, crd, crn, crm, cp]) +armop("cdata", [bs('110'), ppi, updown, tl, wback_no_t, bs_ctransfer_name, + rn_noarg, crd, cpnum, imm8_12], [cpnum, crd, imm8_12]) +armop("mr", [bs('1110'), cpopc, bs_mr_name, crn, rd, cpnum, cp, bs('1'), crm], + [cpnum, cpopc, rd, crn, crm, cp]) +armop("bkpt", [bs('00010010'), imm12, bs('0111'), imm4]) +armop("bx", [bs('000100101111111111110001'), rn]) +armop("mov", [bs('00110000'), imm4_noarg, rd, imm_4_12], [rd, imm_4_12]) +armop("movt", [bs('00110100'), imm4_noarg, rd, imm_4_12], [rd, imm_4_12]) +armop("blx", [bs('00010010'), bs('1111'), + bs('1111'), bs('1111'), bs('0011'), rm], [rm]) +armop("blx", [fix_cond, bs('101'), lowb, offs_blx], [offs_blx]) +armop("clz", [bs('00010110'), bs('1111'), + rd, bs('1111'), bs('0001'), rm], [rd, rm]) +armop("qadd", + [bs('00010000'), rn, rd, bs('0000'), bs('0101'), rm], [rd, rm, rn]) + + +# +# thumnb ####################### +# +# ARM7-TDMI-manual-pt3 +gpregs_l = reg_info(regs_str[:8], regs_expr[:8]) +gpregs_h = reg_info(regs_str[8:], regs_expr[8:]) + +gpregs_sppc = reg_info(regs_str[-1:] + regs_str[13:14], + regs_expr[-1:] + regs_expr[13:14]) + +deref_low = Group(LBRACK + gpregs_l.parser + Optional( + COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre) +deref_pc = Group(LBRACK + gpregs_pc.parser + Optional( + COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre) +deref_sp = Group(LBRACK + gpregs_sp.parser + COMMA + + shift_off + RBRACK).setParseAction(deref2expr_pre) + +gpregs_l_wb = Group( + gpregs_l.parser + Optional('!')).setParseAction(parsegpreg_wb) + + +class arm_offreg(m_arg): + parser = deref_pc + + def decodeval(self, v): + return v + + def encodeval(self, v): + return v + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + if v: + self.expr = self.off_reg + ExprInt32(v) + else: + self.expr = self.off_reg + + e = self.expr + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + return True + + def encode(self): + e = self.expr + if not (isinstance(e, ExprOp) and e.op == "preinc"): + log.debug('cannot encode %r' % e) + return False + if e.args[0] != self.off_reg: + log.debug('cannot encode reg %r' % e.args[0]) + return False + v = int(e.args[1].arg) + v = self.encodeval(v) + self.value = v + return True + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + o = str(e) + elif (len(e.args) == 2 and + isinstance(e.args[1], ExprInt) and e.args[1].arg == 0): + o = "%s" % e.args[0] + else: + o = '%s, %s' % (e.args[0], e.args[1]) + return '[%s]' % o + + +class arm_offpc(arm_offreg): + off_reg = regs_expr[15] + + +class arm_offsp(arm_offreg): + parser = deref_sp + off_reg = regs_expr[13] + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + return v >> 2 + + +class arm_offspc(arm_offs): + + def decodeval(self, v): + return v << 1 + + def encodeval(self, v): + return v >> 1 + + +class arm_offspchl(arm_offs): + + def decodeval(self, v): + if self.parent.hl.value == 0: + return v << 12 + else: + return v << 1 + + def encodeval(self, v): + if v > (1 << 12): + self.parent.hl.value = 0 + v >>= 12 + else: + self.parent.hl.value = 1 + v >>= 1 + return v + + +class arm_off8sppc(arm_imm): + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + return v >> 2 + + +class arm_off7(arm_imm): + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + return v >> 2 + + +class arm_deref(m_arg): + parser = deref_low + + def decode(self, v): + v = v & self.lmask + rbase = regs_expr[v] + e = ExprOp('preinc', rbase, self.parent.off.expr) + self.expr = e + return True + + def encode(self): + e = self.expr + if not (isinstance(e, ExprOp) and e.op == 'preinc'): + log.debug('cannot encode %r' % e) + return False + off = e.args[1] + if isinstance(off, ExprId): + self.parent.off.expr = off + elif isinstance(off, ExprInt): + self.parent.off.expr = off + else: + log.debug('cannot encode off %r' % off) + return False + self.value = gpregs.expr.index(e.args[0]) + if self.value >= 1 << self.l: + log.debug('cannot encode reg %r' % off) + return False + return True + + @staticmethod + def arg2str(e): + if not (isinstance(e, ExprOp) and e.op == 'preinc'): + log.debug('cannot str %r' % e) + raise ValueError() + return '[%s, %s]' % (e.args[0], e.args[1]) + + +class arm_offbw(imm_noarg): + + def decode(self, v): + v = v & self.lmask + if self.parent.trb.value == 0: + v <<= 2 + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if self.parent.trb.value == 0: + if v & 3: + log.debug('off must be aligned %r' % v) + return False + v >>= 2 + self.value = v + return True + + +class arm_offh(imm_noarg): + + def decode(self, v): + v = v & self.lmask + v <<= 1 + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if v & 1: + log.debug('off must be aligned %r' % v) + return False + v >>= 1 + self.value = v + return True + + +class armt_rlist(m_arg): + parser = gpreg_list + + def encode(self): + e = self.expr + rlist = [gpregs_l.expr.index(x) for x in e.args] + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in xrange(0x10): + if 1 << i & v: + out.append(gpregs.expr[i]) + e = ExprOp('reglist', *out) + self.expr = e + return True + + @staticmethod + def arg2str(e): + o = [] + o = [gpregs.expr.index(x) for x in e.args] + out = reglist2str(o) + return out + + +class armt_rlist_pclr(armt_rlist): + + def encode(self): + e = self.expr + reg_l = list(e.args) + self.parent.pclr.value = 0 + if self.parent.pp.value == 0: + # print 'push' + if regs_expr[14] in reg_l: + reg_l.remove(regs_expr[14]) + self.parent.pclr.value = 1 + else: + # print 'pop', + if regs_expr[15] in reg_l: + reg_l.remove(regs_expr[15]) + self.parent.pclr.value = 1 + rlist = [gpregs.expr.index(x) for x in reg_l] + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in xrange(0x10): + if 1 << i & v: + out.append(gpregs.expr[i]) + + if self.parent.pclr.value == 1: + if self.parent.pp.value == 0: + out += [regs_expr[14]] + else: + out += [regs_expr[15]] + e = ExprOp('reglist', *out) + self.expr = e + return True + + +class armt_reg_wb(arm_reg_wb): + reg_info = gpregs_l + parser = gpregs_l_wb + + def decode(self, v): + v = v & self.lmask + e = self.reg_info.expr[v] + e = ExprOp('wback', e) + self.expr = e + return True + + def encode(self): + e = self.expr + self.value = self.reg_info.expr.index(e.args[0]) + return True + + +class arm_gpreg_l(arm_reg): + reg_info = gpregs_l + parser = reg_info.parser + + +class arm_gpreg_h(arm_reg): + reg_info = gpregs_h + parser = reg_info.parser + + +class arm_gpreg_l_noarg(arm_gpreg_noarg): + reg_info = gpregs_l + parser = reg_info.parser + + +class arm_sppc(arm_reg): + reg_info = gpregs_sppc + parser = reg_info.parser + + +class arm_sp(arm_reg): + reg_info = gpregs_sp + parser = reg_info.parser + + +off5 = bs(l=5, cls=(arm_imm,), fname="off") +off3 = bs(l=3, cls=(arm_imm,), fname="off") +off8 = bs(l=8, cls=(arm_imm,), fname="off") +off7 = bs(l=7, cls=(arm_off7,), fname="off") + +rdl = bs(l=3, cls=(arm_gpreg_l,), fname="rd") +rnl = bs(l=3, cls=(arm_gpreg_l,), fname="rn") +rsl = bs(l=3, cls=(arm_gpreg_l,), fname="rs") +rml = bs(l=3, cls=(arm_gpreg_l,), fname="rm") +rol = bs(l=3, cls=(arm_gpreg_l,), fname="ro") +rbl = bs(l=3, cls=(arm_gpreg_l,), fname="rb") +rbl_deref = bs(l=3, cls=(arm_deref,), fname="rb") +dumrh = bs(l=3, default_val="000") + +rdh = bs(l=3, cls=(arm_gpreg_h,), fname="rd") +rsh = bs(l=3, cls=(arm_gpreg_h,), fname="rs") + +offpc8 = bs(l=8, cls=(arm_offpc,), fname="offs") +offsp8 = bs(l=8, cls=(arm_offsp,), fname="offs") +rol_noarg = bs(l=3, cls=(arm_gpreg_l_noarg,), fname="off") + +off5bw = bs(l=5, cls=(arm_offbw,), fname="off") +off5h = bs(l=5, cls=(arm_offh,), fname="off") +sppc = bs(l=1, cls=(arm_sppc,)) + + +pclr = bs(l=1, fname='pclr') + + +sp = bs(l=0, cls=(arm_sp,)) + +tswi_i = bs(l=8, cls=(arm_imm,), fname="swi_i") + + +off8s = bs(l=8, cls=(arm_offs,), fname="offs") +trlistpclr = bs(l=8, cls=(armt_rlist_pclr,)) +trlist = bs(l=8, cls=(armt_rlist,)) + +rbl_wb = bs(l=3, cls=(armt_reg_wb,), fname='rb') + +offs8 = bs(l=8, cls=(arm_offspc,), fname="offs") +offs11 = bs(l=11, cls=(arm_offspc,), fname="offs") + +offs11hl = bs(l=11, cls=(arm_offspchl,), fname="offs") +hl = bs(l=1, prio=default_prio + 1, fname='hl') +off8sppc = bs(l=8, cls=(arm_off8sppc,), fname="off") + +imm8_d1 = bs(l=8, default_val="00000001") +imm8 = bs(l=8, cls=(arm_imm,), default_val = "00000001") + + +mshift_name = {'LSLS': 0, 'LSRS': 1, 'ASRS': 2} +bs_mshift_name = bs_name(l=2, name=mshift_name) + + +addsub_name = {'ADDS': 0, 'SUBS': 1} +bs_addsub_name = bs_name(l=1, name=addsub_name) + +mov_cmp_add_sub_name = {'MOVS': 0, 'CMP': 1, 'ADDS': 2, 'SUBS': 3} +bs_mov_cmp_add_sub_name = bs_name(l=2, name=mov_cmp_add_sub_name) + +alu_name = {'ANDS': 0, 'EORS': 1, 'LSLS': 2, 'LSRS': 3, + 'ASRS': 4, 'ADCS': 5, 'SBCS': 6, 'RORS': 7, + 'TST': 8, 'NEGS': 9, 'CMP': 10, 'CMN': 11, + 'ORRS': 12, 'MULS': 13, 'BICS': 14, 'MVNS': 15} +bs_alu_name = bs_name(l=4, name=alu_name) + +hiregop_name = {'ADDS': 0, 'CMP': 1, 'MOV': 2} +bs_hiregop_name = bs_name(l=2, name=hiregop_name) + +ldr_str_name = {'STR': 0, 'LDR': 1} +bs_ldr_str_name = bs_name(l=1, name=ldr_str_name) + +ldrh_strh_name = {'STRH': 0, 'LDRH': 1} +bs_ldrh_strh_name = bs_name(l=1, name=ldrh_strh_name) + +ldstsp_name = {'STR': 0, 'LDR': 1} +bs_ldstsp_name = bs_name(l=1, name=ldstsp_name) + +addsubsp_name = {'ADD': 0, 'SUB': 1} +bs_addsubsp_name = bs_name(l=1, name=addsubsp_name) + +pushpop_name = {'PUSH': 0, 'POP': 1} +bs_pushpop_name = bs_name(l=1, name=pushpop_name, fname='pp') + +tbtransfer_name = {'STMIA': 0, 'LDMIA': 1} +bs_tbtransfer_name = bs_name(l=1, name=tbtransfer_name) + +br_name = {'BEQ': 0, 'BNE': 1, 'BCS': 2, 'BCC': 3, 'BMI': 4, + 'BPL': 5, 'BVS': 6, 'BVC': 7, 'BHI': 8, 'BLS': 9, + 'BGE': 10, 'BLT': 11, 'BGT': 12, 'BLE': 13} +bs_br_name = bs_name(l=4, name=br_name) + + +armtop("mshift", [bs('000'), bs_mshift_name, off5, rsl, rdl], [rdl, rsl, off5]) +armtop("addsubr", + [bs('000110'), bs_addsub_name, rnl, rsl, rdl], [rdl, rsl, rnl]) +armtop("addsubi", + [bs('000111'), bs_addsub_name, off3, rsl, rdl], [rdl, rsl, off3]) +armtop("mcas", [bs('001'), bs_mov_cmp_add_sub_name, rnl, off8]) +armtop("alu", [bs('010000'), bs_alu_name, rsl, rdl], [rdl, rsl]) + # should not be used ?? +armtop("hiregop00", + [bs('010001'), bs_hiregop_name, bs('00'), rsl, rdl], [rdl, rsl]) +armtop("hiregop01", + [bs('010001'), bs_hiregop_name, bs('01'), rsh, rdl], [rdl, rsh]) +armtop("hiregop10", + [bs('010001'), bs_hiregop_name, bs('10'), rsl, rdh], [rdh, rsl]) +armtop("hiregop11", + [bs('010001'), bs_hiregop_name, bs('11'), rsh, rdh], [rdh, rsh]) +armtop("bx", [bs('010001'), bs('11'), bs('00'), rsl, dumrh]) +armtop("bx", [bs('010001'), bs('11'), bs('01'), rsh, dumrh]) +armtop("ldr", [bs('01001'), rdl, offpc8]) +armtop("ldrstr", [bs('0101'), bs_ldr_str_name, + trb, bs('0'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("strh", [bs('0101'), bs('00'), bs('1'), + rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldrh", [bs('0101'), bs('10'), bs('1'), + rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldsb", [bs('0101'), bs('01'), bs('1'), + rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldsh", [bs('0101'), bs('11'), bs('1'), + rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldst", [bs('011'), trb, + bs_ldr_str_name, off5bw, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldhsth", + [bs('1000'), bs_ldrh_strh_name, off5h, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldstsp", [bs('1001'), bs_ldstsp_name, rdl, offsp8], [rdl, offsp8]) +armtop("add", [bs('1010'), sppc, rdl, off8sppc], [rdl, sppc, off8sppc]) +armtop("addsp", [bs('10110000'), bs_addsubsp_name, sp, off7], [sp, off7]) +armtop("pushpop", + [bs('1011'), bs_pushpop_name, bs('10'), pclr, trlistpclr], [trlistpclr]) +armtop("btransfersp", [bs('1100'), bs_tbtransfer_name, rbl_wb, trlist]) +armtop("br", [bs('1101'), bs_br_name, offs8]) +armtop("blx", [bs("01000111"), bs('10'), rnl, bs('000')]) +armtop("swi", [bs('11011111'), tswi_i]) +armtop("b", [bs('11100'), offs11]) +armtop("und", [bs('1101'), bs('1110'), imm8_d1]) + +# +# thumnb2 ###################### +# + +# ARM Architecture Reference Manual Thumb-2 Supplement + +armt_gpreg_shift_off = Group( + gpregs_nosppc.parser + allshifts_t_armt + base_expr +).setParseAction(shift2expr) +armt_gpreg_shift_off |= gpregs_nosppc.parser + + +class arm_gpreg_nosppc(arm_reg): + reg_info = gpregs_nosppc + + +class armt_gpreg_rm_shift_off(arm_reg): + parser = armt_gpreg_shift_off + + def decode(self, v): + v = v & self.lmask + r = gpregs_nosppc.expr[v] + + i = int(self.parent.imm5_3.value) << 2 + i |= int(self.parent.imm5_2.value) + + if self.parent.stype.value < 3 or i != 0: + shift = allshifts_armt[self.parent.stype.value] + else: + shift = allshifts_armt[4] + self.expr = ExprOp(shift, r, ExprInt32(i)) + return True + + def encode(self): + e = self.expr + if isinstance(e, ExprId): + self.value = gpregs_nosppc.index(e) + self.parent.stype.value = 0 + self.parent.imm5_3.value = 0 + self.parent.imm5_2.value = 0 + return True + shift = e.op + r = gpregs_nosppc.expr.index(e.args[0]) + self.value = r + i = int(e.args[1].arg) + if shift == 'rrx': + if i != 1: + log.debug('rrx shift must be 1') + return False + self.parent.imm5_3.value = 0 + self.parent.imm5_2.value = 0 + self.parent.stype.value = 3 + return True + self.parent.stype.value = allshifts_armt.index(shift) + self.parent.imm5_2.value = i & 3 + self.parent.imm5_3.value = i >> 2 + return True + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + return str(e) + return str(e)[1:-1] + +rn_nosppc = bs(l=4, cls=(arm_gpreg_nosppc,), fname="rn") +rd_nosppc = bs(l=4, cls=(arm_gpreg_nosppc,), fname="rd") +rm_sh = bs(l=4, cls=(armt_gpreg_rm_shift_off,), fname="rm") + + +class armt2_imm12(arm_imm): + + def decode(self, v): + v = v & self.lmask + v |= int(self.parent.imm12_3.value) << 8 + v |= int(self.parent.imm12_1.value) << 11 + + # simple encoding + if 0 <= v < 0x100: + self.expr = ExprInt32(v) + return True + # 00XY00XY form + if v >> 8 == 1: + v &= 0xFF + self.expr = ExprInt32((v << 16) | v) + return True + # XY00XY00 form + if v >> 8 == 2: + v &= 0xFF + self.expr = ExprInt32((v << 24) | (v << 8)) + return True + # XYXYXYXY + if v >> 8 == 3: + v &= 0xFF + self.expr = ExprInt32((v << 24) | (v << 16) | (v << 8) | v) + return True + r = v >> 7 + v = v & 0xFF + self.expr = ExprInt32(myror32(v, r)) + return True + + def encode(self): + v = int(self.expr.arg) + value = None + # simple encoding + if 0 <= v < 0x100: + value = v + elif v & 0xFF00FF00 == 0 and v & 0xFF == (v >> 16) & 0xff: + # 00XY00XY form + value = (1 << 8) | (v & 0xFF) + elif v & 0x00FF00FF == 0 and (v >> 8) & 0xff == (v >> 24) & 0xff: + # XY00XY00 form + value = (2 << 8) | ((v >> 8) & 0xff) + elif (v & 0xFF == + (v >> 8) & 0xFF == + (v >> 16) & 0xFF == + (v >> 24) & 0xFF): + # XYXYXYXY form + value = (3 << 8) | ((v >> 16) & 0xff) + else: + # rol encoding + for i in xrange(32): + o = myrol32(v, i) + if 0 <= o < 0x100 and o & 0x80: + value = (i << 7) | o + break + if value is None: + log.debug('cannot encode imm12') + return False + self.value = value & self.lmask + self.parent.imm12_3.value = (value >> 8) & self.parent.imm12_3.lmask + self.parent.imm12_1.value = (value >> 11) & self.parent.imm12_1.lmask + return True + + +class armt2_imm10l(arm_imm): + + def decode(self, v): + v = v & self.lmask + s = self.parent.sign.value + j1 = self.parent.j1.value + j2 = self.parent.j2.value + imm10h = self.parent.imm10h.value + imm10l = v + + i1, i2 = j1 ^ s ^ 1, j2 ^ s ^ 1 + + v = (s << 24) | (i1 << 23) | ( + i2 << 22) | (imm10h << 12) | (imm10l << 2) + v = sign_ext(v, 25, 32) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + s = 0 + if v & 0x80000000: + s = 1 + v = (-v) & 0xffffffff + if v > (1 << 26): + return False + i1, i2, imm10h, imm10l = (v >> 23) & 1, ( + v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 2) & 0x3ff + j1, j2 = i1 ^ s ^ 1, i2 ^ s ^ 1 + self.parent.sign.value = s + self.parent.j1.value = j1 + self.parent.j2.value = j2 + self.parent.imm10h.value = imm10h + self.value = imm10l + return True + + +class armt2_imm11l(arm_imm): + + def decode(self, v): + v = v & self.lmask + s = self.parent.sign.value + j1 = self.parent.j1.value + j2 = self.parent.j2.value + imm10h = self.parent.imm10h.value + imm11l = v + + i1, i2 = j1 ^ s ^ 1, j2 ^ s ^ 1 + + v = (s << 24) | (i1 << 23) | ( + i2 << 22) | (imm10h << 12) | (imm11l << 1) + v = sign_ext(v, 25, 32) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + s = 0 + if v & 0x80000000: + s = 1 + v = (-v) & 0xffffffff + if v > (1 << 26): + return False + i1, i2, imm10h, imm11l = (v >> 23) & 1, ( + v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 1) & 0x7ff + j1, j2 = i1 ^ s ^ 1, i2 ^ s ^ 1 + self.parent.sign.value = s + self.parent.j1.value = j1 + self.parent.j2.value = j2 + self.parent.imm10h.value = imm10h + self.value = imm11l + return True + + +imm12_1 = bs(l=1, fname="imm12_1", order=1) +imm12_3 = bs(l=3, fname="imm12_3", order=1) +imm12_8 = bs(l=8, cls=(armt2_imm12,), fname="imm", order=2) + + +imm5_3 = bs(l=3, fname="imm5_3") +imm5_2 = bs(l=2, fname="imm5_2") +imm_stype = bs(l=2, fname="stype") + +imm1 = bs(l=1, fname="imm1") + + +class armt_imm5_1(arm_imm): + + def decode(self, v): + v = sign_ext(((self.parent.imm1.value << 5) | v) << 1, 7, 32) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + if v & 0x80000000: + v &= (1 << 7) - 1 + self.parent.imm1.value = (v >> 6) & 1 + self.value = (v >> 1) & 0x1f + return True + +imm5_off = bs(l=5, cls=(armt_imm5_1,), fname="imm5_off") + +tsign = bs(l=1, fname="sign") +tj1 = bs(l=1, fname="j1") +tj2 = bs(l=1, fname="j2") + +timm10H = bs(l=10, fname="imm10h") +timm10L = bs(l=10, cls=(armt2_imm10l,), fname="imm10l") +timm11L = bs(l=11, cls=(armt2_imm11l,), fname="imm11l") + + +armtop("adc", [bs('11110'), imm12_1, bs('0'), bs('1010'), scc, rn_nosppc, + bs('0'), imm12_3, rd_nosppc, imm12_8]) +armtop("adc", [bs('11101'), bs('01'), bs('1010'), scc, rn_nosppc, + bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh]) +armtop("bl", [bs('11110'), tsign, timm10H, + bs('11'), tj1, bs('1'), tj2, timm11L]) +armtop("blx", [bs('11110'), tsign, timm10H, + bs('11'), tj1, bs('0'), tj2, timm10L, bs('0')]) +armtop("cbz", [bs('101100'), imm1, bs('1'), imm5_off, rnl], [rnl, imm5_off]) +armtop("cbnz", [bs('101110'), imm1, bs('1'), imm5_off, rnl], [rnl, imm5_off]) diff --git a/miasm2/arch/arm/disasm.py b/miasm2/arch/arm/disasm.py new file mode 100644 index 00000000..64e10eec --- /dev/null +++ b/miasm2/arch/arm/disasm.py @@ -0,0 +1,51 @@ +from miasm2.core.asmbloc import asm_constraint, disasmEngine +from arch import mn_arm, mn_armt + + +def cb_arm_fix_call( + mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + """ + for arm: + MOV LR, PC + LDR PC, [R5, 0x14] + * is a subcall * + + """ + if len(cur_bloc.lines) < 2: + return + l1 = cur_bloc.lines[-1] + l2 = cur_bloc.lines[-2] + if l1.name != "LDR": + return + if l2.name != "MOV": + return + # print cur_bloc + # print l1 + if not l1.args[0] in mn.pc.values(): + return + if not l2.args[1] in mn.pc.values(): + return + cur_bloc.add_cst(l1.offset + 4, asm_constraint.c_next, symbol_pool) + offsets_to_dis.add(l1.offset + 4) + +cb_arm_funcs = [cb_arm_fix_call] + + +def cb_arm_disasm(mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + for func in cb_arm_funcs: + func(mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool) + + +class dis_arm(disasmEngine): + attrib = 'arm' + + def __init__(self, bs=None, **kwargs): + super(dis_arm, self).__init__(mn_arm, self.attrib, bs, **kwargs) + self.dis_bloc_callback = cb_arm_disasm + + +class dis_armt(disasmEngine): + attrib = 'armt' + + def __init__(self, bs=None, **kwargs): + super(dis_armt, self).__init__(mn_armt, self.attrib, bs, **kwargs) diff --git a/miasm2/arch/arm/ira.py b/miasm2/arch/arm/ira.py new file mode 100644 index 00000000..7ff2c142 --- /dev/null +++ b/miasm2/arch/arm/ira.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * +from miasm2.ir.ir import ir, irbloc +from miasm2.ir.analysis import ira +from miasm2.arch.arm.sem import ir_arm, ir_armt +from miasm2.arch.arm.regs import * +# from miasm2.core.graph import DiGraph + + +class ir_a_arm_base(ir_arm, ira): + + def __init__(self, symbol_pool=None): + ir_arm.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.R0 + + +class ir_a_arm(ir_a_arm_base): + + def __init__(self, symbol_pool=None): + ir_a_arm_base.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.R0 + + # for test XXX TODO + def set_dead_regs(self, b): + b.rw[-1][1].add(self.arch.regs.zf) + b.rw[-1][1].add(self.arch.regs.nf) + b.rw[-1][1].add(self.arch.regs.of) + b.rw[-1][1].add(self.arch.regs.cf) + + def call_effects(self, ad): + irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), + ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), + ]] + return irs + + def post_add_bloc(self, bloc, ir_blocs): + ir.post_add_bloc(self, bloc, ir_blocs) + # flow_graph = DiGraph() + for irb in ir_blocs: + # print 'X'*40 + # print irb + pc_val = None + lr_val = None + for exprs in irb.irs: + for e in exprs: + if e.dst == PC: + pc_val = e.src + if e.dst == LR: + lr_val = e.src + if pc_val is None or lr_val is None: + continue + if not isinstance(lr_val, ExprInt): + continue + + l = bloc.lines[-1] + if lr_val.arg != l.offset + l.l: + continue + # print 'IS CALL!' + lbl = bloc.get_next() + new_lbl = self.gen_label() + irs = self.call_effects(pc_val) + nbloc = irbloc(new_lbl, ExprId(lbl, size=self.pc.size), irs) + nbloc.lines = [l] + self.blocs[new_lbl] = nbloc + irb.dst = ExprId(new_lbl, size=self.pc.size) + + """ + if not bloc.lines: + return + l = bloc.lines[-1] + sub_call_dst = None + if not l.is_subcall(): + return + sub_call_dst = l.args[0] + if self.ExprIsLabel(sub_call_dst): + sub_call_dst = sub_call_dst.name + for b in ir_blocs: + l = b.lines[-1] + sub_call_dst_b = None + sub_call_dst_b = l.args[0] + #if self.ExprIsLabel(sub_call_dst_b): + # sub_call_dst_b = sub_call_dst.name + #if str(b.dst) == str(sub_call_dst_b): + # pass + if not l.is_subcall(): + continue + if b.dst != sub_call_dst_b: + continue + sub_call_dst_b = l.args[0] + if self.ExprIsLabel(sub_call_dst_b): + sub_call_dst_b = sub_call_dst.name + lbl = bloc.get_next() + new_lbl = self.gen_label() + irs = self.call_effects(l.args[0]) + nbloc = irbloc(new_lbl, ExprId(lbl, size=self.pc.size), irs) + nbloc.lines = [l] + self.blocs[new_lbl] = nbloc + b.dst = ExprId(new_lbl, size=self.pc.size) + return + """ + + def get_out_regs(self, b): + return set([self.ret_reg, self.sp]) + + +class ir_a_armt(ir_armt, ir_a_arm): + + def __init__(self, symbol_pool): + ir_armt.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.R0 diff --git a/miasm2/arch/arm/regs.py b/miasm2/arch/arm/regs.py new file mode 100644 index 00000000..6ddac2ef --- /dev/null +++ b/miasm2/arch/arm/regs.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * + + +# GP + +regs32_str = ["R%d" % i for i in xrange(13)] + ["SP", "LR", "PC"] +regs32_expr = [ExprId(x, 32) for x in regs32_str] + + +R0 = regs32_expr[0] +R1 = regs32_expr[1] +R2 = regs32_expr[2] +R3 = regs32_expr[3] +R4 = regs32_expr[4] +R5 = regs32_expr[5] +R6 = regs32_expr[6] +R7 = regs32_expr[7] +R8 = regs32_expr[8] +R9 = regs32_expr[9] +R10 = regs32_expr[10] +R11 = regs32_expr[11] +R12 = regs32_expr[12] +SP = regs32_expr[13] +LR = regs32_expr[14] +PC = regs32_expr[15] + +R0_init = ExprId("R0_init") +R1_init = ExprId("R1_init") +R2_init = ExprId("R2_init") +R3_init = ExprId("R3_init") +R4_init = ExprId("R4_init") +R5_init = ExprId("R5_init") +R6_init = ExprId("R6_init") +R7_init = ExprId("R7_init") +R8_init = ExprId("R8_init") +R9_init = ExprId("R9_init") +R10_init = ExprId("R10_init") +R11_init = ExprId("R11_init") +R12_init = ExprId("R12_init") +SP_init = ExprId("SP_init") +LR_init = ExprId("LR_init") +PC_init = ExprId("PC_init") + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + +zf_init = ExprId("zf_init", size=1) +nf_init = ExprId("nf_init", size=1) +of_init = ExprId("of_init", size=1) +cf_init = ExprId("cf_init", size=1) + + +all_regs_ids = [ + R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, SP, LR, PC, + zf, nf, of, cf +] + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [R0_init, R1_init, R2_init, R3_init, + R4_init, R5_init, R6_init, R7_init, + R8_init, R9_init, R10_init, R11_init, + R12_init, SP_init, LR_init, PC_init, + zf_init, nf_init, of_init, cf_init + ] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py new file mode 100644 index 00000000..d40c86eb --- /dev/null +++ b/miasm2/arch/arm/sem.py @@ -0,0 +1,1040 @@ +from miasm2.expression.expression import * +from miasm2.ir.ir import ir, irbloc +from miasm2.arch.arm.arch import mn_arm, mn_armt + +# liris.cnrs.fr/~mmrissa/lib/exe/fetch.php?media=armv7-a-r-manual.pdf + +EXCEPT_PRIV_INSN = (1 << 17) + +# CPSR: N Z C V + +reg_r0 = 'R0' +reg_r1 = 'R1' +reg_r2 = 'R2' +reg_r3 = 'R3' +reg_r4 = 'R4' +reg_r5 = 'R5' +reg_r6 = 'R6' +reg_r7 = 'R7' +reg_r8 = 'R8' +reg_r9 = 'R9' +reg_r10 = 'R10' +reg_r11 = 'R11' +reg_r12 = 'R12' +reg_sp = 'SP' +reg_lr = 'LR' +reg_pc = 'PC' + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + +R0 = ExprId(reg_r0) +R1 = ExprId(reg_r1) +R2 = ExprId(reg_r2) +R3 = ExprId(reg_r3) +R4 = ExprId(reg_r4) +R5 = ExprId(reg_r5) +R6 = ExprId(reg_r6) +R7 = ExprId(reg_r7) +R8 = ExprId(reg_r8) +R9 = ExprId(reg_r9) +R10 = ExprId(reg_r10) +R11 = ExprId(reg_r11) +R12 = ExprId(reg_r12) +SP = ExprId(reg_sp) +LR = ExprId(reg_lr) +PC = ExprId(reg_pc) + + +all_registers = [ + R0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, + SP, + LR, + PC, +] + + +def update_flag_zf(a): + return [ExprAff(zf, ExprCond(a, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))] + + +def update_flag_nf(a): + return [ExprAff(nf, a.msb())] + + +def update_flag_pf(a): + return [ExprAff(pf, ExprOp('parity', a))] + + +def update_flag_af(a): + return [ExprAff(af, ExprCond(a & ExprInt_from(a, 0x10), + ExprInt_from(af, 1), ExprInt_from(af, 0)))] + + +def update_flag_zn(a): + e = [] + e += update_flag_zf(a) + e += update_flag_nf(a) + return e + + +def update_flag_logic(a): + e = [] + e += update_flag_zn(a) + e.append(ExprAff(cf, ExprInt1(0))) + return e + + +def update_flag_arith(a): + e = [] + e += update_flag_zn(a) + return e + + +def check_ops_msb(a, b, c): + if not a or not b or not c or a != b or a != c: + raise ValueError('bad ops size %s %s %s' % (a, b, c)) + + +def arith_flag(a, b, c): + a_s, b_s, c_s = a.size, b.size, c.size + check_ops_msb(a_s, b_s, c_s) + a_s, b_s, c_s = a.msb(), b.msb(), c.msb() + return a_s, b_s, c_s + +# checked: ok for adc add because b & c before +cf + + +def update_flag_add_cf(a, b, c): + return ExprAff(cf, + ((((a ^ b) ^ c) ^ ((a ^ c) & (~(a ^ b)))).msb()) ^ ExprInt1(1)) + + +def update_flag_add_of(a, b, c): + return ExprAff(of, (((a ^ c) & (~(a ^ b)))).msb()) + + +# checked: ok for sbb add because b & c before +cf +def update_flag_sub_cf(a, b, c): + return ExprAff(cf, + ((((a ^ b) ^ c) ^ ((a ^ c) & (a ^ b))).msb()) ^ ExprInt1(1)) + + +def update_flag_sub_of(a, b, c): + return ExprAff(of, (((a ^ c) & (a ^ b))).msb()) + +# z = x+y (+cf?) + + +def update_flag_add(x, y, z): + e = [] + e.append(update_flag_add_cf(x, y, z)) + e.append(update_flag_add_of(x, y, z)) + return e + +# z = x-y (+cf?) + + +def update_flag_sub(x, y, z): + e = [] + e.append(update_flag_sub_cf(x, y, z)) + e.append(update_flag_sub_of(x, y, z)) + return e + + +def get_dst(a): + if a == PC: + return PC + return None + +# instruction definition ############## + + +def adc(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b + x + cf.zeroExtend(32) + if instr.name == 'ADCS' and a != PC: + e += update_flag_arith(c) + e += update_flag_add(b, x, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def add(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b + x + if instr.name == 'ADDS' and a != PC: + e += update_flag_arith(c) + e += update_flag_add(b, x, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def l_and(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b & x + if instr.name == 'ANDS' and a != PC: + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def sub(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b - x + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def subs(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b - x + e += update_flag_arith(c) + e += update_flag_sub(b, x, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def eor(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b ^ x + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def eors(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b ^ x + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def rsb(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = x - b + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def rsbs(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = x - b + e += update_flag_arith(c) + e += update_flag_sub(b, x, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def sbc(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = (b + cf.zeroExtend(32)) - (x + ExprInt32(1)) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def sbcs(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = (b + cf.zeroExtend(32)) - (x + ExprInt32(1)) + e += update_flag_arith(c) + e += update_flag_sub(b, x, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def rsc(ir, instr, x, a, b): + e = [] + if x is None: + b, x = a, b + c = (x + cf.zeroExtend(32)) - (b + ExprInt32(1)) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def rscs(ir, instr, x, a, b): + e = [] + if x is None: + b, x = a, b + c = (x + cf.zeroExtend(32)) - (b + ExprInt32(1)) + e.append(ExprAff(a, c)) + e += update_flag_arith(c) + e += update_flag_sub(x, b, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def tst(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b & x + e += update_flag_logic(c) + return None, e + + +def teq(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b ^ x + e += update_flag_logic(c) + return None, e + + +def l_cmp(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b - x + e += update_flag_arith(c) + e += update_flag_sub(x, b, c) + return None, e + + +def cmn(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b + x + e += update_flag_arith(c) + e += update_flag_add(b, x, c) + return None, e + + +def orr(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b | x + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def orrs(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b | x + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def mov(ir, instr, a, b): + e = [ExprAff(a, b)] + dst = get_dst(a) + return dst, e + + +def movt(ir, instr, a, b): + e = [ExprAff(a, a | b << ExprInt32(16))] + dst = get_dst(a) + return dst, e + + +def movs(ir, instr, a, b): + e = [] + e.append(ExprAff(a, b)) + # XXX TODO check + e += update_flag_logic(b) + dst = get_dst(a) + return dst, e + + +def mvn(ir, instr, a, b): + e = [ExprAff(a, b ^ ExprInt32(-1))] + dst = get_dst(a) + return dst, e + + +def mvns(ir, instr, a, b): + e = [] + c = b ^ ExprInt32(-1) + e.append(ExprAff(a, c)) + # XXX TODO check + e += update_flag_logic(c) + dst = get_dst(a) + return dst, e + + +def bic(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b & (x ^ ExprInt(uint32(-1))) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def bics(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b & (x ^ ExprInt(uint32(-1))) + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def mla(ir, instr, x, a, b, c): + e = [] + d = (a * b) + c + e.append(ExprAff(x, d)) + dst = get_dst(x) + return dst, e + + +def mlas(ir, instr, x, a, b, c): + e = [] + d = (a * b) + c + e += update_flag_zn(d) + e.append(ExprAff(x, d)) + dst = get_dst(x) + return dst, e + + +def mul(ir, instr, x, a, b): + e = [] + c = a * b + e.append(ExprAff(x, c)) + dst = get_dst(x) + return dst, e + + +def muls(ir, instr, x, a, b): + e = [] + c = a * b + e += update_flag_zn(c) + e.append(ExprAff(x, c)) + dst = get_dst(x) + return dst, e + + +def b(ir, instr, a): + e = [] + e.append(ExprAff(PC, a)) + return a, e + + +def bl(ir, instr, a): + e = [] + l = ExprInt32(instr.offset + instr.l) + e.append(ExprAff(PC, a)) + e.append(ExprAff(LR, l)) + return a, e + + +def bx(ir, instr, a): + e = [] + e.append(ExprAff(PC, a)) + return a, e + + +def blx(ir, instr, a): + e = [] + l = ExprInt32(instr.offset + instr.l) + e.append(ExprAff(PC, a)) + e.append(ExprAff(LR, l)) + return a, e + + +def st_ld_r(ir, instr, a, b, store=False, size=32, s_ext=False, z_ext=False): + e = [] + wb = False + b = b.copy() + postinc = False + if isinstance(b, ExprOp): + if b.op == "wback": + wb = True + b = b.args[0] + if b.op == "postinc": + postinc = True + if isinstance(b, ExprOp) and b.op in ["postinc", 'preinc']: + # XXX TODO CHECK + base, off = b.args[0], b.args[1] # ExprInt32(size/8) + else: + base, off = b, ExprInt32(0) + # print a, wb, base, off, postinc + if postinc: + ad = base + else: + ad = base + off + + dmem = False + if size in [8, 16]: + if store: + a = a[:size] + m = ExprMem(ad, size=size) + elif s_ext: + m = ExprMem(ad, size=size).signExtend(a.size) + elif z_ext: + m = ExprMem(ad, size=size).zeroExtend(a.size) + else: + raise ValueError('unhandled case') + elif size == 32: + m = ExprMem(ad, size=size) + pass + elif size == 64: + m = ExprMem(ad, size=32) + dmem = True + a2 = ir.arch.regs.all_regs_ids[ir.arch.regs.all_regs_ids.index(a) + 1] + size = 32 + else: + raise ValueError('the size DOES matter') + dst = None + + if store: + e.append(ExprAff(m, a)) + if dmem: + e.append(ExprAff(ExprMem(ad + ExprInt32(4), size=size), a2)) + else: + if a == PC: + dst = PC + e.append(ExprAff(a, m)) + if dmem: + e.append(ExprAff(a2, ExprMem(ad + ExprInt32(4), size=size))) + + # XXX TODO check multiple write cause by wb + if wb or postinc: + e.append(ExprAff(base, base + off)) + return dst, e + + +def ldr(ir, instr, a, b): + return st_ld_r(ir, instr, a, b, store=False) + + +def ldrd(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=False, size=64) + return dst, e + + +def l_str(ir, instr, a, b): + return st_ld_r(ir, instr, a, b, store=True) + + +def l_strd(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=True, size=64) + return dst, e + + +def ldrb(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=False, size=8, z_ext=True) + return dst, e + + +def strb(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=True, size=8) + return dst, e + + +def ldrh(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=False, size=16, z_ext=True) + return dst, e + + +def strh(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=True, size=16, z_ext=True) + return dst, e + + +def ldrsh(ir, instr, a, b): + dst, e = st_ld_r( + ir, instr, a, b, store=False, size=16, s_ext=True, z_ext=False) + return dst, e + + +def st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False): + e = [] + wb = False + # sb = False + dst = None + if isinstance(a, ExprOp) and a.op == 'wback': + wb = True + a = a.args[0] + if isinstance(b, ExprOp) and b.op == 'sbit': + # sb = True + b = b.args[0] + regs = b.args + base = a + if updown: + step = 4 + else: + step = -4 + regs = regs[::-1] + if postinc: + pass + else: + base += ExprInt32(step) + for i, r in enumerate(regs): + ad = base + ExprInt32(i * step) + if store: + e.append(ExprAff(ExprMem(ad), r)) + else: + e.append(ExprAff(r, ExprMem(ad))) + # XXX TODO check multiple write cause by wb + if wb: + if postinc: + e.append(ExprAff(a, base + ExprInt32(len(regs) * step))) + else: + e.append(ExprAff(a, base + ExprInt32((len(regs) - 1) * step))) + if store: + pass + else: + assert(isinstance(b, ExprOp) and b.op == "reglist") + if PC in b.args: + dst = PC + + return dst, e + + +def ldmia(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=True, updown=True) + + +def ldmib(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=True) + + +def ldmda(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=True, updown=False) + + +def ldmdb(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False) + + +def stmia(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=True, updown=True) + + +def stmib(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=False, updown=True) + + +def stmda(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=True, updown=False) + + +def stmdb(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=False, updown=False) + + +def svc(ir, instr, a): + # XXX TODO implement + e = [ + ExprAff(ExprId('vmmngr.exception_flags'), ExprInt32(EXCEPT_PRIV_INSN))] + return None, e + + +def und(ir, instr, a, b): + # XXX TODO implement + e = [] + return None, e + + +def lsr(ir, instr, a, b, x): + e = [] + c = b >> x + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def lsrs(ir, instr, a, b, x): + e = [] + c = b >> x + e.append(ExprAff(a, c)) + e += update_flag_logic(c) + dst = get_dst(a) + return dst, e + + +def lsl(ir, instr, a, b, x): + e = [] + c = b << x + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def lsls(ir, instr, a, b, x): + e = [] + c = b << x + e.append(ExprAff(a, c)) + e += update_flag_logic(c) + dst = get_dst(a) + return dst, e + + +def push(ir, instr, a): + e = [] + regs = list(a.args) + for i in xrange(len(regs)): + c = SP + ExprInt32(-4 * (i + 1)) + e.append(ExprAff(regs[i], ExprMem(c))) + c = SP + ExprInt32(-4 * len(regs)) + e.append(ExprAff(SP, c)) + return None, e + + +def pop(ir, instr, a): + e = [] + regs = list(a.args) + for i in xrange(len(regs)): + c = SP + ExprInt32(4 * i) + e.append(ExprAff(regs[i], ExprMem(c))) + c = SP + ExprInt32(4 * len(regs)) + e.append(ExprAff(SP, c)) + dst = None + if PC in a.get_r(): + dst = PC + return dst, e + + +def cbz(ir, instr, a, b): + e = [] + lbl_next = ExprId(ir.get_next_label(instr), 32) + dst = ExprCond(a, lbl_next, b) + return dst, e + + +def cbnz(ir, instr, a, b): + e = [] + lbl_next = ExprId(ir.get_next_label(instr), 32) + dst = ExprCond(a, b, lbl_next) + return dst, e + + +COND_EQ = 0 +COND_NE = 1 +COND_CS = 2 +COND_CC = 3 +COND_MI = 4 +COND_PL = 5 +COND_VS = 6 +COND_VC = 7 +COND_HI = 8 +COND_LS = 9 +COND_GE = 10 +COND_LT = 11 +COND_GT = 12 +COND_LE = 13 +COND_AL = 14 +COND_NV = 15 + +cond_dct = { + COND_EQ: "EQ", + COND_NE: "NE", + COND_CS: "CS", + COND_CC: "CC", + COND_MI: "MI", + COND_PL: "PL", + COND_VS: "VS", + COND_VC: "VC", + COND_HI: "HI", + COND_LS: "LS", + COND_GE: "GE", + COND_LT: "LT", + COND_GT: "GT", + COND_LE: "LE", + COND_AL: "AL", + # COND_NV: "NV", +} + + +tab_cond = {COND_EQ: zf, + COND_NE: ExprOp('==', zf, ExprInt1(0)), + COND_CS: cf, + COND_CC: ExprOp('==', cf, ExprInt1(0)), + COND_MI: nf, + COND_PL: ExprOp('==', nf, ExprInt1(0)), + COND_VS: of, + COND_VC: ExprOp('==', of, ExprInt1(0)), + COND_HI: cf & ExprOp('==', zf, ExprInt1(0)), + # COND_HI: cf, + # COND_HI: ExprOp('==', + # ExprOp('|', cf, zf), + # ExprInt1(0)), + COND_LS: ExprOp('==', cf, ExprInt1(0)) | zf, + COND_GE: ExprOp('==', nf, of), + COND_LT: nf ^ of, + # COND_GT: ExprOp('|', + # ExprOp('==', zf, ExprInt1(0)) & (nf | of), + # ExprOp('==', nf, ExprInt1(0)) & ExprOp('==', of, ExprInt1(0))), + COND_GT: ExprOp('==', zf, ExprInt1(0)) & ExprOp('==', nf, of), + COND_LE: zf | (nf ^ of), + } + + +def is_pc_written(ir, instr_ir): + all_pc = ir.mn.pc.values() + for ir in instr_ir: + if ir.dst in all_pc: + return True, ir.dst + return False, None + + +def add_condition_expr(ir, instr, cond, instr_ir, dst): + # print "XXX", hex(instr.offset), instr + if cond == COND_AL: + return dst, instr_ir, [] + if not cond in tab_cond: + raise ValueError('unknown condition %r' % cond) + cond = tab_cond[cond] + + lbl_next = ExprId(ir.get_next_label(instr), 32) + lbl_do = ExprId(ir.gen_label(), 32) + + dst_cond = ExprCond(cond, lbl_do, lbl_next) + assert(isinstance(instr_ir, list)) + + if dst is None: + dst = lbl_next + e_do = irbloc(lbl_do.name, dst, [instr_ir]) + return dst_cond, [], [e_do] + +mnemo_func = {} +mnemo_func_cond = {} +mnemo_condm0 = {'add': add, + 'sub': sub, + 'eor': eor, + 'and': l_and, + 'rsb': rsb, + 'adc': adc, + 'sbc': sbc, + 'rsc': rsc, + + 'tst': tst, + 'teq': teq, + 'cmp': l_cmp, + 'cmn': cmn, + 'orr': orr, + 'mov': mov, + 'movt': movt, + 'bic': bic, + 'mvn': mvn, + + 'mul': mul, + 'mla': mla, + 'ldr': ldr, + 'ldrd': ldrd, + 'str': l_str, + 'strd': l_strd, + 'b': b, + 'bl': bl, + 'svc': svc, + 'und': und, + 'bx': bx, + 'ldrh': ldrh, + 'strh': strh, + 'ldrsh': ldrsh, + } + +mnemo_condm1 = {'adds': add, + 'subs': subs, + 'eors': eors, + 'ands': l_and, + 'rsbs': rsbs, + 'adcs': adc, + 'sbcs': sbcs, + 'rscs': rscs, + + 'orrs': orrs, + 'movs': movs, + 'bics': bics, + 'mvns': mvns, + + 'muls': muls, + 'mlas': mlas, + 'blx': blx, + + 'ldrb': ldrb, + 'strb': strb, + + } + +mnemo_condm2 = {'ldmia': ldmia, + 'ldmib': ldmib, + 'ldmda': ldmda, + 'ldmdb': ldmdb, + + 'ldmfa': ldmda, + 'ldmfd': ldmia, + 'ldmea': ldmdb, + 'ldmed': ldmib, # XXX + + + 'stmia': stmia, + 'stmib': stmib, + 'stmda': stmda, + 'stmdb': stmdb, + + 'stmfa': stmib, + 'stmed': stmda, + 'stmfd': stmdb, + 'stmea': stmia, + } + + +mnemo_nocond = {'lsr': lsr, + 'lsrs': lsrs, + 'lsl': lsl, + 'lsls': lsls, + 'push': push, + 'pop': pop, + 'cbz': cbz, + 'cbnz': cbnz, + } +mn_cond_x = [mnemo_condm0, + mnemo_condm1, + mnemo_condm2] + +for index, mn_base in enumerate(mn_cond_x): + for mn, mf in mn_base.items(): + for cond, cn in cond_dct.items(): + if cond == COND_AL: + cn = "" + cn = cn.lower() + if index == 0: + mn_mod = mn + cn + else: + mn_mod = mn[:-index] + cn + mn[-index:] + # print mn_mod + mnemo_func_cond[mn_mod] = cond, mf + +for name, mf in mnemo_nocond.items(): + mnemo_func_cond[name] = COND_AL, mf + + +def split_expr_dst(ir, instr_ir): + out = [] + dst = None + for i in instr_ir: + if i.dst == ir.pc: + out.append(i) + dst = ir.pc # i.src + else: + out.append(i) + return out, dst + + +def get_mnemo_expr(ir, instr, *args): + if not instr.name.lower() in mnemo_func_cond: + raise ValueError('unknown mnemo %s' % instr) + cond, mf = mnemo_func_cond[instr.name.lower()] + dst, instr_ir = mf(ir, instr, *args) + dst, instr, extra_ir = add_condition_expr(ir, instr, cond, instr_ir, dst) + return dst, instr, extra_ir + +get_arm_instr_expr = get_mnemo_expr + + +class arminfo: + mode = "arm" + # offset + + +class ir_arm(ir): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_arm, "arm", symbol_pool) + self.pc = PC + self.sp = SP + + def get_ir(self, instr): + args = instr.args + # ir = get_mnemo_expr(self, self.name.lower(), *args) + if len(args) and isinstance(args[-1], ExprOp): + if args[-1].op == 'rrx': + args[-1] = ExprCompose( + [(args[-1].args[0][1:], 0, 31), (cf, 31, 32)]) + elif (args[-1].op in ['<<', '>>', '<<a', 'a>>', '<<<', '>>>'] and + isinstance(args[-1].args[-1], ExprId)): + args[-1].args = args[-1].args[:-1] + ( + args[-1].args[-1][:8].zeroExtend(32),) + dst, instr_ir, extra_ir = get_mnemo_expr(self, instr, *args) + # if self.name.startswith('B'): + # return instr_ir, extra_ir + for i, x in enumerate(instr_ir): + x = ExprAff(x.dst, x.src.replace_expr( + {self.pc: ExprInt32(instr.offset + 8)})) + instr_ir[i] = x + for b in extra_ir: + for irs in b.irs: + for i, x in enumerate(irs): + x = ExprAff(x.dst, x.src.replace_expr( + {self.pc: ExprInt32(instr.offset + 8)})) + irs[i] = x + # return out_ir, extra_ir + return dst, instr_ir, extra_ir + + +class ir_armt(ir): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_armt, "armt", symbol_pool) + self.pc = PC + self.sp = SP + + def get_ir(self, instr): + return get_mnemo_expr(self, instr, *instr.args) + diff --git a/miasm2/arch/exe64.bin b/miasm2/arch/exe64.bin new file mode 100644 index 00000000..d7b5256b --- /dev/null +++ b/miasm2/arch/exe64.bin Binary files differdiff --git a/miasm2/arch/msp430/__init__.py b/miasm2/arch/msp430/__init__.py new file mode 100644 index 00000000..bbad893b --- /dev/null +++ b/miasm2/arch/msp430/__init__.py @@ -0,0 +1 @@ +__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py new file mode 100644 index 00000000..74cce9ea --- /dev/null +++ b/miasm2/arch/msp430/arch.py @@ -0,0 +1,601 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import logging +from pyparsing import * +from miasm2.expression.expression import * +from miasm2.core.cpu import * +from collections import defaultdict +from miasm2.core.bin_stream import bin_stream +import regs as regs_module +from regs import * + +log = logging.getLogger("armdis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.DEBUG) + + +def deref2expr_nooff(s, l, t): + t = t[0] + if len(t) == 1 and isinstance(t[0], ExprId): + return ExprMem(t[0], 16) + elif len(t) == 1 and isinstance(t[0], ExprInt): + return ExprMem(t[0], 16) + raise NotImplementedError('not fully functional') + + +def deref2expr_pinc(s, l, t): + t = t[0] + if len(t) == 1 and isinstance(t[0], ExprId): + return ExprOp('autoinc', t[0]) + raise NotImplementedError('not fully functional') + + +def deref2expr_off(s, l, t): + t = t[0] + if len(t) == 2 and isinstance(t[1], ExprId): + return ExprMem(t[1] + t[0], 16) + raise NotImplementedError('not fully functional') + + +def deref_expr(s, l, t): + t = t[0] + assert(len(t) == 1) + t = t[0] + if isinstance(t, ExprId): + return t + elif isinstance(t, ExprInt): + return t + elif isinstance(t, ExprMem): + return t + elif isinstance(t, ExprOp) and t.op == "autoinc": + return t + raise NotImplementedError('not fully functional') + if t[-1] == '!': + return ExprOp('wback', *t[:-1]) + return t[0] + + +def f_reg2expr(t): + t = t[0] + i = regs16_str.index(t) + r = regs16_expr[i] + return r + +# gpregs.parser.setParseAction(f_reg2expr) + +ARO = Suppress("@") +LPARENT = Suppress("(") +RPARENT = Suppress(")") + +PINC = Suppress("+") + + +def ast_id2expr(t): + if not t in mn_msp430.regs.all_regs_ids_byname: + r = ExprId(t, 16) + else: + r = mn_msp430.regs.all_regs_ids_byname[t] + return r + + +def ast_int2expr(a): + return ExprInt16(a) + + +variable, operand, base_expr = gen_base_expr() + +my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + + +deref_nooff = Group(ARO + base_expr).setParseAction(deref2expr_nooff) +deref_pinc = Group(ARO + base_expr + PINC).setParseAction(deref2expr_pinc) +deref_off = Group(base_expr + LPARENT + + gpregs.parser + RPARENT).setParseAction(deref2expr_off) + + +sreg_p = Group(deref_pinc | deref_nooff | + deref_off | base_expr).setParseAction(deref_expr) + + +class additional_info: + + def __init__(self): + self.except_on_instr = False + + +class instruction_msp430(instruction): + delayslot = 0 + + def dstflow(self): + if self.name.startswith('j'): + return True + return self.name in ['call'] + + def dstflow2label(self, symbol_pool): + e = self.args[0] + if not isinstance(e, ExprInt): + return + if self.name == "call": + ad = e.arg + else: + ad = e.arg + int(self.offset) + self.l + + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0] = s + + def breakflow(self): + if self.name.startswith('j'): + return True + if self.name.startswith('ret'): + return True + if self.name.startswith('int'): + return True + if self.name.startswith('mov') and self.args[1] == PC: + return True + return self.name in ['call'] + + def splitflow(self): + if self.name.startswith('jmp'): + return False + if self.name.startswith('j'): + return True + return self.name in ['call'] + + def setdstflow(self, a): + return + + def is_subcall(self): + return self.name in ['call'] + + def getdstflow(self, symbol_pool): + return [self.args[0]] + + def get_symbol_size(self, symbol, symbol_pool): + return self.mode + + def fixDstOffset(self): + e = self.args[0] + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + # raise ValueError('dst must be int or label') + log.warning('dynamic dst %r' % e) + return + # return ExprInt32(e.arg - (self.offset + self.l)) + self.args[0] = ExprInt_fromsize(self.mode, e.arg) + + def get_info(self, c): + pass + + def __str__(self): + o = super(instruction_msp430, self).__str__() + return o + + def get_args_expr(self): + args = [] + for a in self.args: + # a = a.replace_expr(replace_regs[self.mode]) + args.append(a) + return args + + +mode_msp430 = None + + +class mn_msp430(cls_mn): + name = "msp430" + regs = regs_module + all_mn = [] + bintree = {} + num = 0 + delayslot = 0 + pc = {None: PC} + sp = {None: SP} + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + instruction = instruction_msp430 + max_instruction_len = 8 + + @classmethod + def getpc(cls, attrib): + return PC + + @classmethod + def getsp(cls, attrib): + return SP + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l % 16 == 00, "len %r" % l + + @classmethod + def getbits(cls, bs, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enought bits %r %r' % (n, len(bs.bin) * 8)) + while n: + i = start / 8 + c = cls.getbytes(bs, i) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def getbytes(cls, bs, offset, l=1): + out = "" + for _ in xrange(l): + n_offset = (offset & ~1) + 1 - offset % 2 + out += bs.getbytes(n_offset, 1) + offset += 1 + return out + + def decoded2bytes(self, result): + tmp = super(mn_msp430, self).decoded2bytes(result) + out = [] + for x in tmp: + o = "" + while x: + o += x[:2][::-1] + x = x[2:] + out.append(o) + return out + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def additional_info(self): + info = additional_info() + return info + + @classmethod + def getmn(cls, name): + return name.upper() + + def reset_class(self): + super(mn_msp430, self).reset_class() + + def getnextflow(self, symbol_pool): + raise NotImplementedError('not fully functional') + return self.offset + 4 + + +def addop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_msp430,), dct) + + +class bw_mn(bs_mod_name): + prio = 5 + mn_mod = ['.w', '.b'] + + +class msp430_sreg_arg(reg_noarg, m_arg): + prio = default_prio + 1 + reg_info = gpregs + parser = sreg_p + + def decode(self, v): + size = 16 + if hasattr(self.parent, 'size'): + size = [16, 8][self.parent.size.value] + v = v & self.lmask + e = self.reg_info.expr[v] + if self.parent.a_s.value == 0b00: + if e == R3: + self.expr = ExprInt_fromsize(size, 0) + else: + self.expr = e + elif self.parent.a_s.value == 0b01: + if e == SR: + self.expr = ExprMem(ExprInt16(self.parent.off_s.value), size) + elif e == R3: + self.expr = ExprInt_fromsize(size, 1) + else: + self.expr = ExprMem( + e + ExprInt16(self.parent.off_s.value), size) + elif self.parent.a_s.value == 0b10: + if e == SR: + self.expr = ExprInt_fromsize(size, 4) + elif e == R3: + self.expr = ExprInt_fromsize(size, 2) + else: + self.expr = ExprMem(e, size) + elif self.parent.a_s.value == 0b11: + if e == SR: + self.expr = ExprInt_fromsize(size, 8) + elif e == R3: + if self.parent.size.value == 0: + self.expr = ExprInt_fromsize(size, 0xffff) + else: + self.expr = ExprInt_fromsize(size, 0xff) + elif e == PC: + self.expr = ExprInt_fromsize(size, self.parent.off_s.value) + else: + self.expr = ExprOp('autoinc', e) + else: + raise NotImplementedError( + "unknown value self.parent.a_s.value = " + + "%d" % self.parent.a_s.value) + return True + + def encode(self): + e = self.expr + if e in self.reg_info.expr: + self.parent.a_s.value = 0 + self.value = self.reg_info.expr.index(e) + elif isinstance(e, ExprInt): + v = int(e.arg) + if v == 0xffff and self.parent.size.value == 0: + self.parent.a_s.value = 0b11 + self.value = 3 + elif v == 0xff and self.parent.size.value == 1: + self.parent.a_s.value = 0b11 + self.value = 3 + elif v == 2: + self.parent.a_s.value = 0b10 + self.value = 3 + elif v == 1: + self.parent.a_s.value = 0b01 + self.value = 3 + elif v == 8: + self.parent.a_s.value = 0b11 + self.value = 2 + elif v == 4: + self.parent.a_s.value = 0b10 + self.value = 2 + elif v == 0: + self.parent.a_s.value = 0b00 + self.value = 3 + else: + self.parent.a_s.value = 0b11 + self.value = 0 + self.parent.off_s.value = v + elif isinstance(e, ExprMem): + if isinstance(e.arg, ExprId): + self.parent.a_s.value = 0b10 + self.value = self.reg_info.expr.index(e.arg) + elif isinstance(e.arg, ExprInt): + self.parent.a_s.value = 0b01 + self.value = self.reg_info.expr.index(SR) + self.parent.off_s.value = int(e.arg.arg) + elif isinstance(e.arg, ExprOp): + self.parent.a_s.value = 0b01 + self.value = self.reg_info.expr.index(e.arg.args[0]) + self.parent.off_s.value = int(e.arg.args[1].arg) + else: + raise NotImplementedError( + 'unknown instance e.arg = %s' % type(e.arg)) + elif isinstance(e, ExprOp) and e.op == "autoinc": + self.parent.a_s.value = 0b11 + self.value = self.reg_info.expr.index(e.args[0]) + else: + raise NotImplementedError('unknown instance e = %s' % type(e)) + return True + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + o = str(e) + elif isinstance(e, ExprInt): + o = str(e) + elif isinstance(e, ExprOp) and e.op == "autoinc": + o = "@%s+" % str(e.args[0]) + elif isinstance(e, ExprMem): + if isinstance(e.arg, ExprId): + o = "@%s" % e.arg + elif isinstance(e.arg, ExprInt): + o = "@%s" % e.arg + elif isinstance(e.arg, ExprOp): + o = "%s(%s)" % (e.arg.args[1], e.arg.args[0]) + else: + raise NotImplementedError('unknown instance e = %s' % type(e)) + return o + + +class msp430_dreg_arg(msp430_sreg_arg): + prio = default_prio + 1 + reg_info = gpregs + parser = sreg_p + + def decode(self, v): + if hasattr(self.parent, 'size'): + size = [16, 8][self.parent.size.value] + else: + size = 16 + + v = v & self.lmask + e = self.reg_info.expr[v] + if self.parent.a_d.value == 0: + self.expr = e + elif self.parent.a_d.value == 1: + if e == SR: + x = ExprInt16(self.parent.off_d.value) + else: + x = e + ExprInt16(self.parent.off_d.value) + self.expr = ExprMem(x, size) + else: + raise NotImplementedError( + "unknown value self.parent.a_d.value = " + + "%d" % self.parent.a_d.value) + return True + + def encode(self): + e = self.expr + if e in self.reg_info.expr: + self.parent.a_d.value = 0 + self.value = self.reg_info.expr.index(e) + elif isinstance(e, ExprMem): + if isinstance(e.arg, ExprId): + r, i = e.arg, ExprInt16(0) + elif isinstance(e.arg, ExprOp): + r, i = e.arg.args[0], e.arg.args[1] + elif isinstance(e.arg, ExprInt): + r, i = SR, e.arg + else: + raise NotImplementedError( + 'unknown instance e.arg = %s' % type(e.arg)) + self.parent.a_d.value = 1 + self.value = self.reg_info.expr.index(r) + self.parent.off_d.value = int(i.arg) + else: + raise NotImplementedError('unknown instance e = %s' % type(e)) + return True + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + o = str(e) + elif isinstance(e, ExprMem): + if isinstance(e.arg, ExprId): + o = "0x0(%s)" % e.arg + elif isinstance(e.arg, ExprInt): + o = "@%s" % e.arg + elif isinstance(e.arg, ExprOp): + o = "%s(%s)" % (e.arg.args[1], e.arg.args[0]) + else: + raise NotImplementedError( + 'unknown instance e.arg = %s' % type(e.arg)) + else: + raise NotImplementedError('unknown instance e = %s' % type(e)) + return o + + +class bs_cond_off_s(bs_cond): + + @classmethod + def flen(cls, mode, v): + if v['a_s'] == 0b00: + return None + elif v['a_s'] == 0b01: + if v['sreg'] in [3]: + return None + else: + return 16 + elif v['a_s'] == 0b10: + return None + elif v['a_s'] == 0b11: + """ + if v['sreg'] in [2, 3]: + return None + else: + return 16 + """ + if v['sreg'] in [0]: + return 16 + else: + return None + else: + raise NotImplementedError("unknown value v[a_s] = %d" % v['a_s']) + + def encode(self): + return super(bs_cond, self).encode() + + def decode(self, v): + if self.l == 0: + self.value = None + self.value = v + return True + + +class bs_cond_off_d(bs_cond_off_s): + + @classmethod + def flen(cls, mode, v): + if v['a_d'] == 0: + return None + elif v['a_d'] == 1: + return 16 + else: + raise NotImplementedError("unknown value v[a_d] = %d" % v['a_d']) + + +class msp430_offs(imm_noarg, m_arg): + parser = base_expr + + def int2expr(self, v): + if v & ~self.intmask != 0: + return None + return ExprInt_fromsize(16, v) + + def decodeval(self, v): + return v << 1 + + def encodeval(self, v): + return v >> 1 + + def decode(self, v): + v = v & self.lmask + if (1 << (self.l - 1)) & v: + v |= ~0 ^ self.lmask + v = self.decodeval(v) + self.expr = ExprInt16(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if (1 << (self.l - 1)) & v: + v = -((0xffff ^ v) + 1) + v = self.encodeval(v) + self.value = (v & 0xffff) & self.lmask + return True + + +off_s = bs(l=16, order=-10, cls=(bs_cond_off_s,), fname = "off_s") +off_d = bs(l=16, order=-10, cls=(bs_cond_off_d,), fname = "off_d") + +a_s = bs(l=2, order=-4, fname='a_s') +a_d = bs(l=1, order=-6, fname='a_d') + +a_d2 = bs(l=2, order=-2, fname='a_d') + +sreg = bs(l=4, order=-3, cls=(msp430_sreg_arg,), fname='sreg') +dreg = bs(l=4, order=-5, cls=(msp430_dreg_arg,), fname='dreg') + +bw = bw_mn(l=1, order=-10, mn_mod=['.w', '.b'], fname='size') + +bs_f1 = bs_name( + l=4, name={ + 'mov': 4, 'add': 5, 'addc': 6, 'subc': 7, 'sub': 8, 'cmp': 9, + 'dadd': 10, 'bit': 11, 'bic': 12, 'bis': 13, 'xor': 14, 'and': 15}) +addop("f1", [bs_f1, sreg, a_d, bw, a_s, dreg, off_s, off_d]) + +bs_f2 = bs_name(l=3, name={'rrc': 0, 'rra': 2, + 'push': 4}) +addop("f2_1", [bs('000100'), bs_f2, bw, a_s, sreg, off_s]) + + +bs_f2_nobw = bs_name(l=3, name={'swpb': 1, 'sxt': 3, + 'call': 5}) +addop("f2_2", [bs('000100'), bs_f2_nobw, bs('0'), a_s, sreg, off_s]) + + +offimm = bs(l=10, cls=(msp430_offs,), fname="offs") + +bs_f2_jcc = bs_name(l=3, name={'jnz': 0, 'jz': 1, 'jnc': 2, 'jc': 3, 'jn': 4, + 'jge': 5, 'jl': 6, 'jmp': 7}) +addop("f2_3", [bs('001'), bs_f2_jcc, offimm]) diff --git a/miasm2/arch/msp430/disasm.py b/miasm2/arch/msp430/disasm.py new file mode 100644 index 00000000..f0138bdf --- /dev/null +++ b/miasm2/arch/msp430/disasm.py @@ -0,0 +1,8 @@ +from miasm2.core.asmbloc import disasmEngine +from arch import mn_msp430 + + +class dis_msp430(disasmEngine): + + def __init__(self, bs=None, **kwargs): + super(dis_msp430, self).__init__(mn_msp430, None, bs, **kwargs) diff --git a/miasm2/arch/msp430/ira.py b/miasm2/arch/msp430/ira.py new file mode 100644 index 00000000..8e9a70dd --- /dev/null +++ b/miasm2/arch/msp430/ira.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * +from miasm2.ir.ir import ir, irbloc +from miasm2.ir.analysis import ira +from miasm2.arch.msp430.sem import ir_msp430 +from miasm2.arch.msp430.regs import * +# from miasm2.core.graph import DiGraph + + +class ir_a_msp430_base(ir_msp430, ira): + + def __init__(self, symbol_pool=None): + ir_msp430.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.R15 + + +class ir_a_msp430(ir_a_msp430_base): + + def __init__(self, symbol_pool=None): + ir_a_msp430_base.__init__(self, symbol_pool) + + # for test XXX TODO + def set_dead_regs(self, b): + b.rw[-1][1].add(self.arch.regs.zf) + b.rw[-1][1].add(self.arch.regs.nf) + b.rw[-1][1].add(self.arch.regs.of) + b.rw[-1][1].add(self.arch.regs.cf) + + b.rw[-1][1].add(self.arch.regs.res) + b.rw[-1][1].add(self.arch.regs.scg1) + b.rw[-1][1].add(self.arch.regs.scg0) + b.rw[-1][1].add(self.arch.regs.osc) + b.rw[-1][1].add(self.arch.regs.cpuoff) + b.rw[-1][1].add(self.arch.regs.gie) + + def call_effects(self, ad): + irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), + ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), + ]] + return irs + + def post_add_bloc(self, bloc, ir_blocs): + ir.post_add_bloc(self, bloc, ir_blocs) + # flow_graph = DiGraph() + + l = bloc.lines[-1] + if not l.is_subcall(): + return + + for irb in ir_blocs: + # print 'X'*40 + # print irb + pc_val = None + for exprs in irb.irs: + for e in exprs: + if e.dst == PC: + pc_val = e.src + if pc_val is None: + continue + + l = bloc.lines[-1] + # print str(l), 'IS CALL!' + lbl = bloc.get_next() + new_lbl = self.gen_label() + irs = self.call_effects(pc_val) + nbloc = irbloc(new_lbl, ExprId(lbl, size=self.pc.size), irs) + nbloc.lines = [l] + self.blocs[new_lbl] = nbloc + irb.dst = ExprId(new_lbl, size=self.pc.size) + + def get_out_regs(self, b): + return set([self.ret_reg, self.sp]) + diff --git a/miasm2/arch/msp430/regs.py b/miasm2/arch/msp430/regs.py new file mode 100644 index 00000000..7a389ae1 --- /dev/null +++ b/miasm2/arch/msp430/regs.py @@ -0,0 +1,105 @@ +from miasm2.expression.expression import * +from miasm2.core.cpu import reg_info + + +# GP + +regs16_str = ["PC", "SP", "SR"] + ["R%d" % i for i in xrange(3, 16)] +regs16_expr = [ExprId(x, 16) for x in regs16_str] + + +gpregs = reg_info(regs16_str, regs16_expr) + +PC = regs16_expr[0] +SP = regs16_expr[1] +SR = regs16_expr[2] +R3 = regs16_expr[3] +R4 = regs16_expr[4] +R5 = regs16_expr[5] +R6 = regs16_expr[6] +R7 = regs16_expr[7] +R8 = regs16_expr[8] +R9 = regs16_expr[9] +R10 = regs16_expr[10] +R11 = regs16_expr[11] +R12 = regs16_expr[12] +R13 = regs16_expr[13] +R14 = regs16_expr[14] +R15 = regs16_expr[15] + +PC_init = ExprId("PC_init", 16) +SP_init = ExprId("SP_init", 16) +SR_init = ExprId("SR_init", 16) +R3_init = ExprId("R3_init", 16) +R4_init = ExprId("R4_init", 16) +R5_init = ExprId("R5_init", 16) +R6_init = ExprId("R6_init", 16) +R7_init = ExprId("R7_init", 16) +R8_init = ExprId("R8_init", 16) +R9_init = ExprId("R9_init", 16) +R10_init = ExprId("R10_init", 16) +R11_init = ExprId("R11_init", 16) +R12_init = ExprId("R12_init", 16) +R13_init = ExprId("R13_init", 16) +R14_init = ExprId("R14_init", 16) +R15_init = ExprId("R15_init", 16) + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' +reg_cpuoff = 'cpuoff' +reg_gie = 'gie' +reg_osc = 'osc' +reg_scg0 = 'scg0' +reg_scg1 = 'scg1' +reg_res = 'res' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + +cpuoff = ExprId(reg_cpuoff, size=1) +gie = ExprId(reg_gie, size=1) +osc = ExprId(reg_osc, size=1) +scg0 = ExprId(reg_scg0, size=1) +scg1 = ExprId(reg_scg1, size=1) +res = ExprId(reg_res, size=7) + + +zf_init = ExprId("zf_init", size=1) +nf_init = ExprId("nf_init", size=1) +of_init = ExprId("of_init", size=1) +cf_init = ExprId("cf_init", size=1) + + +cpuoff_init = ExprId("cpuoff_init", size=1) +gie_init = ExprId("gie_init", size=1) +osc_init = ExprId("osc_init", size=1) +scg0_init = ExprId("scg0_init", size=1) +scg1_init = ExprId("scg1_init", size=1) +res_init = ExprId("res_init", size=7) + + +all_regs_ids = [ + PC, SP, SR, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, + zf, nf, of, cf, + cpuoff, gie, osc, scg0, scg1, res, +] + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [PC_init, SP_init, SR_init, R3_init, + R4_init, R5_init, R6_init, R7_init, + R8_init, R9_init, R10_init, R11_init, + R12_init, R13_init, R14_init, R15_init, + zf_init, nf_init, of_init, cf_init, + cpuoff_init, gie_init, osc_init, + scg0_init, scg1_init, res_init, + ] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py new file mode 100644 index 00000000..6fea2c21 --- /dev/null +++ b/miasm2/arch/msp430/sem.py @@ -0,0 +1,440 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * +from miasm2.arch.msp430.regs import * +from miasm2.arch.msp430.arch import mn_msp430 +from miasm2.ir.ir import ir +from regs import * + + +# Utils +def hex2bcd(val): + "Return val as BCD" + try: + return int("%x" % val, 10) + except ValueError: + raise NotImplementedError("Not defined behaviour") + + +def bcd2hex(val): + "Return the hex value of a BCD" + try: + return int("0x%d" % val, 16) + except ValueError: + raise NotImplementedError("Not defined behaviour") + + +def reset_sr_res(): + return [ExprAff(res, ExprInt_fromsize(7, 0))] + + +def update_flag_zf(a): + return [ExprAff(zf, ExprCond(a, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))] + + +def update_flag_nf(a): + return [ExprAff(nf, a.msb())] + + +def update_flag_pf(a): + return [ExprAff(pf, ExprOp('parity', a & ExprInt_from(a, 0xFF)))] + + +def update_flag_cf_inv_zf(a): + return [ExprAff(cf, ExprCond(a, ExprInt_from(cf, 1), ExprInt_from(cf, 0)))] + + +def update_flag_zn_r(a): + e = [] + e += update_flag_zf(a) + e += update_flag_nf(a) + e += reset_sr_res() + return e + + +def update_flag_sub_cf(a, b, c): + return [ExprAff(cf, + ((((a ^ b) ^ c) ^ ((a ^ c) & (a ^ b))).msb()) ^ ExprInt1(1))] + + +def update_flag_add_cf(a, b, c): + return [ExprAff(cf, (((a ^ b) ^ c) ^ ((a ^ c) & (~(a ^ b)))).msb())] + + +def update_flag_add_of(a, b, c): + return [ExprAff(of, (((a ^ c) & (~(a ^ b)))).msb())] + + +def update_flag_sub_of(a, b, c): + return [ExprAff(of, (((a ^ c) & (a ^ b))).msb())] + + +def mng_autoinc(a, b, size): + e = [] + if not (isinstance(a, ExprOp) and a.op == "autoinc"): + return e, a, b + + a_r = a.args[0] + e.append(ExprAff(a_r, a_r + ExprInt_from(a_r, size / 8))) + a = ExprMem(a_r, size) + if isinstance(b, ExprMem) and a_r in b.arg: + b = ExprMem(b.arg + ExprInt16(size / 8), b.size) + return e, a, b + +# Mnemonics + + +def mov_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + if isinstance(b, ExprMem): + b = ExprMem(b.arg, 8) + a = a[:8] + else: + a = a[:8].zeroExtend(16) + e.append(ExprAff(b, a)) + return None, e, [] + + +def mov_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + e.append(ExprAff(b, a)) + if b == ir.pc: + dst = PC + else: + dst = None + return dst, e, [] + + +def and_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + c = a[:8] & b[:8] + e.append(ExprAff(b, c.zeroExtend(16))) + e += update_flag_zn_r(c) + e += update_flag_cf_inv_zf(c) + e += [ExprAff(of, ExprInt1(0))] + return None, e, [] + + +def and_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = a & b + e.append(ExprAff(b, c)) + e += update_flag_zn_r(c) + e += update_flag_cf_inv_zf(c) + e += [ExprAff(of, ExprInt1(0))] + return None, e, [] + + +def bic_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + c = (a[:8] ^ ExprInt8(0xff)) & b[:8] + c = c.zeroExtend(b.size) + e.append(ExprAff(b, c)) + return None, e, [] + + +def bic_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = (a ^ ExprInt16(0xffff)) & b + e.append(ExprAff(b, c)) + return None, e, [] + + +def bis_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = a | b + e.append(ExprAff(b, c)) + return None, e, [] + + +def bit_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = a & b + e += update_flag_zn_r(c) + e += update_flag_cf_inv_zf(c) + e.append(ExprAff(of, ExprInt1(0))) + return None, e, [] + +""" +def sub_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + c = b - a + e.append(ExprAff(b, c)) + e += update_flag_zn_r(c) + e += update_flag_sub_cf(b, a, c) + return None, e, [] +""" + + +def sub_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = b - a + e.append(ExprAff(b, c)) + e += update_flag_zn_r(c) + e += update_flag_sub_cf(b, a, c) + # micrcorruption + # e += update_flag_sub_of(a, b, c) + # e += update_flag_sub_of(b, a, c) + return None, e, [] + + +def add_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = b + a + e.append(ExprAff(b, c)) + e += update_flag_zn_r(c) + e += update_flag_add_cf(a, b, c) + e += update_flag_add_of(a, b, c) + return None, e, [] + + +def dadd_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + # TODO: microcorruption no carryflag + c = ExprOp("bcdadd", b, a) # +zeroExtend(cf, 16)) + + e.append(ExprAff(b, c)) + # e += update_flag_zn_r(c) + + # micrcorruption + e += update_flag_zf(a) + # e += update_flag_nf(a) + e += reset_sr_res() + + e.append(ExprAff(cf, ExprOp("bcdadd_cf", b, a))) # +zeroExtend(cf, 16)))) + + # of : undefined + return None, e, [] + + +def xor_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = b ^ a + e.append(ExprAff(b, c)) + e += update_flag_zn_r(c) + e += update_flag_cf_inv_zf(c) + e.append(ExprAff(of, b.msb() & a.msb())) + return None, e, [] + + +def push_w(ir, instr, a): + e = [] + e.append(ExprAff(ExprMem(SP - ExprInt16(2), 16), a)) + e.append(ExprAff(SP, SP - ExprInt16(2))) + return None, e, [] + + +def call(ir, instr, a): + e, a, dummy = mng_autoinc(a, None, 16) + n = ExprId(ir.get_next_label(instr), 16) + e.append(ExprAff(ExprMem(SP - ExprInt16(2), 16), n)) + e.append(ExprAff(SP, SP - ExprInt16(2))) + e.append(ExprAff(PC, a)) + return PC, e, [] + + +def swpb(ir, instr, a): + e = [] + x, y = a[:8], a[8:16] + e.append(ExprAff(a, ExprCompose([(y, 0, 8), + (x, 8, 16)]))) + return None, e, [] + + +def cmp_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = b - a + e += update_flag_zn_r(c) + e += update_flag_sub_cf(a, b, c) + e += update_flag_sub_of(a, b, c) + return None, e, [] + + +def cmp_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + c = b[:8] - a[:8] + e += update_flag_zn_r(c) + e += update_flag_sub_cf(a[:8], b[:8], c) + e += update_flag_sub_of(a[:8], b[:8], c) + return None, e, [] + + +def jz(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(zf, a, n))) + return PC, e, [] + + +def jnz(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(zf, n, a))) + return PC, e, [] + + +def jl(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(nf ^ of, a, n))) + return PC, e, [] + + +def jc(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(cf, a, n))) + return PC, e, [] + + +def jnc(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(cf, n, a))) + return PC, e, [] + + +def jge(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(nf ^ of, n, a))) + return PC, e, [] + + +def jmp(ir, instr, a): + e = [] + e.append(ExprAff(PC, a)) + return PC, e, [] + + +def rrc_w(ir, instr, a): + e = [] + c = ExprCompose([(a[1:16], 0, 15), + (cf, 15, 16)]) + e.append(ExprAff(a, c)) + e.append(ExprAff(cf, a[:1])) + # e += update_flag_zn_r(c) + + # micrcorruption + e += update_flag_zf(a) + # e += update_flag_nf(a) + e += reset_sr_res() + + e.append(ExprAff(of, ExprInt1(0))) + return None, e, [] + + +def rra_w(ir, instr, a): + e = [] + c = ExprCompose([(a[1:16], 0, 15), + (a[15:16], 15, 16)]) + e.append(ExprAff(a, c)) + # TODO: error in disasm microcorruption? + # e.append(ExprAff(cf, a[:1])) + # e += update_flag_zn_r(c) + + # micrcorruption + e += update_flag_zf(a) + # e += update_flag_nf(a) + e += reset_sr_res() + + e.append(ExprAff(of, ExprInt1(0))) + return None, e, [] + + +def sxt(ir, instr, a): + e = [] + c = a[:8].signExtend(16) + e.append(ExprAff(a, c)) + + e += update_flag_zn_r(c) + e += update_flag_cf_inv_zf(c) + e.append(ExprAff(of, ExprInt1(0))) + + return None, e, [] + +mnemo_func = { + "mov.b": mov_b, + "mov.w": mov_w, + "and.b": and_b, + "and.w": and_w, + "bic.b": bic_b, + "bic.w": bic_w, + "bis.w": bis_w, + "bit.w": bit_w, + "sub.w": sub_w, + "add.w": add_w, + "push.w": push_w, + "dadd.w": dadd_w, + "xor.w": xor_w, + "call": call, + "swpb": swpb, + "cmp.w": cmp_w, + "cmp.b": cmp_b, + "jz": jz, + "jnz": jnz, + "jl": jl, + "jc": jc, + "jnc": jnc, + "jmp": jmp, + "jge": jge, + "rrc.w": rrc_w, + "rra.w": rra_w, + "sxt": sxt, +} + + +composed_sr = ExprCompose([ + (cf, 0, 1), + (zf, 1, 2), + (nf, 2, 3), + (gie, 3, 4), + (cpuoff, 4, 5), + (osc, 5, 6), + (scg0, 6, 7), + (scg1, 7, 8), + (of, 8, 9), + (res, 9, 16), +]) + + +def ComposeExprAff(dst, src): + e = [] + for x, start, stop in dst.args: + e.append(ExprAff(x, src[start:stop])) + return e + + +class ir_msp430(ir): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_msp430, None, symbol_pool) + self.pc = PC + self.sp = SP + + def mod_pc(self, instr, instr_ir, extra_ir): + pass + + def get_ir(self, instr): + # print instr#, args + args = instr.args + dst, instr_ir, extra_ir = mnemo_func[instr.name](self, instr, *args) + self.mod_sr(instr, instr_ir, extra_ir) + + return dst, instr_ir, extra_ir + + def mod_sr(self, instr, instr_ir, extra_ir): + for i, x in enumerate(instr_ir): + x.src = x.src.replace_expr({SR: composed_sr}) + if x.dst != SR: + continue + xx = ComposeExprAff(composed_sr, x.src) + instr_ir[i:i + 1] = xx + for i, x in enumerate(instr_ir): + x = ExprAff(x.dst, x.src.replace_expr( + {self.pc: ExprInt16(instr.offset + instr.l)})) + instr_ir[i] = x + + if extra_ir: + raise NotImplementedError('not fully functional') diff --git a/miasm2/arch/sh4/__init__.py b/miasm2/arch/sh4/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/arch/sh4/__init__.py diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py new file mode 100644 index 00000000..c2029000 --- /dev/null +++ b/miasm2/arch/sh4/arch.py @@ -0,0 +1,1404 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import os +from pyparsing import * +from miasm2.core.cpu import * +from miasm2.expression.expression import * +from collections import defaultdict +from regs import * + + +jra = ExprId('jra') +jrb = ExprId('jrb') +jrc = ExprId('jrc') + + +# parser helper ########### +PLUS = Suppress("+") +MULT = Suppress("*") +MINUS = Suppress("-") +AND = Suppress("&") +LBRACK = Suppress("[") +RBRACK = Suppress("]") +DEREF = Suppress("@") +COMMA = Suppress(",") +LPARENT = Suppress("(") +RPARENT = Suppress(")") + + +def parse_deref_pcimm(t): + t = t[0] + return t[0] + t[1] + + +def parse_pcandimmimm(t): + t = t[0] + return (t[0] & t[1]) + t[2] + +def ast_id2expr(a): + return ExprId(a, 32) + +def ast_int2expr(a): + return ExprInt32(a) + + +my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + +int_or_expr = base_expr + +ref_pc = Group(LPARENT + regi_pc.parser + COMMA + + int_or_expr + RPARENT).setParseAction(parse_deref_pcimm) +ref_pcandimm = Group( + LPARENT + regi_pc.parser + AND + int_or_expr + + COMMA + int_or_expr + RPARENT).setParseAction(parse_pcandimmimm) + + +pcdisp = Group(regi_pc.parser + AND + int_or_expr + + PLUS + int_or_expr).setParseAction(parse_pcandimmimm) + +PTR = Suppress('PTR') + + +def parse_deref_mem(s, l, t): + t = t[0] + e = ExprMem(t[0], 32) + return e + + +def parse_predec(s, l, t): + t = t[0] + e = ExprOp('predec', t[0]) + return e + + +def parse_postinc(s, l, t): + t = t[0] + e = ExprOp('postinc', t[0]) + return e + + +def parse_regdisp(t): + t = t[0] + e = ExprMem(t[0] + t[1]) + return e + + +def parse_regreg(t): + t = t[0] + e = ExprMem(t[0] + t[1]) + return e + + +deref_pc = Group(DEREF + ref_pc).setParseAction(parse_deref_mem) +deref_pcimm = Group(DEREF + ref_pcandimm).setParseAction(parse_deref_mem) + +dgpregs_base = Group(DEREF + gpregs.parser).setParseAction(parse_deref_mem) +dgpregs_predec = Group( + DEREF + MINUS + gpregs.parser).setParseAction(parse_predec) +dgpregs_postinc = Group( + DEREF + gpregs.parser + PLUS).setParseAction(parse_postinc) + +dgpregs = dgpregs_base | dgpregs_predec | dgpregs_postinc + +d_gpreg_gpreg = Group(DEREF + + LPARENT + gpregs.parser + COMMA + gpregs.parser + RPARENT + ).setParseAction(parse_regdisp) +dgpregs_p = dgpregs_predec | dgpregs_postinc + + +dgpregs_ir = Group(DEREF + LPARENT + gpregs.parser + + COMMA + int_or_expr + RPARENT).setParseAction(parse_regdisp) +dgpregs_ir |= d_gpreg_gpreg + +dgbr_imm = Group(DEREF + LPARENT + regi_gbr.parser + + COMMA + int_or_expr + RPARENT).setParseAction(parse_regdisp) + +dgbr_reg = Group(DEREF + LPARENT + regi_gbr.parser + + COMMA + gpregs.parser + RPARENT).setParseAction(parse_regreg) + + +class sh4_reg(reg_noarg, m_arg): + pass + + +class sh4_gpreg(sh4_reg): + reg_info = gpregs + parser = reg_info.parser + + +class sh4_dr(sh4_reg): + reg_info = dregs + parser = reg_info.parser + + +class sh4_bgpreg(sh4_reg): + reg_info = bgpregs + parser = reg_info.parser + + +class sh4_gpreg_noarg(reg_noarg, ): + reg_info = gpregs + parser = reg_info.parser + + +class sh4_freg(sh4_reg): + reg_info = fregs + parser = reg_info.parser + + +class sh4_dgpreg(m_arg): + parser = dgpregs_base + + def fromstring(self, s, parser_result=None): + start, stop = super(sh4_dgpreg, self).fromstring(s, parser_result) + if start is None: + return start, stop + self.expr = ExprMem(self.expr.arg, self.sz) + return start, stop + + def decode(self, v): + r = gpregs.expr[v] + self.expr = ExprMem(r, self.sz) + return True + + def encode(self): + e = self.expr + if not isinstance(e, ExprMem): + return False + if not isinstance(e.arg, ExprId): + return False + v = gpregs.expr.index(e.arg) + self.value = v + return True + + @staticmethod + def arg2str(e): + ad = e.arg + if isinstance(ad, ExprOp): + s = ','.join([str(x).replace('(', '').replace(')', '') + for x in ad.args]) + s = "@(%s)" % s + else: + s = "@%s" % ad + return s + + +class sh4_dgpregpinc(m_arg): + parser = dgpregs_p + + def fromstring(self, s, parser_result=None): + start, stop = super(sh4_dgpregpinc, self).fromstring(s, parser_result) + if not isinstance(self.expr, ExprOp): + return None, None + if self.expr.op != self.op: + return None, None + return start, stop + + def decode(self, v): + r = gpregs.expr[v] + e = ExprOp(self.op, r, ExprInt32(self.sz)) + self.expr = e + return True + + def encode(self): + e = self.expr + res = MatchExpr(e, ExprOp(self.op, jra), [jra]) + if not res: + return False + r = res[jra] + if not r in gpregs.expr: + return False + v = gpregs.expr.index(r) + self.value = v + return True + + @staticmethod + def arg2str(e): + if e.op == "predec": + o = '-%s' % e.args[0] + elif e.op == "postinc": + o = '%s+' % e.args[0] + else: + raise ValueError('unknown e.op: %s' % e.op) + return "@%s" % o + + +class sh4_dgpregpdec(m_arg): + parser = dgpregs_postinc + op = "preinc" + + +class sh4_dgpreg_imm(sh4_dgpreg): + parser = dgpregs_ir + + def decode(self, v): + p = self.parent + r = gpregs.expr[v] + s = self.sz + d = ExprInt32(p.disp.value * s / 8) + e = ExprMem(r + d, s) + self.expr = e + return True + + def encode(self): + e = self.expr + p = self.parent + s = self.sz + if not isinstance(e, ExprMem): + return False + if isinstance(e.arg, ExprId): + v = gpregs.expr.index(e.arg) + p.disp.value = 0 + elif isinstance(e.arg, ExprOp): + res = MatchExpr(e, ExprMem(jra + jrb, self.sz), [jra, jrb]) + if not res: + return False + if not isinstance(res[jra], ExprId): + return False + if not isinstance(res[jrb], ExprInt): + return False + d = int(res[jrb].arg) + p.disp.value = d / (s / 8) + if not res[jra] in gpregs.expr: + return False + v = gpregs.expr.index(res[jra]) + else: + return False + self.value = v + return True + + +class sh4_imm(imm_noarg, m_arg): + parser = base_expr + pass + + +class sh4_simm(sh4_imm): + parser = base_expr + + def decode(self, v): + v = sign_ext(v, self.l, 32) + v = self.decodeval(v) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if (1 << (self.l - 1)) & v: + v = -((0xffffffff ^ v) + 1) + v = self.encodeval(v) + self.value = (v & 0xffffffff) & self.lmask + return True + + +class sh4_dpc16imm(sh4_dgpreg): + parser = deref_pc + + def decode(self, v): + self.expr = ExprMem(PC + ExprInt32(v * 2 + 4), 16) + return True + + def calcdisp(self, v): + v = (int(v.arg) - 4) / 2 + if not 0 < v <= 0xff: + return None + return v + + def encode(self): + res = MatchExpr(self.expr, ExprMem(PC + jra, 16), [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + v = self.calcdisp(res[jra]) + if v is None: + return False + self.value = v + return True + + +class sh4_dgbrimm8(sh4_dgpreg): + parser = dgbr_imm + + def decode(self, v): + s = self.sz + self.expr = ExprMem(GBR + ExprInt32(v * s / 8), s) + return True + + def encode(self): + e = self.expr + s = self.sz + if e == ExprMem(GBR): + self.value = 0 + return True + res = MatchExpr(self.expr, ExprMem(GBR + jra, s), [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + self.value = int(res[jra].arg) / (s / 8) + return True + + +class sh4_dpc32imm(sh4_dpc16imm): + parser = deref_pcimm + + def decode(self, v): + self.expr = ExprMem( + (PC & ExprInt32(0xfffffffc)) + ExprInt32(v * 4 + 4)) + return True + + def calcdisp(self, v): + v = (int(v.arg) - 4) / 4 + if not 0 < v <= 0xff: + return None + return v + + def encode(self): + res = MatchExpr( + self.expr, ExprMem((PC & ExprInt32(0xFFFFFFFC)) + jra, 32), [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + v = self.calcdisp(res[jra]) + if v is None: + return False + self.value = v + return True + + +class sh4_pc32imm(m_arg): + parser = pcdisp + + def decode(self, v): + self.expr = (PC & ExprInt32(0xfffffffc)) + ExprInt32(v * 4 + 4) + return True + + def encode(self): + res = MatchExpr(self.expr, (PC & ExprInt32(0xfffffffc)) + jra, [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + v = (int(res[jra].arg) - 4) / 4 + if v is None: + return False + self.value = v + return True + + @staticmethod + def arg2str(e): + s = str(e).replace('(', '').replace(')', '') + return "%s" % s + + +class additional_info: + + def __init__(self): + self.except_on_instr = False + + +class instruction_sh4(instruction): + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_arm, self).__init__(*args, **kargs) + + def dstflow(self): + return self.name.startswith('J') + """ + def dstflow2label(self, symbol_pool): + e = self.args[0] + if not isinstance(e, ExprInt): + return + if self.name == 'BLX': + ad = e.arg+8+self.offset + else: + ad = e.arg+8+self.offset + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0] = s + """ + + def breakflow(self): + if self.name.startswith('J'): + return True + return False + + def is_subcall(self): + return self.name == 'JSR' + + def getdstflow(self, symbol_pool): + return [self.args[0]] + + def splitflow(self): + return self.name == 'JSR' + + def get_symbol_size(self, symbol, symbol_pool): + return 32 + + def fixDstOffset(self): + e = self.args[0] + print 'FIX', e, self.offset, self.l + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + log.warning('zarb dst %r' % e) + return + off = e.arg - (self.offset + 4 + self.l) + print hex(off) + if int(off % 4): + raise ValueError('strange offset! %r' % off) + self.args[0] = ExprInt32(off) + print 'final', self.args[0] + + def get_args_expr(self): + args = [a for a in self.args] + return args + + +class mn_sh4(cls_mn): + bintree = {} + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + pc = PC + # delayslot: + # http://resource.renesas.com/lib/eng/e_learnig/sh4/13/index.html + delayslot = 0 # unit is instruction instruction + + def additional_info(self): + info = additional_info() + return info + + @classmethod + def getbits(cls, bs, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enought bits %r %r' % (n, len(bs.bin) * 8)) + while n: + i = start / 8 + c = cls.getbytes(bs, i) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def getbytes(cls, bs, offset, l=1): + out = "" + for _ in xrange(l): + n_offset = (offset & ~1) + 1 - offset % 2 + out += bs.getbytes(n_offset, 1) + offset += 1 + return out + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l == 16, "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper().replace('_', '.') + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def value(self, mode): + v = super(mn_sh4, self).value(mode) + return [x[::-1] for x in v] + + +class bs_dr0gbr(sh4_dgpreg): + parser = dgbr_reg + + def decode(self, v): + self.expr = ExprMem(GBR + R0, 8) + return True + + def encode(self): + return self.expr == ExprMem(GBR + R0, 8) + + +class bs_dr0gp(sh4_dgpreg): + parser = d_gpreg_gpreg + + def decode(self, v): + self.expr = ExprMem(gpregs.expr[v] + R0, self.sz) + return True + + def encode(self): + res = MatchExpr(self.expr, ExprMem(R0 + jra, self.sz), [jra]) + if not res: + return False + r = res[jra] + if not r in gpregs.expr: + return False + self.value = gpregs.expr.index(r) + return True + + +class bs_dgpreg(sh4_dgpreg): + parser = dgpregs_base + + +rn = bs(l=4, cls=(sh4_gpreg,), fname="rn") +rm = bs(l=4, cls=(sh4_gpreg,), fname="rm") + + +d08_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 8) +d16_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 16) +d32_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 32) +d08_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 8) +d16_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 16) +d32_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 32) + + +brm = bs(l=3, cls=(sh4_bgpreg,), fname="brm") +brn = bs(l=3, cls=(sh4_bgpreg,), fname="brn") + +d08rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 8) +d16rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 16) +d32rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 32) + +d08rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 8) +d16rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 16) +d32rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 32) + +btype = bs(l=4, fname="btype", order=-1) + +s08imm = bs(l=8, cls=(sh4_simm,), fname="imm") +s12imm = bs(l=12, cls=(sh4_simm,), fname="imm") +dpc16imm = bs(l=8, cls=(sh4_dpc16imm,), fname="pcimm", sz=16) +dpc32imm = bs(l=8, cls=(sh4_dpc32imm,), fname="pcimm", sz=32) +dimm4 = bs(l=4, fname='disp', order=-1) +d08gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=8) +d16gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=16) +d32gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=32) + +pc32imm = bs(l=8, cls=(sh4_pc32imm,), fname="pcimm") + +d08rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=8, fname="rn") +d08rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=8, fname="rm") + +d16rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=16, fname="rn") +d16rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=16, fname="rm") + +d32rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=32, fname="rn") +d32rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=32, fname="rm") + +d08rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=8, fname="rn") +d08rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=8, fname="rm") + +d16rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=16, fname="rn") +d16rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=16, fname="rm") + +d32rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=32, fname="rn") +d32rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=32, fname="rm") + + +u08imm = bs(l=8, cls=(sh4_imm,), fname="imm") +dr0gbr = bs(l=0, cls=(bs_dr0gbr,), sz=8) + +d08gpreg = bs(l=4, cls=(bs_dgpreg,), sz=8) +d32gpreg = bs(l=4, cls=(bs_dgpreg,), sz=32) + +frn = bs(l=4, cls=(sh4_freg,), fname="frn") +frm = bs(l=4, cls=(sh4_freg,), fname="frm") + +bd08r0gp = bs(l=4, cls=(bs_dr0gp,), sz=8) +bd16r0gp = bs(l=4, cls=(bs_dr0gp,), sz=16) +bd32r0gp = bs(l=4, cls=(bs_dr0gp,), sz=32) + +drn = bs(l=3, cls=(sh4_dr,), fname="drn") +drm = bs(l=3, cls=(sh4_dr,), fname="drm") + + +def addop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_sh4,), dct) + +addop("mov", [bs('1110'), rn, s08imm], [s08imm, rn]) +addop("mov_w", [bs('1001'), rn, dpc16imm], [dpc16imm, rn]) +addop("mov_l", [bs('1101'), rn, dpc32imm], [dpc32imm, rn]) +addop("mov", [bs('0110', fname="opc"), rn, rm, bs('0011')], [rm, rn]) +addop("mov_b", [bs('0010', fname="opc"), d08_rn, rm, bs('0000')], [rm, d08_rn]) +addop("mov_w", [bs('0010', fname="opc"), d16_rn, rm, bs('0001')], [rm, d16_rn]) +addop("mov_l", [bs('0010', fname="opc"), d32_rn, rm, bs('0010')], [rm, d32_rn]) +addop("mov_b", [bs('0110', fname="opc"), rn, d08_rm, bs('0000')], [d08_rm, rn]) +addop("mov_w", [bs('0110', fname="opc"), rn, d16_rm, bs('0001')], [d16_rm, rn]) +addop("mov_l", [bs('0110', fname="opc"), rn, d32_rm, bs('0010')], [d32_rm, rn]) +addop("mov_b", + [bs('0010', fname="opc"), d08rnpdec, rm, bs('0100')], [rm, d08rnpdec]) +addop("mov_w", + [bs('0010', fname="opc"), d16rnpdec, rm, bs('0101')], [rm, d16rnpdec]) +addop("mov_l", + [bs('0010', fname="opc"), d32rnpdec, rm, bs('0110')], [rm, d32rnpdec]) +addop("mov_b", + [bs('0110', fname="opc"), rn, d08rmpinc, bs('0100')], [rm, d08rnpinc]) +addop("mov_w", + [bs('0110', fname="opc"), rn, d16rmpinc, bs('0101')], [d16rmpinc, rn]) +addop("mov_l", + [bs('0110', fname="opc"), rn, d32rmpinc, bs('0110')], [d32rmpinc, rn]) +addop("mov_b", [bs('10000000', fname='opc'), bsr0, d08rnimm, dimm4]) +addop("mov_w", [bs('10000001', fname='opc'), bsr0, d16rnimm, dimm4]) +addop("mov_l", [bs('0001', fname='opc'), d32rnimm, rm, dimm4], [rm, d32rnimm]) +addop("mov_b", [bs('10000100', fname='opc'), d08rmimm, dimm4, bsr0]) +addop("mov_w", [bs('10000101', fname='opc'), d16rmimm, dimm4, bsr0]) +addop("mov_l", [bs('0101', fname='opc'), rn, d32rmimm, dimm4], [d32rmimm, rn]) +addop("mov_b", + [bs('0000', fname='opc'), bd08r0gp, rm, bs('0100')], [rm, bd08r0gp]) +addop("mov_w", + [bs('0000', fname='opc'), bd16r0gp, rm, bs('0101')], [rm, bd16r0gp]) +addop("mov_l", + [bs('0000', fname='opc'), bd32r0gp, rm, bs('0110')], [rm, bd32r0gp]) +addop("mov_b", + [bs('0000', fname='opc'), rn, bd08r0gp, bs('1100')], [bd08r0gp, rn]) +addop("mov_w", + [bs('0000', fname='opc'), rn, bd16r0gp, bs('1101')], [bd16r0gp, rn]) +addop("mov_l", + [bs('0000', fname='opc'), rn, bd32r0gp, bs('1110')], [bd32r0gp, rn]) + +addop("mov_b", [bs('11000000'), bsr0, d08gbrimm8]) +addop("mov_w", [bs('11000001'), bsr0, d16gbrimm8]) +addop("mov_l", [bs('11000010'), bsr0, d32gbrimm8]) + +addop("mov_b", [bs('11000100'), d08gbrimm8, bsr0]) +addop("mov_w", [bs('11000101'), d16gbrimm8, bsr0]) +addop("mov_l", [bs('11000110'), d32gbrimm8, bsr0]) + +addop("mov", [bs('11000111'), pc32imm, bsr0]) + +addop("swapb", [bs('0110'), rn, rm, bs('1000')], [rm, rn]) +addop("swapw", [bs('0110'), rn, rm, bs('1001')], [rm, rn]) +addop("xtrct", [bs('0010'), rn, rm, bs('1101')], [rm, rn]) + + +addop("add", [bs('0011'), rn, rm, bs('1100')], [rm, rn]) +addop("add", [bs('0111'), rn, s08imm], [s08imm, rn]) +addop("addc", [bs('0011'), rn, rm, bs('1110')], [rm, rn]) +addop("addv", [bs('0011'), rn, rm, bs('1111')], [rm, rn]) + + +addop("cmpeq", [bs('10001000'), s08imm, bsr0]) + + +addop("cmpeq", [bs('0011'), rn, rm, bs('0000')], [rm, rn]) +addop("cmphs", [bs('0011'), rn, rm, bs('0010')], [rm, rn]) +addop("cmpge", [bs('0011'), rn, rm, bs('0011')], [rm, rn]) +addop("cmphi", [bs('0011'), rn, rm, bs('0110')], [rm, rn]) +addop("cmpgt", [bs('0011'), rn, rm, bs('0111')], [rm, rn]) + + +addop("cmppz", [bs('0100'), rn, bs('00010001')]) +addop("cmppl", [bs('0100'), rn, bs('00010101')]) +addop("cmpstr", [bs('0010'), rn, rm, bs('1100')], [rm, rn]) + + +addop("div1", [bs('0011'), rn, rm, bs('0100')], [rm, rn]) + +addop("div0s", [bs('0010'), rn, rm, bs('0111')], [rm, rn]) +addop("div0u", [bs('0000000000011001')]) + +addop("dmuls", [bs('0011'), rn, rm, bs('1101')], [rm, rn]) +addop("dmulu", [bs('0011'), rn, rm, bs('0101')], [rm, rn]) + +addop("dt", [bs('0100'), rn, bs('00010000')]) + + +addop("extsb", [bs('0110'), rn, rm, bs('1110')], [rm, rn]) +addop("extsw", [bs('0110'), rn, rm, bs('1111')], [rm, rn]) +addop("extub", [bs('0110'), rn, rm, bs('1100')], [rm, rn]) +addop("extuw", [bs('0110'), rn, rm, bs('1101')], [rm, rn]) + +addop("mac_l", [bs('0000', fname='opc'), d32rnpinc, + d32rmpinc, bs('1111')], [d32rmpinc, d32rnpinc]) +addop("mac_w", [bs('0100', fname='opc'), d16rnpinc, + d16rmpinc, bs('1111')], [d16rmpinc, d16rnpinc]) + +addop("mull", [bs('0000'), rn, rm, bs('0111')], [rm, rn]) +addop("mulsw", [bs('0010'), rn, rm, bs('1111')], [rm, rn]) +addop("muluw", [bs('0010'), rn, rm, bs('1110')], [rm, rn]) + +addop("neg", [bs('0110'), rn, rm, bs('1011')], [rm, rn]) +addop("negc", [bs('0110'), rn, rm, bs('1010')], [rm, rn]) + +addop("sub", [bs('0011'), rn, rm, bs('1000')], [rm, rn]) +addop("subc", [bs('0011'), rn, rm, bs('1010')], [rm, rn]) +addop("subv", [bs('0011'), rn, rm, bs('1011')], [rm, rn]) + +addop("and", [bs('0010'), rn, rm, bs('1001')], [rm, rn]) +addop("and", [bs('11001001'), u08imm, bsr0]) +addop("and_b", [bs('11001101'), u08imm, dr0gbr]) + +addop("not", [bs('0110'), rn, rm, bs('0111')], [rm, rn]) + +addop("or", [bs('0010'), rn, rm, bs('1011')], [rm, rn]) + +addop("or", [bs('11001011'), u08imm, bsr0]) +addop("or_b", [bs('11001111'), u08imm, dr0gbr]) + +addop("tas_b", [bs('0100'), d08gpreg, bs('00011011')]) +addop("tst", [bs('0010'), rn, rm, bs('1000')], [rm, rn]) +addop("tst", [bs('11001000'), u08imm, bsr0]) +addop("tst_b", [bs('11001100'), u08imm, dr0gbr]) + + +addop("xor", [bs('0010'), rn, rm, bs('1010')], [rm, rn]) +addop("xor", [bs('11001010'), u08imm, bsr0]) +addop("xor_b", [bs('11001110'), u08imm, dr0gbr]) + +addop("rotl", [bs('0100'), rn, bs('00000100')]) +addop("rotr", [bs('0100'), rn, bs('00000101')]) +addop("rotcl", [bs('0100'), rn, bs('00100100')]) +addop("rotcr", [bs('0100'), rn, bs('00100101')]) + +addop("shad", [bs('0100'), rn, rm, bs('1100')], [rm, rn]) +addop("shal", [bs('0100'), rn, bs('00100000')]) +addop("shar", [bs('0100'), rn, bs('00100001')]) +addop("shld", [bs('0100'), rn, rm, bs('1101')], [rm, rn]) + +addop("shll", [bs('0100'), rn, bs('00000000')]) +addop("shlr", [bs('0100'), rn, bs('00000001')]) +addop("shll2", [bs('0100'), rn, bs('00001000')]) +addop("shlr2", [bs('0100'), rn, bs('00001001')]) +addop("shll8", [bs('0100'), rn, bs('00011000')]) +addop("shlr8", [bs('0100'), rn, bs('00011001')]) +addop("shll16", [bs('0100'), rn, bs('00101000')]) +addop("shlr16", [bs('0100'), rn, bs('00101001')]) + + +addop("bf", [bs('10001011'), s08imm]) +""" + def splitflow(self): + return True + def breakflow(self): + return True + def dstflow(self): + return True + def dstflow2label(self, symbol_pool): + e = self.args[0].expr + ad = e.arg*2+4+self.offset + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0].expr = s +""" + +addop("bfs", [bs('10001111'), s08imm]) +""" + delayslot = 1 +""" +addop("bt", [bs('10001001'), s08imm]) + +addop("bts", [bs('10001101'), s08imm]) + +addop("bra", [bs('1010'), s12imm]) +""" + delayslot = 1 + def breakflow(self): + return True + def dstflow(self): + return True + def dstflow2label(self, symbol_pool): + e = self.args[0].expr + ad = e.arg*2+4+self.offset + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0].expr = s +""" + +addop("braf", [bs('0000'), rn, bs('00100011')]) +""" + delayslot = 1 + def breakflow(self): + return True + def dstflow(self): + return True +""" +addop("bsr", [bs('1011'), s12imm]) + +addop("bsrf", [bs('0000'), rn, bs('00000011')]) +""" + delayslot = 1 + def breakflow(self): + return True + def is_subcall(self): + return True + def splitflow(self): + return True +""" + +addop("jmp_l", [bs('0100'), d32gpreg, bs('00101011')]) +""" + delayslot = 1 + def breakflow(self): + return True +""" + +addop("jsr_l", [bs('0100'), d32gpreg, bs('00001011')]) +""" + delayslot = 1 + def breakflow(self): + return True + def is_subcall(self): + return True + def splitflow(self): + return True +""" + +addop("rts", [bs('0000000000001011')]) +""" + delayslot = 1 + def breakflow(self): + return True +""" +addop("clrmac", [bs('0000000000101000')]) +addop("clrs", [bs('0000000001001000')]) +addop("clrt", [bs('0000000000001000')]) + + +addop("ldc", [bs('0100'), rm, bssr, bs('00001110')]) +addop("ldc", [bs('0100'), rm, bsgbr, bs('00011110')]) +addop("ldc", [bs('0100'), rm, bsvbr, bs('00101110')]) +addop("ldc", [bs('0100'), rm, bsssr, bs('00111110')]) +addop("ldc", [bs('0100'), rm, bsspc, bs('01001110')]) +addop("ldc", [bs('0100'), rm, bsdbr, bs('11111010')]) +addop("ldc", [bs('0100'), rm, bs('1'), brn, bs('1110')], [rm, brn]) +addop("ldc_l", [bs('0100'), d32rmpinc, bssr, bs('00000111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bsgbr, bs('00010111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bsvbr, bs('00100111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bsssr, bs('00110111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bsspc, bs('01000111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bsdbr, bs('11110110')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs('1'), brn, bs('0111')]) +addop("lds", [bs('0100'), rm, bsmach, bs('00001010')]) +addop("lds", [bs('0100'), rm, bsmacl, bs('00011010')]) +addop("lds", [bs('0100'), rm, bspr, bs('00101010')]) +addop("lds_l", [bs('0100'), d32rmpinc, bsmach, bs('00000110')]) +addop("lds_l", [bs('0100'), d32rmpinc, bsmacl, bs('00010110')]) +addop("lds_l", [bs('0100'), d32rmpinc, bspr, bs('00100110')]) +addop("ldtlb", [bs('0000000000111000')]) + +addop("movca_l", [bs('0000'), bsr0, d32gpreg, bs('11000011')]) +addop("nop", [bs('0000000000001001')]) +addop("ocbi_l", [bs('0000'), d32gpreg, bs('10010011')]) +addop("ocbp_l", [bs('0000'), d32gpreg, bs('10100011')]) +addop("ocbwb_l", [bs('0000'), d32gpreg, bs('10110011')]) +addop("pref_l", [bs('0000'), d32gpreg, bs('10000011')]) + + +addop("rte", [bs('0000000000101011')]) +addop("sets", [bs('0000000001011000')]) +addop("sett", [bs('0000000000011000')]) +addop("sleep", [bs('0000000000011011')]) +addop("stc", [bs('0000'), bssr, rn, bs('00000010')]) +addop("stc", [bs('0000'), bsgbr, rn, bs('00010010')]) +addop("stc", [bs('0000'), bsvbr, rn, bs('00100010')]) +addop("stc", [bs('0000'), bsssr, rn, bs('00110010')]) +addop("stc", [bs('0000'), bsspc, rn, bs('01000010')]) +addop("stc", [bs('0000'), bssgr, rn, bs('00111010')]) +addop("stc", [bs('0000'), bsdbr, rn, bs('11111010')]) +addop("stc", [bs('0000'), rn, bs('1'), brm, bs('0010')], [brm, rn]) + +addop("stc_l", [bs('0100'), bssr, d32rmpdec, bs('00000011')]) +addop("stc_l", [bs('0100'), bsgbr, d32rmpdec, bs('00010011')]) +addop("stc_l", [bs('0100'), bsvbr, d32rmpdec, bs('00100011')]) +addop("stc_l", [bs('0100'), bsssr, d32rmpdec, bs('00110011')]) +addop("stc_l", [bs('0100'), bsspc, d32rmpdec, bs('01000011')]) +addop("stc_l", [bs('0100'), bssgr, d32rmpdec, bs('00110010')]) +addop("stc_l", [bs('0100'), bsdbr, d32rmpdec, bs('11110010')]) +addop("stc_l", + [bs('0100'), d32rnpdec, bs('1'), brm, bs('0011')], [brm, d32rnpdec]) + +# float +addop("sts", [bs('0000'), bsmach, rm, bs('00001010')]) +addop("sts", [bs('0000'), bsmacl, rm, bs('00011010')]) +addop("sts", [bs('0000'), bspr, rm, bs('00101010')]) +addop("sts_l", [bs('0100'), bsmach, d32rmpdec, bs('00000010')]) +addop("sts_l", [bs('0100'), bsmacl, d32rmpdec, bs('00010010')]) +addop("sts_l", + [bs('0100'), d32rnpdec, bspr, bs('00100010')], [bspr, d32rnpdec]) +addop("trapa", [bs('11000011'), u08imm]) + +addop("fldi0", [bs('1111'), frn, bs('10001101')]) +addop("fldi1", [bs('1111'), frn, bs('10011101')]) +addop("fmov", [bs('1111'), frn, frm, bs('1100')], [frm, frn]) +addop("fmov_s", [bs('1111'), frn, d32gpreg, bs('1000')], [d32gpreg, frn]) +addop("fmov_s", [bs('1111'), frn, bd32r0gp, bs('0110')], [bd32r0gp, frn]) +addop("fmov_s", [bs('1111'), frn, d32rmpinc, bs('1001')], [d32rmpinc, frn]) +addop("fmov_s", [bs('1111'), d32gpreg, frm, bs('1010')], [frm, d32gpreg]) +addop("fmov_s", [bs('1111'), d32rnpdec, frm, bs('1011')], [frm, d32rnpdec]) +addop("fmov_s", [bs('1111'), bd32r0gp, frm, bs('0111')], [frm, bd32r0gp]) + +addop("flds", [bs('1111'), frm, bsfpul, bs('00011101')]) +addop("fsts", [bs('1111'), bsfpul, frm, bs('00001101')]) +addop("fabs", [bs('1111'), frn, bs('01011101')]) +addop("fadd", [bs('1111'), frn, frm, bs('0000')], [frm, frn]) +addop("fcmpeq", [bs('1111'), frn, frm, bs('0100')], [frm, frn]) +addop("fcmpgt", [bs('1111'), frn, frm, bs('0101')], [frm, frn]) +addop("fdiv", [bs('1111'), frn, frm, bs('0011')], [frm, frn]) + +addop("float", [bs('1111'), bsfpul, frn, bs('00101101')]) +addop("fmac", [bs('1111'), bsfr0, frn, frm, bs('1110')], [bsfr0, frm, frn]) +addop("fmul", [bs('1111'), frn, frm, bs('0010')], [frm, frn]) +addop("fneg", [bs('1111'), frn, bs('01001101')]) +addop("fsqrt", [bs('1111'), frn, bs('01101101')]) +addop("fsub", [bs('1111'), frn, frm, bs('0001')], [frm, frn]) +addop("ftrc", [bs('1111'), frm, bsfpul, bs('00111101')]) + + +if __name__ == '__main__': + import os + import time + filename = os.environ.get('PYTHONSTARTUP') + if filename and os.path.isfile(filename): + execfile(filename) + + def h2i(s): + return s.replace(' ', '').decode('hex') + + reg_tests_sh4 = [ + # vxworks + ("c80022f2 MOV 0x10, R6", + "10e6"), + ("c8002250 MOV 0xFFFFFFFF, R0", + "ffe0"), + ("c800226a MOV.W @(PC,0xC0), R9", + "5e99"), + ("c8002006 MOV.L @(PC&0xFFFFFFFC,0x10), R15", + "03df"), + ("c800cfc4 MOV R4, R9", + "4369"), + ("C8005004 MOV.B R1, @R2", + "1022"), + ("C8002E04 MOV.W R0, @R8", + '0128'), + ("c800223e MOV.L R1, @R14", + "122E"), + + ("c8002002 MOV.L @R1, R0", + "1260"), + ("c8002E08 MOV.W @R8, R1", + "8161"), + ("c800357c MOV.B @R4, R1", + "4061"), + + ("c8002220 MOV.L R8, @-R15", + "862f"), + ("c8022a66 MOV.B R4, @-R0", + "4420"), + ("c8002310 MOV.L @R15+, R14", + "f66e"), + ("c80038a4 MOV.W @R8+, R5", + "8565"), + ("xxxxxxxx MOV.B R0, @(R8,0x2)", + "8280"), + ("xxxxxxxx MOV.W R0, @(R8,0x4)", + "8281"), + ("c8002274 MOV.L R0, @(R9,0x8)", + "0219"), + ("xxxxxxxx MOV.B @(R8,0x8), R0", + "8884"), + ("xxxxxxxx MOV.W @(R8,0x10), R0", + "8885"), + ("c8002500 MOV.L @(R14,0x4), R5", + "e155"), + ("xxxxxxxx MOV.B R4, @(R0,R8)", + "4408"), + ("xxxxxxxx MOV.W R4, @(R0,R8)", + "4508"), + ("xxxxxxxx MOV.L R4, @(R0,R8)", + "4608"), + ("xxxxxxxx MOV.B @(R0,R4), R8", + "4c08"), + ("xxxxxxxx MOV.W @(R0,R4), R8", + "4d08"), + ("xxxxxxxx MOV.L @(R0,R4), R8", + "4e08"), + ("xxxxxxxx MOV.B R0, @(GBR,0x4)", + "04c0"), + ("xxxxxxxx MOV.W R0, @(GBR,0x8)", + "04c1"), + ("xxxxxxxx MOV.L R0, @(GBR,0x10)", + "04c2"), + ("xxxxxxxx MOV.B @(GBR,0x4), R0", + "04c4"), + ("xxxxxxxx MOV.W @(GBR,0x8), R0", + "04c5"), + ("xxxxxxxx MOV.L @(GBR,0x10), R0", + "04c6"), + #("xxxxxxxx MOV PC&0xFFFFFFFC+0x14, R0", + # "04c7"), + ("xxxxxxxx SWAPB R2, R1", + "2861"), + ("c803f492 SWAPW R4, R9", + "4969"), + ("xxxxxxxx XTRCT R4, R9", + "4d29"), + ("c8002270 ADD R12, R9", + "cc39"), + ("c8002238 ADD 0xFFFFFFFC, R15", + "FC7F"), + ("c80164cc ADDC R0, R1", + "0e31"), + ("xxxxxxxx ADDV R0, R1", + "0f31"), + ("c8002994 CMPEQ 0x20, R0", + "2088"), + ("c80029d2 CMPEQ R2, R1", + "2031"), + ("c8003964 CMPHS R5, R3", + "5233"), + ("c8002df2 CMPGE R0, R1", + "0331"), + ("c80029a4 CMPHI R1, R0", + "1630"), + ("c8002bfe CMPGT R10, R8", + "a738"), + ("c8002bf8 CMPPZ R0", + "1140"), + ("c8006294 CMPPL R2", + "1542"), + ("c8033800 CMPSTR R14, R4", + "ec24"), + ("xxxxxxxx DIV1 R14, R4", + "e434"), + ("c8d960de DIV0S R0, R3", + "0723"), + ("xxxxxxxx DIV0U ", + "1900"), + ("c800dcd8 DMULS R1, R0", + "1d30"), + ("c80164da DMULU R3, R8", + "3538"), + ("c80024e2 DT R10", + "104a"), + ("c800343a EXTSB R1, R1", + "1e61"), + ("c8002bf6 EXTSW R0, R0", + "0f60"), + ("c8002fba EXTUB R0, R0", + "0c60"), + ("c8002398 EXTUW R0, R0", + "0d60"), + ("xxxxxxxx MAC.L @R5+, @R4+", + "5f04"), + ("xxxxxxxx MAC.W @R5+, @R4+", + "5f44"), + ("c8005112 MULL R1, R3", + "1703"), + ("xxxxxxxx MULSW R1, R3", + "1F23"), + ("xxxxxxxx MULUW R1, R3", + "1e23"), + ("c8004856 NEG R1, R8", + "1b68"), + ("c80054fc NEGC R9, R7", + "9a67"), + ("c8004b36 SUB R1, R5", + "1835"), + ("c800a536 SUBC R1, R0", + "1a30"), + ("xxxxxxxx SUBV R1, R0", + "1b30"), + ("c80023ca AND R0, R5", + "0925"), + ("c800257c AND 0x2, R0", + "02c9"), + ("xxxxxxxx AND.B 0x2, @(GBR,R0)", + "02cd"), + ("c80065fe NOT R5, R1", + "5761"), + ("c8002586 OR R10, R1", + "ab21"), + ("c80023aa OR 0x4, R0", + "04cb"), + ("xxxxxxxx OR.B 0x4, @(GBR,R0)", + "04cf"), + ("xxxxxxxx TAS.B @R8", + "1b48"), + ("c8002368 TST R10, R13", + "a82d"), + ("c8003430 TST 0x11, R0", + "11c8"), + ("xxxxxxxx TST.B 0x4, @(GBR,R0)", + "04cc"), + ("c8003978 XOR R1, R6", + "1a26"), + ("c8028270 XOR 0x1, R0", + "01ca"), + ("xxxxxxxx XOR.B 0x4, @(GBR,R0)", + "04cE"), + ("xxxxxxxx ROTL R9", + "0449"), + ("xxxxxxxx ROTR R9", + "0549"), + ("xxxxxxxx ROTCL R9", + "2449"), + ("xxxxxxxx ROTCR R9", + "2549"), + ("xxxxxxxx SHAL R11", + "204b"), + ("xxxxxxxx SHAR R11", + "214b"), + ("c800236c SHLD R6, R10", + "6d4a"), + ("xxxxxxxx SHLL R11", + "004b"), + ("xxxxxxxx SHLR R11", + "014b"), + ("xxxxxxxx SHLL2 R11", + "084b"), + ("xxxxxxxx SHLR2 R11", + "094b"), + ("xxxxxxxx SHLL8 R11", + "184b"), + ("xxxxxxxx SHLR8 R11", + "194b"), + ("xxxxxxxx SHLL16 R11", + "284b"), + ("xxxxxxxx SHLR16 R11", + "294b"), + ("c8002c00 BF 0xFFFFFFF4", + "f48b"), + ("c80023c2 BFS 0xFFFFFFD8", + "d88f"), + ("c8002266 BT 0x5B", + "5b89"), + ("c8002266 BTS 0x5C", + "5c8d"), + ("c8002326 BRA 0xFFFFFFF0", + "f0af"), + ("c8004b4a BRAF R1", + "2301"), + ("c8055da4 BSR 0xFFFFFE48", + "48be"), + ("xxxxxxxx BSRF R1", + "0301"), + ("c80027b4 JMP.L @R1", + "2b41"), + ("c800200c JSR.L @R0", + "0b40"), + ("c800231a RTS ", + "0b00"), + ("xxxxxxxx CLRMAC ", + "2800"), + ("xxxxxxxx CLRS ", + "4800"), + ("xxxxxxxx CLRT ", + "0800"), + ("c8002004 LDC R0, SR", + "0e40"), + ("c800200e LDC R1, GBR", + "1e41"), + ("c8064bd4 LDC R8, VBR", + "2e48"), + ("xxxxxxxx LDC R8, SSR", + "3e48"), + ("xxxxxxxx LDC R8, SPC", + "4e48"), + ("xxxxxxxx LDC R8, DBR", + "fa48"), + ("xxxxxxxx LDC R8, R0_BANK", + "8e48"), + ("xxxxxxxx LDC.L @R8+, SR", + "0748"), + ("xxxxxxxx LDC.L @R8+, GBR", + "1748"), + ("xxxxxxxx LDC.L @R8+, VBR", + "2748"), + ("xxxxxxxx LDC.L @R8+, SSR", + "3748"), + ("xxxxxxxx LDC.L @R8+, SPC", + "4748"), + ("xxxxxxxx LDC.L @R8+, DBR", + "f648"), + ("xxxxxxxx LDC.L @R8+, R2_BANK", + "a748"), + ("xxxxxxxx LDS R8, MACH", + "0a48"), + ("xxxxxxxx LDS R8, MACL", + "1a48"), + ("xxxxxxxx LDS R8, PR", + "2a48"), + ("xxxxxxxx LDS.L @R8+, MACH", + "0648"), + ("xxxxxxxx LDS.L @R8+, MACL", + "1648"), + ("xxxxxxxx LDTLB ", + "3800"), + ("xxxxxxxx MOVCA.L R0, @R8", + "c308"), + ("xxxxxxxx NOP ", + "0900"), + ("xxxxxxxx OCBI.L @R8", + "9308"), + ("xxxxxxxx OCBP.L @R8", + "a308"), + ("xxxxxxxx OCBWB.L @R8", + "b308"), + ("xxxxxxxx PREF.L @R8", + "8308"), + ("xxxxxxxx STS MACH, R8", + "0a08"), + ("xxxxxxxx STS MACL, R8", + "1a08"), + ("xxxxxxxx STS PR, R8", + "2a08"), + ("xxxxxxxx STS.L MACH, @-R8", + "0248"), + ("xxxxxxxx STS.L MACL, @-R8", + "1248"), + ("xxxxxxxx STS.L PR, @-R8", + "2248"), + + + + + + ("c8004b50 STC GBR, R0", + "1200"), + ("c8064516 STC VBR, R1", + "2201"), + ("c8004b54 STC SSR, R1", + "3201"), + ("c801ed6c STC SPC, R0", + "4200"), + ("xxxxxxxx STC SGR, R0", + "3a00"), + ("xxxxxxxx STC DBR, R0", + "fa00"), + ("c8004b56 STC R3_BANK, R1", + "B201"), + ("xxxxxxxx STC.L SR, @-R8", + "0348"), + ("xxxxxxxx STC.L GBR, @-R8", + "1348"), + ("xxxxxxxx STC.L VBR, @-R8", + "2348"), + ("xxxxxxxx STC.L SSR, @-R8", + "3348"), + ("xxxxxxxx STC.L SPC, @-R8", + "4348"), + ("xxxxxxxx STC.L DBR, @-R8", + "f248"), + ("xxxxxxxx STC.L R7_BANK, @-R8", + "f348"), + ("c803b130 TRAPA 0xE0", + "e0c3"), + + ("xxxxxxxx FLDI0 FR8", + "8df8"), + ("xxxxxxxx FLDI1 FR8", + "9df8"), + ("c8019ca8 FMOV FR15, FR5", + "fcf5"), + ("c800affe FMOV.S @R1, FR4", + "18f4"), + ("c80283f6 FMOV.S @(R0,R14), FR5", + "e6f5"), + ("c800aff8 FMOV.S @R1+, FR5", + "19f5"), + ("c80cb692 FMOV.S FR0, @R2", + "0af2"), + ("c80cb694 FMOV.S FR1, @-R2", + "1bf2"), + ("c80283aa FMOV.S FR1, @(R0,R14)", + "17fe"), + ("c800ce16 FLDS FR13, FPUL", + "1dfd"), + ("c800ce08 FSTS FPUL, FR13", + "0dfd"), + ("xxxxxxxx FABS FR8", + "5df8"), + ("c800cf28 FADD FR2, FR6", + "20f6"), + ("c805dacc FCMPEQ FR2, FR6", + "24f6"), + ("c8028406 FCMPGT FR4, FR2", + "45f2"), + ("c8019ca4 FDIV FR2, FR12", + "23fc"), + ("c800ce5e FLOAT FPUL, FR2", + "2df2"), + ("xxxxxxxx FMAC FR0, FR1, FR2", + "1ef2"), + ("c800b006 FMUL FR2, FR4", + "22f4"), + ("c805e412 FNEG FR14", + "4dfe"), + ("xxxxxxxx FSQRT FR14", + "6dfe"), + ("c8030400 FSUB FR4, FR2", + "41f2"), + ("c80303ba FTRC FR2, FPUL", + "3df2"), + + ] + + for s, l in reg_tests_sh4: + print "-" * 80 + s = s[12:] + b = h2i((l)) + print b.encode('hex') + mn = mn_sh4.dis(b, None) + print [str(x) for x in mn.args] + print s + print mn + assert(str(mn) == s) + # print hex(b) + # print [str(x.get()) for x in mn.args] + l = mn_sh4.fromstring(s, None) + # print l + assert(str(l) == s) + a = mn_sh4.asm(l, None) + print [x for x in a] + print repr(b) + # print mn.args + assert(b in a) + + # speed test + o = "" + for s, l, in reg_tests_sh4: + s = s[12:] + b = h2i((l)) + o += b + + while len(o) < 1000: + o += o + bs = bin_stream_str(o) + off = 0 + instr_num = 0 + ts = time.time() + while off < bs.getlen(): + mn = mn_sh4.dis(bs, None, off) + # print instr_num, off, mn.l, str(mn) + instr_num += 1 + off += mn.l + print 'instr per sec:', instr_num / (time.time() - ts) + + import cProfile + cProfile.run(r'mn_sh4.dis("\x17\xfe", None)') diff --git a/miasm2/arch/sh4/regs.py b/miasm2/arch/sh4/regs.py new file mode 100644 index 00000000..bfc61b04 --- /dev/null +++ b/miasm2/arch/sh4/regs.py @@ -0,0 +1,80 @@ +from miasm2.expression.expression import * +from miasm2.core.cpu import reg_info, gen_reg + +# GP +gpregs_str = ['R%d' % r for r in xrange(0x10)] +gpregs_expr = [ExprId(x, 32) for x in gpregs_str] +gpregs = reg_info(gpregs_str, gpregs_expr) + +bgpregs_str = ['R%d_BANK' % r for r in xrange(0x8)] +bgpregs_expr = [ExprId(x, 32) for x in bgpregs_str] +bgpregs = reg_info(bgpregs_str, bgpregs_expr) + +fregs_str = ['FR%d' % r for r in xrange(0x10)] +fregs_expr = [ExprId(x, 32) for x in fregs_str] +fregs = reg_info(fregs_str, fregs_expr) + +dregs_str = ['DR%d' % r for r in xrange(0x8)] +dregs_expr = [ExprId(x, 32) for x in dregs_str] +dregs = reg_info(dregs_str, dregs_expr) + + +gen_reg('PC', globals()) +gen_reg('PR', globals()) +gen_reg('R0', globals()) +gen_reg('GBR', globals()) +gen_reg('SR', globals()) +gen_reg('VBR', globals()) +gen_reg('SSR', globals()) +gen_reg('SPC', globals()) +gen_reg('SGR', globals()) +gen_reg('DBR', globals()) +gen_reg('MACH', globals()) +gen_reg('MACL', globals()) +gen_reg('FPUL', globals()) +gen_reg('FR0', globals()) + +R0 = gpregs_expr[0] +R1 = gpregs_expr[1] +R2 = gpregs_expr[2] +R3 = gpregs_expr[3] +R4 = gpregs_expr[4] +R5 = gpregs_expr[5] +R6 = gpregs_expr[6] +R7 = gpregs_expr[7] +R8 = gpregs_expr[8] +R9 = gpregs_expr[9] +R10 = gpregs_expr[10] +R11 = gpregs_expr[11] +R12 = gpregs_expr[12] +R13 = gpregs_expr[13] +R14 = gpregs_expr[14] +R15 = gpregs_expr[15] + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + + +all_regs_ids = [ + R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, + zf, nf, of, cf, + + PC, PR, R0, GBR, SR, VBR, SSR, SPC, + SGR, DBR, MACH, MACL, FPUL, FR0] + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + all_regs_ids_init[i].is_term = True + regs_init[r] = all_regs_ids_init[i] diff --git a/miasm2/arch/x86/__init__.py b/miasm2/arch/x86/__init__.py new file mode 100644 index 00000000..bbad893b --- /dev/null +++ b/miasm2/arch/x86/__init__.py @@ -0,0 +1 @@ +__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py new file mode 100644 index 00000000..5ccc4a9c --- /dev/null +++ b/miasm2/arch/x86/arch.py @@ -0,0 +1,3839 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import re +from miasm2.expression.expression import * +from pyparsing import * +from miasm2.core.cpu import * +from collections import defaultdict +import regs as regs_module +from regs import * +from miasm2.ir.ir import * + +log = logging.getLogger("x86_arch") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + + +f_isad = "AD" +f_s08 = "S08" +f_u08 = "U08" +f_s16 = "S16" +f_u16 = "U16" +f_s32 = "S32" +f_u32 = "U32" +f_s64 = "S64" +f_u64 = "U64" +f_imm = 'IMM' + +f_imm2size = {f_s08: 8, f_s16: 16, f_s32: 32, f_s64: 64, + f_u08: 8, f_u16: 16, f_u32: 32, f_u64: 64} + + +size2gpregs = {8: gpregs08, 16: gpregs16, + 32: gpregs32, 64: gpregs64} + + +replace_regs64 = { + AL: RAX[:8], CL: RCX[:8], DL: RDX[:8], BL: RBX[:8], + AH: RAX[8:16], CH: RCX[8:16], DH: RDX[8:16], BH: RBX[8:16], + SPL: RSP[0:8], BPL: RBP[0:8], SIL: RSI[0:8], DIL: RDI[0:8], + R8B: R8[0:8], R9B: R9[0:8], R10B: R10[0:8], R11B: R11[0:8], + R12B: R12[0:8], R13B: R13[0:8], R14B: R14[0:8], R15B: R15[0:8], + + AX: RAX[:16], CX: RCX[:16], DX: RDX[:16], BX: RBX[:16], + SP: RSP[:16], BP: RBP[:16], SI: RSI[:16], DI: RDI[:16], + R8W: R8[:16], R9W: R9[:16], R10W: R10[:16], R11W: R11[:16], + R12W: R12[:16], R13W: R13[:16], R14W: R14[:16], R15W: R15[:16], + + + EAX: RAX[:32], ECX: RCX[:32], EDX: RDX[:32], EBX: RBX[:32], + ESP: RSP[:32], EBP: RBP[:32], ESI: RSI[:32], EDI: RDI[:32], + R8D: R8[:32], R9D: R9[:32], R10D: R10[:32], R11D: R11[:32], + R12D: R12[:32], R13D: R13[:32], R14D: R14[:32], R15D: R15[:32], + + IP: RIP[:16], EIP: RIP[:32], + +} + +replace_regs32 = { + AL: EAX[:8], CL: ECX[:8], DL: EDX[:8], BL: EBX[:8], + AH: EAX[8:16], CH: ECX[8:16], DH: EDX[8:16], BH: EBX[8:16], + + AX: EAX[:16], CX: ECX[:16], DX: EDX[:16], BX: EBX[:16], + SP: ESP[:16], BP: EBP[:16], SI: ESI[:16], DI: EDI[:16], + + IP: EIP[:16] +} + +replace_regs16 = { + AL: AX[:8], CL: CX[:8], DL: DX[:8], BL: BX[:8], + AH: AX[8:16], CH: CX[8:16], DH: DX[8:16], BH: BX[8:16], + + AX: AX[:16], CX: CX[:16], DX: DX[:16], BX: BX[:16], + SP: SP[:16], BP: BP[:16], SI: SI[:16], DI: DI[:16], +} + +replace_regs = {16: replace_regs16, + 32: replace_regs32, + 64: replace_regs64} + + +# parser helper ########### +PLUS = Suppress("+") +MULT = Suppress("*") + +COLON = Suppress(":") + + +LBRACK = Suppress("[") +RBRACK = Suppress("]") + +dbreg = Group(gpregs16.parser | gpregs32.parser | gpregs64.parser) +gpreg = (gpregs08.parser | gpregs08_64.parser | gpregs16.parser | + gpregs32.parser | gpregs64.parser | gpregs_xmm.parser | + gpregs_mm.parser) + + +def reg2exprid(r): + if not r.name in all_regs_ids_byname: + raise ValueError('unknown reg') + return all_regs_ids_byname[r.name] + + +def parse_deref_reg(s, l, t): + t = t[0][0] + return t[0] + + +def parse_deref_int(s, l, t): + t = t[0] + return t[0] + + +def parse_deref_regint(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + i1 = ExprInt_from(r1, t[1].arg) + return r1 + i1 + + +def parse_deref_regreg(s, l, t): + t = t[0] + return t[0][0] + t[1][0] + + +def parse_deref_regregint(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + r2 = reg2exprid(t[1][0]) + i1 = ExprInt_from(r1, t[2].arg) + return r1 + r2 + i1 + + +def parse_deref_reg_intmreg(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + r2 = reg2exprid(t[1][0]) + i1 = ExprInt_from(r1, t[2].arg) + return r1 + (r2 * i1) + + +def parse_deref_reg_intmreg_int(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + r2 = reg2exprid(t[1][0]) + i1 = ExprInt_from(r1, t[2].arg) + i2 = ExprInt_from(r1, t[3].arg) + return r1 + (r2 * i1) + i2 + + +def parse_deref_intmreg(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + i1 = ExprInt_from(r1, t[1].arg) + return r1 * i1 + + +def parse_deref_intmregint(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + i1 = ExprInt_from(r1, t[1].arg) + i2 = ExprInt_from(r1, t[1].arg) + return (r1 * i1) + i2 + + +def getreg(s, l, t): + t = t[0] + return t[0] + + +def parse_deref_ptr(s, l, t): + t = t[0] + return ExprMem(ExprOp('segm', t[0], t[1])) + + +variable, operand, base_expr = gen_base_expr() + + +def ast_id2expr(t): + if not t in mn_x86.regs.all_regs_ids_byname: + r = ExprId(t) + else: + r = mn_x86.regs.all_regs_ids_byname[t] + return r + + +def ast_int2expr(a): + return ExprInt64(a) + + +my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + +int_or_expr = base_expr + +deref_mem_ad = Group(LBRACK + dbreg + RBRACK).setParseAction(parse_deref_reg) +deref_mem_ad |= Group( + LBRACK + int_or_expr + RBRACK).setParseAction(parse_deref_int) +deref_mem_ad |= Group( + LBRACK + dbreg + PLUS + + int_or_expr + RBRACK).setParseAction(parse_deref_regint) +deref_mem_ad |= Group( + LBRACK + dbreg + PLUS + + dbreg + RBRACK).setParseAction(parse_deref_regreg) +deref_mem_ad |= Group( + LBRACK + dbreg + PLUS + dbreg + PLUS + + int_or_expr + RBRACK).setParseAction(parse_deref_regregint) +deref_mem_ad |= Group( + LBRACK + dbreg + PLUS + dbreg + MULT + + int_or_expr + RBRACK).setParseAction(parse_deref_reg_intmreg) +deref_mem_ad |= Group( + LBRACK + dbreg + PLUS + dbreg + MULT + int_or_expr + + PLUS + int_or_expr + RBRACK).setParseAction(parse_deref_reg_intmreg_int) +deref_mem_ad |= Group( + LBRACK + dbreg + MULT + + int_or_expr + RBRACK).setParseAction(parse_deref_intmreg) +deref_mem_ad |= Group( + LBRACK + dbreg + MULT + int_or_expr + + PLUS + int_or_expr + RBRACK).setParseAction(parse_deref_intmregint) + + +deref_ptr = Group(int_or_expr + COLON + + int_or_expr).setParseAction(parse_deref_ptr) + + +PTR = Suppress('PTR') + + +BYTE = Literal('BYTE') +WORD = Literal('WORD') +DWORD = Literal('DWORD') +QWORD = Literal('QWORD') +TBYTE = Literal('TBYTE') + + +def parse_deref_mem(s, l, t): + sz = {'BYTE': 8, 'WORD': 16, 'DWORD': 32, 'QWORD': 64, 'TBYTE': 80} + t = t[0] + if len(t) == 2: + s, ptr = t + return ExprMem(ptr, sz[s[0]]) + elif len(t) == 3: + s, segm, ptr = t + return ExprMem(ExprOp('segm', segm[0], ptr), sz[s[0]]) + else: + raise ValueError('len(t) > 3') + +mem_size = Group(BYTE | DWORD | QWORD | WORD | TBYTE) +deref_mem = Group(mem_size + PTR + Optional(Group(int_or_expr + COLON)) + + deref_mem_ad).setParseAction(parse_deref_mem) + + +rmarg = Group(gpregs08.parser | + gpregs08_64.parser | + gpregs16.parser | + gpregs32.parser | + gpregs64.parser | + gpregs_mm.parser | + gpregs_xmm.parser + ).setParseAction(getreg) + +rmarg |= deref_mem + + +cl_or_imm = Group(r08_ecx.parser).setParseAction(getreg) +cl_or_imm |= int_or_expr + + +class r_al(reg_noarg, m_arg): + reg_info = r08_eax + parser = reg_info.parser + + +class r_ax(reg_noarg, m_arg): + reg_info = r16_eax + parser = reg_info.parser + + +class r_dx(reg_noarg, m_arg): + reg_info = r16_edx + parser = reg_info.parser + + +class r_eax(reg_noarg, m_arg): + reg_info = r32_eax + parser = reg_info.parser + + +class r_rax(reg_noarg, m_arg): + reg_info = r64_eax + parser = reg_info.parser + + +class r_cl(reg_noarg, m_arg): + reg_info = r08_ecx + parser = reg_info.parser + + +invmode = {16: 32, 32: 16} + + +def opmode_prefix(mode): + size, opmode, admode = mode + if size in [16, 32]: + if opmode: + return invmode[size] + else: + return size + elif size == 64: + if opmode: + return 16 + else: + return 32 + raise NotImplementedError('not fully functional') + + +def admode_prefix(mode): + size, opmode, admode = mode + if size in [16, 32]: + if admode: + return invmode[size] + else: + return size + elif size == 64: + return 64 + raise NotImplementedError('not fully functional') + + +def v_opmode_info(size, opmode, rex_w, stk): + if size in [16, 32]: + if opmode: + return invmode[size] + else: + return size + elif size == 64: + if rex_w == 1: + return 64 + elif stk: + if opmode == 1: + return 16 + else: + return 64 + elif opmode == 1: + return 16 + return 32 + + +def v_opmode(p): + stk = hasattr(p, 'stk') + return v_opmode_info(p.mode, p.opmode, p.rex_w.value, stk) + + +def v_admode_info(size, admode): + if size in [16, 32]: + if admode: + return invmode[size] + else: + return size + elif size == 64: + if admode == 1: + return 32 + return 64 + + +def v_admode(p): + return v_admode_info(p.mode, p.admode) + + +def offsize(p): + if p.opmode: + return 16 + else: + return p.mode + + +def get_prefix(s): + g = re.search('(\S+)(\s+)', s) + if not g: + return None, s + prefix, b = g.groups() + return prefix, s[len(prefix) + len(b):] + + +repeat_mn = ["INS", "OUTS", + "MOVSB", "MOVSW", "MOVSD", "MOVSQ", + "SCASB", "SCASW", "SCASD", "SCASQ", + "LODSB", "LODSW", "LODSD", "LODSQ", + "STOSB", "STOSW", "STOSD", "STOSQ", + "CMPSB", "CMPSW", "CMPSD", "CMPSQ", + ] + +segm2enc = {CS: 1, SS: 2, DS: 3, ES: 4, FS: 5, GS: 6} +enc2segm = dict([(x[1], x[0]) for x in segm2enc.items()]) + + +class group: + + def __init__(self): + self.value = None + + +class additional_info: + + def __init__(self): + self.except_on_instr = False + self.g1 = group() + self.g2 = group() + self.vopmode = None + self.stk = False + self.v_opmode = None + self.v_admode = None + self.prefixed = '' + + +class instruction_x86(instruction): + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_x86, self).__init__(*args, **kargs) + self.additional_info.stk = hasattr(self, 'stk') + + def v_opmode(self): + return self.additional_info.v_opmode + + def v_admode(self): + return self.additional_info.v_admode + + def dstflow(self): + if self.name.startswith('J'): + return True + if self.name.startswith('LOOP'): + return True + # repxx yyy generate split flow + # if self.g1.value & 6 and self.name in repeat_mn: + # return True + return self.name in ['CALL'] + + def dstflow2label(self, symbol_pool): + if self.additional_info.g1.value & 6 and self.name in repeat_mn: + return + e = self.args[0] + if isinstance(e, ExprId) and not e.name in all_regs_ids_byname: + l = symbol_pool.getby_name_create(e.name) + s = ExprId(l, e.size) + self.args[0] = s + elif isinstance(e, ExprInt): + ad = e.arg + int(self.offset) + self.l + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0] = s + else: + return + + def breakflow(self): + if self.name.startswith('J'): + return True + if self.name.startswith('LOOP'): + return True + if self.name.startswith('RET'): + return True + if self.name.startswith('INT'): + return True + if self.name.startswith('SYS'): + return True + # repxx yyy generate split flow + # if self.g1.value & 6 and self.name in repeat_mn: + # return True + return self.name in ['CALL', 'HLT', 'IRET', 'ICEBP'] + + def splitflow(self): + if self.name.startswith('JMP'): + return False + if self.name.startswith('J'): + return True + if self.name.startswith('LOOP'): + return True + if self.name.startswith('SYS'): + return True + # repxx yyy generate split flow + # if self.g1.value & 6 and self.name in repeat_mn: + # return True + return self.name in ['CALL'] + + def setdstflow(self, a): + return + + def is_subcall(self): + return self.name in ['CALL'] + + def getdstflow(self, symbol_pool): + if self.additional_info.g1.value & 6 and self.name in repeat_mn: + ad = int(self.offset) + l = symbol_pool.getby_offset_create(ad) + # XXX size ??? + s = ExprId(l, self.v_opmode()) + return [s] + return [self.args[0]] + + def get_symbol_size(self, symbol, symbol_pool): + return self.mode + + def fixDstOffset(self): + e = self.args[0] + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + # raise ValueError('dst must be int or label') + log.warning('dynamic dst %r' % e) + return + # return ExprInt32(e.arg - (self.offset + self.l)) + self.args[0] = ExprInt_fromsize( + self.mode, e.arg - (self.offset + self.l)) + + def get_info(self, c): + self.additional_info.g1.value = c.g1.value + self.additional_info.g2.value = c.g2.value + self.additional_info.v_opmode = c.v_opmode() + self.additional_info.v_admode = c.v_admode() + self.additional_info.prefix = c.prefix + self.additional_info.prefixed = getattr(c, "prefixed", "") + + def __str__(self): + o = super(instruction_x86, self).__str__() + if self.additional_info.g1.value & 1: + o = "LOCK %s" % o + if self.additional_info.g1.value & 2: + if getattr(self.additional_info.prefixed, 'default', "") != "\xF2": + o = "REPNE %s" % o + if self.additional_info.g1.value & 4: + if getattr(self.additional_info.prefixed, 'default', "") != "\xF3": + o = "REPE %s" % o + return o + + def get_args_expr(self): + args = [] + for a in self.args: + a = a.replace_expr(replace_regs[self.mode]) + args.append(a) + return args + + +class mn_x86(cls_mn): + name = "x86" + prefix_op_size = False + prefix_ad_size = False + regs = regs_module + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + bintree = {} + num = 0 + delayslot = 0 + pc = {16: IP, 32: EIP, 64: RIP} + sp = {16: SP, 32: ESP, 64: RSP} + instruction = instruction_x86 + max_instruction_len = 15 + + @classmethod + def getpc(cls, attrib): + return cls.pc[attrib] + + @classmethod + def getsp(cls, attrib): + return cls.sp[attrib] + + def v_opmode(self): + if hasattr(self, 'stk'): + stk = 1 + else: + stk = 0 + return v_opmode_info(self.mode, self.opmode, self.rex_w.value, stk) + + def v_admode(self): + size, opmode, admode = self.mode, self.opmode, self.admode + if size in [16, 32]: + if admode: + return invmode[size] + else: + return size + elif size == 64: + if admode == 1: + return 32 + return 64 + + def additional_info(self): + info = additional_info() + info.g1.value = self.g1.value + info.g2.value = self.g2.value + info.v_opmode = self.v_opmode() + info.prefixed = "" + if hasattr(self, 'prefixed'): + info.prefixed = self.prefixed.default + return info + + @classmethod + def check_mnemo(cls, fields): + pass + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def mod_fields(cls, fields): + prefix = [d_g1, d_g2, d_rex_p, d_rex_w, d_rex_r, d_rex_x, d_rex_b] + return prefix + fields + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + @classmethod + def fromstring(cls, s, mode): + pref = 0 + prefix, new_s = get_prefix(s) + if prefix == "LOCK": + pref |= 1 + s = new_s + elif prefix == "REPNE": + pref |= 2 + s = new_s + elif prefix == "REPE": + pref |= 4 + s = new_s + c = super(mn_x86, cls).fromstring(s, mode) + c.additional_info.g1.value = pref + return c + + @classmethod + def pre_dis(cls, v, mode, offset): + offset_o = offset + pre_dis_info = {'opmode': 0, + 'admode': 0, + 'g1': 0, + 'g2': 0, + 'rex_p': 0, + 'rex_w': 0, + 'rex_r': 0, + 'rex_x': 0, + 'rex_b': 0, + 'prefix': "", + 'prefixed': "", + } + while True: + c = v.getbytes(offset) + if c == '\x66': + # pre_dis_info.opmode = 1 + pre_dis_info['opmode'] = 1 + elif c == '\x67': + pre_dis_info['admode'] = 1 + elif c == '\xf0': + pre_dis_info['g1'] = 1 + elif c == '\xf2': + pre_dis_info['g1'] = 2 + elif c == '\xf3': + pre_dis_info['g1'] = 4 + + elif c == '\x2e': + pre_dis_info['g2'] = 1 + elif c == '\x36': + pre_dis_info['g2'] = 2 + elif c == '\x3e': + pre_dis_info['g2'] = 3 + elif c == '\x26': + pre_dis_info['g2'] = 4 + elif c == '\x64': + pre_dis_info['g2'] = 5 + elif c == '\x65': + pre_dis_info['g2'] = 6 + + elif mode == 64 and c in '@ABCDEFGHIJKLMNO': + x = ord(c) + pre_dis_info['rex_p'] = 1 + pre_dis_info['rex_w'] = (x >> 3) & 1 + pre_dis_info['rex_r'] = (x >> 2) & 1 + pre_dis_info['rex_x'] = (x >> 1) & 1 + pre_dis_info['rex_b'] = (x >> 0) & 1 + offset += 1 + break + else: + c = '' + break + pre_dis_info['prefix'] += c + offset += 1 + # pre_dis_info.b = v[:offset] + return pre_dis_info, v, mode, offset, offset - offset_o + + @classmethod + def get_cls_instance(cls, cc, mode, infos=None): + for opmode in [0, 1]: + for admode in [0, 1]: + # c = cls.all_mn_inst[cc][0] + c = cc() + c.init_class() + + c.reset_class() + c.add_pre_dis_info() + c.dup_info(infos) + + c.mode = mode + c.opmode = opmode + c.admode = admode + + if hasattr(c, "fopmode") and c.fopmode.mode == 64: + c.rex_w.value = 1 + yield c + + def post_dis(self): + if self.g2.value: + for a in self.args: + if not isinstance(a.expr, ExprMem): + continue + m = a.expr + a.expr = ExprMem( + ExprOp('segm', enc2segm[self.g2.value], m.arg), m.size) + if self.name == 'LEA': + if not isinstance(self.args[1].expr, ExprMem): + return None + return self + + def dup_info(self, infos): + if infos is not None: + self.g1.value = infos.g1.value + self.g2.value = infos.g2.value + + def reset_class(self): + super(mn_x86, self).reset_class() + # self.rex_w.value, self.rex_b.value, + # self.rex_x.value = None, None, None + # self.opmode.value, self.admode.value = None, None + if hasattr(self, "opmode"): + del(self.opmode) + if hasattr(self, "admode"): + del(self.admode) + # self.opmode = 0 + # self.admode = 0 + + def add_pre_dis_info(self, pre_dis_info=None): + # print 'add_pre_dis_info', pre_dis_info + + if pre_dis_info is None: + return True + if hasattr(self, "prefixed") and self.prefixed.default == "\x66": + pre_dis_info['opmode'] = 0 + # if self.opmode != 0: + # return False + + # if pre_dis_info['opmode'] != self.opmode: + # return False + # if pre_dis_info['admode'] != self.admode: + # return False + self.opmode = pre_dis_info['opmode'] + self.admode = pre_dis_info['admode'] + + if hasattr(self, 'no_xmm_pref') and\ + pre_dis_info['prefix'] and\ + pre_dis_info['prefix'][-1] in '\x66\xf2\xf3': + return False + if (hasattr(self, "prefixed") and + not pre_dis_info['prefix'].endswith(self.prefixed.default)): + return False + # print self.rex_w.value, pre_dis_info['rex_w'] + # print 'rex', self.rex_w.value, self.rex_b.value, self.rex_x.value + if (self.rex_w.value is not None and + self.rex_w.value != pre_dis_info['rex_w']): + return False + else: + self.rex_w.value = pre_dis_info['rex_w'] + self.rex_r.value = pre_dis_info['rex_r'] + self.rex_b.value = pre_dis_info['rex_b'] + self.rex_x.value = pre_dis_info['rex_x'] + self.rex_p.value = pre_dis_info['rex_p'] + self.g1.value = pre_dis_info['g1'] + self.g2.value = pre_dis_info['g2'] + self.prefix = pre_dis_info['prefix'] + # self.prefixed = pre_dis_info['prefixed'] + + """ + if hasattr(self, "p_"): + self.prefixed = self.p_.default + if self.p_.default == "\x66": + pre_dis_info['opmode'] = 0 + if self.opmode != 0: + return False + #self.pre_dis_info = pre_dis_info + """ + return True + + def post_asm(self, v): + return v + + def encodefields(self, decoded): + v = super(mn_x86, self).encodefields(decoded) + if hasattr(self, 'prefixed'): + v = self.prefixed.default + v + + rex = 0x40 + if self.g1.value is None: + self.g1.value = 0 + if self.g2.value is None: + self.g2.value = 0 + + if self.rex_w.value: + rex |= 0x8 + if self.rex_r.value: + rex |= 0x4 + if self.rex_x.value: + rex |= 0x2 + if self.rex_b.value: + rex |= 0x1 + if rex != 0x40 or self.rex_p.value == 1: + v = chr(rex) + v + if self.g1.value & 1: + v = "\xf0" + v + if self.g1.value & 2: + if hasattr(self, 'no_xmm_pref'): + return None + v = "\xf2" + v + if self.g1.value & 4: + if hasattr(self, 'no_xmm_pref'): + return None + v = "\xf3" + v + if self.g2.value: + v = {1: '\x2e', 2: '\x36', 3: '\x3e', 4: + '\x26', 5: '\x64', 6: '\x65'}[self.g2.value] + v + # mode prefix + if hasattr(self, "admode") and self.admode: + v = "\x67" + v + + if hasattr(self, "opmode") and self.opmode: + if hasattr(self, 'no_xmm_pref'): + return None + v = "\x66" + v + + return v + + def getnextflow(self, symbol_pool): + raise NotImplementedError('not fully functional') + return self.offset + 4 + + def ir_pre_instruction(self): + return [ExprAff(mRIP[self.mode], + ExprInt_from(mRIP[self.mode], self.offset + self.l))] + + @classmethod + def filter_asm_candidates(cls, instr, candidates): + + cand_same_mode = [] + cand_diff_mode = [] + out = [] + for c, v in candidates: + if (hasattr(c, 'no_xmm_pref') and + (c.g1.value & 2 or c.g1.value & 4 or c.opmode)): + continue + if hasattr(c, "fopmode") and v_opmode(c) != c.fopmode.mode: + # print 'DROP', c, v_opmode(c), c.fopmode.mode + continue + if hasattr(c, "fadmode") and v_admode(c) != c.fadmode.mode: + # print 'DROP', c, v_opmode(c), c.fopmode.mode + continue + # relative dstflow must not have opmode set + # (affect IP instead of EIP for instance) + if (instr.dstflow() and + instr.name not in ["JCXZ", "JECXZ", "JRCXZ"] and + len(instr.args) == 1 and + isinstance(instr.args[0], ExprInt) and c.opmode): + continue + + out.append((c, v)) + candidates = out + # return [x[1][0] for x in candidates] + for c, v in candidates: + if v_opmode(c) == instr.mode: + cand_same_mode += v + for c, v in candidates: + if v_opmode(c) != instr.mode: + cand_diff_mode += v + cand_same_mode.sort(key=lambda x: len(x)) + cand_diff_mode.sort(key=lambda x: len(x)) + return cand_same_mode + cand_diff_mode + + +class bs8(bs): + prio = default_prio + + def __init__(self, v, cls=None, fname=None, **kargs): + super(bs8, self).__init__(int2bin(v, 8), 8, + cls=cls, fname=fname, **kargs) + + +class bs_modname_size(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + fopmode = opmode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) + mode = dct['mode'] + size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] + # no mode64 existance in name means no 64bit version of mnemo + if mode == 64: + if mode in self.args['name']: + nfields = fields[:] + f, i = getfieldindexby_name(nfields, 'rex_w') + # f = bs("1", l=0, fname = 'rex_w') + f = bs("1", l=0, cls=(bs_fbit,), fname="rex_w") + osize = v_opmode_info(size, opmode, 1, 0) + nfields[i] = f + nfields = nfields[:-1] + args = dict(self.args) + ndct = dict(dct) + if osize in self.args['name']: + ndct['name'] = self.args['name'][osize] + out.append((cls, ndct['name'], bases, ndct, nfields)) + + nfields = fields[:] + nfields = nfields[:-1] + f, i = getfieldindexby_name(nfields, 'rex_w') + # f = bs("0", l=0, fname = 'rex_w') + f = bs("0", l=0, cls=(bs_fbit,), fname="rex_w") + osize = v_opmode_info(size, opmode, 0, 0) + nfields[i] = f + args = dict(self.args) + ndct = dict(dct) + if osize in self.args['name']: + ndct['name'] = self.args['name'][osize] + out.append((cls, ndct['name'], bases, ndct, nfields)) + else: + l = opmode_prefix((dct['mode'], dct['opmode'], dct['admode'])) + osize = v_opmode_info(size, opmode, None, 0) + nfields = fields[:-1] + args = dict(self.args) + ndct = dict(dct) + if osize in self.args['name']: + ndct['name'] = self.args['name'][osize] + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class bs_modname_jecx(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + fopmode = opmode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) + mode = dct['mode'] + size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] + + nfields = fields[:] + nfields = nfields[:-1] + args = dict(self.args) + ndct = dict(dct) + if mode == 64: + if admode: + ndct['name'] = "JECXZ" + else: + ndct['name'] = "JRCXZ" + elif mode == 32: + if admode: + ndct['name'] = "JCXZ" + else: + ndct['name'] = "JECXZ" + elif mode == 16: + if admode: + ndct['name'] = "JECXZ" + else: + ndct['name'] = "JCXZ" + else: + raise ValueError('unhandled mode') + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class bs_modname_mode(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + fopmode = opmode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) + size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] + + mode = dct['mode'] + l = opmode_prefix((dct['mode'], dct['opmode'], dct['admode'])) + osize = v_opmode_info(size, opmode, None, 0) + nfields = fields[:-1] + args = dict(self.args) + ndct = dict(dct) + if mode == 64 or osize == 32: + ndct['name'] = self.args['name'][mode] + else: + ndct['name'] = self.args['name'][16] + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class x86_imm(imm_noarg): + parser = base_expr + + def decodeval(self, v): + return swap_uint(self.l, v) + + def encodeval(self, v): + return swap_uint(self.l, v) + + +class x86_imm_fix(imm_noarg): + parser = base_expr + + def decodeval(self, v): + return self.ival + + def encodeval(self, v): + if v != self.ival: + return False + return self.ival + + +class x86_08(x86_imm): + intsize = 8 + intmask = (1 << intsize) - 1 + + +class x86_16(x86_imm): + intsize = 16 + intmask = (1 << intsize) - 1 + + +class x86_32(x86_imm): + intsize = 32 + intmask = (1 << intsize) - 1 + + +class x86_64(x86_imm): + intsize = 64 + intmask = (1 << intsize) - 1 + + +class x86_08_ne(x86_imm): + intsize = 8 + intmask = (1 << intsize) - 1 + + def encode(self): + return True + + def decode(self, v): + v = swap_uint(self.l, v) + p = self.parent + admode = p.v_admode() + e = sign_ext(v, self.intsize, admode) + e = ExprInt_fromsize(admode, e) + self.expr = e + return True + + +class x86_16_ne(x86_08_ne): + intsize = 16 + intmask = (1 << intsize) - 1 + + +class x86_32_ne(x86_08_ne): + intsize = 32 + intmask = (1 << intsize) - 1 + + +class x86_64_ne(x86_08_ne): + intsize = 64 + intmask = (1 << intsize) - 1 + + +class x86_s08to16(x86_imm): + in_size = 8 + out_size = 16 + + def myexpr(self, x): + return ExprInt16(x) + + def int2expr(self, v): + return self.myexpr(v) + + def expr2int(self, e): + if not isinstance(e, ExprInt): + return None + v = int(e.arg) + if v & ~((1 << self.l) - 1) != 0: + return None + return v + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + if self.parent.v_opmode() == 64: + self.expr = ExprInt64(sign_ext(v, self.in_size, 64)) + else: + if (1 << (self.l - 1)) & v: + v = sign_ext(v, self.l, self.out_size) + self.expr = self.myexpr(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + opmode = self.parent.v_opmode() + + out_size = self.out_size + if opmode != self.out_size: + if opmode == 32 and self.out_size == 64: + out_size = opmode + if v == sign_ext( + int(v & ((1 << self.in_size) - 1)), self.in_size, out_size): + pass + else: + # print 'cannot encode1', hex(v), + # print hex(sign_ext(int(v&((1<<self.in_size)-1)), + # self.in_size, out_size)) + # test with rex_w + self.parent.rex_w.value = 1 + opmode = self.parent.v_opmode() + out_size = opmode + if (v != sign_ext( + int(v & ((1 << self.in_size) - 1)), + self.in_size, out_size)): + # print 'cannot encode2', hex(v), + # hex(sign_ext(int(v&((1<<self.in_size)-1)), + # self.in_size, out_size)) + return False + else: + pass + else: + pass + if v != sign_ext( + int(v & ((1 << self.in_size) - 1)), self.in_size, out_size): + # print 'cannot encode3', hex(v), + # hex(sign_ext(int(v&((1<<self.in_size)-1)), self.in_size, + # self.out_size)) + return False + v = self.encodeval(v) + self.value = (v & 0xffffffff) & self.lmask + return True + + def decodeval(self, v): + return swap_uint(self.l, v) + + def encodeval(self, v): + return swap_sint(self.l, v) + + +class x86_s08to32(x86_s08to16): + myexpr = lambda self, x: ExprInt32(x) + in_size = 8 + out_size = 32 + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + if self.parent.rex_w.value == 1: + v = ExprInt64(sign_ext(v, self.in_size, 64)) + else: + v = ExprInt32(sign_ext(v, self.in_size, 32)) + + self.expr = v + # print "INT1", self.parent.rex_w.value, self.expr, self.expr.size + return True + + +class x86_s08to64(x86_s08to16): + myexpr = lambda self, x: ExprInt64(x) + in_size = 8 + out_size = 64 + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + if self.parent.rex_w.value == 1: + v = ExprInt64(sign_ext(v, self.in_size, 64)) + else: + v = ExprInt32(sign_ext(v, self.in_size, 32)) + + self.expr = v + # print "INT1X", self.parent.prefix.rex_w, self.expr, self.expr.size + return True + + +class x86_s32to64(x86_s08to32): + myexpr = lambda self, x: ExprInt64(x) + in_size = 32 + out_size = 64 + + +class bs_eax(m_arg): + reg_info = r_eax_all + rindex = 0 + parser = reg_info.parser + + def decode(self, v): + p = self.parent + e = None + if hasattr(p, 'w8') and p.w8.value == 0: + e = regs08_expr[self.rindex] + else: + e = size2gpregs[p.v_opmode()].expr[self.rindex] + self.expr = e + return True + + def encode(self): + self.value = 0 + p = self.parent + e = self.expr + # print "EEEEE", e, p.w8.value + # print 'XXX', p.mode, p.opmode + osize = p.v_opmode() + if hasattr(p, 'w8'): + if p.w8.value is None: + # XXX TODO: priority in w8 erase? + if e.size == 8: + p.w8.value = 0 + else: + p.w8.value = 1 + if hasattr(p, 'w8') and p.w8.value == 0: + return e == regs08_expr[self.rindex] + elif p.mode in [16, 32]: + return e == size2gpregs[osize].expr[self.rindex] + elif p.mode == 64: + if e == size2gpregs[64].expr[self.rindex]: + p.rex_w.value = 1 + return True + elif e == size2gpregs[osize].expr[self.rindex]: + return True + return False + + +class bs_seg(m_arg): + reg_info = r_eax_all + rindex = 0 + parser = reg_info.parser + + def decode(self, v): + self.expr = self.reg_info.expr[0] + return True + + def encode(self): + self.value = 0 + return self.expr == self.reg_info.expr[0] + + +class bs_edx(bs_eax): + reg_info = r_edx_all + rindex = 2 + parser = reg_info.parser + + +class bs_st(bs_eax): + reg_info = r_st_all + rindex = 0 + parser = reg_info.parser + + +class bs_cs(bs_seg): + reg_info = r_cs_all + rindex = 0 + parser = reg_info.parser + + +class bs_ds(bs_seg): + reg_info = r_ds_all + rindex = 0 + parser = reg_info.parser + + +class bs_es(bs_seg): + reg_info = r_es_all + rindex = 0 + parser = reg_info.parser + + +class bs_ss(bs_seg): + reg_info = r_ss_all + rindex = 0 + parser = reg_info.parser + + +class bs_fs(bs_seg): + reg_info = r_fs_all + rindex = 0 + parser = reg_info.parser + + +class bs_gs(bs_seg): + reg_info = r_gs_all + rindex = 0 + parser = reg_info.parser + + +class x86_reg_st(reg_noarg, m_arg): + reg_info = r_st_all + parser = reg_info.parser + + +class bs_sib_scale(bs_divert): + bsname = "sib_scale" + + def divert(self, i, candidates): + out = [] + done = False + for cls, name, bases, dct, fields in candidates: + if (not (admode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) != 16 and + 'rm' in dct and dct['rm'] == 0b100 and + 'mod' in dct and dct['mod'] != 0b11)): + ndct = dict(dct) + nfields = fields[:] + nfields[i] = None + ndct[self.args['fname']] = None + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + + nfields = fields[:] + args = dict(self.args) + ndct = dict(dct) + f = bs(**args) + nfields[i] = f + ndct[self.args['fname']] = None + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class bs_sib_index(bs_sib_scale): + pass + + +class bs_sib_base(bs_sib_scale): + pass + + +class bs_disp(bs_divert): + + def divert(self, i, candidates): + out = [] + done = False + for cls, name, bases, dct, fields in candidates: + ndct = dict(dct) + nfields = fields[:] + if (admode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) == 16): + if 'mod' in dct and dct['mod'] == 0b00 and \ + 'rm' in dct and dct['rm'] == 0b110: + nfields[i] = bs( + l=16, cls=(x86_16_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b01: + nfields[i] = bs( + l=8, cls=(x86_08_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b10: + nfields[i] = bs( + l=16, cls=(x86_16_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + else: + if 'mod' in dct and dct['mod'] == 0b00 and \ + 'rm' in dct and dct['rm'] == 0b101: + nfields[i] = bs( + l=32, cls=(x86_32_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b01: + nfields[i] = bs( + l=8, cls=(x86_08_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b10: + nfields[i] = bs( + l=32, cls=(x86_32_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + + nfields[i] = None + ndct[self.args['fname']] = None + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +def getmodrm(c): + return (c >> 6) & 3, (c >> 3) & 7, c & 7 + + +def setmodrm(mod, re, rm): + return ((mod & 3) << 6) | ((re & 7) << 3) | (rm & 7) + + +def sib(c): + return modrm(c) + +db_afs_64 = [] +sib_64_s08_ebp = [] + + +def gen_modrm_form(): + global db_afs_64, sib_64_s08_ebp + ebp = 5 + + sib_s08_ebp = [{f_isad: True} for i in range(0x100)] + sib_u32_ebp = [{f_isad: True} for i in range(0x100)] + sib_u32 = [{f_isad: True} for i in range(0x100)] + + sib_u64 = [] + for rex_x in xrange(2): + o = [] + for rex_b in xrange(2): + x = [{f_isad: True} for i in range(0x100)] + o.append(x) + sib_u64.append(o) + + sib_u64_ebp = [] + for rex_x in xrange(2): + o = [] + for rex_b in xrange(2): + x = [{f_isad: True} for i in range(0x100)] + o.append(x) + sib_u64_ebp.append(o) + + sib_64_s08_ebp = [] + for rex_x in xrange(2): + o = [] + for rex_b in xrange(2): + x = [{f_isad: True} for i in range(0x100)] + o.append(x) + sib_64_s08_ebp.append(o) + + for sib_rez in [sib_s08_ebp, + sib_u32_ebp, + sib_u32, + sib_64_s08_ebp, + sib_u64_ebp, + sib_u64, + ]: + for index in range(0x100): + ss, i, b = getmodrm(index) + + if b == 0b101: + if sib_rez == sib_s08_ebp: + sib_rez[index][f_imm] = f_s08 + sib_rez[index][ebp] = 1 + elif sib_rez == sib_u32_ebp: + sib_rez[index][f_imm] = f_u32 + sib_rez[index][ebp] = 1 + elif sib_rez == sib_u32: + sib_rez[index][f_imm] = f_u32 + elif sib_rez == sib_u64_ebp: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_u32 + sib_rez[rex_x][rex_b][index][ebp + 8 * rex_b] = 1 + elif sib_rez == sib_u64: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_u32 + elif sib_rez == sib_64_s08_ebp: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_s08 + sib_rez[rex_x][rex_b][index][ebp + 8 * rex_b] = 1 + + else: + if sib_rez == sib_s08_ebp: + sib_rez[index][b] = 1 + sib_rez[index][f_imm] = f_s08 + elif sib_rez == sib_u32_ebp: + sib_rez[index][b] = 1 + sib_rez[index][f_imm] = f_u32 + elif sib_rez == sib_u32: + sib_rez[index][b] = 1 + elif sib_rez == sib_u64_ebp: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 + sib_rez[rex_x][rex_b][index][f_imm] = f_u32 + elif sib_rez == sib_u64: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 + elif sib_rez == sib_64_s08_ebp: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_s08 + sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 + + if i == 0b100 and sib_rez in [sib_s08_ebp, sib_u32_ebp, sib_u32]: + continue + + if sib_rez in [sib_s08_ebp, sib_u32_ebp, sib_u32]: + tmp = i + if not tmp in sib_rez[index]: + sib_rez[index][tmp] = 0 # 1 << ss + sib_rez[index][tmp] += 1 << ss + else: + for rex_b in xrange(2): + for rex_x in xrange(2): + tmp = i + 8 * rex_x + if i == 0b100 and rex_x == 0: + continue + if not tmp in sib_rez[rex_x][rex_b][index]: + sib_rez[rex_x][rex_b][index][tmp] = 0 # 1 << ss + sib_rez[rex_x][rex_b][index][tmp] += 1 << ss + + # 32bit + db_afs_32 = [None for i in range(0x100)] + for i in range(0x100): + index = i + mod, re, rm = getmodrm(i) + + if mod == 0b00: + if rm == 0b100: + db_afs_32[index] = sib_u32 + elif rm == 0b101: + db_afs_32[index] = {f_isad: True, f_imm: f_u32} + else: + db_afs_32[index] = {f_isad: True, rm: 1} + elif mod == 0b01: + if rm == 0b100: + db_afs_32[index] = sib_s08_ebp + continue + tmp = {f_isad: True, rm: 1, f_imm: f_s08} + db_afs_32[index] = tmp + + elif mod == 0b10: + if rm == 0b100: + db_afs_32[index] = sib_u32_ebp + else: + db_afs_32[index] = {f_isad: True, rm: 1, f_imm: f_u32} + elif mod == 0b11: + db_afs_32[index] = {f_isad: False, rm: 1} + + # 64bit + db_afs_64 = [None for i in range(0x400)] + for i in range(0x400): + index = i + rex_x = (index >> 9) & 1 + rex_b = (index >> 8) & 1 + mod, re, rm = getmodrm(i & 0xff) + + if mod == 0b00: + if rm == 0b100: + db_afs_64[i] = sib_u64[rex_x][rex_b] + elif rm == 0b101: + db_afs_64[i] = {f_isad: True, f_imm: f_u32, 16: 1} + else: + db_afs_64[i] = {f_isad: True, rm + 8 * rex_b: 1} + elif mod == 0b01: + if rm == 0b100: + db_afs_64[i] = sib_64_s08_ebp[rex_x][rex_b] + continue + tmp = {f_isad: True, rm + 8 * rex_b: 1, f_imm: f_s08} + db_afs_64[i] = tmp + + elif mod == 0b10: + if rm == 0b100: + db_afs_64[i] = sib_u64_ebp[rex_x][rex_b] + else: + db_afs_64[i] = {f_isad: True, rm + 8 * rex_b: 1, f_imm: f_u32} + elif mod == 0b11: + db_afs_64[i] = {f_isad: False, rm + 8 * rex_b: 1} + + # 16bit + db_afs_16 = [None for i in range(0x100)] + _si = 6 + _di = 7 + _bx = 3 + _bp = 5 + for i in range(0x100): + index = i + mod, re, rm = getmodrm(i) + + if mod == 0b00: + if rm == 0b100: + db_afs_16[index] = {f_isad: True, _si: 1} + elif rm == 0b101: + db_afs_16[index] = {f_isad: True, _di: 1} + elif rm == 0b110: + db_afs_16[index] = { + f_isad: True, f_imm: f_u16} # {f_isad:True,_bp:1} + elif rm == 0b111: + db_afs_16[index] = {f_isad: True, _bx: 1} + else: + db_afs_16[index] = {f_isad: True, + [_si, _di][rm % 2]: 1, + [_bx, _bp][(rm >> 1) % 2]: 1} + elif mod in [0b01, 0b10]: + if mod == 0b01: + my_imm = f_s08 + else: + my_imm = f_u16 + + if rm == 0b100: + db_afs_16[index] = {f_isad: True, _si: 1, f_imm: my_imm} + elif rm == 0b101: + db_afs_16[index] = {f_isad: True, _di: 1, f_imm: my_imm} + elif rm == 0b110: + db_afs_16[index] = {f_isad: True, _bp: 1, f_imm: my_imm} + elif rm == 0b111: + db_afs_16[index] = {f_isad: True, _bx: 1, f_imm: my_imm} + else: + db_afs_16[index] = {f_isad: True, + [_si, _di][rm % 2]: 1, + [_bx, _bp][(rm >> 1) % 2]: 1, + f_imm: my_imm} + + elif mod == 0b11: + db_afs_16[index] = {f_isad: False, rm: 1} + + byte2modrm = {} + byte2modrm[16] = db_afs_16 + byte2modrm[32] = db_afs_32 + byte2modrm[64] = db_afs_64 + + modrm2byte = {16: defaultdict(list), + 32: defaultdict(list), + 64: defaultdict(list), + } + for size, db_afs in byte2modrm.items(): + for i, modrm in enumerate(db_afs): + if not isinstance(modrm, list): + modrm = modrm.items() + modrm.sort() + modrm = tuple(modrm) + modrm2byte[size][modrm].append(i) + continue + for j, modrm_f in enumerate(modrm): + modrm_f = modrm_f.items() + modrm_f.sort() + modrm_f = tuple(modrm_f) + modrm2byte[size][modrm_f].append((i, j)) + + return byte2modrm, modrm2byte + +byte2modrm, modrm2byte = gen_modrm_form() + + +# ret is modr; ret is displacement +def exprfindmod(e, o=None): + if o is None: + o = {} + if isinstance(e, ExprInt): + return e + if isinstance(e, ExprId): + i = size2gpregs[e.size].expr.index(e) + o[i] = 1 + return None + elif isinstance(e, ExprOp): + out = None + if e.op == '+': + for a in e.args: + r = exprfindmod(a, o) + if out and r1: + raise ValueError('multiple displacement!') + out = r + return out + elif e.op == "*": + mul = int(e.args[1].arg) + a = e.args[0] + i = size2gpregs[a.size].expr.index(a) + o[i] = mul + else: + raise ValueError('bad op') + return None + + +def expr2modrm(e, p, w8, sx=0, xmm=0, mm=0): + o = defaultdict(lambda x: 0) + if e.size == 64 and not e in gpregs_mm.expr: + if hasattr(p, 'sd'): + p.sd.value = 1 + # print 'set64pref', str(e) + elif hasattr(p, 'wd'): + pass + elif hasattr(p, 'stk'): + pass + else: + p.rex_w.value = 1 + opmode = p.v_opmode() + if sx == 1: + opmode = 16 + if sx == 2: + opmode = 32 + if e.size == 8 and w8 != 0: + return None, None, False + + if w8 == 0 and e.size != 8: + return None, None, False + + if not isinstance(e, ExprMem): + o[f_isad] = False + if xmm: + if e in gpregs_xmm.expr: + i = gpregs_xmm.expr.index(e) + o[i] = 1 + return [o], None, True + else: + return None, None, False + if mm: + if e in gpregs_mm.expr: + i = gpregs_mm.expr.index(e) + o[i] = 1 + return [o], None, True + else: + return None, None, False + if w8 == 0: + # if (p.v_opmode() == 64 or p.rex_p.value == 1) and e in + # gpregs08_64.expr: + if p.mode == 64 and e in gpregs08_64.expr: + r = gpregs08_64 + p.rex_p.value = 1 + else: + p.rex_p.value = 0 + p.rex_x.value = 0 + r = size2gpregs[8] + if not e in r.expr: + return None, None, False + i = r.expr.index(e) + o[i] = 1 + return [o], None, True + # print "ttt", opmode, e.size + if opmode != e.size: + # print "FFFF" + return None, None, False + if not e in size2gpregs[opmode].expr: + return None, None, False + i = size2gpregs[opmode].expr.index(e) + # print 'aaa', p.mode, i + if i > 7: + if p.mode == 64: + # p.rex_b.value = 1 + # i -=7 + # print "SET REXB" + pass + else: + return None, None, False + o[i] = 1 + return [o], None, True + if e.is_op_segm() and isinstance(e.arg.args[0], ExprInt): + return None, None, False + + if e.is_op_segm(): + segm = e.arg.args[0] + ptr = e.arg.args[1] + else: + segm = None + ptr = e.arg + + o[f_isad] = True + ad_size = ptr.size + admode = p.v_admode() + if ad_size != admode: + return None, None, False + """ + if e.size == 64: + if hasattr(p, 'sd'): + p.sd.value = 1 + else: + p.rex_w.value = 1 + """ + + if w8 == 1 and e.size != opmode: # p.v_opmode(): + if not (hasattr(p, 'sd') or hasattr(p, 'wd')): + return None, None, False + # print 'tttt' + + if hasattr(p, 'wd'): + s = e.size + if s == 16: + p.wd.value = 1 + elif s == 32: + pass + else: + return None, None, False + + if p.mode == 64 and ptr.size == 32: + if p.admode != 1: + return None, None, False + + o = {f_isad: True} + disp = exprfindmod(ptr, o) + out = [] + if disp is None: + # add 0 disp + disp = ExprInt32(0) + if disp is not None: + for s, x in [(f_s08, ExprInt8), (f_s16, ExprInt16), (f_s32, ExprInt32), + (f_u08, ExprInt8), (f_u16, ExprInt16), (f_u32, ExprInt32)]: + # print "1", disp + v = x(int(disp.arg)) + # print "2", v, hex(sign_ext(int(v.arg), v.size, disp.size)) + if int(disp.arg) != sign_ext(int(v.arg), v.size, disp.size): + # print 'nok' + continue + # print 'ok', s, v + x1 = dict(o) + x1[f_imm] = (s, v) + out.append(x1) + else: + out = [o] + return out, segm, True + + +def modrm2expr(m, p, w8, sx=0, xmm=0, mm=0): + o = [] + if not m[f_isad]: + k = [x[0] for x in m.items() if x[1] == 1] + if len(k) != 1: + raise ValueError('strange reg encoding %r' % m) + k = k[0] + if w8 == 0: + opmode = 8 + elif sx == 1: + opmode = 16 + elif sx == 2: + opmode = 32 + else: + opmode = p.v_opmode() + """ + if k > 7: + # XXX HACK TODO + e = size2gpregs[64].expr[k] + else: + e = size2gpregs[opmode].expr[k] + """ + # print 'yyy', opmode, k + if xmm: + e = gpregs_xmm.expr[k] + elif mm: + e = gpregs_mm.expr[k] + elif opmode == 8 and (p.v_opmode() == 64 or p.rex_p.value == 1): + e = gpregs08_64.expr[k] + else: + e = size2gpregs[opmode].expr[k] + return e + # print "enc", m, p.v_admode(), p.prefix.opmode, p.prefix.admode + admode = p.v_admode() + opmode = p.v_opmode() + for k, v in m.items(): + if type(k) in [int, long]: + e = size2gpregs[admode].expr[k] + if v != 1: + e = ExprInt_fromsize(admode, v) * e + o.append(e) + # print [str(x) for x in o] + if f_imm in m: + if p.disp.value is None: + return None + o.append(ExprInt_fromsize(admode, p.disp.expr.arg)) + e = ExprOp('+', *o) + if w8 == 0: + opmode = 8 + elif sx == 1: + opmode = 16 + elif sx == 2: + opmode = 32 + e = ExprMem(e, size=opmode) + # print "mem size", opmode, e + return e + + +class x86_rm_arg(m_arg): + parser = rmarg + + def fromstring(self, s, parser_result=None): + start, stop = super(x86_rm_arg, self).fromstring(s, parser_result) + e = self.expr + p = self.parent + if start is None: + return None, None + s = e.size + return start, stop + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + o = str(e) + elif isinstance(e, ExprMem): + sz = {8: 'BYTE', 16: 'WORD', 32: 'DWORD', + 64: 'QWORD', 80: 'TBYTE'}[e.size] + segm = "" + if e.is_op_segm(): + segm = "%s:" % e.arg.args[0] + e = e.arg.args[1] + else: + e = e.arg + if isinstance(e, ExprOp): + # s = str(e.arg)[1:-1] + s = str(e).replace('(', '').replace(')', '') + else: + s = str(e) + o = sz + ' PTR %s[%s]' % (segm, s) + else: + raise ValueError('check this %r' % e) + return "%s" % o + + def get_modrm(self): + p = self.parent + admode = p.v_admode() + + if not admode in [16, 32, 64]: + raise ValueError('strange admode %r', admode) + v = setmodrm(p.mod.value, 0, p.rm.value) + v |= p.rex_b.value << 8 + v |= p.rex_x.value << 9 + if p.mode == 64: + # XXXx to check + admode = 64 + + xx = byte2modrm[admode][v] + if isinstance(xx, list): + if not p.sib_scale: + return False + v = setmodrm(p.sib_scale.value, + p.sib_index.value, + p.sib_base.value) + # print 'SIB', hex(v) + # v |= p.rex_b.value << 8 + # v |= p.rex_x.value << 9 + # if v >= 0x100: + # pass + xx = xx[v] + return xx + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + mm = hasattr(self.parent, "mm") + xmm = hasattr(self.parent, "xmm") + e = modrm2expr(xx, p, 1, xmm=xmm, mm=mm) + if e is None: + return False + self.expr = e + return True + + def gen_cand(self, v_cand, admode): + # print "GEN CAND" + if not admode in modrm2byte: + # XXX TODO: 64bit + raise StopIteration + if not v_cand: + raise StopIteration + + p = self.parent + o_rex_x = p.rex_x.value + o_rex_b = p.rex_b.value + # add candidate without 0 imm + new_v_cand = [] + moddd = False + for v in v_cand: + new_v_cand.append(v) + # print 'CANDI', v, admode + if f_imm in v and int(v[f_imm][1].arg) == 0: + v = dict(v) + del(v[f_imm]) + new_v_cand.append(v) + moddd = True + + v_cand = new_v_cand + + out_c = [] + for v in v_cand: + disp = None + # patch value in modrm + if f_imm in v: + size, disp = v[f_imm] + disp = int(disp.arg) + # disp = swap_uint(f_imm2size[size], int(disp)) + + v[f_imm] = size + vo = v + # print 'vv', v, disp + v = v.items() + v.sort() + v = tuple(v) + # print "II", e, admode + # print 'III', v + # if (8, 1) in v: + # pass + if not v in modrm2byte[admode]: + # print 'cannot find' + continue + # print "FOUND1", v + xx = modrm2byte[admode][v] + # if opmode == 64 and admode == 64: + # pdb.set_trace() + + # print "FOUND2", xx + # default case + for x in xx: + if type(x) == tuple: + modrm, sib = x + else: + modrm = x + sib = None + # print 'mod sib', hex(modrm), sib + # print p.sib_scale + # print p.sib_base + # print p.sib_index + + # 16 bit cannot have sib + if (not sib is None) and admode == 16: + continue + # if ((p.sib_scale and sib is None) or + # (p.sib_scale is None and sib)): + # log.debug('dif sib %r %r'%(p.sib_scale, sib)) + # continue + # print hex(modrm), sib + # p.mod.value, dum, p.rm.value = getmodrm(modrm) + rex = modrm >> 8 # 0# XXX HACK REM temporary REX modrm>>8 + if rex and admode != 64: + continue + # print 'prefix', hex(rex) + # p.rex_x.value = o_rex_x + # p.rex_b.value = o_rex_b + + p.rex_x.value = (rex >> 1) & 1 + p.rex_b.value = rex & 1 + + if o_rex_x is not None and p.rex_x.value != o_rex_x: + continue + if o_rex_b is not None and p.rex_b.value != o_rex_b: + continue + + mod, re, rm = getmodrm(modrm) + # check re on parent + if re != p.reg.value: + continue + # p.mod.value.append(mod) + # p.rm.value.append(rm) + + if sib: + # print 'REX', p.rex_x.value, p.rex_b.value + # print hex(modrm), hex(sib) + # if (modrm & 0xFF == 4 and sib & 0xFF == 0x5 + # and p.rex_b.value ==1 and p.rex_x.value == 0): + # pass + s_scale, s_index, s_base = getmodrm(sib) + # p.sib_scale.value, p.sib_index.value, + # p.sib_base.value = getmodrm(sib) + # p.sib_scale.decode(mod) + # p.sib_index.decode(re) + # p.sib_base.decode(rm) + # p.sib_scale.value.append(mod) + # p.sib_index.value.append(re) + # p.sib_base.value.append(rm) + else: + # p.sib_scale.value.append(None) + # p.sib_index.value.append(None) + # p.sib_base.value.append(None) + s_scale, s_index, s_base = None, None, None + + # print 'IIII', repr(p.disp), f_imm in v + # if p.disp and not f_imm in vo: + # continue + # if not p.disp and f_imm in vo: + # continue + # if p.disp: + # if p.disp.l != f_imm2size[vo[f_imm]]: + # continue + # print "DISP", repr(p.disp), p.disp.l + # p.disp.value = int(disp.arg) + # print 'append' + # print mod, rm, s_scale, s_index, s_base, disp + # print p.mod, p.rm + # out_c.append((mod, rm, s_scale, s_index, s_base, disp)) + p.mod.value = mod + p.rm.value = rm + p.sib_scale.value = s_scale + p.sib_index.value = s_index + p.sib_base.value = s_base + p.disp.value = disp + if disp is not None: + p.disp.l = f_imm2size[vo[f_imm]] + + yield True + + raise StopIteration + + def encode(self): + e = self.expr + # print "eee", e + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + admode = p.v_admode() + mode = e.size + mm = hasattr(self.parent, 'mm') + xmm = hasattr(self.parent, 'xmm') + v_cand, segm, ok = expr2modrm(e, p, 1, xmm=xmm, mm=mm) + if segm: + p.g2.value = segm2enc[segm] + # print "REZ1", v_cand, ok + for x in self.gen_cand(v_cand, admode): + yield x + + +class x86_rm_w8(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, p.w8.value) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + if p.w8.value is None: + if e.size == 8: + p.w8.value = 0 + else: + p.w8.value = 1 + + # print 'TTTTT', e + v_cand, segm, ok = expr2modrm(e, p, p.w8.value) + if segm: + p.g2.value = segm2enc[segm] + # print "REZ2", v_cand, ok + for x in self.gen_cand(v_cand, p.v_admode()): + # print 'REZ', p.rex_x.value + yield x + + +class x86_rm_sx(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, p.w8.value, 1) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + if p.w8.value is None: + if e.size == 8: + p.w8.value = 0 + else: + p.w8.value = 1 + v_cand, segm, ok = expr2modrm(e, p, p.w8.value, 1) + if segm: + p.g2.value = segm2enc[segm] + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_sxd(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, 1, 2) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + v_cand, segm, ok = expr2modrm(e, p, 1, 2) + if segm: + p.g2.value = segm2enc[segm] + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_sd(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, 1) + if not isinstance(e, ExprMem): + return False + if p.sd.value == 0: + e = ExprMem(e.arg, 32) + else: + e = ExprMem(e.arg, 64) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + if not e.size in [32, 64]: + raise StopIteration + p.sd.value = 0 + v_cand, segm, ok = expr2modrm(e, p, 1) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_wd(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, 1) + if not isinstance(e, ExprMem): + return False + if p.wd.value == 0: + e = ExprMem(e.arg, 32) + else: + e = ExprMem(e.arg, 16) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + p.wd.value = 0 + v_cand, segm, ok = expr2modrm(e, p, 1) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_m80(x86_rm_arg): + msize = 80 + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + # print "aaa", xx + e = modrm2expr(xx, p, 1) + if not isinstance(e, ExprMem): + return False + e = ExprMem(e.arg, self.msize) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + if not isinstance(e, ExprMem) or e.size != self.msize: + raise StopIteration + p = self.parent + mode = p.mode + if mode == 64: + mode = 32 + e = ExprMem(e.arg, mode) + v_cand, segm, ok = expr2modrm(e, p, 1) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_m08(x86_rm_arg): + msize = 8 + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, 0) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if e.size != 8: + raise StopIteration + """ + if not isinstance(e, ExprMem) or e.size != self.msize: + raise StopIteration + """ + p = self.parent + mode = p.mode + # if mode == 64: + # mode = 32 + # e = ExprMem(e.arg, mode) + v_cand, segm, ok = expr2modrm(e, p, 0) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_m16(x86_rm_m80): + msize = 16 + + +class x86_rm_m64(x86_rm_m80): + msize = 64 + + +class x86_rm_reg_noarg(object): + prio = default_prio + 1 + + parser = gpreg + + def fromstring(self, s, parser_result=None): + # print 'parsing reg', s, opmode + if not hasattr(self.parent, 'sx') and hasattr(self.parent, "w8"): + self.parent.w8.value = 1 + if parser_result: + e, start, stop = parser_result[self.parser] + # print 'reg result', e, start, stop + if e is None: + return None, None + self.expr = e + if self.expr.size == 8: + if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): + return None, None + self.parent.w8.value = 0 + return start, stop + try: + v, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + self.expr = v[0] + if self.expr.size == 0: + if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): + return None, None + self.parent.w8.value = 0 + + # print 'parsed', s, self.expr + return start, stop + + def getrexsize(self): + return self.parent.rex_r.value + + def setrexsize(self, v): + self.parent.rex_r.value = v + + def decode(self, v): + v = v & self.lmask + p = self.parent + opmode = p.v_opmode() + # if hasattr(p, 'sx'): + # opmode = 16 + if not hasattr(p, 'sx') and (hasattr(p, 'w8') and p.w8.value == 0): + opmode = 8 + r = size2gpregs[opmode] + if p.mode == 64 and self.getrexsize(): + v |= 0x8 + # print "XXX", p.v_opmode(), p.rex_p.value + if p.v_opmode() == 64 or p.rex_p.value == 1: + if not hasattr(p, 'sx') and (hasattr(p, 'w8') and p.w8.value == 0): + # if (hasattr(p, 'w8') and p.w8.value == 0): + r = gpregs08_64 + """ + if v < 8: + self.expr = r.expr[v] + else: + self.expr = size2gpregs[64].expr[v] + """ + if hasattr(p, "xmm") or hasattr(p, "xmmreg"): + e = gpregs_xmm.expr[v] + elif hasattr(p, "mm") or hasattr(p, "mmreg"): + e = gpregs_mm.expr[v] + else: + e = r.expr[v] + self.expr = e + return True + + def encode(self): + if not isinstance(self.expr, ExprId): + return False + if self.expr in gpregs64.expr and not hasattr(self.parent, 'stk'): + self.parent.rex_w.value = 1 + # print self.parent.opmode + # fd + opmode = self.parent.v_opmode() + # if hasattr(self.parent, 'sx'): + # opmode = 16 + # print 'reg encode', self.expr, opmode + if not hasattr(self.parent, 'sx') and hasattr(self.parent, 'w8'): + self.parent.w8.value = 1 + if self.expr.size == 8: + if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): + return False + self.parent.w8.value = 0 + opmode = 8 + r = size2gpregs[opmode] + # print "YYY", opmode, self.expr + if ((hasattr(self.parent, 'xmm') or hasattr(self.parent, 'xmmreg')) + and self.expr in gpregs_xmm.expr): + i = gpregs_xmm.expr.index(self.expr) + elif ((hasattr(self.parent, 'mm') or hasattr(self.parent, 'mmreg')) + and self.expr in gpregs_mm.expr): + i = gpregs_mm.expr.index(self.expr) + elif self.expr in r.expr: + i = r.expr.index(self.expr) + elif (opmode == 8 and self.parent.mode == 64 and + self.expr in gpregs08_64.expr): + i = gpregs08_64.expr.index(self.expr) + self.parent.rex_p.value = 1 + else: + log.debug("cannot encode reg %r" % self.expr) + return False + # print "zzz", opmode, self.expr, i, self.parent.mode + if self.parent.v_opmode() == 64: + if i > 7: + self.setrexsize(1) + i -= 8 + elif self.parent.mode == 64 and i > 7: + i -= 8 + # print 'rrr', self.getrexsize() + # self.parent.rex_b.value = 1 + self.setrexsize(1) + if hasattr(self.parent, 'xmm') or hasattr(self.parent, 'mm'): + if i > 7: + i -= 8 + self.value = i + if self.value > self.lmask: + log.debug("cannot encode field value %x %x" % + (self.value, self.lmask)) + return False + # print 'RR ok' + return True + + +class x86_rm_reg(x86_rm_reg_noarg, m_arg): + pass + + +class x86_reg(x86_rm_reg): + + def getrexsize(self): + return self.parent.rex_b.value + + def setrexsize(self, v): + self.parent.rex_b.value = v + + +class x86_reg_noarg(x86_rm_reg_noarg): + + def getrexsize(self): + return self.parent.rex_b.value + + def setrexsize(self, v): + self.parent.rex_b.value = v + + +class x86_rm_segm(reg_noarg, m_arg): + prio = default_prio + 1 + reg_info = segmreg + parser = reg_info.parser + + +class x86_rm_cr(reg_noarg, m_arg): + prio = default_prio + 1 + reg_info = crregs + parser = reg_info.parser + + +class x86_rm_dr(reg_noarg, m_arg): + prio = default_prio + 1 + reg_info = drregs + parser = reg_info.parser + + +class x86_rm_flt(reg_noarg, m_arg): + prio = default_prio + 1 + reg_info = fltregs + parser = reg_info.parser + + +class bs_fbit(bsi): + + def decode(self, v): + # value already decoded in pre_dis_info + # print "jj", self.value + return True + + +class bs_cl1(bsi, m_arg): + parser = cl_or_imm + + def decode(self, v): + if v == 1: + self.expr = regs08_expr[1] + else: + self.expr = ExprInt8(1) + return True + + def encode(self): + if self.expr == regs08_expr[1]: + self.value = 1 + elif isinstance(self.expr, ExprInt) and int(self.expr.arg) == 1: + self.value = 0 + else: + return False + return True + + +def sib_cond(cls, mode, v): + if admode_prefix((mode, v["opmode"], v["admode"])) == 16: + return None + if v['mod'] == 0b11: + return None + elif v['rm'] == 0b100: + return cls.ll + else: + return None + return v['rm'] == 0b100 + + +class bs_cond_scale(bs_cond): + # cond must return field len + ll = 2 + + @classmethod + def flen(cls, mode, v): + return sib_cond(cls, mode, v) + + def encode(self): + if self.value is None: + self.value = 0 + self.l = 0 + return True + return super(bs_cond, self).encode() + + def decode(self, v): + self.value = v + return True + + +class bs_cond_index(bs_cond_scale): + ll = 3 + + @classmethod + def flen(cls, mode, v): + return sib_cond(cls, mode, v) + + +class bs_cond_disp(bs_cond): + # cond must return field len + + @classmethod + def flen(cls, mode, v): + # print 'disp cond', mode, + # print v, v_admode_info(mode, v['opmode'], v['admode']) + # if v_admode_info(mode, v['opmode'], v['admode']) ==16: + if admode_prefix((mode, v['opmode'], v['admode'])) == 16: + if v['mod'] == 0b00: + if v['rm'] == 0b110: + return 16 + else: + return None + elif v['mod'] == 0b01: + return 8 + elif v['mod'] == 0b10: + return 16 + return None + # 32, 64 + if 'sib_base' in v and v['sib_base'] == 0b101: + if v['mod'] == 0b00: + return 32 + elif v['mod'] == 0b01: + return 8 + elif v['mod'] == 0b10: + return 32 + else: + return None + + if v['mod'] == 0b00: + if v['rm'] == 0b101: + return 32 + else: + return None + elif v['mod'] == 0b01: + return 8 + elif v['mod'] == 0b10: + return 32 + else: + return None + + def encode(self): + if self.value is None: + self.value = 0 + self.l = 0 + return True + self.value = swap_uint(self.l, self.value) + return True + + def decode(self, v): + admode = self.parent.v_admode() + v = swap_uint(self.l, v) + self.value = v + v = sign_ext(v, self.l, admode) + v = ExprInt_fromsize(admode, v) + self.expr = v + return True + + +class bs_cond_imm(bs_cond_scale, m_arg): + parser = int_or_expr + max_size = 32 + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + else: + try: + e, start, stop = self.parser.scanString(s).next() + except StopIteration: + e = None + self.expr = e + + if len(self.parent.args) > 1: + l = self.parent.args[0].expr.size + else: + l = self.parent.v_opmode() + # l = min(l, self.max_size) + # l = offsize(self.parent) + if isinstance(self.expr, ExprInt): + v = int(self.expr.arg) + mask = ((1 << l) - 1) + v = v & mask + e = ExprInt_fromsize(l, v) + self.expr = e + + if self.expr is None: + log.debug('cannot fromstring int %r' % s) + return None, None + return start, stop + + @classmethod + def flen(cls, mode, v): + if 'w8' not in v or v['w8'] == 1: + if 'se' in v and v['se'] == 1: + return 8 + else: + # osize = v_opmode_info(mode, v['opmode'], v['admode']) + # osize = opmode_prefix((mode, v['opmode'], v['admode'])) + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + osize = min(osize, cls.max_size) + return osize + return 8 + + def getmaxlen(self): + return 32 + + def encode(self): + if not isinstance(self.expr, ExprInt): + raise StopIteration + arg0_expr = self.parent.args[0].expr + self.parent.rex_w.value = 0 + # special case for push + if len(self.parent.args) == 1: + v = int(self.expr.arg) + l = self.parent.v_opmode() + l = min(l, self.max_size) + + self.l = l + mask = ((1 << self.l) - 1) + # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1), + # self.l, l)) + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + # print hex(self.value) + yield True + raise StopIteration + + # assume 2 args; use first arg to guess op size + if arg0_expr.size == 64: + self.parent.rex_w.value = 1 + + l = self.parent.v_opmode() # self.parent.args[0].expr.size + # print 'imm enc', l, self.parent.rex_w.value + v = int(self.expr.arg) + # print "imms size", l, hex(v), arg0_expr.size + if arg0_expr.size == 8: + if not hasattr(self.parent, 'w8'): + raise StopIteration + self.parent.w8.value = 0 + l = 8 + if hasattr(self.parent, 'se'): + self.parent.se.value = 0 + elif hasattr(self.parent, 'se'): + if hasattr(self.parent, 'w8'): + self.parent.w8.value = 1 + # print 'test', 8, hex(v), + # print hex(sign_ext(v & 0xFF, 8, arg0_expr.size)) + # try to generate signed extended version + if v == sign_ext(v & 0xFF, 8, arg0_expr.size): + # print 'setse' + self.parent.se.value = 1 + self.l = 8 + self.value = v & 0xFF + yield True + self.parent.se.value = 0 + else: + if hasattr(self.parent, 'w8'): + self.parent.w8.value = 1 + if l == 64: + self.l = self.getmaxlen() + else: + self.l = l + # l = min(l, self.max_size) + + mask = ((1 << self.l) - 1) + # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1), + # self.l, l)) + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + # print hex(self.value) + yield True + + def decode(self, v): + opmode = self.parent.v_opmode() + v = swap_uint(self.l, v) + self.value = v + l_out = opmode + if hasattr(self.parent, 'w8') and self.parent.w8.value == 0: + l_out = 8 + v = sign_ext(v, self.l, l_out) + v = ExprInt_fromsize(l_out, v) + self.expr = v + # print self.expr, repr(self.expr) + return True + + +class bs_cond_imm64(bs_cond_imm): + max_size = 64 + + def getmaxlen(self): + return 64 + + @classmethod + def flen(cls, mode, v): + if 'w8' not in v or v['w8'] == 1: + if 'se' in v and v['se'] == 1: + return 8 + else: + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + return osize + else: + return 8 + + +class bs_rel_off(bs_cond_imm): # m_arg): + parser = int_or_expr + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + else: + try: + e, start, stop = self.parser.scanString(s).next() + except StopIteration: + e = None + self.expr = e + l = self.parent.mode + if isinstance(self.expr, ExprInt): + v = int(self.expr.arg) + mask = ((1 << l) - 1) + v = v & mask + e = ExprInt_fromsize(l, v) + self.expr = e + return start, stop + + @classmethod + def flen(cls, mode, v): + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + if osize == 16: + return 16 + else: + return 32 + + def encode(self): + if not isinstance(self.expr, ExprInt): + raise StopIteration + arg0_expr = self.parent.args[0].expr + if self.l != 0: + l = self.l + else: + l = self.parent.v_opmode() # self.parent.args[0].expr.size + self.l = l + # if l == 16: + # self.l = 16 + # else: + # self.l = 32 + l = offsize(self.parent) + + # l = self.parent.v_opmode()#self.parent.args[0].expr.size + # print 'imm enc', l, self.parent.rex_w.value + v = int(self.expr.arg) + mask = ((1 << self.l) - 1) + # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1), + # self.l, l)) + if self.l > l: + raise StopIteration + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + # print hex(self.value) + yield True + + def decode(self, v): + v = swap_uint(self.l, v) + size = offsize(self.parent) + v = sign_ext(v, self.l, size) + v = ExprInt_fromsize(size, v) + self.expr = v + # print self.expr, repr(self.expr) + return True + + +class bs_rel_off08(bs_rel_off): + + @classmethod + def flen(cls, mode, v): + return 8 + + +class bs_moff(bsi): + + @classmethod + def flen(cls, mode, v): + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + if osize == 16: + return 16 + else: + return 32 + + def encode(self): + if not hasattr(self.parent, "mseg"): + raise StopIteration + m = self.parent.mseg.expr + if (not (isinstance(m, ExprMem) and m.is_op_segm() and + isinstance(m.arg.args[0], ExprInt))): + raise StopIteration + l = self.parent.v_opmode() # self.parent.args[0].expr.size + if l == 16: + self.l = 16 + else: + self.l = 32 + # print 'imm enc', l, self.parent.rex_w.value + v = int(m.arg.args[1].arg) + mask = ((1 << self.l) - 1) + # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1), + # self.l, l)) + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + # print hex(self.value) + yield True + + def decode(self, v): + opmode = self.parent.v_opmode() + if opmode == 64: + return False + v = swap_uint(self.l, v) + self.value = v + v = sign_ext(v, self.l, opmode) + v = ExprInt_fromsize(opmode, v) + self.expr = v + # print self.expr, repr(self.expr) + return True + + +class bs_movoff(m_arg): + parser = deref_mem + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + if e is None: + return None, None + # print 'fromstring', hex(e), self.int2expr + if not isinstance(e, ExprMem): + return None, None + self.expr = e + if self.expr is None: + return None, None + return start, stop + try: + v, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + if not isinstance(e, ExprMem): + return None, None + e = v[0] + if e is None: + log.debug('cannot fromstring int %r' % s) + return None, None + self.expr = e + return start, stop + + @classmethod + def flen(cls, mode, v): + if mode == 64: + if v['admode']: + return 32 + else: + return 64 + asize = v_admode_info(mode, v['admode']) + return asize + + def encode(self): + e = self.expr + p = self.parent + if not isinstance(e, ExprMem) or not isinstance(e.arg, ExprInt): + raise StopIteration + self.l = p.v_admode() + # print 'imm enc', l, self.parent.rex_w.value + v = int(e.arg.arg) + mask = ((1 << self.l) - 1) + if v != mask & v: + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + if self.parent.mode == 64: + if self.parent.admode == 1: + l = 32 + else: + l = 64 + else: + l = self.parent.v_admode() + v = swap_uint(self.l, v) + self.value = v + v = sign_ext(v, self.l, l) + v = ExprInt_fromsize(l, v) + size = self.parent.v_opmode() + if self.parent.w8.value == 0: + size = 8 + self.expr = ExprMem(v, size) + # print self.expr, repr(self.expr) + return True + + @staticmethod + def arg2str(e): + sz = {8: 'BYTE', 16: 'WORD', 32: 'DWORD', 64: 'QWORD', 80: 'TBYTE'} + o = sz[e.size] + ' PTR [%s]' % e.arg + return "%s" % o + + +class bs_msegoff(m_arg): + parser = deref_ptr + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + if e is None: + return None, None + self.expr = e + if self.expr is None: + return None, None + return start, stop + try: + v, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + e = v[0] + if e is None: + log.debug('cannot fromstring int %r' % s) + return None, None + self.expr = e + return start, stop + + def encode(self): + if not (isinstance(self.expr, ExprMem) and self.expr.is_op_segm()): + raise StopIteration + if not isinstance(self.expr.arg.args[0], ExprInt): + raise StopIteration + if not isinstance(self.expr.arg.args[1], ExprInt): + raise StopIteration + l = self.parent.v_opmode() # self.parent.args[0].expr.size + # print 'imm enc', l, self.parent.rex_w.value + v = int(self.expr.arg.args[0].arg) + mask = ((1 << self.l) - 1) + # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1), + # self.l, l)) + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + opmode = self.parent.v_opmode() + v = swap_uint(self.l, v) + self.value = v + v = sign_ext(v, self.l, opmode) + v = ExprInt_fromsize(opmode, v) + e = ExprMem(ExprOp('segm', v, self.parent.off.expr)) + self.expr = e + # print self.expr, repr(self.expr) + return True + + @staticmethod + def arg2str(e): + return "%s:%s" % (e.arg.args[0], e.arg.args[1]) + + +d_rex_p = bs(l=0, cls=(bs_fbit,), fname="rex_p") +d_rex_w = bs(l=0, cls=(bs_fbit,), fname="rex_w") +d_rex_r = bs(l=0, cls=(bs_fbit,), fname="rex_r") +d_rex_x = bs(l=0, cls=(bs_fbit,), fname="rex_x") +d_rex_b = bs(l=0, cls=(bs_fbit,), fname="rex_b") + +d_g1 = bs(l=0, cls=(bs_fbit,), fname="g1") +d_g2 = bs(l=0, cls=(bs_fbit,), fname="g2") + + +d_cl1 = bs(l=1, cls=(bs_cl1,), fname="cl1") + + +w8 = bs(l=1, fname="w8") +se = bs(l=1, fname="se") + +sx = bs(l=0, fname="sx") +sxd = bs(l=0, fname="sx") + + +xmm = bs(l=0, fname="xmm") +mm = bs(l=0, fname="mm") +xmmreg = bs(l=0, fname="xmmreg") +mmreg = bs(l=0, fname="mmreg") + +pref_f2 = bs(l=0, fname="prefixed", default="\xf2") +pref_f3 = bs(l=0, fname="prefixed", default="\xf3") +pref_66 = bs(l=0, fname="prefixed", default="\x66") +no_xmm_pref = bs(l=0, fname="no_xmm_pref") + +sib_scale = bs(l=2, cls=(bs_cond_scale,), fname = "sib_scale") +sib_index = bs(l=3, cls=(bs_cond_index,), fname = "sib_index") +sib_base = bs(l=3, cls=(bs_cond_index,), fname = "sib_base") + +disp = bs(l=0, cls=(bs_cond_disp,), fname = "disp") + + +u08 = bs(l=8, cls=(x86_08, m_arg)) +u07 = bs(l=7, cls=(x86_08, m_arg)) +u16 = bs(l=16, cls=(x86_16, m_arg)) +u32 = bs(l=32, cls=(x86_32, m_arg)) +s3264 = bs(l=32, cls=(x86_s32to64, m_arg)) + +u08_3 = bs(l=0, cls=(x86_imm_fix, m_arg), ival = 3) + +d0 = bs("000", fname='reg') +d1 = bs("001", fname='reg') +d2 = bs("010", fname='reg') +d3 = bs("011", fname='reg') +d4 = bs("100", fname='reg') +d5 = bs("101", fname='reg') +d6 = bs("110", fname='reg') +d7 = bs("111", fname='reg') + +sd = bs(l=1, fname="sd") +wd = bs(l=1, fname="wd") + +stk = bs(l=0, fname="stk") + + +class field_size: + prio = default_prio + + def __init__(self, d=None): + if d is None: + d = {} + self.d = d + + def get(self, opm, adm=None): + return self.d[opm] + +d_imm64 = bs(l=0, fname="imm64") + +# d_eax = bs_eax(l=0) +d_eax = bs(l=0, cls=(bs_eax, ), fname='eax') +d_edx = bs(l=0, cls=(bs_edx, ), fname='edx') +d_st = bs(l=0, cls=(x86_reg_st, ), fname='st') +# d_imm = bs(l=0, cls=(bs_cond_imm,), fname="imm") +d_imm = bs(l=0, cls=(bs_cond_imm,), fname="imm") +d_imm64 = bs(l=0, cls=(bs_cond_imm64,), fname="imm") +d_ax = bs(l=0, cls=(r_ax, ), fname='ax') +d_dx = bs(l=0, cls=(r_dx, ), fname='dx') +d_cl = bs(l=0, cls=(r_cl, ), fname='cl') + +d_cs = bs(l=0, cls=(bs_cs, ), fname='cs') +d_ds = bs(l=0, cls=(bs_ds, ), fname='ds') +d_es = bs(l=0, cls=(bs_es, ), fname='es') +d_ss = bs(l=0, cls=(bs_ss, ), fname='ss') +d_fs = bs(l=0, cls=(bs_fs, ), fname='fs') +d_gs = bs(l=0, cls=(bs_gs, ), fname='gs') + +rel_off = bs(l=0, cls=(bs_rel_off,), fname="off") +rel_off08 = bs(l=8, cls=(bs_rel_off08,), fname="off") +moff = bs(l=0, cls=(bs_moff,), fname="off") +msegoff = bs(l=16, cls=(bs_msegoff,), fname="mseg") +movoff = bs(l=0, cls=(bs_movoff,), fname="off") +mod = bs(l=2, fname="mod") + +rmreg = bs(l=3, cls=(x86_rm_reg, ), order =1, fname = "reg") +reg = bs(l=3, cls=(x86_reg, ), order =1, fname = "reg") +regnoarg = bs(l=3, default_val="000", order=1, fname="reg") +segm = bs(l=3, cls=(x86_rm_segm, ), order =1, fname = "reg") +crreg = bs(l=3, cls=(x86_rm_cr, ), order =1, fname = "reg") +drreg = bs(l=3, cls=(x86_rm_dr, ), order =1, fname = "reg") + +fltreg = bs(l=3, cls=(x86_rm_flt, ), order =1, fname = "reg") + +rm = bs(l=3, fname="rm") + +rm_arg = bs(l=0, cls=(x86_rm_arg,), fname='rmarg') +rm_arg_w8 = bs(l=0, cls=(x86_rm_w8,), fname='rmarg') +rm_arg_sx = bs(l=0, cls=(x86_rm_sx,), fname='rmarg') +rm_arg_sxd = bs(l=0, cls=(x86_rm_sxd,), fname='rmarg') +rm_arg_sd = bs(l=0, cls=(x86_rm_sd,), fname='rmarg') +rm_arg_wd = bs(l=0, cls=(x86_rm_wd,), fname='rmarg') +rm_arg_m80 = bs(l=0, cls=(x86_rm_m80,), fname='rmarg') +rm_arg_m64 = bs(l=0, cls=(x86_rm_m64,), fname='rmarg') +rm_arg_m08 = bs(l=0, cls=(x86_rm_m08,), fname='rmarg') +rm_arg_m16 = bs(l=0, cls=(x86_rm_m16,), fname='rmarg') + +swapargs = bs_swapargs(l=1, fname="swap", mn_mod=range(1 << 1)) + + +cond_list = ["O", "NO", "B", "AE", + "Z", "NZ", "BE", "A", + "S", "NS", "PE", "NP", + #"L", "NL", "NG", "G"] + "L", "GE", "LE", "G"] +cond = bs_mod_name(l=4, fname='cond', mn_mod=cond_list) + + +def rmmod(r, rm_arg_x=rm_arg): + return [mod, r, rm, sib_scale, sib_index, sib_base, disp, rm_arg_x] + +# +# mode | reg | rm # +# + +# +# scale | index | base # +# + +# +# Prefix | REX prefix | Opcode | mod/rm | sib | displacement | immediate # +# + + +def addop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_x86,), dct) +""" +class ia32_aaa(mn_x86): + fields = [bs8(0x37)] +""" +addop("aaa", [bs8(0x37)]) +addop("aas", [bs8(0x3F)]) +addop("aad", [bs8(0xd5), u08]) +addop("aam", [bs8(0xd4), u08]) + +addop("adc", [bs("0001010"), w8, d_eax, d_imm]) +addop("adc", [bs("100000"), se, w8] + rmmod(d2, rm_arg_w8) + [d_imm]) +addop("adc", [bs("000100"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("add", [bs("0000010"), w8, d_eax, d_imm]) +addop("add", [bs("100000"), se, w8] + rmmod(d0, rm_arg_w8) + [d_imm]) +addop("add", [bs("000000"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("and", [bs("0010010"), w8, d_eax, d_imm]) +addop("and", [bs("100000"), se, w8] + rmmod(d4, rm_arg_w8) + [d_imm]) +addop("and", [bs("001000"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("bsf", [bs8(0x0f), bs8(0xbc)] + rmmod(rmreg)) +addop("bsr", [bs8(0x0f), bs8(0xbd), mod, + rmreg, rm, sib_scale, sib_index, sib_base, disp, rm_arg]) + +addop("bswap", [bs8(0x0f), bs('11001'), reg]) + +addop("bt", [bs8(0x0f), bs8(0xa3)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("bt", [bs8(0x0f), bs8(0xba)] + rmmod(d4) + [u08]) +addop("btc", [bs8(0x0f), bs8(0xbb)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("btc", [bs8(0x0f), bs8(0xba)] + rmmod(d7) + [u08]) + + +addop("btr", [bs8(0x0f), bs8(0xb3)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("btr", [bs8(0x0f), bs8(0xba)] + rmmod(d6) + [u08]) +addop("bts", [bs8(0x0f), bs8(0xab)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("bts", [bs8(0x0f), bs8(0xba)] + rmmod(d5) + [u08]) + +addop("call", [bs8(0xe8), rel_off]) +addop("call", [bs8(0xff), stk] + rmmod(d2)) +addop("call", [bs8(0x9a), moff, msegoff]) + + +class bs_op_mode(bsi): + + def decode(self, v): + opmode = self.parent.v_opmode() + # print "MODE", opmode, self.mode + return opmode == self.mode + + +class bs_ad_mode(bsi): + + def decode(self, v): + admode = self.parent.v_admode() + # print "MODE", opmode, self.mode + return admode == self.mode + + +class bs_op_mode_no64(bsi): + + def encode(self): + if self.parent.mode == 64: + return False + return super(bs_op_mode_no64, self).encode() + + def decode(self, v): + if self.parent.mode == 64: + return False + opmode = self.parent.v_opmode() + # print "MODE", opmode, self.mode + return opmode == self.mode + + +bs_opmode16 = bs(l=0, cls=(bs_op_mode,), mode = 16, fname="fopmode") +bs_opmode32 = bs(l=0, cls=(bs_op_mode,), mode = 32, fname="fopmode") +bs_opmode64 = bs(l=0, cls=(bs_op_mode,), mode = 64, fname="fopmode") + + +bs_admode16 = bs(l=0, cls=(bs_ad_mode,), mode = 16, fname="fadmode") +bs_admode32 = bs(l=0, cls=(bs_ad_mode,), mode = 32, fname="fadmode") +bs_admode64 = bs(l=0, cls=(bs_ad_mode,), mode = 64, fname="fadmode") + +bs_opmode16_no64 = bs(l=0, cls=(bs_op_mode_no64,), mode = 16, fname="fopmode") +bs_opmode32_no64 = bs(l=0, cls=(bs_op_mode_no64,), mode = 32, fname="fopmode") + +# class ia32_call(mn_x86): +# fields = [bs8(0xff)] + rmmod(d3) +# conv_name = {16:'CBW', 32:'CWDE', 64:'CDQE'} +# bs_conv_name = bs_modname_size(l=0, name=conv_name) +addop("cbw", [bs8(0x98), bs_opmode16]) +addop("cwde", [bs8(0x98), bs_opmode32]) +addop("cdqe", [bs8(0x98), bs_opmode64]) + +addop("clc", [bs8(0xf8)]) +addop("cld", [bs8(0xfc)]) +addop("cli", [bs8(0xfa)]) +addop("clts", [bs8(0x0f), bs8(0x06)]) +addop("cmc", [bs8(0xf5)]) + +addop("cmov", [bs8(0x0f), bs('0100'), cond] + rmmod(rmreg)) + +addop("cmp", [bs("0011110"), w8, d_eax, d_imm]) +addop("cmp", [bs("100000"), se, w8] + rmmod(d7, rm_arg_w8) + [d_imm]) +addop("cmp", [bs("001110"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + + +addop("cmpsb", [bs8(0xa6)]) +# cmps_name = {16:'CMPSW', 32:'CMPSD', 64:'CMPSQ'} +# bs_cmps_name = bs_modname_size(l=0, name=cmps_name) +# addop("cmps", [bs8(0xa7), bs_cmps_name]) +addop("cmpsw", [bs8(0xa7), bs_opmode16]) +addop("cmpsd", [bs8(0xa7), bs_opmode32]) +addop("cmpsq", [bs8(0xa7), bs_opmode64]) + +addop("cmpxchg", [bs8(0x0f), bs('1011000'), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +# XXX TODO CMPXCHG8/16 +addop("cpuid", [bs8(0x0f), bs8(0xa2)]) + +# convbis_name = {16:'CWD', 32:'CDQ', 64:'CQO'} +# bs_convbis_name = bs_modname_size(l=0, name=convbis_name) +# addop("convbis", [bs8(0x99), bs_convbis_name]) +addop("cwd", [bs8(0x99), bs_opmode16]) +addop("cdq", [bs8(0x99), bs_opmode32]) +addop("cqo", [bs8(0x99), bs_opmode64]) + + +addop("daa", [bs8(0x27)]) +addop("das", [bs8(0x2f)]) +addop("dec", [bs('1111111'), w8] + rmmod(d1, rm_arg_w8)) +addop("dec", [bs('01001'), reg]) +addop("div", [bs('1111011'), w8] + rmmod(d6, rm_arg_w8)) +addop("enter", [bs8(0xc8), u16, u08]) + +# float ##### +addop("fwait", [bs8(0x9b)]) + +addop("f2xm1", [bs8(0xd9), bs8(0xf0)]) +addop("fabs", [bs8(0xd9), bs8(0xe1)]) + +addop("fadd", [bs("11011"), sd, bs("00")] + rmmod(d0, rm_arg_sd)) +addop("fadd", [bs("11011"), swapargs, bs("00"), + bs("11000"), d_st, fltreg], [d_st, fltreg]) +addop("faddp", [bs8(0xde), bs("11000"), fltreg, d_st]) +addop("fiadd", [bs("11011"), wd, bs("10")] + rmmod(d0, rm_arg_wd)) + +addop("fbld", [bs8(0xdf)] + rmmod(d4, rm_arg_m80)) +addop("fbldp", [bs8(0xdf)] + rmmod(d6, rm_arg_m80)) +addop("fchs", [bs8(0xd9), bs8(0xe0)]) +# addop("fclex", [bs8(0x9b), bs8(0xdb), bs8(0xe2)]) +addop("fnclex", [bs8(0xdb), bs8(0xe2)]) + +addop("fcmovb", [bs8(0xda), bs("11000"), d_st, fltreg]) +addop("fcmove", [bs8(0xda), bs("11001"), d_st, fltreg]) +addop("fcmovbe", [bs8(0xda), bs("11010"), d_st, fltreg]) +addop("fcmovu", [bs8(0xda), bs("11011"), d_st, fltreg]) +addop("fcmovnb", [bs8(0xdb), bs("11000"), d_st, fltreg]) +addop("fcmovne", [bs8(0xdb), bs("11001"), d_st, fltreg]) +addop("fcmovnbe", [bs8(0xdb), bs("11010"), d_st, fltreg]) +addop("fcmovnu", [bs8(0xdb), bs("11011"), d_st, fltreg]) + +addop("fcom", [bs("11011"), sd, bs("00")] + rmmod(d2, rm_arg_sd)) +addop("fcom", [bs("11011"), swapargs, bs("00"), + bs("11010"), d_st, fltreg], [d_st, fltreg]) +addop("fcomp", [bs("11011"), sd, bs("00")] + rmmod(d3, rm_arg_sd)) +addop("fcomp", + [bs("11011"), swapargs, bs("00"), bs("11011"), + d_st, fltreg], [d_st, fltreg]) +addop("fcompp", [bs8(0xde), bs8(0xd9)]) + +addop("fcomi", [bs8(0xdb), bs("11110"), d_st, fltreg]) +addop("fcomip", [bs8(0xdf), bs("11110"), d_st, fltreg]) +addop("fucomi", [bs8(0xdb), bs("11101"), d_st, fltreg]) +addop("fucomip", [bs8(0xdf), bs("11101"), d_st, fltreg]) + +addop("fcos", [bs8(0xd9), bs8(0xff)]) +addop("fdecstp", [bs8(0xd9), bs8(0xf6)]) + + +addop("fdiv", [bs("11011"), sd, bs("00")] + rmmod(d6, rm_arg_sd)) +addop("fdiv", [bs8(0xd8), bs("11110"), d_st, fltreg]) +addop("fdiv", [bs8(0xdc), bs("11111"), fltreg, d_st]) +addop("fdivp", [bs8(0xde), bs("11111"), fltreg, d_st]) +addop("fidiv", [bs("11011"), wd, bs("10")] + rmmod(d6, rm_arg_wd)) + +addop("fdivr", [bs("11011"), sd, bs("00")] + rmmod(d7, rm_arg_sd)) +addop("fdivr", [bs8(0xd8), bs("11111"), d_st, fltreg]) +addop("fdivr", [bs8(0xdc), bs("11110"), fltreg, d_st]) +addop("fdivrp", [bs8(0xde), bs("11110"), fltreg, d_st]) +addop("fidivr", [bs("11011"), wd, bs("10")] + rmmod(d7, rm_arg_wd)) + +addop("ffree", [bs8(0xdd), bs("11000"), fltreg]) +addop("ficom", [bs("11011"), wd, bs("10")] + rmmod(d2, rm_arg_wd)) +addop("ficomp", [bs("11011"), wd, bs("10")] + rmmod(d3, rm_arg_wd)) +addop("fild", [bs("11011"), wd, bs("11")] + rmmod(d0, rm_arg_wd)) +addop("fild", [bs8(0xdf)] + rmmod(d5, rm_arg_m64)) + +addop("fincstp", [bs8(0xd9), bs8(0xf7)]) + +# addop("finit", [bs8(0x9b), bs8(0xdb), bs8(0xe3)]) +addop("fninit", [bs8(0xdb), bs8(0xe3)]) + +addop("fist", [bs("11011"), wd, bs("11")] + rmmod(d2, rm_arg_wd)) +addop("fistp", [bs("11011"), wd, bs("11")] + rmmod(d3, rm_arg_wd)) +addop("fistp", [bs8(0xdf)] + rmmod(d7, rm_arg_m64)) + +addop("fisttp", [bs("11011"), wd, bs("11")] + rmmod(d1, rm_arg_wd)) +addop("fisttp", [bs8(0xdd)] + rmmod(d1, rm_arg_m64)) + +addop("fld", [bs("11011"), sd, bs("01")] + rmmod(d0, rm_arg_sd)) +addop("fld", [bs8(0xdb)] + rmmod(d5, rm_arg_m80)) +addop("fld", [bs8(0xd9), bs("11000"), fltreg]) + +addop("fld1", [bs8(0xd9), bs8(0xe8)]) +addop("fldl2t", [bs8(0xd9), bs8(0xe9)]) +addop("fldl2e", [bs8(0xd9), bs8(0xea)]) +addop("fldpi", [bs8(0xd9), bs8(0xeb)]) +addop("fldlg2", [bs8(0xd9), bs8(0xec)]) +addop("fldln2", [bs8(0xd9), bs8(0xed)]) +addop("fldz", [bs8(0xd9), bs8(0xee)]) + +addop("fldcw", [bs8(0xd9)] + rmmod(d5, rm_arg_m16)) +addop("fldenv", [bs8(0xd9)] + rmmod(d4, rm_arg_m80)) # XXX TODO: m14? + +addop("fmul", [bs("11011"), sd, bs("00")] + rmmod(d1, rm_arg_sd)) +addop("fmul", [bs("11011"), swapargs, bs("00"), + bs("11001"), d_st, fltreg], [d_st, fltreg]) +addop("fmulp", [bs8(0xde), bs("11001"), fltreg, d_st]) +addop("fimul", [bs("11011"), wd, bs("10")] + rmmod(d1, rm_arg_wd)) + +addop("fnop", [bs8(0xd9), bs8(0xd0)]) +addop("fpatan", [bs8(0xd9), bs8(0xf3)]) +addop("fprem", [bs8(0xd9), bs8(0xf8)]) +addop("fprem1", [bs8(0xd9), bs8(0xf5)]) +addop("fptan", [bs8(0xd9), bs8(0xf2)]) +addop("frndint", [bs8(0xd9), bs8(0xfc)]) +addop("frstor", [bs8(0xdd)] + rmmod(d4, rm_arg_m80)) # XXX TODO: m94 ? +# addop("fsave", [bs8(0x9b), bs8(0xdd)] + rmmod(d6, rm_arg_m80)) # XXX +# TODO: m94 ? +addop("fnsave", [bs8(0xdd)] + rmmod(d6, rm_arg_m80)) # XXX TODO: m94 ? + +addop("fscale", [bs8(0xd9), bs8(0xfd)]) +addop("fsin", [bs8(0xd9), bs8(0xfe)]) +addop("fsincos", [bs8(0xd9), bs8(0xfb)]) +addop("fsqrt", [bs8(0xd9), bs8(0xfa)]) + +addop("fst", [bs("11011"), sd, bs("01")] + rmmod(d2, rm_arg_sd)) +addop("fst", [bs8(0xdd), bs("11010"), fltreg]) +addop("fstp", [bs("11011"), sd, bs("01")] + rmmod(d3, rm_arg_sd)) +addop("fstp", [bs8(0xdb)] + rmmod(d7, rm_arg_m80)) +addop("fstp", [bs8(0xdd), bs("11011"), fltreg]) + +# addop("fstcw", [bs8(0x9b), bs8(0xd9)] + rmmod(d7, rm_arg_m16)) +addop("fnstcw", [bs8(0xd9)] + rmmod(d7, rm_arg_m16)) +# addop("fstenv", [bs8(0x9b), bs8(0xd9)] + rmmod(d6, rm_arg_m80)) # XXX +# TODO: m14? +addop("fnstenv", [bs8(0xd9)] + rmmod(d6, rm_arg_m80)) # XXX TODO: m14? +# addop("fstsw", [bs8(0x9b), bs8(0xdd)] + rmmod(d7, rm_arg_m16)) +addop("fnstsw", [bs8(0xdd)] + rmmod(d7, rm_arg_m16)) +# addop("fstsw", [bs8(0x9b), bs8(0xdf), bs8(0xe0), d_ax]) +addop("fnstsw", [bs8(0xdf), bs8(0xe0), d_ax]) + +addop("fsub", [bs("11011"), sd, bs("00")] + rmmod(d4, rm_arg_sd)) +addop("fsub", [bs8(0xd8), bs("11100"), d_st, fltreg]) +addop("fsub", [bs8(0xdc), bs("11101"), fltreg, d_st]) +addop("fsubp", [bs8(0xde), bs("11101"), fltreg, d_st]) +addop("fisub", [bs("11011"), wd, bs("10")] + rmmod(d4, rm_arg_wd)) + +addop("fsubr", [bs("11011"), sd, bs("00")] + rmmod(d5, rm_arg_sd)) +addop("fsubr", [bs8(0xd8), bs("11101"), d_st, fltreg]) +addop("fsubr", [bs8(0xdc), bs("11100"), fltreg, d_st]) +addop("fsubrp", [bs8(0xde), bs("11100"), fltreg, d_st]) +addop("fisubr", [bs("11011"), wd, bs("10")] + rmmod(d5, rm_arg_wd)) +addop("ftst", [bs8(0xd9), bs8(0xe4)]) + + +addop("fucom", [bs8(0xdd), bs("11100"), fltreg]) +addop("fucomp", [bs8(0xdd), bs("11101"), fltreg]) +addop("fucompp", [bs8(0xda), bs8(0xe9)]) + +addop("fxam", [bs8(0xd9), bs8(0xe5)]) +addop("fxch", [bs8(0xd9), bs("11001"), fltreg]) +addop("fxrstor", [bs8(0x0f), bs8(0xae)] + + rmmod(d1, rm_arg_m80)) # XXX TODO m512 +addop("fxsave", [bs8(0x0f), bs8(0xae)] + + rmmod(d0, rm_arg_m80)) # XXX TODO m512 + +addop("fxtract", [bs8(0xd9), bs8(0xf4)]) +addop("fyl2x", [bs8(0xd9), bs8(0xf1)]) +addop("fyl2xp1", [bs8(0xd9), bs8(0xf9)]) + +addop("hlt", [bs8(0xf4)]) +addop("icebp", [bs8(0xf1)]) + +addop("idiv", [bs('1111011'), w8] + rmmod(d7, rm_arg_w8)) + +addop("imul", [bs('1111011'), w8] + rmmod(d5, rm_arg_w8)) +addop("imul", [bs8(0x0f), bs8(0xaf)] + rmmod(rmreg)) + +addop("imul", [bs("011010"), se, bs('1')] + rmmod(rmreg) + [d_imm]) + +addop("in", [bs("1110010"), w8, d_eax, u08]) +addop("in", [bs("1110110"), w8, d_eax, d_edx]) + +addop("inc", [bs('1111111'), w8] + rmmod(d0, rm_arg_w8)) +addop("inc", [bs('01000'), reg]) + +addop("insb", [bs8(0x6c)]) +# ins_name = {16:'INSW', 32:'INSD', 64:'INSD'} +# bs_ins_name = bs_modname_size(l=0, name=ins_name) +# addop("ins", [bs8(0x6d), bs_ins_name]) +addop("insw", [bs8(0x6d), bs_opmode16]) +addop("insd", [bs8(0x6d), bs_opmode32]) +addop("insd", [bs8(0x6d), bs_opmode64]) + +addop("int", [bs8(0xcc), u08_3]) +addop("int", [bs8(0xcd), u08]) +addop("into", [bs8(0xce)]) +addop("invd", [bs8(0x0f), bs8(0x08)]) +addop("invlpg", [bs8(0x0f), bs8(0x01)] + rmmod(d7)) + +# iret_name = {16:'IRET', 32:'IRETD', 64:'IRETQ'} +# bs_iret_name = bs_modname_size(l=0, name=iret_name) +# addop("iret", [bs8(0xcf), stk, bs_iret_name]) +addop("iret", [bs8(0xcf), stk, bs_opmode16]) +addop("iretd", [bs8(0xcf), stk, bs_opmode32]) +addop("iretq", [bs8(0xcf), stk, bs_opmode64]) + +addop("j", [bs('0111'), cond, rel_off08]) +# bs_jecxz_name = bs_modname_jecx(l=0) +# addop("jecxz", [bs8(0xe3), rel_off08, bs_jecxz_name]) + +addop("jcxz", [bs8(0xe3), rel_off08, bs_admode16]) +addop("jecxz", [bs8(0xe3), rel_off08, bs_admode32]) +addop("jrcxz", [bs8(0xe3), rel_off08, bs_admode64]) + +addop("j", [bs8(0x0f), bs('1000'), cond, rel_off]) +addop("jmp", [bs8(0xeb), rel_off08]) +addop("jmp", [bs8(0xe9), rel_off]) +# TODO XXX replace stk force64? +addop("jmp", [bs8(0xff), stk] + rmmod(d4)) +addop("jmp", [bs8(0xea), moff, msegoff]) + +addop("jmpf", [bs8(0xff), stk] + rmmod(d5)) + +addop("lahf", [bs8(0x9f)]) +addop("lar", [bs8(0x0f), bs8(0x02)] + rmmod(rmreg)) + +# XXX TODO LDS LES ... +addop("lea", [bs8(0x8d)] + rmmod(rmreg)) +addop("leave", [bs8(0xc9)]) + +addop("lodsb", [bs8(0xac)]) +# lods_name = {16:'LODSW', 32:'LODSD', 64:'LODSQ'} +# bs_lods_name = bs_modname_size(l=0, name=lods_name) +# addop("lods", [bs8(0xad), bs_lods_name]) +addop("lodsw", [bs8(0xad), bs_opmode16]) +addop("lodsd", [bs8(0xad), bs_opmode32]) +addop("lodsq", [bs8(0xad), bs_opmode64]) + +addop("loop", [bs8(0xe2), rel_off08]) +addop("loope", [bs8(0xe1), rel_off08]) +addop("loopne", [bs8(0xe0), rel_off08]) +addop("lsl", [bs8(0x0f), bs8(0x03)] + rmmod(rmreg)) +addop("monitor", [bs8(0x0f), bs8(0x01), bs8(0xc8)]) + +addop("mov", [bs("100010"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("mov", [bs("100011"), swapargs, bs('0')] + rmmod(segm), [rm_arg, segm]) +addop("mov", [bs("101000"), swapargs, w8, d_eax, movoff], [d_eax, movoff]) +addop("mov", [bs("1011"), w8, reg, d_imm64]) +addop("mov", [bs("1100011"), w8] + rmmod(d0, rm_arg_w8) + [d_imm]) +addop("mov", [bs8(0x0f), bs("001000"), swapargs, bs('0')] + + rmmod(crreg), [rm_arg, crreg]) +addop("mov", [bs8(0x0f), bs("001000"), swapargs, bs('1')] + + rmmod(drreg), [rm_arg, drreg]) +addop("movsb", [bs8(0xa4)]) +# movs_name = {16:'MOVSW', 32:'MOVSD', 64:'MOVSQ'} +# bs_movs_name = bs_modname_size(l=0, name=movs_name) +# addop("movs", [bs8(0xa5), bs_movs_name]) +addop("movsw", [bs8(0xa5), bs_opmode16]) +addop("movsd", [bs8(0xa5), bs_opmode32]) +addop("movsq", [bs8(0xa5), bs_opmode64]) + +addop("movsx", [bs8(0x0f), bs("1011111"), w8, sx] + rmmod(rmreg, rm_arg_sx)) +# addop("movsxd", [bs8(0x63), sxd] + rmmod(rmreg, rm_arg_sxd)) +type("movsxd", (mn_x86,), { + "fields": [bs8(0x63), sxd] + rmmod(rmreg, rm_arg_sxd), + "modes": [64], 'alias': False}) + +addop("movups", + [bs8(0x0f), bs8(0x10), xmm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("movsd", [bs8(0x0f), bs("0001000"), swapargs, xmm, pref_f2] + + rmmod(rmreg, rm_arg), [xmm, rm_arg]) +addop("movss", [bs8(0x0f), bs8(0x10), xmm, pref_f3] + rmmod(rmreg, rm_arg)) +addop("movupd", [bs8(0x0f), bs8(0x10), xmm, pref_66] + rmmod(rmreg, rm_arg)) + + +addop("addss", [bs8(0x0f), bs8(0x58), xmm, pref_f3] + rmmod(rmreg, rm_arg)) +addop("addsd", [bs8(0x0f), bs8(0x58), xmm, pref_f2] + rmmod(rmreg, rm_arg)) + +addop("subss", [bs8(0x0f), bs8(0x5c), xmm, pref_f3] + rmmod(rmreg, rm_arg)) +addop("subsd", [bs8(0x0f), bs8(0x5c), xmm, pref_f2] + rmmod(rmreg, rm_arg)) + +addop("mulss", [bs8(0x0f), bs8(0x59), xmm, pref_f3] + rmmod(rmreg, rm_arg)) +addop("mulsd", [bs8(0x0f), bs8(0x59), xmm, pref_f2] + rmmod(rmreg, rm_arg)) + +addop("divss", [bs8(0x0f), bs8(0x5e), xmm, pref_f3] + rmmod(rmreg, rm_arg)) +addop("divsd", [bs8(0x0f), bs8(0x5e), xmm, pref_f2] + rmmod(rmreg, rm_arg)) + + +addop("pminsw", [bs8(0x0f), bs8(0xea), mm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("pminsw", [bs8(0x0f), bs8(0xea), xmm, pref_66] + rmmod(rmreg, rm_arg)) + + +addop("pxor", [bs8(0x0f), bs8(0xef), xmm] + rmmod(rmreg, rm_arg)) + +addop("ucomiss", + [bs8(0x0f), bs8(0x2e), xmm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("ucomisd", [bs8(0x0f), bs8(0x2e), xmm, pref_66] + rmmod(rmreg, rm_arg)) + +addop("andps", [bs8(0x0f), bs8(0x54), xmm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("andpd", [bs8(0x0f), bs8(0x54), xmm, pref_66] + rmmod(rmreg, rm_arg)) + + +addop("maxsd", [bs8(0x0f), bs8(0x5f), xmm, pref_f2] + rmmod(rmreg, rm_arg)) + +addop("cvtsi2sd", + [bs8(0x0f), bs8(0x2a), xmmreg, pref_f2] + rmmod(rmreg, rm_arg)) +addop("cvtsi2ss", + [bs8(0x0f), bs8(0x2a), xmmreg, pref_f3] + rmmod(rmreg, rm_arg)) + + +addop("cvttsd2ss", + [bs8(0x0f), bs8(0x2c), xmmreg, pref_f2] + rmmod(rmreg, rm_arg)) +addop("cvttss2si", + [bs8(0x0f), bs8(0x2c), xmmreg, pref_f3] + rmmod(rmreg, rm_arg)) + + +# type("movupd", (mn_x86,), {"fields":[bs8(0x0f), bs8(0x10), xmm, pref_f2] +# + rmmod(rmreg, rm_arg_sxd), 'prefixed':'\xf2'}) + +addop("movzx", [bs8(0x0f), bs("1011011"), w8, sx] + rmmod(rmreg, rm_arg_sx)) +addop("mul", [bs('1111011'), w8] + rmmod(d4, rm_arg_w8)) + +addop("neg", [bs('1111011'), w8] + rmmod(d3, rm_arg_w8)) +addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d0, rm_arg)) # XXX TODO m512 +addop("not", [bs('1111011'), w8] + rmmod(d2, rm_arg_w8)) +addop("or", [bs("0000110"), w8, d_eax, d_imm]) +addop("or", [bs("100000"), se, w8] + rmmod(d1, rm_arg_w8) + [d_imm]) +addop("or", [bs("000010"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("out", [bs("1110011"), w8, u08, d_eax]) +addop("out", [bs("1110111"), w8, d_edx, d_eax]) + +addop("outsb", [bs8(0x6e)]) +# outs_name = {16:'OUTSW', 32:'OUTSD', 64:'OUTSD'} +# bs_outs_name = bs_modname_size(l=0, name=outs_name) +# addop("outs", [bs8(0x6f), bs_outs_name]) +addop("outsw", [bs8(0x6f), bs_opmode16]) +addop("outsd", [bs8(0x6f), bs_opmode32]) +addop("outsd", [bs8(0x6f), bs_opmode64]) + + +# addop("pause", [bs8(0xf3), bs8(0x90)]) + +addop("pop", [bs8(0x8f), stk] + rmmod(d0)) +addop("pop", [bs("01011"), stk, reg]) +addop("pop", [bs8(0x1f), d_ds]) +addop("pop", [bs8(0x07), d_es]) +addop("pop", [bs8(0x17), d_ss]) +addop("pop", [bs8(0x0f), bs8(0xa1), d_fs]) +addop("pop", [bs8(0x0f), bs8(0xa9), d_gs]) + +# popa_name = {16:'POPA', 32:'POPAD'} +# bs_popa_name = bs_modname_size(l=0, name=popa_name) +# addop("popa", [bs8(0x61), bs_popa_name]) +addop("popa", [bs8(0x61), bs_opmode16]) +addop("popad", [bs8(0x61), bs_opmode32]) + +# popf_name = {16:'POPF', 32:'POPFD', 64:'POPFQ'} +# bs_popf_name = bs_modname_size(l=0, name=popf_name) +# addop("popf", [bs8(0x9d), bs_popf_name]) +addop("popf", [bs8(0x9d), bs_opmode16]) +addop("popfd", [bs8(0x9d), bs_opmode32]) +addop("popfq", [bs8(0x9d), bs_opmode64]) + +addop("prefetch0", [bs8(0x0f), bs8(0x18)] + rmmod(d1, rm_arg_m08)) +addop("prefetch1", [bs8(0x0f), bs8(0x18)] + rmmod(d2, rm_arg_m08)) +addop("prefetch2", [bs8(0x0f), bs8(0x18)] + rmmod(d3, rm_arg_m08)) +addop("prefetchnta", [bs8(0x0f), bs8(0x18)] + rmmod(d0, rm_arg_m08)) + +addop("push", [bs8(0xff), stk] + rmmod(d6)) +addop("push", [bs("01010"), stk, reg]) +addop("push", [bs8(0x6a), rel_off08, stk]) +addop("push", [bs8(0x68), d_imm, stk]) +addop("push", [bs8(0x0e), d_cs]) +addop("push", [bs8(0x16), d_ss]) +addop("push", [bs8(0x1e), d_ds]) +addop("push", [bs8(0x06), d_es]) +addop("push", [bs8(0x0f), bs8(0xa0), d_fs]) +addop("push", [bs8(0x0f), bs8(0xa8), d_gs]) + +# pusha_name = {16:'PUSHA', 32:'PUSHAD'} +# bs_pusha_name = bs_modname_size(l=0, name=pusha_name) +# addop("pusha", [bs8(0x60), bs_pusha_name]) +addop("pusha", [bs8(0x60), bs_opmode16_no64]) +addop("pushad", [bs8(0x60), bs_opmode32_no64]) + + +# pushf_name = {16:'PUSHF', 32:'PUSHFD', 64:'PUSHFQ'} +# bs_pushf_name = bs_modname_size(l=0, name=pushf_name) +# addop("pushf", [bs8(0x9c), bs_pushf_name]) +addop("pushf", [bs8(0x9c), bs_opmode16]) +addop("pushfd", [bs8(0x9c), bs_opmode32]) +addop("pushfq", [bs8(0x9c), bs_opmode64]) + +addop("rcl", [bs('110100'), d_cl1, w8] + + rmmod(d2, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("rcl", [bs('1100000'), w8] + rmmod(d2, rm_arg_w8) + [u08]) +addop("rcr", [bs('110100'), d_cl1, w8] + + rmmod(d3, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("rcr", [bs('1100000'), w8] + rmmod(d3, rm_arg_w8) + [u08]) +addop("rol", [bs('110100'), d_cl1, w8] + + rmmod(d0, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("rol", [bs('1100000'), w8] + rmmod(d0, rm_arg_w8) + [u08]) +addop("ror", [bs('110100'), d_cl1, w8] + + rmmod(d1, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("ror", [bs('1100000'), w8] + rmmod(d1, rm_arg_w8) + [u08]) + +addop("rdmsr", [bs8(0x0f), bs8(0x32)]) +addop("rdpmc", [bs8(0x0f), bs8(0x33)]) +addop("rdtsc", [bs8(0x0f), bs8(0x31)]) +addop("ret", [bs8(0xc3), stk]) +addop("ret", [bs8(0xc2), stk, u16]) +addop("retf", [bs8(0xcb), stk]) +addop("retf", [bs8(0xca), stk, u16]) + +addop("rsm", [bs8(0x0f), bs8(0xaa)]) +addop("sahf", [bs8(0x9e)]) + +# XXX tipo in doc: /4 instead of /6 +addop("sal", [bs('110100'), d_cl1, w8] + + rmmod(d6, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("sal", [bs('1100000'), w8] + rmmod(d6, rm_arg_w8) + [u08]) +addop("sar", [bs('110100'), d_cl1, w8] + + rmmod(d7, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("sar", [bs('1100000'), w8] + rmmod(d7, rm_arg_w8) + [u08]) + +addop("scasb", [bs8(0xae)]) +# scas_name = {16:'SCASW', 32:'SCASD', 64:'SCASQ'} +# bs_scas_name = bs_modname_size(l=0, name=scas_name) +# addop("scas", [bs8(0xaf), bs_scas_name]) +addop("scasw", [bs8(0xaf), bs_opmode16]) +addop("scasd", [bs8(0xaf), bs_opmode32]) +addop("scasq", [bs8(0xaf), bs_opmode64]) + +addop("shl", [bs('110100'), d_cl1, w8] + + rmmod(d4, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("shl", [bs('1100000'), w8] + rmmod(d4, rm_arg_w8) + [u08]) +addop("shr", [bs('110100'), d_cl1, w8] + + rmmod(d5, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("shr", [bs('1100000'), w8] + rmmod(d5, rm_arg_w8) + [u08]) + +addop("sbb", [bs("0001110"), w8, d_eax, d_imm]) +addop("sbb", [bs("100000"), se, w8] + rmmod(d3, rm_arg_w8) + [d_imm]) +addop("sbb", [bs("000110"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("set", [bs8(0x0f), bs('1001'), cond] + rmmod(regnoarg, rm_arg_m08)) +addop("sgdt", [bs8(0x0f), bs8(0x01)] + rmmod(d0)) +addop("shld", [bs8(0x0f), bs8(0xa4)] + + rmmod(rmreg) + [u08], [rm_arg, rmreg, u08]) +addop("shld", [bs8(0x0f), bs8(0xa5)] + + rmmod(rmreg) + [d_cl], [rm_arg, rmreg, d_cl]) +addop("shrd", [bs8(0x0f), bs8(0xac)] + + rmmod(rmreg) + [u08], [rm_arg, rmreg, u08]) +addop("shrd", [bs8(0x0f), bs8(0xad)] + + rmmod(rmreg) + [d_cl], [rm_arg, rmreg, d_cl]) +addop("sidt", [bs8(0x0f), bs8(0x01)] + rmmod(d1)) +addop("sldt", [bs8(0x0f), bs8(0x00)] + rmmod(d0)) +addop("smsw", [bs8(0x0f), bs8(0x01)] + rmmod(d4)) +addop("stc", [bs8(0xf9)]) +addop("std", [bs8(0xfd)]) +addop("sti", [bs8(0xfb)]) +addop("stosb", [bs8(0xaa)]) +# stos_name = {16:'STOSW', 32:'STOSD', 64:'STOSQ'} +# bs_stos_name = bs_modname_size(l=0, name=stos_name) +# addop("stos", [bs8(0xab), bs_stos_name]) +addop("stosw", [bs8(0xab), bs_opmode16]) +addop("stosd", [bs8(0xab), bs_opmode32]) +addop("stosq", [bs8(0xab), bs_opmode64]) + +addop("str", [bs8(0x0f), bs8(0x00)] + rmmod(d1)) + +addop("sub", [bs("0010110"), w8, d_eax, d_imm]) +addop("sub", [bs("100000"), se, w8] + rmmod(d5, rm_arg_w8) + [d_imm]) +addop("sub", [bs("001010"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("syscall", [bs8(0x0f), bs8(0x05)]) +addop("sysenter", [bs8(0x0f), bs8(0x34)]) +addop("sysexit", [bs8(0x0f), bs8(0x35)]) +addop("sysret", [bs8(0x0f), bs8(0x07)]) +addop("test", [bs("1010100"), w8, d_eax, d_imm]) +addop("test", [bs("1111011"), w8] + rmmod(d0, rm_arg_w8) + [d_imm]) +addop("test", [bs("1000010"), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("ud2", [bs8(0x0f), bs8(0x0b)]) +addop("verr", [bs8(0x0f), bs8(0x00)] + rmmod(d4)) +addop("verw", [bs8(0x0f), bs8(0x00)] + rmmod(d5)) +addop("wbind", [bs8(0x0f), bs8(0x09)]) +addop("wrmsr", [bs8(0x0f), bs8(0x30)]) +addop("xadd", [bs8(0x0f), bs("1100000"), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("nop", [bs8(0x90)], alias=True) + +addop("xchg", [bs('10010'), d_eax, reg]) +addop("xchg", [bs('1000011'), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("xlat", [bs8(0xd7)]) + + +addop("xor", [bs("0011010"), w8, d_eax, d_imm]) +addop("xor", [bs("100000"), se, w8] + rmmod(d6, rm_arg_w8) + [d_imm]) +addop("xor", [bs("001100"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + + +# xorps_name = {16:'XORPD', 32:'XORPS', 64:'XORPS'} +# bs_xorps_name = bs_modname_size(l=0, name=xorps_name) +# addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [ +# bs_xorps_name] ) +addop("xorpd", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode16]) +addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode32]) +addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode64]) + +# movaps_name = {16:'MOVAPD', 32:'MOVAPS', 64:'MOVAPS'} +# bs_movaps_name = bs_modname_size(l=0, name=movaps_name) +# addop("movaps", [bs8(0x0f), bs("0010100"), swapargs, xmm] + rmmod(rmreg, +# rm_arg) + [ bs_movaps_name], [rmreg, rm_arg]) +addop("movapd", [bs8(0x0f), bs("0010100"), swapargs, xmm] + + rmmod(rmreg, rm_arg) + [bs_opmode16], [rmreg, rm_arg]) +addop("movaps", [bs8(0x0f), bs("0010100"), swapargs, xmm] + + rmmod(rmreg, rm_arg) + [bs_opmode32], [rmreg, rm_arg]) +addop("movaps", [bs8(0x0f), bs("0010100"), swapargs, xmm] + + rmmod(rmreg, rm_arg) + [bs_opmode64], [rmreg, rm_arg]) + +mn_x86.bintree = factor_one_bit(mn_x86.bintree) +# mn_x86.bintree = factor_fields_all(mn_x86.bintree) +""" +mod reg r/m + XX XXX XXX + +""" + + +def print_size(e): + print e, e.size + return e diff --git a/miasm2/arch/x86/disasm.py b/miasm2/arch/x86/disasm.py new file mode 100644 index 00000000..7185a973 --- /dev/null +++ b/miasm2/arch/x86/disasm.py @@ -0,0 +1,51 @@ +from miasm2.core.asmbloc import asm_constraint, asm_label, disasmEngine +from miasm2.expression.expression import ExprId +from arch import mn_x86 + + +def cb_x86_callpop(mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + """ + 1000: call 1005 + 1005: pop + """ + if len(cur_bloc.lines) < 1: + return + l = cur_bloc.lines[-1] + if l.name != 'CALL': + return + dst = l.args[0] + if not (isinstance(dst, ExprId) and isinstance(dst.name, asm_label)): + return + if dst.name.offset != l.offset + l.l: + return + l.name = 'PUSH' + # cur_bloc.bto.pop() + cur_bloc.bto[0].c_bto = asm_constraint.c_next + + +cb_x86_funcs = [cb_x86_callpop] + + +def cb_x86_disasm(mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + for func in cb_x86_funcs: + func(mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool) + + +class dis_x86(disasmEngine): + attrib = None + + def __init__(self, bs=None, **kwargs): + super(dis_x86, self).__init__(mn_x86, self.attrib, bs, **kwargs) + self.dis_bloc_callback = cb_x86_disasm + + +class dis_x86_16(dis_x86): + attrib = 16 + + +class dis_x86_32(dis_x86): + attrib = 32 + + +class dis_x86_64(dis_x86): + attrib = 64 diff --git a/miasm2/arch/x86/ira.py b/miasm2/arch/x86/ira.py new file mode 100644 index 00000000..04cb4cca --- /dev/null +++ b/miasm2/arch/x86/ira.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * +from miasm2.core.graph import DiGraph +from miasm2.ir.ir import ir, irbloc +from miasm2.ir.analysis import ira +from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 + + +class ir_a_x86_16(ir_x86_16, ira): + + def __init__(self, symbol_pool=None): + ir_x86_16.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.AX + + # for test XXX TODO + def set_dead_regs(self, b): + b.rw[-1][1].add(self.arch.regs.zf) + b.rw[-1][1].add(self.arch.regs.of) + b.rw[-1][1].add(self.arch.regs.pf) + b.rw[-1][1].add(self.arch.regs.cf) + b.rw[-1][1].add(self.arch.regs.nf) + b.rw[-1][1].add(self.arch.regs.af) + + def get_out_regs(self, b): + return set([self.ret_reg, self.sp]) + + def add_unused_regs(self): + leaves = [self.blocs[n] for n in self.g.leafs()] + for b in leaves: + self.set_dead_regs(b) + + def call_effects(self, ad): + irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), + ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), + ]] + return irs + + def post_add_bloc(self, bloc, ir_blocs): + ir.post_add_bloc(self, bloc, ir_blocs) + if not bloc.lines: + return + l = bloc.lines[-1] + sub_call_dst = None + if not l.is_subcall(): + return + sub_call_dst = l.args[0] + if self.ExprIsLabel(sub_call_dst): + sub_call_dst = sub_call_dst.name + for b in ir_blocs: + l = b.lines[-1] + sub_call_dst = None + if not l.is_subcall(): + continue + sub_call_dst = l.args[0] + if self.ExprIsLabel(sub_call_dst): + sub_call_dst = sub_call_dst.name + lbl = bloc.get_next() + new_lbl = self.gen_label() + irs = self.call_effects(l.args[0]) + nbloc = irbloc(new_lbl, ExprId(lbl, size=self.pc.size), irs) + nbloc.lines = [l] + self.blocs[new_lbl] = nbloc + b.dst = ExprId(new_lbl, size=self.pc.size) + return + + +class ir_a_x86_32(ir_x86_32, ir_a_x86_16): + + def __init__(self, symbol_pool=None): + ir_x86_32.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.EAX + + +class ir_a_x86_64(ir_x86_64, ir_a_x86_16): + + def __init__(self, symbol_pool=None): + ir_x86_64.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.RAX + + def call_effects(self, ad): + irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp, + self.arch.regs.RCX, + self.arch.regs.RDX, + self.arch.regs.R8, + self.arch.regs.R9, + )), + ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), + ]] + return irs diff --git a/miasm2/arch/x86/regs.py b/miasm2/arch/x86/regs.py new file mode 100644 index 00000000..532b4f0c --- /dev/null +++ b/miasm2/arch/x86/regs.py @@ -0,0 +1,405 @@ +from miasm2.expression.expression import * +from miasm2.core.cpu import reg_info + + +IP = ExprId('IP', 16) +EIP = ExprId('EIP', 32) +RIP = ExprId('RIP', 64) +exception_flags = ExprId('exception_flags', 32) + +# GP + + +regs08_str = ["AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH"] + \ + ["R%dB" % (i + 8) for i in xrange(8)] +regs08_expr = [ExprId(x, 8) for x in regs08_str] + +regs08_64_str = ["AL", "CL", "DL", "BL", "SPL", "BPL", "SIL", "DIL"] + \ + ["R%dB" % (i + 8) for i in xrange(8)] +regs08_64_expr = [ExprId(x, 8) for x in regs08_64_str] + + +regs16_str = ["AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI"] + \ + ["R%dW" % (i + 8) for i in xrange(8)] +regs16_expr = [ExprId(x, 16) for x in regs16_str] + +regs32_str = ["EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI"] + \ + ["R%dD" % (i + 8) for i in xrange(8)] +regs32_expr = [ExprId(x, 32) for x in regs32_str] + +regs64_str = ["RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", + "RIP"] +regs64_expr = [ExprId(x, 64) for x in regs64_str] + + +regs_xmm_str = ["XMM%d" % i for i in xrange(16)] +regs_xmm_expr = [ExprId(x, 128) for x in regs_xmm_str] + +regs_mm_str = ["MM%d" % i for i in xrange(16)] +regs_mm_expr = [ExprId(x, 64) for x in regs_mm_str] + + +gpregs08 = reg_info(regs08_str, regs08_expr) +gpregs08_64 = reg_info(regs08_64_str, regs08_64_expr) +gpregs16 = reg_info(regs16_str, regs16_expr) +gpregs32 = reg_info(regs32_str, regs32_expr) +gpregs64 = reg_info(regs64_str, regs64_expr) + +gpregs_xmm = reg_info(regs_xmm_str, regs_xmm_expr) +gpregs_mm = reg_info(regs_mm_str, regs_mm_expr) + +r08_eax = reg_info([regs08_str[0]], [regs08_expr[0]]) +r16_eax = reg_info([regs16_str[0]], [regs16_expr[0]]) +r32_eax = reg_info([regs32_str[0]], [regs32_expr[0]]) +r64_eax = reg_info([regs64_str[0]], [regs64_expr[0]]) + +r08_ecx = reg_info([regs08_str[1]], [regs08_expr[1]]) + +r_eax_all = reg_info( + [regs08_str[0], regs16_str[0], regs32_str[0], regs64_str[0]], + [regs08_expr[0], regs16_expr[0], regs32_expr[0], regs64_expr[0]]) +r_edx_all = reg_info( + [regs08_str[2], regs16_str[2], regs32_str[2], regs64_str[2]], + [regs08_expr[2], regs16_expr[2], regs32_expr[2], regs64_expr[2]]) + +r16_edx = reg_info([regs16_str[2]], [regs16_expr[2]]) + + +selectr_str = ["ES", "CS", "SS", "DS", "FS", "GS"] +selectr_expr = [ExprId(x, 16) for x in selectr_str] +segmreg = reg_info(selectr_str, selectr_expr) + +crregs32_str = ["CR%d" % i for i in xrange(8)] +crregs32_expr = [ExprId(x, 32) for x in crregs32_str] +crregs = reg_info(crregs32_str, crregs32_expr) + + +drregs32_str = ["DR%d" % i for i in xrange(8)] +drregs32_expr = [ExprId(x, 32) for x in drregs32_str] +drregs = reg_info(drregs32_str, drregs32_expr) + + +fltregs32_str = ["ST(%d)" % i for i in xrange(8)] +fltregs32_expr = [ExprId(x, 64) for x in fltregs32_str] +fltregs = reg_info(fltregs32_str, fltregs32_expr) + +r_st_all = reg_info(['ST'], + [ExprId('ST', 64)]) + +r_cs_all = reg_info(['CS'], + [ExprId('CS', 16)]) +r_ds_all = reg_info(['DS'], + [ExprId('DS', 16)]) +r_es_all = reg_info(['ES'], + [ExprId('ES', 16)]) +r_ss_all = reg_info(['SS'], + [ExprId('SS', 16)]) +r_fs_all = reg_info(['FS'], + [ExprId('FS', 16)]) +r_gs_all = reg_info(['GS'], + [ExprId('GS', 16)]) + + +AL = regs08_expr[0] +CL = regs08_expr[1] +DL = regs08_expr[2] +BL = regs08_expr[3] +AH = regs08_expr[4] +CH = regs08_expr[5] +DH = regs08_expr[6] +BH = regs08_expr[7] +R8B = regs08_expr[8] +R9B = regs08_expr[9] +R10B = regs08_expr[10] +R11B = regs08_expr[11] +R12B = regs08_expr[12] +R13B = regs08_expr[13] +R14B = regs08_expr[14] +R15B = regs08_expr[15] +R15B = regs08_expr[15] + +SPL = regs08_64_expr[4] +BPL = regs08_64_expr[5] +SIL = regs08_64_expr[6] +DIL = regs08_64_expr[7] + + +AX = regs16_expr[0] +CX = regs16_expr[1] +DX = regs16_expr[2] +BX = regs16_expr[3] +SP = regs16_expr[4] +BP = regs16_expr[5] +SI = regs16_expr[6] +DI = regs16_expr[7] +R8W = regs16_expr[8] +R9W = regs16_expr[9] +R10W = regs16_expr[10] +R11W = regs16_expr[11] +R12W = regs16_expr[12] +R13W = regs16_expr[13] +R14W = regs16_expr[14] +R15W = regs16_expr[15] + + +EAX = regs32_expr[0] +ECX = regs32_expr[1] +EDX = regs32_expr[2] +EBX = regs32_expr[3] +ESP = regs32_expr[4] +EBP = regs32_expr[5] +ESI = regs32_expr[6] +EDI = regs32_expr[7] +R8D = regs32_expr[8] +R9D = regs32_expr[9] +R10D = regs32_expr[10] +R11D = regs32_expr[11] +R12D = regs32_expr[12] +R13D = regs32_expr[13] +R14D = regs32_expr[14] +R15D = regs32_expr[15] + + +RAX = regs64_expr[0] +RCX = regs64_expr[1] +RDX = regs64_expr[2] +RBX = regs64_expr[3] +RSP = regs64_expr[4] +RBP = regs64_expr[5] +RSI = regs64_expr[6] +RDI = regs64_expr[7] +R8 = regs64_expr[8] +R9 = regs64_expr[9] +R10 = regs64_expr[10] +R11 = regs64_expr[11] +R12 = regs64_expr[12] +R13 = regs64_expr[13] +R14 = regs64_expr[14] +R15 = regs64_expr[15] + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_pf = 'pf' +reg_of = 'of' +reg_cf = 'cf' +reg_tf = 'tf' +reg_if = 'i_f' +reg_df = 'df' +reg_af = 'af' +reg_iopl = 'iopl_f' +reg_nt = 'nt' +reg_rf = 'rf' +reg_vm = 'vm' +reg_ac = 'ac' +reg_vif = 'vif' +reg_vip = 'vip' +reg_id = 'i_d' + + +reg_es = "ES" +reg_cs = "CS" +reg_ss = "SS" +reg_ds = "DS" +reg_fs = "FS" +reg_gs = "GS" + +reg_dr0 = 'DR0' +reg_dr1 = 'DR1' +reg_dr2 = 'DR2' +reg_dr3 = 'DR3' +reg_dr4 = 'DR4' +reg_dr5 = 'DR5' +reg_dr6 = 'DR6' +reg_dr7 = 'DR7' + +reg_cr0 = 'CR0' +reg_cr1 = 'CR1' +reg_cr2 = 'CR2' +reg_cr3 = 'CR3' +reg_cr4 = 'CR4' +reg_cr5 = 'CR5' +reg_cr6 = 'CR6' +reg_cr7 = 'CR7' + +reg_mm0 = 'MM0' +reg_mm1 = 'MM1' +reg_mm2 = 'MM2' +reg_mm3 = 'MM3' +reg_mm4 = 'MM4' +reg_mm5 = 'MM5' +reg_mm6 = 'MM6' +reg_mm7 = 'MM7' + + +reg_tsc1 = "tsc1" +reg_tsc2 = "tsc2" + +reg_float_c0 = 'float_c0' +reg_float_c1 = 'float_c1' +reg_float_c2 = 'float_c2' +reg_float_c3 = 'float_c3' +reg_float_stack_ptr = "float_stack_ptr" +reg_float_control = 'reg_float_control' +reg_float_eip = 'reg_float_eip' +reg_float_cs = 'reg_float_cs' +reg_float_address = 'reg_float_address' +reg_float_ds = 'reg_float_ds' + + +reg_float_st0 = 'float_st0' +reg_float_st1 = 'float_st1' +reg_float_st2 = 'float_st2' +reg_float_st3 = 'float_st3' +reg_float_st4 = 'float_st4' +reg_float_st5 = 'float_st5' +reg_float_st6 = 'float_st6' +reg_float_st7 = 'float_st7' + + +dr0 = ExprId(reg_dr0) +dr1 = ExprId(reg_dr1) +dr2 = ExprId(reg_dr2) +dr3 = ExprId(reg_dr3) +dr4 = ExprId(reg_dr4) +dr5 = ExprId(reg_dr5) +dr6 = ExprId(reg_dr6) +dr7 = ExprId(reg_dr7) + +cr0 = ExprId(reg_cr0) +cr1 = ExprId(reg_cr1) +cr2 = ExprId(reg_cr2) +cr3 = ExprId(reg_cr3) +cr4 = ExprId(reg_cr4) +cr5 = ExprId(reg_cr5) +cr6 = ExprId(reg_cr6) +cr7 = ExprId(reg_cr7) + +mm0 = ExprId(reg_mm0, 64) +mm1 = ExprId(reg_mm1, 64) +mm2 = ExprId(reg_mm2, 64) +mm3 = ExprId(reg_mm3, 64) +mm4 = ExprId(reg_mm4, 64) +mm5 = ExprId(reg_mm5, 64) +mm6 = ExprId(reg_mm6, 64) +mm7 = ExprId(reg_mm7, 64) + + +# tmp1= ExprId(reg_tmp1) +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +pf = ExprId(reg_pf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) +tf = ExprId(reg_tf, size=1) +i_f = ExprId(reg_if, size=1) +df = ExprId(reg_df, size=1) +af = ExprId(reg_af, size=1) +iopl = ExprId(reg_iopl, size=2) +nt = ExprId(reg_nt, size=1) +rf = ExprId(reg_rf, size=1) +vm = ExprId(reg_vm, size=1) +ac = ExprId(reg_ac, size=1) +vif = ExprId(reg_vif, size=1) +vip = ExprId(reg_vip, size=1) +i_d = ExprId(reg_id, size=1) + +ES = ExprId(reg_es, size=16) +CS = ExprId(reg_cs, size=16) +SS = ExprId(reg_ss, size=16) +DS = ExprId(reg_ds, size=16) +FS = ExprId(reg_fs, size=16) +GS = ExprId(reg_gs, size=16) + +tsc1 = ExprId(reg_tsc1, size=32) +tsc2 = ExprId(reg_tsc2, size=32) + +float_c0 = ExprId(reg_float_c0, size=1) +float_c1 = ExprId(reg_float_c1, size=1) +float_c2 = ExprId(reg_float_c2, size=1) +float_c3 = ExprId(reg_float_c3, size=1) +float_stack_ptr = ExprId(reg_float_stack_ptr, size=3) +float_control = ExprId(reg_float_control, 16) +float_eip = ExprId(reg_float_eip) +float_cs = ExprId(reg_float_cs, size=16) +float_address = ExprId(reg_float_address) +float_ds = ExprId(reg_float_ds, size=16) + +float_st0 = ExprId(reg_float_st0, 64) +float_st1 = ExprId(reg_float_st1, 64) +float_st2 = ExprId(reg_float_st2, 64) +float_st3 = ExprId(reg_float_st3, 64) +float_st4 = ExprId(reg_float_st4, 64) +float_st5 = ExprId(reg_float_st5, 64) +float_st6 = ExprId(reg_float_st6, 64) +float_st7 = ExprId(reg_float_st7, 64) + +EAX_init = ExprId('EAX_init') +EBX_init = ExprId('EBX_init') +ECX_init = ExprId('ECX_init') +EDX_init = ExprId('EDX_init') +ESI_init = ExprId('ESI_init') +EDI_init = ExprId('EDI_init') +ESP_init = ExprId('ESP_init') +EBP_init = ExprId('EBP_init') + + +RAX_init = ExprId('RAX_init', 64) +RBX_init = ExprId('RBX_init', 64) +RCX_init = ExprId('RCX_init', 64) +RDX_init = ExprId('RDX_init', 64) +RSI_init = ExprId('RSI_init', 64) +RDI_init = ExprId('RDI_init', 64) +RSP_init = ExprId('RSP_init', 64) +RBP_init = ExprId('RBP_init', 64) + + +all_regs_ids = [ + AL, CL, DL, BL, AH, CH, DH, BH, + R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B, R15B, + SPL, BPL, SIL, DIL, + AX, CX, DX, BX, SP, BP, SI, DI, + R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W, + IP, + EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, + R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D, + EIP, + + RAX, RBX, RCX, RDX, RSP, RBP, RIP, RSI, RDI, + R8, R9, R10, R11, R12, R13, R14, R15, R15, + zf, nf, pf, of, cf, af, df, + tf, i_f, iopl, nt, rf, vm, ac, vif, vip, i_d, + float_control, float_eip, float_cs, float_address, float_ds, + tsc1, tsc2, + ES, CS, SS, DS, FS, GS, + float_st0, float_st1, float_st2, float_st3, + float_st4, float_st5, float_st6, float_st7, + float_c0, float_c1, float_c2, float_c3, + cr0, cr3, + dr0, dr1, dr2, dr3, dr4, dr5, dr6, dr7, + float_stack_ptr, + mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, + + exception_flags, +] + fltregs32_expr + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + all_regs_ids_init[i].is_term = True + regs_init[r] = all_regs_ids_init[i] + + +mRAX = {16: AX, 32: EAX, 64: RAX} +mRBX = {16: BX, 32: EBX, 64: RBX} +mRCX = {16: CX, 32: ECX, 64: RCX} +mRDX = {16: DX, 32: EDX, 64: RDX} +mRSI = {16: SI, 32: ESI, 64: RSI} +mRDI = {16: DI, 32: EDI, 64: RDI} +mRBP = {16: BP, 32: EBP, 64: RBP} +mRSP = {16: SP, 32: ESP, 64: RSP} +mRIP = {16: IP, 32: EIP, 64: RIP} diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py new file mode 100644 index 00000000..4b8a357b --- /dev/null +++ b/miasm2/arch/x86/sem.py @@ -0,0 +1,3029 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from miasm2.arch.x86.regs import * +from miasm2.arch.x86.arch import mn_x86, repeat_mn, replace_regs +from miasm2.core.asmbloc import expr_is_int_or_label +from miasm2.ir.ir import ir, irbloc +import math +import struct +from regs import * + +# interrupt with eip update after instr +EXCEPT_SOFT_BP = (1 << 1) +EXCEPT_INT_XX = (1 << 2) + +EXCEPT_BREAKPOINT_INTERN = (1 << 10) + +EXCEPT_NUM_UPDT_EIP = (1 << 11) +# interrupt with eip at instr +EXCEPT_UNK_MEM_AD = (1 << 12) +EXCEPT_THROW_SEH = (1 << 13) +EXCEPT_UNK_EIP = (1 << 14) +EXCEPT_ACCESS_VIOL = (1 << 14) +EXCEPT_INT_DIV_BY_ZERO = (1 << 16) +EXCEPT_PRIV_INSN = (1 << 17) +EXCEPT_ILLEGAL_INSN = (1 << 18) +EXCEPT_UNK_MNEMO = (1 << 19) + + +""" +http://www.emulators.com/docs/nx11_flags.htm + +CF(A+B) = (((A XOR B) XOR D) < 0) XOR (((A XOR D) AND NOT (A XOR B)) < 0) +CF(A-B) = (((A XOR B) XOR D) < 0) XOR (((A XOR D) AND (A XOR B)) < 0) + +OF(A+B) = ((A XOR D) AND NOT (A XOR B)) < 0 +OF(A-B) = ((A XOR D) AND (A XOR B)) < 0 +""" + + +float_list = [ + float_st0, + float_st1, + float_st2, + float_st3, + float_st4, + float_st5, + float_st6, + float_st7, +] + + +# XXX TODO make default check against 0 or not 0 (same eq as in C) + + +def update_flag_zf(a): + return [ExprAff(zf, ExprCond(a, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))] + + +def update_flag_nf(a): + return [ExprAff(nf, a.msb())] + + +def update_flag_pf(a): + return [ExprAff(pf, ExprOp('parity', a & ExprInt_from(a, 0xFF)))] + + +def update_flag_af(a): + return [ExprAff(af, ExprCond((a & ExprInt_from(a, 0x10)), + ExprInt_from(af, 1), ExprInt_from(af, 0)))] + + +def update_flag_znp(a): + e = [] + e += update_flag_zf(a) + e += update_flag_nf(a) + e += update_flag_pf(a) + return e + + +def update_flag_logic(a): + e = [] + e += update_flag_znp(a) + e.append(ExprAff(of, ExprInt_from(of, 0))) + e.append(ExprAff(cf, ExprInt_from(cf, 0))) + return e + + +def update_flag_arith(a): + e = [] + e += update_flag_znp(a) + return e + + +def check_ops_msb(a, b, c): + if not a or not b or not c or a != b or a != c: + raise ValueError('bad ops size %s %s %s' % (a, b, c)) + + +def arith_flag(a, b, c): + a_s, b_s, c_s = a.size, b.size, c.size + check_ops_msb(a_s, b_s, c_s) + a_s, b_s, c_s = a.msb(), b.msb(), c.msb() + return a_s, b_s, c_s + +# checked: ok for adc add because b & c before +cf + + +def update_flag_add_cf(a, b, c): + return ExprAff(cf, (((a ^ b) ^ c) ^ ((a ^ c) & (~(a ^ b)))).msb()) + + +def update_flag_add_of(a, b, c): + return ExprAff(of, (((a ^ c) & (~(a ^ b)))).msb()) + + +# checked: ok for sbb add because b & c before +cf +def update_flag_sub_cf(a, b, c): + return ExprAff(cf, (((a ^ b) ^ c) ^ ((a ^ c) & (a ^ b))).msb()) + + +def update_flag_sub_of(a, b, c): + return ExprAff(of, (((a ^ c) & (a ^ b))).msb()) + +# z = x+y (+cf?) + + +def update_flag_add(x, y, z): + e = [] + e.append(update_flag_add_cf(x, y, z)) + e.append(update_flag_add_of(x, y, z)) + return e + +# z = x-y (+cf?) + + +def update_flag_sub(x, y, z): + e = [] + e.append(update_flag_sub_cf(x, y, z)) + e.append(update_flag_sub_of(x, y, z)) + return e + + +def set_float_cs_eip(instr): + e = [] + # XXX TODO check float updt + e.append(ExprAff(float_eip, ExprInt_from(float_eip, instr.offset))) + e.append(ExprAff(float_cs, CS)) + return e + + +def mov(ir, instr, a, b): + if a in [ES, CS, SS, DS, FS, GS]: + b = b[:a.size] + if b in [ES, CS, SS, DS, FS, GS]: + b = b.zeroExtend(a.size) + e = [ExprAff(a, b)] + return None, e, [] + + +def xchg(ir, instr, a, b): + e = [] + e.append(ExprAff(a, b)) + e.append(ExprAff(b, a)) + return None, e, [] + + +def movzx(ir, instr, a, b): + e = [ExprAff(a, b.zeroExtend(a.size))] + return None, e, [] + + +def movsx(ir, instr, a, b): + e = [ExprAff(a, b.signExtend(a.size))] + return None, e, [] + + +def lea(ir, instr, a, b): + src = b.arg + if src.size > a.size: + src = src[:a.size] + e = [ExprAff(a, src)] + return None, e, [] + + +def add(ir, instr, a, b): + e = [] + c = a + b + e += update_flag_arith(c) + e += update_flag_af(c) + e += update_flag_add(a, b, c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def xadd(ir, instr, a, b): + e = [] + c = a + b + e += update_flag_arith(c) + e += update_flag_af(c) + e += update_flag_add(b, a, c) + e.append(ExprAff(b, a)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def adc(ir, instr, a, b): + e = [] + c = a + (b + ExprCompose([(ExprInt_fromsize(a.size - 1, 0), 1, a.size), + (cf, 0, 1)])) + e += update_flag_arith(c) + e += update_flag_af(c) + e += update_flag_add(a, b, c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def sub(ir, instr, a, b): + e = [] + c = a - b + e += update_flag_arith(c) + e += update_flag_af(c) + e += update_flag_sub(a, b, c) + e.append(ExprAff(a, c)) + return None, e, [] + +# a-(b+cf) + + +def sbb(ir, instr, a, b): + e = [] + c = a - (b + ExprCompose([(ExprInt_fromsize(a.size - 1, 0), 1, a.size), + (cf, 0, 1)])) + e += update_flag_arith(c) + e += update_flag_af(c) + e += update_flag_sub(a, b, c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def neg(ir, instr, b): + e = [] + a = ExprInt_from(b, 0) + + c = a - b + e += update_flag_arith(c) + e += update_flag_sub(a, b, c) + e += update_flag_af(c) + e.append(ExprAff(b, c)) + return None, e, [] + + +def l_not(ir, instr, b): + e = [] + c = ~b + e.append(ExprAff(b, c)) + return None, e, [] + + +def l_cmp(ir, instr, a, b): + e = [] + c = a - b + e += update_flag_arith(c) + e += update_flag_sub(a, b, c) + e += update_flag_af(c) + return None, e, [] + + +def xor(ir, instr, a, b): + e = [] + c = a ^ b + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def l_or(ir, instr, a, b): + e = [] + c = a | b + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def l_and(ir, instr, a, b): + e = [] + c = a & b + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def l_test(ir, instr, a, b): + e = [] + c = a & b + e += update_flag_logic(c) + return None, e, [] + + +def l_rol(ir, instr, a, b): + e = [] + b = b.zeroExtend(a.size) + c = ExprOp('<<<', a, b) + + new_cf = c[:1] + e.append(ExprAff(cf, new_cf)) + # hack (only valid if b=1) + e.append(ExprAff(of, c.msb() ^ new_cf)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def l_ror(ir, instr, a, b): + e = [] + b = b.zeroExtend(a.size) + c = ExprOp('>>>', a, b) + + e.append(ExprAff(cf, c.msb())) + # hack (only valid if b=1): when count == 1: a = msb-1(dest) + e.append(ExprAff(of, (c ^ a).msb())) + e.append(ExprAff(a, c)) + return None, e, [] + + +def rcl(ir, instr, a, b): + e = [] + b = b.zeroExtend(a.size) + c = ExprOp('<<<c_rez', a, b, cf.zeroExtend(a.size)) + new_cf = ExprOp('<<<c_cf', a, b, cf.zeroExtend(a.size))[:1] + + e.append(ExprAff(cf, new_cf)) + # hack (only valid if b=1) + e.append(ExprAff(of, c.msb() ^ new_cf)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def rcr(ir, instr, a, b): + e = [] + b = b.zeroExtend(a.size) + c = ExprOp('>>>c_rez', a, b, cf.zeroExtend(a.size)) + new_cf = ExprOp('>>>c_cf', a, b, cf.zeroExtend(a.size))[:1] + + e.append(ExprAff(cf, new_cf)) + # hack (only valid if b=1) + e.append(ExprAff(of, (a ^ c).msb())) + e.append(ExprAff(a, c)) + + return None, e, [] + + +def get_shift(a, b): + # b.size must match a + b = b.zeroExtend(a.size) + if a.size == 64: + shift = b & ExprInt_from(b, 0x3f) + else: + shift = b & ExprInt_from(b, 0x1f) + shift = expr_simp(shift) + return shift + + +def sar(ir, instr, a, b): + + shifter = get_shift(a, b) + c = ExprOp('a>>', a, shifter) + + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + + new_cf = ExprOp('a>>', a, (shifter - ExprInt_from(a, 1)))[:1] + + e_do = [ + ExprAff(cf, new_cf), + ExprAff(of, ExprInt_from(of, 0)), + ExprAff(a, c) + ] + + e_do += update_flag_znp(c) + + # dont generate conditional shifter on constant + if isinstance(shifter, ExprInt): + if int(shifter.arg) != 0: + return None, e_do, [] + else: + raise NotImplementedError("TODO check me") + + return ExprCond(shifter, lbl_do, lbl_skip), + [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def shr(ir, instr, a, b): + e = [] + # TODO FIX AS SAR! + shifter = get_shift(a, b) + c = ExprOp('>>', a, shifter) + + new_cf = ExprOp('>>', a, (shifter - ExprInt_from(a, 1)))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + e.append(ExprAff(of, a.msb())) + e += update_flag_znp(c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def shrd_cl(ir, instr, a, b): + e = [] + opmode, admode = s, instr.v_admode() + shifter = mRCX[instr.mode][:8].zeroExtend(a.size) & ExprInt_from(a, 0x1f) + c = (a >> shifter) | (b << (ExprInt_from(a, a.size) - shifter)) + new_cf = (a >> (shifter - ExprInt_from(a, 1)))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + e.append(ExprAff(of, a.msb())) + e += update_flag_znp(c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def shrd(ir, instr, a, b, c): + e = [] + shifter = get_shift(a, c) + + d = (a >> shifter) | (b << (ExprInt_from(a, a.size) - shifter)) + new_cf = (a >> (shifter - ExprInt_from(a, 1)))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + e.append(ExprAff(of, a.msb())) + e += update_flag_znp(d) + e.append(ExprAff(a, d)) + return None, e, [] + + +def sal(ir, instr, a, b): + e = [] + shifter = get_shift(a, b) + c = ExprOp('a<<', a, shifter) + new_cf = (a >> (ExprInt_from(a, a.size) - shifter))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + e += update_flag_znp(c) + e.append(ExprAff(of, c.msb() ^ new_cf)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def shl(ir, instr, a, b): + e = [] + shifter = get_shift(a, b) + c = a << shifter + new_cf = (a >> (ExprInt_from(a, a.size) - shifter))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + e += update_flag_znp(c) + e.append(ExprAff(of, c.msb() ^ new_cf)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def shld_cl(ir, instr, a, b): + return shld(ir, instr, a, b, ecx) + + +def shld(ir, instr, a, b, c): + e = [] + shifter = c.zeroExtend(a.size) & ExprInt_from(a, 0x1f) + c = ExprOp('|', + a << shifter, + b >> (ExprInt_from(a, a.size) - shifter) + ) + + new_cf = (a >> (ExprInt_from(a, a.size) - shifter))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + # XXX todo: don't update flag if shifter is 0 + e += update_flag_znp(c) + e.append(ExprAff(of, c.msb() ^ new_cf)) + e.append(ExprAff(a, ExprCond(shifter, + c, + a))) + return None, e, [] + + +# XXX todo ### +def cmc(ir, instr): + e = [ExprAff(cf, ExprCond(cf, ExprInt_from(cf, 0), ExprInt_from(cf, 1)))] + return None, e, [] + + +def clc(ir, instr): + e = [ExprAff(cf, ExprInt_from(cf, 0))] + return None, e, [] + + +def stc(ir, instr): + e = [ExprAff(cf, ExprInt_from(cf, 1))] + return None, e, [] + + +def cld(ir, instr): + e = [ExprAff(df, ExprInt_from(df, 0))] + return None, e, [] + + +def std(ir, instr): + e = [ExprAff(df, ExprInt_from(df, 1))] + return None, e, [] + + +def cli(ir, instr): + e = [ExprAff(i_f, ExprInt_from(i_f, 0))] + return None, e, [] + + +def sti(ir, instr): + e = [ExprAff(exception_flags, ExprInt32(EXCEPT_PRIV_INSN))] + e = [] # XXX TODO HACK + return None, e, [] + + +def inc(ir, instr, a): + e = [] + b = ExprInt_from(a, 1) + c = a + b + e += update_flag_arith(c) + e += update_flag_af(c) + + e.append(update_flag_add_of(a, b, c)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def dec(ir, instr, a): + e = [] + b = ExprInt_from(a, -1) + c = a + b + e += update_flag_arith(c) + e += update_flag_af(c) + + e.append(update_flag_add_of(a, b, c)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def push(ir, instr, a): + e = [] + s = instr.mode + size = instr.v_opmode() + opmode, admode = s, instr.v_admode() + # special case segment regs + if a in [ES, CS, SS, DS, FS, GS]: + pass + if not s in [16, 32, 64]: + raise ValueError('bad size stacker!') + if isinstance(a, ExprInt): + a = ExprInt_fromsize(s, a.arg) + + c = mRSP[instr.mode][:s] - ExprInt_fromsize(s, s / 8) + e.append(ExprAff(mRSP[instr.mode][:s], c)) + # we sub vopmode to stack, but mem access is arg size wide + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(ExprMem(c, a.size), a)) + return None, e, [] + + +def pop(ir, instr, a): + e = [] + s = instr.mode + size = instr.v_opmode() + opmode, admode = s, instr.v_admode() + # special case segment regs + if a in [ES, CS, SS, DS, FS, GS]: + s = admode + if not s in [16, 32, 64]: + raise ValueError('bad size stacker!') + new_esp = mRSP[instr.mode][:s] + ExprInt_fromsize(s, s / 8) + e.append(ExprAff(mRSP[instr.mode][:s], new_esp)) + # XXX FIX XXX for pop [esp] + if isinstance(a, ExprMem): + a = a.replace_expr({mRSP[instr.mode]: new_esp}) + c = mRSP[instr.mode][:s] + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(a, ExprMem(c, a.size))) + return None, e, [] + + +def sete(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(zf, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def setnz(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(zf, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def setl(ir, instr, a): + e = [] + e.append( + ExprAff(a, ExprCond(nf - of, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def setg(ir, instr, a): + e = [] + a0 = ExprInt_from(a, 0) + a1 = ExprInt_from(a, 1) + e.append(ExprAff(a, ExprCond(zf, a0, a1) & ExprCond(nf - of, a0, a1))) + return None, e, [] + + +def setge(ir, instr, a): + e = [] + e.append( + ExprAff(a, ExprCond(nf - of, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def seta(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf | zf, + ExprInt_from(a, 0), + ExprInt_from(a, 1)))) + + return None, e, [] + + +def setae(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def setb(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def setbe(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf | zf, + ExprInt_from(a, 1), + ExprInt_from(a, 0))) + ) + return None, e, [] + + +def setns(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(nf, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def sets(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(nf, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def seto(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(of, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def setp(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(pf, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def setnp(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(pf, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def setle(ir, instr, a): + e = [] + a0 = ExprInt_from(a, 0) + a1 = ExprInt_from(a, 1) + e.append(ExprAff(a, ExprCond(zf, a1, a0) | ExprCond(nf ^ of, a1, a0))) + return None, e, [] + + +def setna(ir, instr, a): + e = [] + a0 = ExprInt_from(a, 0) + a1 = ExprInt_from(a, 1) + e.append(ExprAff(a, ExprCond(cf, a1, a0) & ExprCond(zf, a1, a0))) + return None, e, [] + + +def setnbe(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf | zf, + ExprInt_from(a, 0), + ExprInt_from(a, 1))) + ) + return None, e, [] + + +def setno(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(of, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def setnb(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def setalc(ir, instr): + a = mRAX[instr.mode][0:8] + e = [] + e.append( + ExprAff(a, ExprCond(cf, ExprInt_from(a, 0xff), ExprInt_from(a, 0)))) + return None, e, [] + + +def bswap(ir, instr, a): + e = [] + if a.size == 16: + c = ExprCompose([(a[:8], 8, 16), + (a[8:16], 0, 8), + ]) + elif a.size == 32: + c = ExprCompose([(a[:8], 24, 32), + (a[8:16], 16, 24), + (a[16:24], 8, 16), + (a[24:32], 0, 8), + ]) + elif a.size == 64: + c = ExprCompose([(a[:8], 56, 64), + (a[8:16], 48, 56), + (a[16:24], 40, 48), + (a[24:32], 32, 40), + (a[32:40], 24, 32), + (a[40:48], 16, 24), + (a[48:56], 8, 16), + (a[56:64], 0, 8), + ]) + else: + raise ValueError('the size DOES matter') + e.append(ExprAff(a, c)) + return None, e, [] + + +def cmps(ir, instr, size): + lbl_cmp = ExprId(ir.gen_label(), instr.mode) + lbl_df_0 = ExprId(ir.gen_label(), instr.mode) + lbl_df_1 = ExprId(ir.gen_label(), instr.mode) + lbl_next = ExprId(ir.get_next_label(instr), instr.mode) + + s = instr.v_admode() + a = ExprMem(mRDI[instr.mode][:s], size) + b = ExprMem(mRSI[instr.mode][:s], size) + + dummy, e, extra = l_cmp(ir, instr, a, b) + + e0 = [] + e0.append(ExprAff(a.arg, a.arg + ExprInt_from(a.arg, size / 8))) + e0.append(ExprAff(b.arg, b.arg + ExprInt_from(b.arg, size / 8))) + e0 = irbloc(lbl_df_0.name, lbl_next, [e0]) + + e1 = [] + e1.append(ExprAff(a.arg, a.arg - ExprInt_from(a.arg, size / 8))) + e1.append(ExprAff(b.arg, b.arg - ExprInt_from(b.arg, size / 8))) + e1 = irbloc(lbl_df_1.name, lbl_next, [e1]) + + return ExprCond(df, lbl_df_1, lbl_df_0), e, [e0, e1] + + +def scas(ir, instr, size): + lbl_cmp = ExprId(ir.gen_label(), instr.mode) + lbl_df_0 = ExprId(ir.gen_label(), instr.mode) + lbl_df_1 = ExprId(ir.gen_label(), instr.mode) + lbl_next = ExprId(ir.get_next_label(instr), instr.mode) + + s = instr.v_admode() + a = ExprMem(mRDI[instr.mode][:s], size) + + dummy, e, extra = l_cmp(ir, instr, mRAX[instr.mode][:size], a) + + e0 = [] + e0.append(ExprAff(a.arg, a.arg + ExprInt_from(a.arg, size / 8))) + e0 = irbloc(lbl_df_0.name, lbl_next, [e0]) + + e1 = [] + e1.append(ExprAff(a.arg, a.arg - ExprInt_from(a.arg, size / 8))) + e1 = irbloc(lbl_df_1.name, lbl_next, [e1]) + + return ExprCond(df, lbl_df_1, lbl_df_0), e, [e0, e1] + + +def compose_eflag(s=32): + args = [] + + regs = [cf, ExprInt1(1), pf, ExprInt1( + 0), af, ExprInt1(0), zf, nf, tf, i_f, df, of] + for i in xrange(len(regs)): + args.append((regs[i], i, i + 1)) + + args.append((iopl, 12, 14)) + + if s == 32: + regs = [nt, ExprInt1(0), rf, vm, ac, vif, vip, i_d] + elif s == 16: + regs = [nt, ExprInt1(0)] + else: + raise ValueError('unk size') + for i in xrange(len(regs)): + args.append((regs[i], i + 14, i + 15)) + if s == 32: + args.append((ExprInt_fromsize(10, 0), 22, 32)) + return ExprCompose(args) + + +def pushfd(ir, instr): + return push(ir, instr, compose_eflag()) + + +def pushfw(ir, instr): + return push(ir, instr, compose_eflag(16)) + + +def popfd(ir, instr): + tmp = ExprMem(mRSP[instr.mode]) + e = [] + e.append(ExprAff(cf, ExprSlice(tmp, 0, 1))) + e.append(ExprAff(pf, ExprSlice(tmp, 2, 3))) + e.append(ExprAff(af, ExprSlice(tmp, 4, 5))) + e.append(ExprAff(zf, ExprSlice(tmp, 6, 7))) + e.append(ExprAff(nf, ExprSlice(tmp, 7, 8))) + e.append(ExprAff(tf, ExprSlice(tmp, 8, 9))) + e.append(ExprAff(i_f, ExprSlice(tmp, 9, 10))) + e.append(ExprAff(df, ExprSlice(tmp, 10, 11))) + e.append(ExprAff(of, ExprSlice(tmp, 11, 12))) + e.append(ExprAff(iopl, ExprSlice(tmp, 12, 14))) + e.append(ExprAff(nt, ExprSlice(tmp, 14, 15))) + e.append(ExprAff(rf, ExprSlice(tmp, 16, 17))) + e.append(ExprAff(vm, ExprSlice(tmp, 17, 18))) + e.append(ExprAff(ac, ExprSlice(tmp, 18, 19))) + e.append(ExprAff(vif, ExprSlice(tmp, 19, 20))) + e.append(ExprAff(vip, ExprSlice(tmp, 20, 21))) + e.append(ExprAff(i_d, ExprSlice(tmp, 21, 22))) + e.append(ExprAff(mRSP[instr.mode], mRSP[instr.mode] + ExprInt32(4))) + e.append(ExprAff(exception_flags, + ExprCond(ExprSlice(tmp, 8, 9), + ExprInt32(EXCEPT_SOFT_BP), + exception_flags + ) + ) + ) + return None, e, [] + + +def popfw(ir, instr): + tmp = ExprMem(esp) + e = [] + e.append(ExprAff(cf, ExprSlice(tmp, 0, 1))) + e.append(ExprAff(pf, ExprSlice(tmp, 2, 3))) + e.append(ExprAff(af, ExprSlice(tmp, 4, 5))) + e.append(ExprAff(zf, ExprSlice(tmp, 6, 7))) + e.append(ExprAff(nf, ExprSlice(tmp, 7, 8))) + e.append(ExprAff(tf, ExprSlice(tmp, 8, 9))) + e.append(ExprAff(i_f, ExprSlice(tmp, 9, 10))) + e.append(ExprAff(df, ExprSlice(tmp, 10, 11))) + e.append(ExprAff(of, ExprSlice(tmp, 11, 12))) + e.append(ExprAff(iopl, ExprSlice(tmp, 12, 14))) + e.append(ExprAff(nt, ExprSlice(tmp, 14, 15))) + e.append(ExprAff(esp, esp + ExprInt32(2))) + return None, e, [] + + +def pushad(ir, instr): + e = [] + s = instr.v_opmode() + opmode, admode = s, instr.v_admode() + if not s in [16, 32, 64]: + raise ValueError('bad size stacker!') + + regs = [ + mRAX[instr.mode][:s], mRCX[instr.mode][ + :s], mRDX[instr.mode][:s], mRBX[instr.mode][:s], + mRSP[instr.mode][:s], mRBP[instr.mode][:s], + mRSI[instr.mode][:s], mRDI[instr.mode][:s]] + + for i in xrange(len(regs)): + c = mRSP[instr.mode][:s] + ExprInt_fromsize(s, -(s / 8) * (i + 1)) + e.append(ExprAff(ExprMem(c, s), regs[i])) + e.append(ExprAff(mRSP[instr.mode][:s], c)) + return None, e, [] + + +def popad(ir, instr): + e = [] + s = instr.v_opmode() + opmode, admode = s, instr.v_admode() + if not s in [16, 32, 64]: + raise ValueError('bad size stacker!') + regs = [ + mRAX[instr.mode][:s], mRCX[instr.mode][ + :s], mRDX[instr.mode][:s], mRBX[instr.mode][:s], + mRSP[instr.mode][:s], mRBP[instr.mode][:s], + mRSI[instr.mode][:s], mRDI[instr.mode][:s]] + myesp = mRSP[instr.mode][:s] + regs.reverse() + for i in xrange(len(regs)): + if regs[i] == myesp: + continue + c = myesp + ExprInt_from(myesp, ((s / 8) * i)) + e.append(ExprAff(regs[i], ExprMem(c, s))) + + c = myesp + ExprInt_from(myesp, ((s / 8) * (i + 1))) + e.append(ExprAff(myesp, c)) + + return None, e, [] + + +def call(ir, instr, dst): + e = [] + # opmode, admode = instr.opmode, instr.admode + s = dst.size + meip = mRIP[instr.mode] + opmode, admode = s, instr.v_admode() + myesp = mRSP[instr.mode][:opmode] + n = ExprId(ir.get_next_label(instr), instr.mode) + + c = myesp + ExprInt_fromsize(s, (-s / 8)) + e.append(ExprAff(myesp, c)) + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(ExprMem(c, size=s), n)) + e.append(ExprAff(meip, dst.zeroExtend(instr.mode))) + if not expr_is_int_or_label(dst): + dst = meip + return dst, e, [] + + +def ret(ir, instr, a=None): + e = [] + s = instr.mode + meip = mRIP[instr.mode] + opmode, admode = instr.v_opmode(), instr.v_admode() + s = opmode + myesp = mRSP[instr.mode][:s] + + if a is None: + a = ExprInt_fromsize(s, 0) + e.append(ExprAff(myesp, (myesp + (ExprInt_fromsize(s, (s / 8)))))) + else: + a = a.zeroExtend(s) + e.append(ExprAff(myesp, (myesp + (ExprInt_fromsize(s, (s / 8)) + a)))) + c = myesp + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(meip, ExprMem(c, size=s).zeroExtend(s))) + return meip, e, [] + + +def retf(ir, instr, a=None): + e = [] + s = instr.mode + meip = mRIP[instr.mode] + opmode, admode = instr.v_opmode(), instr.v_admode() + if a is None: + a = ExprInt_fromsize(s, 0) + s = opmode + myesp = mRSP[instr.mode][:s] + + a = a.zeroExtend(s) + + e.append(ExprAff(myesp, (myesp + (ExprInt_fromsize(s, (s / 8)) + a)))) + + c = myesp + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(meip, ExprMem(c, size=s).zeroExtend(s))) + # e.append(ExprAff(meip, ExprMem(c, size = s))) + c = myesp + ExprInt_fromsize(s, (s / 8)) + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(CS, ExprMem(c, size=16))) + + return meip, e, [] + + +def leave(ir, instr): + opmode, admode = instr.v_opmode(), instr.v_admode() + s = opmode + myesp = mRSP[instr.mode] + + e = [] + e.append(ExprAff(mRBP[s], ExprMem(mRBP[instr.mode], size=s))) + e.append(ExprAff(myesp, + ExprInt_fromsize(instr.mode, instr.mode / 8) + mRBP[instr.mode])) + return None, e, [] + + +def enter(ir, instr, a, b): + opmode, admode = instr.v_opmode(), instr.v_admode() + s = opmode + myesp = mRSP[instr.mode][:s] + myebp = mRBP[instr.mode][:s] + + a = a.zeroExtend(s) + + e = [] + esp_tmp = myesp - ExprInt_fromsize(s, s / 8) + e.append(ExprAff(ExprMem(esp_tmp, + size=s), + myebp)) + e.append(ExprAff(myebp, esp_tmp)) + e.append(ExprAff(myesp, myesp - (a + ExprInt_fromsize(s, s / 8)))) + return None, e, [] + + +def jmp(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + e.append(ExprAff(meip, dst)) # dst.zeroExtend(instr.mode))) + if isinstance(dst, ExprMem): + dst = meip + return dst, e, [] + + +def jmpf(ir, instr, a): + e = [] + meip = mRIP[instr.mode] + assert(isinstance(a, ExprMem) and + isinstance(a.arg, ExprOp) and + a.arg.op == "segm") + segm = a.arg.args[0] + base = a.arg.args[1] + s = instr.mode + print segm, base + m1 = ExprMem(ExprOp('segm', segm, base), 16) + m2 = ExprMem(ExprOp('segm', segm, base + ExprInt_from(base, 2)), s) + + e.append(ExprAff(meip, m1)) + e.append(ExprAff(CS, m2)) + return meip, e, [] + + +def jz(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(zf, dst, n).zeroExtend(instr.mode) + e = [ExprAff(meip, dst_o)] + return dst_o, e, [] + + +def jcxz(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(mRCX[instr.mode][:16], n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jecxz(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(mRCX[instr.mode][:32], n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jrcxz(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(mRCX[instr.mode], n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jnz(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(zf, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jp(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(pf, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jnp(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(pf, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def ja(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(cf | zf, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jae(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(cf, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jb(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(cf, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jbe(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(cf | zf, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jge(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(nf - of, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jg(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(zf | (nf - of), n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jl(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(nf - of, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jle(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(zf | (nf - of), dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def js(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(nf, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jns(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(nf, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jo(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(of, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jno(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(of, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def loop(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + s = instr.v_opmode() + opmode, admode = s, instr.v_admode() + myecx = mRCX[instr.mode][:admode] + + n = ExprId(ir.get_next_label(instr), instr.mode) + c = myecx - ExprInt_from(myecx, 1) + e.append(ExprAff(myecx, c)) + e.append(ExprAff(meip, ExprCond(c, dst, n).zeroExtend(instr.mode))) + dst_o = ExprCond(myecx, dst, n).zeroExtend(instr.mode) + return dst_o, e, [] + + +def loopne(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + s = instr.v_opmode() + opmode, admode = s, instr.v_admode() + myecx = mRCX[instr.mode][:admode] + + n = ExprId(ir.get_next_label(instr), instr.mode) + + c = ExprOp('==', + mRCX[instr.mode][:s] - ExprInt_fromsize(s, 1), + ExprInt_fromsize(s, 0)) ^ ExprInt1(1) + c &= zf ^ ExprInt1(1) + + e.append(ExprAff(myecx, myecx - ExprInt_from(myecx, 1))) + e.append(ExprAff(meip, ExprCond(c, dst, n).zeroExtend(instr.mode))) + + # for dst, ecx has been modified! + c = ExprOp('==', + mRCX[instr.mode][:s], + ExprInt_fromsize(s, 0)) ^ ExprInt1(1) + c &= zf ^ ExprInt1(1) + dst_o = ExprCond(c, dst, n).zeroExtend(instr.mode) + return dst_o, e, [] + + +def loope(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + s = instr.v_opmode() + opmode, admode = s, instr.v_admode() + myecx = mRCX[instr.mode][:admode] + + n = ExprId(ir.get_next_label(instr), instr.mode) + c = ExprOp('==', + mRCX[instr.mode][:s] - ExprInt_fromsize(s, 1), + ExprInt_fromsize(s, 0)) ^ ExprInt1(1) + c &= zf + e.append(ExprAff(myecx, myecx - ExprInt_from(myecx, 1))) + dst_o = ExprCond(c, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + + # for dst, ecx has been modified! + c = ExprOp('==', + mRCX[instr.mode][:s], + ExprInt_fromsize(s, 0)) ^ ExprInt1(1) + c &= zf + dst_o = ExprCond(c, dst, n).zeroExtend(instr.mode) + return dst_o, e, [] + + +# XXX size to do; eflag +def div(ir, instr, a): + e = [] + s = a.size + if s == 8: + b = mRAX[instr.mode][:16] + elif s == 16: + s1, s2 = mRDX[instr.mode][:16], mRAX[instr.mode][:16] + b = ExprCompose([(s1, 0, 16), + (s1, 16, 32)]) + elif s == 32: + s1, s2 = mRDX[instr.mode][:32], mRAX[instr.mode][:32] + b = ExprCompose([(s2, 0, 32), + (s1, 32, 64)]) + elif s == 64: + s1, s2 = mRDX[instr.mode], mRAX[instr.mode] + b = ExprCompose([(s2, 0, 64), + (s1, 64, 128)]) + else: + raise ValueError('div arg not impl', a) + + c_d = ExprOp('udiv', b, a.zeroExtend(b.size)) + c_r = ExprOp('umod', b, a.zeroExtend(b.size)) + + # if 8 bit div, only ax is affected + if s == 8: + e.append(ExprAff(b, ExprCompose([(c_d[:8], 0, 8), + (c_r[:8], 8, 16)]))) + else: + e.append(ExprAff(s1, c_r[:s])) + e.append(ExprAff(s2, c_d[:s])) + return None, e, [] + +# XXX size to do; eflag + + +def idiv(ir, instr, a): + e = [] + s = a.size + + if s == 8: + b = mRAX[instr.mode][:16] + elif s == 16: + s1, s2 = mRDX[instr.mode][:16], mRAX[instr.mode][:16] + b = ExprCompose([(s1, 0, 16), + (s1, 16, 32)]) + elif s == 32: + s1, s2 = mRDX[instr.mode][:32], mRAX[instr.mode][:32] + b = ExprCompose([(s2, 0, 32), + (s1, 32, 64)]) + else: + raise ValueError('div arg not impl', a) + + c_d = ExprOp('idiv', b, a.signExtend(b.size)) + c_r = ExprOp('imod', b, a.signExtend(b.size)) + + # if 8 bit div, only ax is affected + if s == 8: + e.append(ExprAff(b, ExprCompose([(c_d[:8], 0, 8), + (c_r[:8], 8, 16)]))) + else: + e.append(ExprAff(s1, c_r[:s])) + e.append(ExprAff(s2, c_d[:s])) + return None, e, [] + +# XXX size to do; eflag + + +def mul(ir, instr, a): + e = [] + size = a.size + if a.size in [16, 32, 64]: + result = ExprOp('*', + mRAX[instr.mode][:size].zeroExtend(size * 2), + a.zeroExtend(size * 2)) + e.append(ExprAff(mRAX[instr.mode][:size], result[:size])) + e.append(ExprAff(mRDX[instr.mode][:size], result[size:size * 2])) + + elif a.size == 8: + result = ExprOp('*', + mRAX[instr.mode][:8].zeroExtend(16), + a.zeroExtend(16)) + e.append(ExprAff(mRAX[instr.mode][:16], result)) + else: + raise ValueError('unknow size') + + e.append(ExprAff(of, ExprCond(result[size:size * 2], + ExprInt1(1), + ExprInt1(0)))) + e.append(ExprAff(cf, ExprCond(result[size:size * 2], + ExprInt1(1), + ExprInt1(0)))) + + return None, e, [] + + +def imul(ir, instr, a, b=None, c=None): + e = [] + size = a.size + if b is None: + if size in [16, 32, 64]: + result = ExprOp('*', + mRAX[instr.mode][:size].signExtend(size * 2), + a.signExtend(size * 2)) + e.append(ExprAff(mRAX[instr.mode][:size], result[:size])) + e.append(ExprAff(mRDX[instr.mode][:size], result[size:size * 2])) + elif size == 8: + dst = mRAX[instr.mode][:16] + result = ExprOp('*', + mRAX[instr.mode][:8].signExtend(16), + a.signExtend(16)) + + e.append(ExprAff(dst, result)) + e.append( + ExprAff(cf, ExprCond(result - result[:size].signExtend(size * 2), + ExprInt1(1), + ExprInt1(0)))) + e.append( + ExprAff(of, ExprCond(result - result[:size].signExtend(size * 2), + ExprInt1(1), + ExprInt1(0)))) + + else: + if c is None: + c = b + b = a + result = ExprOp('*', + b.signExtend(size * 2), + c.signExtend(size * 2)) + e.append(ExprAff(a, result[:size])) + + e.append( + ExprAff(cf, ExprCond(result - result[:size].signExtend(size * 2), + ExprInt1(1), + ExprInt1(0)))) + e.append( + ExprAff(of, ExprCond(result - result[:size].signExtend(size * 2), + ExprInt1(1), + ExprInt1(0)))) + return None, e, [] + + +def cbw(ir, instr): + e = [] + tempAL = mRAX[instr.mode][:8] + tempAX = mRAX[instr.mode][:16] + e.append(ExprAff(tempAX, tempAL.signExtend(16))) + return None, e, [] + + +def cwde(ir, instr): + e = [] + tempAX = mRAX[instr.mode][:16] + tempEAX = mRAX[instr.mode][:32] + e.append(ExprAff(tempEAX, tempAX.signExtend(32))) + return None, e, [] + + +def cdqe(ir, instr): + e = [] + tempEAX = mRAX[instr.mode][:32] + tempRAX = mRAX[instr.mode][:64] + e.append(ExprAff(tempRAX, tempEAX.signExtend(64))) + return None, e, [] + + +def cwd(ir, instr): + e = [] + tempAX = mRAX[instr.mode][:16] + tempDX = mRDX[instr.mode][:16] + c = tempAX.signExtend(32) + e.append(ExprAff(tempAX, c[:16])) + e.append(ExprAff(tempDX, c[16:32])) + return None, e, [] + + +def cdq(ir, instr): + e = [] + tempEAX = mRAX[instr.mode][:32] + tempEDX = mRDX[instr.mode][:32] + c = tempEAX.signExtend(64) + e.append(ExprAff(tempEAX, c[:32])) + e.append(ExprAff(tempEDX, c[32:64])) + return None, e, [] + + +def cqo(ir, instr): + e = [] + tempRAX = mRAX[instr.mode][:64] + tempRDX = mRDX[instr.mode][:64] + c = tempEAX.signExtend(128) + e.append(ExprAff(tempRAX, c[:64])) + e.append(ExprAff(tempRDX, c[64:127])) + return None, e, [] + + +def stos(ir, instr, size): + lbl_df_0 = ExprId(ir.gen_label(), instr.mode) + lbl_df_1 = ExprId(ir.gen_label(), instr.mode) + lbl_next = ExprId(ir.get_next_label(instr), instr.mode) + + s = instr.v_admode() + + addr_o = mRDI[instr.mode][:s] + addr = addr_o + addr_p = addr + ExprInt_from(addr, size / 8) + addr_m = addr - ExprInt_from(addr, size / 8) + if ir.do_str_segm: + mss = ES + if instr.additional_info.g2.value: + raise NotImplementedError("add segm support") + addr = ExprOp('segm', mss, addr) + + b = mRAX[instr.mode][:size] + + e0 = [] + e0.append(ExprAff(addr_o, addr_p)) + e0 = irbloc(lbl_df_0.name, lbl_next, [e0]) + + e1 = [] + e1.append(ExprAff(addr_o, addr_m)) + e1 = irbloc(lbl_df_1.name, lbl_next, [e1]) + + e = [] + e.append(ExprAff(ExprMem(addr, size), b)) + + return ExprCond(df, lbl_df_1, lbl_df_0), e, [e0, e1] + + +def lods(ir, instr, size): + lbl_df_0 = ExprId(ir.gen_label(), instr.mode) + lbl_df_1 = ExprId(ir.gen_label(), instr.mode) + lbl_next = ExprId(ir.get_next_label(instr), instr.mode) + e = [] + s = instr.v_admode() + + addr_o = mRSI[instr.mode][:s] + addr = addr_o + addr_p = addr + ExprInt_from(addr, size / 8) + addr_m = addr - ExprInt_from(addr, size / 8) + if ir.do_str_segm: + mss = DS + if instr.additional_info.g2.value: + raise NotImplementedError("add segm support") + addr = ExprOp('segm', mss, addr) + + b = mRAX[instr.mode][:size] + + e0 = [] + e0.append(ExprAff(addr_o, addr_p)) + e0 = irbloc(lbl_df_0.name, lbl_next, [e0]) + + e1 = [] + e1.append(ExprAff(addr_o, addr_m)) + e1 = irbloc(lbl_df_1.name, lbl_next, [e1]) + + e = [] + e.append(ExprAff(b, ExprMem(addr, size))) + + return ExprCond(df, lbl_df_1, lbl_df_0), e, [e0, e1] + + +def movs(ir, instr, size): + lbl_df_0 = ExprId(ir.gen_label(), instr.mode) + lbl_df_1 = ExprId(ir.gen_label(), instr.mode) + lbl_next = ExprId(ir.get_next_label(instr), instr.mode) + + s = instr.v_admode() + # a = ExprMem(mRDI[instr.mode][:s], size) + # b = ExprMem(mRSI[instr.mode][:s], size) + + a = mRDI[instr.mode][:s] + b = mRSI[instr.mode][:s] + + e = [] + src = b + dst = a + if ir.do_str_segm: + if instr.additional_info.g2.value: + raise NotImplementedError("add segm support") + src = ExprOp('segm', DS, src) + dst = ExprOp('segm', ES, dst) + e.append(ExprAff(ExprMem(dst, size), ExprMem(src, size))) + + e0 = [] + e0.append(ExprAff(a, a + ExprInt_from(a, size / 8))) + e0.append(ExprAff(b, b + ExprInt_from(b, size / 8))) + e0 = irbloc(lbl_df_0.name, lbl_next, [e0]) + + e1 = [] + e1.append(ExprAff(a, a - ExprInt_from(a, size / 8))) + e1.append(ExprAff(b, b - ExprInt_from(b, size / 8))) + e1 = irbloc(lbl_df_1.name, lbl_next, [e1]) + + return ExprCond(df, lbl_df_1, lbl_df_0), e, [e0, e1] + + +def float_prev(flt): + if not flt in float_list: + return None + i = float_list.index(flt) + if i == 0: + raise ValueError('broken index') + flt = float_list[i - 1] + return flt + + +def float_pop(avoid_flt=None): + avoid_flt = float_prev(avoid_flt) + e = [] + if avoid_flt != float_st0: + e.append(ExprAff(float_st0, float_st1)) + if avoid_flt != float_st1: + e.append(ExprAff(float_st1, float_st2)) + if avoid_flt != float_st2: + e.append(ExprAff(float_st2, float_st3)) + if avoid_flt != float_st3: + e.append(ExprAff(float_st3, float_st4)) + if avoid_flt != float_st4: + e.append(ExprAff(float_st4, float_st5)) + if avoid_flt != float_st5: + e.append(ExprAff(float_st5, float_st6)) + if avoid_flt != float_st6: + e.append(ExprAff(float_st6, float_st7)) + if avoid_flt != float_st7: + e.append(ExprAff(float_st7, ExprInt_from(float_st7, 0))) + e.append( + ExprAff(float_stack_ptr, float_stack_ptr - ExprInt_fromsize(3, 1))) + return e + +# XXX TODO + + +def fcom(ir, instr, a, b): + e = [] + """ + if isinstance(a, ExprMem): + src = ExprOp('mem_%.2d_to_double'%a.size, a) + else: + src = a + """ + src = b + e.append(ExprAff(float_c0, ExprOp('fcom_c0', a, src.zeroExtend(a.size)))) + e.append(ExprAff(float_c1, ExprOp('fcom_c1', a, src.zeroExtend(a.size)))) + e.append(ExprAff(float_c2, ExprOp('fcom_c2', a, src.zeroExtend(a.size)))) + e.append(ExprAff(float_c3, ExprOp('fcom_c3', a, src.zeroExtend(a.size)))) + + e += set_float_cs_eip(instr) + return None, e, [] + + +def ficom(ir, instr, a): + e = [] + e += set_float_cs_eip(instr) + return None, e, [] + + +def fcomi(ir, instr, a): + # Invalid emulation + InvalidEmulation + + +def fcomip(ir, instr, a): + # Invalid emulation + InvalidEmulation + + +def fucomi(ir, instr, a): + # Invalid emulation + InvalidEmulation + + +def fucomip(ir, instr, a): + # Invalid emulation, only read/write analysis is valid + cond = ExprOp('fcomp', float_st0, a) + e = [] + e.append( + ExprAff(zf, ExprCond(cond, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))) + e.append( + ExprAff(pf, ExprCond(cond, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))) + e.append( + ExprAff(cf, ExprCond(cond, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))) + return None, e, [] + + +def fcomp(ir, instr, a, b): + dst, e, extra = fcom(ir, instr, a, b) + e += float_pop() + e += set_float_cs_eip(instr) + return dst, e, extra + + +def fld(ir, instr, a): + if isinstance(a, ExprMem): + src = ExprOp('mem_%.2d_to_double' % a.size, a) + else: + src = a + + e = [] + e.append(ExprAff(float_st7, float_st6)) + e.append(ExprAff(float_st6, float_st5)) + e.append(ExprAff(float_st5, float_st4)) + e.append(ExprAff(float_st4, float_st3)) + e.append(ExprAff(float_st3, float_st2)) + e.append(ExprAff(float_st2, float_st1)) + e.append(ExprAff(float_st1, float_st0)) + e.append(ExprAff(float_st0, src)) + e.append( + ExprAff(float_stack_ptr, float_stack_ptr + ExprInt_fromsize(3, 1))) + + e += set_float_cs_eip(instr) + return None, e, [] + + +def fst(ir, instr, a): + e = [] + if isinstance(a, ExprMem): + src = ExprOp('double_to_mem_%2d' % a.size, float_st0) + else: + src = float_st0 + e.append(ExprAff(a, src)) + + e += set_float_cs_eip(instr) + return None, e, [] + + +def fstp(ir, instr, a): + dst, e, extra = fst(ir, instr, a) + e += float_pop(a) + return dst, e, extra + + +def fist(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprOp('double_to_int_%d' % a.size, float_st0))) + + e += set_float_cs_eip(instr) + return None, e, [] + + +def fistp(ir, instr, a): + dst, e, extra = fist(ir, instr, a) + e += float_pop(a) + return dst, e, extra + + +def fild(ir, instr, a): + # XXXXX + src = ExprOp('int_%.2d_to_double' % a.size, a) + e = [] + e += set_float_cs_eip(instr) + dst, e_fld, extra = fld(ir, instr, src) + e += e_fld + return dst, e, extra + + +def fldz(ir, instr): + return fld(ir, instr, ExprOp('int_32_to_double', ExprInt32(0))) + + +def fld1(ir, instr): + return fld(ir, instr, ExprOp('int_32_to_double', ExprInt32(1))) + + +def fldl2e(ir, instr): + x = struct.pack('d', 1 / math.log(2)) + x = struct.unpack('Q', x)[0] + return fld(ir, instr, ExprOp('mem_64_to_double', ExprInt64(x))) + + +def fldlg2(ir, instr): + x = struct.pack('d', math.log10(2)) + x = struct.unpack('Q', x)[0] + return fld(ir, instr, ExprOp('mem_64_to_double', ExprInt64(x))) + + +def fadd(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(a, ExprOp('fadd', a, src))) + + e += set_float_cs_eip(instr) + return None, e, [] + + +def faddp(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(float_prev(a), ExprOp('fadd', a, src))) + e += set_float_cs_eip(instr) + e += float_pop(a) + return None, e, [] + + +def fninit(ir, instr): + e = [] + e += set_float_cs_eip(instr) + return None, e, [] + + +def fnstenv(ir, instr, a): + e = [] + # XXX TODO tag word, ... + status_word = ExprCompose([(ExprInt8(0), 0, 8), + (float_c0, 8, 9), + (float_c1, 9, 10), + (float_c2, 10, 11), + (float_stack_ptr, 11, 14), + (float_c3, 14, 15), + (ExprInt1(0), 15, 16), + ]) + + s = instr.mode + ad = ExprMem(a.arg, size=16) + e.append(ExprAff(ad, float_control)) + ad = ExprMem(a.arg + ExprInt_from(a.arg, s / 8 * 1), size=16) + e.append(ExprAff(ad, status_word)) + ad = ExprMem(a.arg + ExprInt_from(a.arg, s / 8 * 3), size=s) + e.append(ExprAff(ad, float_eip[:s])) + ad = ExprMem(a.arg + ExprInt_from(a.arg, s / 8 * 4), size=16) + e.append(ExprAff(ad, float_cs)) + ad = ExprMem(a.arg + ExprInt_from(a.arg, s / 8 * 5), size=s) + e.append(ExprAff(ad, float_address[:s])) + ad = ExprMem(a.arg + ExprInt_from(a.arg, s / 8 * 6), size=16) + e.append(ExprAff(ad, float_ds)) + return None, e, [] + + +def fsub(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(a, ExprOp('fsub', a, src))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fmul(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(a, ExprOp('fmul', a, src))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fdiv(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(a, ExprOp('fdiv', a, src))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fdivr(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(a, ExprOp('fdiv', src, a))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fdivp(ir, instr, a, b=None): + # Invalid emulation + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(float_prev(a), ExprOp('fdiv', a, src))) + e += set_float_cs_eip(instr) + e += float_pop(a) + return None, e, [] + + +def fmulp(ir, instr, a, b=None): + # Invalid emulation + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(float_prev(a), ExprOp('fmul', a, src))) + e += set_float_cs_eip(instr) + e += float_pop(a) + return None, e, [] + + +def ftan(ir, instr, a): + e = [] + if isinstance(a, ExprMem): + src = ExprOp('mem_%.2d_to_double' % a.size, a) + else: + src = a + e.append(ExprAff(float_st0, ExprOp('ftan', src))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fxch(ir, instr, a): + e = [] + if isinstance(a, ExprMem): + src = ExprOp('mem_%.2d_to_double' % a.size, a) + else: + src = a + e.append(ExprAff(float_st0, src)) + e.append(ExprAff(src, float_st0)) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fptan(ir, instr): + e = [] + e.append(ExprAff(float_st7, float_st6)) + e.append(ExprAff(float_st6, float_st5)) + e.append(ExprAff(float_st5, float_st4)) + e.append(ExprAff(float_st4, float_st3)) + e.append(ExprAff(float_st3, float_st2)) + e.append(ExprAff(float_st2, float_st1)) + e.append(ExprAff(float_st1, ExprOp('ftan', float_st0))) + e.append(ExprAff(float_st0, ExprOp('int_32_to_double', ExprInt32(1)))) + e.append( + ExprAff(float_stack_ptr, float_stack_ptr + ExprInt_fromsize(3, 1))) + return None, e, [] + + +def frndint(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('frndint', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fsin(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('fsin', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fcos(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('fcos', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fscale(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('fscale', float_st0, float_st1))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def f2xm1(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('f2xm1', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fsqrt(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('fsqrt', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fabs(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('fabs', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fnstsw(ir, instr, dst): + args = [(ExprInt8(0), 0, 8), + (float_c0, 8, 9), + (float_c1, 9, 10), + (float_c2, 10, 11), + (float_stack_ptr, 11, 14), + (float_c3, 14, 15), + (ExprInt1(0), 15, 16)] + e = [ExprAff(dst, ExprCompose(args))] + return None, e, [] + + +def fnstcw(ir, instr, a): + e = [] + e.append(ExprAff(a, float_control)) + return None, e, [] + + +def fldcw(ir, instr, a): + e = [] + e.append(ExprAff(float_control, a)) + return None, e, [] + + +def fwait(ir, instr): + return None, [], None + + +def nop(ir, instr, a=None): + return None, [], [] + + +def hlt(ir, instr): + e = [] + except_int = EXCEPT_PRIV_INSN + e.append(ExprAff(exception_flags, ExprInt32(except_int))) + return None, e, [] + + +def rdtsc(ir, instr): + e = [] + myEAX = mRAX[instr.mode] + myEDX = mRDX[instr.mode] + e.append(ExprAff(tsc1, tsc1 + ExprInt32(1))) + e.append(ExprAff(myEAX, tsc1)) + e.append(ExprAff(myEDX, tsc2)) + return None, e, [] + + +# XXX TODO +def daa(ir, instr): + return None, [], None + + +def aam(ir, instr, a): + e = [] + tempAL = mRAX[instr.mode][0:8] + newEAX = ExprCompose([ + (tempAL % a, 0, 8), + (tempAL / a, 8, 16), + (mRAX[instr.mode][16:], 16, mRAX[instr.mode].size), + ]) + e += [ExprAff(mRAX[instr.mode], newEAX)] + e += update_flag_arith(newEAX) + return None, e, [] + + +def aad(ir, instr, a): + e = [] + tempAL = mRAX[instr.mode][0:8] + tempAH = mRAX[instr.mode][8:16] + newEAX = ExprCompose([ + ((tempAL + (tempAH * a)) & ExprInt8(0xFF), 0, 8), + (ExprInt8(0), 8, 16), + (mRAX[instr.mode][16:], + 16, mRAX[instr.mode].size), + ]) + e += [ExprAff(mRAX[instr.mode], newEAX)] + e += update_flag_arith(newEAX) + return None, e, [] + + +def aaa(ir, instr, ): + e = [] + c = (mRAX[instr.mode][:8] & ExprInt8(0xf)) - ExprInt8(9) + + c = ExprCond(c.msb(), + ExprInt1(0), + ExprInt1(1)) & \ + ExprCond(c, + ExprInt1(1), + ExprInt1(0)) + + c |= af & ExprInt1(1) + # set AL + m_al = ExprCond(c, + (mRAX[instr.mode][:8] + ExprInt8(6)) & ExprInt8(0xF), + mRAX[instr.mode][:8] & ExprInt8(0xF)) + m_ah = ExprCond(c, + mRAX[instr.mode][8:16] + ExprInt8(1), + mRAX[instr.mode][8:16]) + + e.append(ExprAff(mRAX[instr.mode], ExprCompose([ + (m_al, 0, 8), (m_ah, 8, 16), + (mRAX[instr.mode][16:], 16, mRAX[instr.mode].size)]))) + e.append(ExprAff(af, c)) + e.append(ExprAff(cf, c)) + return None, e, [] + + +def aas(ir, instr, ): + e = [] + c = (mRAX[instr.mode][:8] & ExprInt8(0xf)) - ExprInt8(9) + + c = ExprCond(c.msb(), + ExprInt1(0), + ExprInt1(1)) & \ + ExprCond(c, + ExprInt1(1), + ExprInt1(0)) + + c |= af & ExprInt1(1) + # set AL + m_al = ExprCond(c, + (mRAX[instr.mode][:8] - ExprInt8(6)) & ExprInt8(0xF), + mRAX[instr.mode][:8] & ExprInt8(0xF)) + m_ah = ExprCond(c, + mRAX[instr.mode][8:16] - ExprInt8(1), + mRAX[instr.mode][8:16]) + + e.append(ExprAff(mRAX[instr.mode], ExprCompose([ + (m_al, 0, 8), (m_ah, 8, 16), + (mRAX[instr.mode][16:], 16, mRAX[instr.mode].size)]))) + e.append(ExprAff(af, c)) + e.append(ExprAff(cf, c)) + return None, e, [] + + +def bsf(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + + e = [ExprAff(zf, ExprCond(b, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))] + + e_do = [] + e_do.append(ExprAff(a, ExprOp('bsf', b))) + return ExprCond(b, + lbl_do, lbl_skip), e, [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def bsr(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + + e = [ExprAff(zf, ExprCond(b, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))] + + e_do = [] + e_do.append(ExprAff(a, ExprOp('bsr', b))) + return ExprCond(b, + lbl_do, lbl_skip), e, [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def arpl(ir, instr, a, b): + e = [] + e.append(ExprAff(exception_flags, ExprInt32(1 << 7))) + return None, e, [] + + +def ins(ir, instr, size): + e = [] + e.append(ExprAff(exception_flags, ExprInt32(1 << 7))) + return None, e, [] + + +def sidt(ir, instr, a): + e = [] + if not isinstance(a, ExprMem) or a.size != 32: + raise ValueError('not exprmem 32bit instance!!') + b = a.arg + print "DEFAULT SIDT ADDRESS %s!!" % str(a) + e.append(ExprAff(ExprMem(b, 32), ExprInt32(0xe40007ff))) + e.append( + ExprAff(ExprMem(ExprOp("+", b, + ExprInt_from(b, 4)), 16), ExprInt16(0x8245))) + return None, e, [] + + +def sldt(ir, instr, a): + # XXX TOOD + e = [ExprAff(exception_flags, ExprInt32(EXCEPT_PRIV_INSN))] + return None, e, [] + + +def cmovz(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + + dum, e_do, extra_irs = mov(ir, instr, a, b) + return ExprCond(zf, + lbl_do, lbl_skip), [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def cmovnz(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(zf, a, b))) + return None, e, [] + + +def cmovge(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(nf ^ of, a, b))) + return None, e, [] + + +def cmovg(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(zf | (nf ^ of), a, b))) + return None, e, [] + + +def cmovl(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(nf ^ of, b, a))) + return None, e, [] + + +def cmovle(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond((nf ^ of) | zf, b, a))) + return None, e, [] + + +def cmova(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + dum, e_do, extra_irs = mov(ir, instr, a, b) + return ExprCond(cf | zf, + lbl_skip, lbl_do), [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def cmovae(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + dum, e_do, extra_irs = mov(ir, instr, a, b) + return ExprCond(cf, + lbl_skip, lbl_do), [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def cmovbe(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + dum, e_do, extra_irs = mov(ir, instr, a, b) + return ExprCond(cf | zf, + lbl_do, lbl_skip), [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def cmovb(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + dum, e_do, extra_irs = mov(ir, instr, a, b) + return ExprCond(cf, + lbl_do, lbl_skip), [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def cmovo(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(of, b, a))) + return None, e, [] + + +def cmovno(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(of, a, b))) + return None, e, [] + + +def cmovs(ir, instr, a, b): + e = [] + # SF is called nf in miasm + e.append(ExprAff(a, ExprCond(nf, b, a))) + return None, e, [] + + +def cmovns(ir, instr, a, b): + e = [] + # SF is called nf in miasm + e.append(ExprAff(a, ExprCond(nf, a, b))) + return None, e, [] + + +def icebp(ir, instr): + e = [] + e.append(ExprAff(exception_flags, + ExprInt32(EXCEPT_PRIV_INSN))) + return None, e, [] +# XXX + + +def l_int(ir, instr, a): + e = [] + # XXX + if a.arg in [1, 3]: + except_int = EXCEPT_SOFT_BP + else: + except_int = EXCEPT_INT_XX + e.append(ExprAff(exception_flags, + ExprInt32(except_int))) + return None, e, [] + + +def l_sysenter(ir, instr): + e = [] + e.append(ExprAff(exception_flags, + ExprInt32(EXCEPT_PRIV_INSN))) + return None, e, [] + +# XXX + + +def l_out(ir, instr, a, b): + e = [] + e.append(ExprAff(exception_flags, + ExprInt32(EXCEPT_PRIV_INSN))) + return None, e, [] + +# XXX + + +def l_outs(ir, instr, size): + e = [] + e.append(ExprAff(exception_flags, + ExprInt32(EXCEPT_PRIV_INSN))) + return None, e, [] + +# XXX actually, xlat performs al = (ds:[e]bx + ZeroExtend(al)) + + +def xlat(ir, instr): + e = [] + a = ExprCompose([(ExprInt_fromsize(24, 0), 8, 32), + (mRAX[instr.mode][0:8], 0, 8)]) + b = ExprMem(ExprOp('+', mRBX[instr.mode], a), 8) + e.append(ExprAff(mRAX[instr.mode][0:8], b)) + return None, e, [] + + +def cpuid(ir, instr): + e = [] + e.append( + ExprAff(mRAX[instr.mode], + ExprOp('cpuid', mRAX[instr.mode], ExprInt32(0)))) + e.append( + ExprAff(mRBX[instr.mode], + ExprOp('cpuid', mRAX[instr.mode], ExprInt32(1)))) + e.append( + ExprAff(mRCX[instr.mode], + ExprOp('cpuid', mRAX[instr.mode], ExprInt32(2)))) + e.append( + ExprAff(mRDX[instr.mode], + ExprOp('cpuid', mRAX[instr.mode], ExprInt32(3)))) + return None, e, [] + + +def bittest_get(a, b): + b = b.zeroExtend(a.size) + if isinstance(a, ExprMem): + off_bit = ExprOp('&', b, ExprInt_from(a, a.size - 1)) + off_byte = (b >> ExprInt_from(a, 3)) & \ + ExprOp('!', ExprInt_from(a, a.size / 8 - 1)) + + d = ExprMem(a.arg + off_byte, a.size) + # d = ExprOp('>>', mem, off_bit) + else: + off_bit = ExprOp('&', b, ExprInt_from(a, a.size - 1)) + d = a + # d = ExprOp('>>', a, off_bit) + return d, off_bit + + +def bt(ir, instr, a, b): + e = [] + b = b.zeroExtend(a.size) + d, off_bit = bittest_get(a, b) + d = d >> off_bit + e.append(ExprAff(cf, d[:1])) + return None, e, [] + + +def btc(ir, instr, a, b): + e = [] + d, off_bit = bittest_get(a, b) + e.append(ExprAff(cf, (d >> off_bit)[:1])) + + m = ExprInt_from(a, 1) << off_bit + e.append(ExprAff(d, d ^ m)) + + return None, e, [] + + +def bts(ir, instr, a, b): + e = [] + d, off_bit = bittest_get(a, b) + e.append(ExprAff(cf, (d >> off_bit)[:1])) + m = ExprInt_from(a, 1) << off_bit + e.append(ExprAff(d, d | m)) + + return None, e, [] + + +def btr(ir, instr, a, b): + e = [] + d, off_bit = bittest_get(a, b) + e.append(ExprAff(cf, (d >> off_bit)[:1])) + m = ~(ExprInt_from(a, 1) << off_bit) + e.append(ExprAff(d, d & m)) + + return None, e, [] + + +def into(ir, instr): + return None, [], None + + +def l_in(ir, instr, a, b): + e = [] + e.append(ExprAff(exception_flags, + ExprInt32(EXCEPT_PRIV_INSN))) + return None, e, [] + + +def cmpxchg(ir, instr, a, b): + e = [] + + c = mRAX[instr.mode][:a.size] + cond = c - a + e.append( + ExprAff(zf, ExprCond(cond, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))) + e.append(ExprAff(a, ExprCond(cond, + b, + a) + )) + e.append(ExprAff(c, ExprCond(cond, + a, + c) + )) + return None, e, [] + + +def lds(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprMem(b.arg, size=a.size))) + e.append(ExprAff(ds, ExprMem(b.arg + ExprInt_from(a, 2), + size=16))) + return None, e, [] + + +def les(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprMem(b.arg, size=a.size))) + e.append(ExprAff(es, ExprMem(b.arg + ExprInt_from(a, 2), + size=16))) + return None, e, [] + + +def lss(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprMem(b.arg, size=a.size))) + e.append(ExprAff(ss, ExprMem(b.arg + ExprInt_from(a, 2), + size=16))) + return None, e, [] + + +def lahf(ir, instr): + e = [] + args = [] + regs = [cf, ExprInt1(1), pf, ExprInt1(0), af, ExprInt1(0), zf, nf] + for i in xrange(len(regs)): + args.append((regs[i], i, i + 1)) + e.append(ExprAff(mRAX[instr.mode][8:16], ExprCompose(args))) + return None, e, [] + + +def sahf(ir, instr): + tmp = mRAX[instr.mode][8:16] + e = [] + e.append(ExprAff(cf, tmp[0:1])) + e.append(ExprAff(pf, tmp[2:3])) + e.append(ExprAff(af, tmp[4:5])) + e.append(ExprAff(zf, tmp[6:7])) + e.append(ExprAff(nf, tmp[7:8])) + return None, e, [] + + +def lar(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprOp('access_segment', b))) + e.append(ExprAff(zf, ExprOp('access_segment_ok', b))) + return None, e, [] + + +def lsl(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprOp('load_segment_limit', b))) + e.append(ExprAff(zf, ExprOp('load_segment_limit_ok', b))) + return None, e, [] + + +def fclex(ir, instr): + # XXX TODO + return None, [], None + + +def fnclex(ir, instr): + # XXX TODO + return None, [], None + + +def l_str(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprOp('load_tr_segment_selector', ExprInt32(0)))) + return None, e, [] + + +def movd(ir, instr, a, b): + e = [] + if a.size == 64: + e.append(ExprAff(a, ExprCompose([(ExprInt32(0), 32, 64), (b, 0, 32)]))) + else: + e.append(ExprAff(a, b[0:32])) + return None, e, [] + + +def xorps(ir, instr, a, b): + e = [] + if isinstance(b, ExprMem): + b = ExprMem(b.arg, a.size) + e.append(ExprAff(a, ExprOp('xorps', a, b))) + return None, e, [] + + +def movaps(ir, instr, a, b): + e = [] + if isinstance(a, ExprMem): + a = ExprMem(a.arg, b.size) + if isinstance(b, ExprMem): + b = ExprMem(b.arg, a.size) + e.append(ExprAff(a, b)) + return None, e, [] + + +def pminsw(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond((a - b).msb(), a, b))) + return None, e, [] + + +def cvtsi2sd(ir, instr, a, b): + e = [] + e.append(ExprAff(a[:b.size], ExprOp('cvtsi2sd', b))) + return None, e, [] + + +def movss(ir, instr, a, b): + e = [] + e.append(ExprAff(a[:b.size], ExprOp('movss', b))) + return None, e, [] + + +def ucomiss(ir, instr, a, b): + e = [] + e.append(ExprAff(zf, ExprOp('ucomiss_zf', a[:32], b[:32]))) + e.append(ExprAff(pf, ExprOp('ucomiss_pf', a[:32], b[:32]))) + e.append(ExprAff(cf, ExprOp('ucomiss_cf', a[:32], b[:32]))) + + e.append(ExprAff(of, ExprInt1(0))) + e.append(ExprAff(af, ExprInt1(0))) + e.append(ExprAff(nf, ExprInt1(0))) + + return None, e, [] + +mnemo_func = {'mov': mov, + 'xchg': xchg, + 'movzx': movzx, + 'movsx': movsx, + 'movsxd': movsx, + 'lea': lea, + 'add': add, + 'xadd': xadd, + 'adc': adc, + 'sub': sub, + 'sbb': sbb, + 'neg': neg, + 'not': l_not, + 'cmp': l_cmp, + 'xor': xor, + 'or': l_or, + 'and': l_and, + 'test': l_test, + 'rol': l_rol, + 'ror': l_ror, + 'rcl': rcl, + 'rcr': rcr, + 'sar': sar, + 'shr': shr, + 'shrd_cl': shrd_cl, + 'sal': sal, + 'shl': shl, + 'shld_cl': shld_cl, + 'shld': shld, + 'cmc': cmc, + 'clc': clc, + 'stc': stc, + 'cld': cld, + 'std': std, + 'cli': cli, + 'sti': sti, + 'bsf': bsf, + 'bsr': bsr, + 'inc': inc, + 'dec': dec, + 'push': push, + 'pop': pop, + 'sete': sete, + 'setnz': setnz, + 'setl': setl, + 'setg': setg, + 'setge': setge, + 'seta': seta, + 'setae': setae, + 'setb': setb, + 'setbe': setbe, + 'setns': setns, + 'sets': sets, + 'seto': seto, + 'setp': setp, + 'setpe': setp, + 'setnp': setnp, + 'setpo': setnp, + 'setle': setle, + 'setng': setle, + 'setna': setna, + 'setnbe': setnbe, + 'setno': setno, + 'setnc': setnb, + 'setz': sete, + 'setne': setnz, + 'setnb': setae, + 'setnae': setb, + 'setc': setb, + 'setnge': setl, + 'setnl': setge, + 'setnle': setg, + 'setalc': setalc, + 'bswap': bswap, + 'cmpsb': lambda ir, instr: cmps(ir, instr, 8), + 'cmpsw': lambda ir, instr: cmps(ir, instr, 16), + 'cmpsd': lambda ir, instr: cmps(ir, instr, 32), + 'scasb': lambda ir, instr: scas(ir, instr, 8), + 'scasw': lambda ir, instr: scas(ir, instr, 16), + 'scasd': lambda ir, instr: scas(ir, instr, 32), + 'pushfd': pushfd, + 'pushfw': pushfw, + 'popfd': popfd, + 'popfw': popfw, + 'pushad': pushad, + 'pusha': pushad, + 'popad': popad, + 'popa': popad, + 'call': call, + 'ret': ret, + 'retf': retf, + 'leave': leave, + 'enter': enter, + 'jmp': jmp, + 'jmpf': jmpf, + 'jz': jz, + 'je': jz, + 'jcxz': jcxz, + 'jecxz': jecxz, + 'jrcxz': jrcxz, + 'jnz': jnz, + 'jp': jp, + 'jpe': jp, + 'jnp': jnp, + 'ja': ja, + 'jae': jae, + 'jb': jb, + 'jbe': jbe, + 'jg': jg, + 'jge': jge, + 'jl': jl, + 'jle': jle, + 'js': js, + 'jns': jns, + 'jo': jo, + 'jno': jno, + 'jecxz': jecxz, + 'loop': loop, + 'loopne': loopne, + 'loope': loope, + 'div': div, + 'mul': mul, + 'imul': imul, + 'idiv': idiv, + + 'cbw': cbw, + 'cwde': cwde, + 'cdqe': cdqe, + + 'cwd': cwd, + 'cdq': cdq, + 'cqo': cqo, + + 'daa': daa, + 'aam': aam, + 'aad': aad, + 'aaa': aaa, + 'aas': aas, + 'shrd': shrd, + 'stosb': lambda ir, instr: stos(ir, instr, 8), + 'stosw': lambda ir, instr: stos(ir, instr, 16), + 'stosd': lambda ir, instr: stos(ir, instr, 32), + 'stosq': lambda ir, instr: stos(ir, instr, 64), + + 'lodsb': lambda ir, instr: lods(ir, instr, 8), + 'lodsw': lambda ir, instr: lods(ir, instr, 16), + 'lodsd': lambda ir, instr: lods(ir, instr, 32), + + 'movsb': lambda ir, instr: movs(ir, instr, 8), + 'movsw': lambda ir, instr: movs(ir, instr, 16), + 'movsd': lambda ir, instr: movs(ir, instr, 32), + 'movsq': lambda ir, instr: movs(ir, instr, 64), + 'fcomp': fcomp, + 'nop': nop, + 'fnop': nop, # XXX + 'hlt': hlt, + 'rdtsc': rdtsc, + 'fst': fst, + 'fstp': fstp, + 'fist': fist, + 'fistp': fistp, + 'fld': fld, + 'fldz': fldz, + 'fld1': fld1, + 'fldl2e': fldl2e, + 'fldlg2': fldlg2, + 'fild': fild, + 'fadd': fadd, + 'fninit': fninit, + 'faddp': faddp, + 'fsub': fsub, + 'fmul': fmul, + 'fmulp': fmulp, + 'fdiv': fdiv, + 'fdivr': fdivr, + 'fdivp': fdivp, + 'fxch': fxch, + 'fptan': fptan, + 'frndint': frndint, + 'fsin': fsin, + 'fcos': fcos, + 'fscale': fscale, + 'f2xm1': f2xm1, + 'fsqrt': fsqrt, + 'fabs': fabs, + 'fnstsw': fnstsw, + 'fnstcw': fnstcw, + 'fldcw': fldcw, + 'fwait': fwait, + 'fnstenv': fnstenv, + 'sidt': sidt, + 'sldt': sldt, + 'arpl': arpl, + 'cmovz': cmovz, + 'cmove': cmovz, + 'cmovnz': cmovnz, + 'cmovge': cmovge, + 'cmovnl': cmovge, + 'cmovg': cmovg, + 'cmovl': cmovl, + 'cmova': cmova, + 'cmovae': cmovae, + 'cmovbe': cmovbe, + 'cmovb': cmovb, + 'cmovnge': cmovl, + 'cmovle': cmovle, + 'cmovng': cmovle, + 'cmovo': cmovo, + 'cmovno': cmovno, + 'cmovs': cmovs, + 'cmovns': cmovns, + 'icebp': icebp, + 'int': l_int, + 'xlat': xlat, + 'bt': bt, + 'cpuid': cpuid, + 'jo': jo, + 'fcom': fcom, + 'ficom': ficom, + 'fcomi': fcomi, + 'fcomip': fcomip, + 'fucomi': fucomi, + 'fucomip': fucomip, + 'insb': lambda ir, instr: ins(ir, instr, 8), + 'insw': lambda ir, instr: ins(ir, instr, 16), + 'insd': lambda ir, instr: ins(ir, instr, 32), + 'btc': btc, + 'bts': bts, + 'btr': btr, + 'into': into, + 'in': l_in, + 'outsb': lambda ir, instr: l_outs(ir, instr, 8), + 'outsw': lambda ir, instr: l_outs(ir, instr, 16), + 'outsd': lambda ir, instr: l_outs(ir, instr, 32), + + 'out': l_out, + "sysenter": l_sysenter, + "cmpxchg": cmpxchg, + "lds": lds, + "les": les, + "lss": lss, + "lahf": lahf, + "sahf": sahf, + "lar": lar, + "lsl": lsl, + "fclex": fclex, + "fnclex": fnclex, + "str": l_str, + "movd": movd, + "movaps": movaps, + "xorps": xorps, + + "pminsw": pminsw, + "cvtsi2sd": cvtsi2sd, + "movss": movss, + + "ucomiss": ucomiss, + } + + +class ir_x86_16(ir): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_x86, 16, symbol_pool) + self.do_stk_segm = False + self.do_ds_segm = False + self.do_str_segm = False + self.do_all_segm = False + self.pc = IP + self.sp = SP + + def mod_pc(self, instr, instr_ir, extra_ir): + pass + + def get_ir(self, instr): + args = instr.args[:] + my_ss = None + if self.do_ds_segm: + my_ss = DS + if self.do_all_segm and instr.additional_info.g2.value: + my_ss = {1: CS, 2: SS, 3: DS, 4: ES, 5: FS, 6: GS}[ + instr.additional_info.g2.value] + if my_ss is not None: + for i, a in enumerate(args): + if isinstance(a, ExprMem) and not a.is_op_segm(): + args[i] = ExprMem(ExprOp('segm', my_ss, a.arg), a.size) + + dst, instr_ir, extra_ir = mnemo_func[ + instr.name.lower()](self, instr, *args) + self.mod_pc(instr, instr_ir, extra_ir) + + self.mod_pc(instr, instr_ir, extra_ir) + instr.additional_info.except_on_instr = False + if instr.additional_info.g1.value & 6 == 0 or \ + not instr.name in repeat_mn: + return dst, instr_ir, extra_ir + instr.additional_info.except_on_instr = True + # get instruction size + s = {"B": 8, "W": 16, "D": 32, 'Q': 64}[instr.name[-1]] + size = instr.v_opmode() + c_reg = mRCX[instr.mode][:size] + out_ir = [] + zf_val = None + # set if zf is tested (cmps, scas) + for e in instr_ir: # +[updt_c]: + if e.dst == zf: + zf_val = e.src + + # end condition + if zf_val is None: + c_cond = ExprCond(c_reg, ExprInt1(0), ExprInt1(1)) + elif instr.additional_info.g1.value & 2: # REPNE + # c_cond = ExprCond(c_reg, ExprInt1(0), ExprInt1(1)) | (zf_val) + c_cond = ExprCond(c_reg, ExprInt1(0), ExprInt1(1)) | (zf) + elif instr.additional_info.g1.value & 4: # REP + # c_cond = ExprCond(c_reg, ExprInt1(0), ExprInt1(1)) | + # (zf_val^ExprInt32(1)) + c_cond = ExprCond( + c_reg, ExprInt1(0), ExprInt1(1)) | (zf ^ ExprInt1(1)) + + # gen while + lbl_do = ExprId(self.gen_label(), instr.mode) + lbl_end = ExprId(self.gen_label(), instr.mode) + lbl_skip = ExprId(self.get_next_label(instr), instr.mode) + lbl_next = ExprId(self.get_next_label(instr), instr.mode) + + for b in extra_ir: + # print repr(b) + # print b + # self.replace_expr_in_ir(b, {lbl_next:lbl_end}) + b.dst = b.dst.replace_expr({lbl_next: lbl_end}) + # print b + + cond_bloc = [] + cond_bloc.append(ExprAff(c_reg, c_reg - ExprInt_from(c_reg, 1))) + cond_bloc = irbloc( + lbl_end.name, ExprCond(c_cond, lbl_skip, lbl_do), [cond_bloc]) + e_do = instr_ir + + c = irbloc(lbl_do.name, dst, [e_do]) + c.except_automod = False + return ExprCond(c_reg, lbl_do, lbl_skip), [], [cond_bloc, c] + extra_ir + + def expr_fix_regs_for_mode(self, e, mode=64): + return e.replace_expr(replace_regs[mode]) + + def expraff_fix_regs_for_mode(self, e, mode=64): + dst = self.expr_fix_regs_for_mode(e.dst, mode) + src = self.expr_fix_regs_for_mode(e.src, mode) + return ExprAff(dst, src) + + def irbloc_fix_regs_for_mode(self, irbloc, mode=64): + for irs in irbloc.irs: + for i, e in enumerate(irs): + """ + special case for 64 bits: + if destination is a 32 bit reg, zero extend the 64 bit reg + """ + if mode == 64: + if (isinstance(e.dst, ExprId) and e.dst.size == 32 and + e.dst in replace_regs[64]): + src = self.expr_fix_regs_for_mode(e.src, mode) + dst = replace_regs[64][e.dst].arg + e = ExprAff(dst, src.zeroExtend(64)) + irs[i] = self.expr_fix_regs_for_mode(e, mode) + irbloc.dst = self.expr_fix_regs_for_mode(irbloc.dst, mode) + + +class ir_x86_32(ir_x86_16): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_x86, 32, symbol_pool) + self.do_stk_segm = False + self.do_ds_segm = False + self.do_str_segm = False + self.do_all_segm = False + self.pc = EIP + self.sp = ESP + + +class ir_x86_64(ir_x86_16): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_x86, 64, symbol_pool) + self.do_stk_segm = False + self.do_ds_segm = False + self.do_str_segm = False + self.do_all_segm = False + self.pc = RIP + self.sp = RSP + + def mod_pc(self, instr, instr_ir, extra_ir): + # fix RIP for 64 bit + for i, x in enumerate(instr_ir): + if x.dst != self.pc: + x.dst = x.dst.replace_expr( + {self.pc: ExprInt64(instr.offset + instr.l)}) + x = ExprAff(x.dst, x.src.replace_expr( + {self.pc: ExprInt64(instr.offset + instr.l)})) + instr_ir[i] = x + for b in extra_ir: + for irs in b.irs: + for i, x in enumerate(irs): + if x.dst != self.pc: + x.dst = x.dst.replace_expr( + {self.pc: ExprInt64(instr.offset + instr.l)}) + x = ExprAff(x.dst, x.src.replace_expr( + {self.pc: ExprInt64(instr.offset + instr.l)})) + irs[i] = x diff --git a/miasm2/core/__init__.py b/miasm2/core/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/core/__init__.py diff --git a/miasm2/core/asmbloc.py b/miasm2/core/asmbloc.py new file mode 100644 index 00000000..945eb990 --- /dev/null +++ b/miasm2/core/asmbloc.py @@ -0,0 +1,1412 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import logging +import miasm2.expression.expression as m2_expr +from miasm2.expression.modint import moduint, modint +from miasm2.core.graph import DiGraph +from utils import Disasm_Exception +from miasm2.core.graph import DiGraph +import inspect + +log_asmbloc = logging.getLogger("asmbloc") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log_asmbloc.addHandler(console_handler) +log_asmbloc.setLevel(logging.WARNING) + + +def whoami(): + return inspect.stack()[2][3] + + +def is_int(a): + return isinstance(a, int) or isinstance(a, long) or \ + isinstance(a, moduint) or isinstance(a, modint) + + +def expr_is_label(e): + if isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label): + return True + return False + + +def expr_is_int_or_label(e): + if isinstance(e, m2_expr.ExprInt): + return True + if isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label): + return True + return False + + +class asm_label: + + def __init__(self, name="", offset=None): + # print whoami() + self.fixedblocs = False + if is_int(name): + name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) + self.name = name + self.attrib = None + if offset is None: + self.offset = offset + else: + self.offset = int(offset) + self._hash = hash((self.name, self.offset)) + + def __str__(self): + if isinstance(self.offset, (int, long)): + return "%s:0x%08x" % (self.name, self.offset) + else: + return "%s:%s" % (self.name, str(self.offset)) + + def __repr__(self): + rep = '<asmlabel ' + if self.name: + rep += repr(self.name) + ' ' + rep += '>' + return rep + + def __hash__(self): + return self._hash + + def __eq__(self, a): + if isinstance(a, asm_label): + return self._hash == a._hash + else: + return False + + +class asm_raw: + + def __init__(self, raw=""): + self.raw = raw + + def __str__(self): + return repr(self.raw) + + +class asm_constraint(object): + c_to = "c_to" + c_next = "c_next" + c_bad = "c_bad" + + def __init__(self, label=None, c_t=c_to): + self.label = label + self.c_t = c_t + self._hash = hash((self.label, self.c_t)) + + def __str__(self): + return "%s:%s" % (str(self.c_t), str(self.label)) + + def __hash__(self): + return self._hash + + def __eq__(self, a): + if isinstance(a, asm_constraint): + return self._hash == a._hash + else: + return False + + +class asm_constraint_next(asm_constraint): + + def __init__(self, label=None): + super(asm_constraint_next, self).__init__( + label, c_t=asm_constraint.c_next) + + +class asm_constraint_to(asm_constraint): + + def __init__(self, label=None): + super(asm_constraint_to, self).__init__( + label, c_t=asm_constraint.c_to) + + +class asm_constraint_bad(asm_constraint): + + def __init__(self, label=None): + super(asm_constraint_bad, self).__init__( + label, c_t=asm_constraint.c_bad) + + +class asm_bloc: + + def __init__(self, label=None): + self.bto = set() + self.lines = [] + self.label = label + + def __str__(self): + out = [] + out.append(str(self.label)) + for l in self.lines: + out.append(str(l)) + if self.bto: + lbls = ["->"] + for l in self.bto: + if l is None: + lbls.append("Unknown? ") + else: + lbls.append(str(l) + " ") + lbls = '\t'.join(lbls) + out.append(lbls) + return '\n'.join(out) + + def addline(self, l): + self.lines.append(l) + + def addto(self, c): + assert(type(self.bto) is set) + self.bto.add(c) + + def split(self, offset, l): + log_asmbloc.debug('split at %x' % offset) + i = -1 + offsets = [x.offset for x in self.lines] + if not l.offset in offsets: + log_asmbloc.warning( + 'cannot split bloc at %X ' % offset + + 'middle instruction? default middle') + offsets.sort() + return None + new_bloc = asm_bloc(l) + i = offsets.index(offset) + + self.lines, new_bloc.lines = self.lines[:i], self.lines[i:] + flow_mod_instr = self.get_flow_instr() + log_asmbloc.debug('flow mod %r' % flow_mod_instr) + c = asm_constraint(l, asm_constraint.c_next) + # move dst if flowgraph modifier was in original bloc + # (usecase: split delayslot bloc) + if flow_mod_instr: + for xx in self.bto: + log_asmbloc.debug('lbl %s' % xx) + c_next = set( + [x for x in self.bto if x.c_t == asm_constraint.c_next]) + c_to = [x for x in self.bto if x.c_t != asm_constraint.c_next] + self.bto = set([c] + c_to) + new_bloc.bto = c_next + else: + new_bloc.bto = self.bto + self.bto = set([c]) + return new_bloc + + def get_range(self): + if len(self.lines): + return self.lines[0].offset, self.lines[-1].offset + else: + return 0, 0 + + def get_offsets(self): + return [x.offset for x in self.lines] + + def add_cst(self, offset, c_t, symbol_pool): + if type(offset) in [int, long]: + l = symbol_pool.getby_offset_create(offset) + elif type(offset) is str: + l = symbol_pool.getby_name_create(offset) + elif isinstance(offset, asm_label): + l = offset + else: + raise ValueError('unknown offset type %r' % offset) + c = asm_constraint(l, c_t) + self.bto.add(c) + + def get_flow_instr(self): + if not self.lines: + return None + for i in xrange(-1, -1 - self.lines[0].delayslot - 1, -1): + l = self.lines[i] + if l.splitflow() or l.breakflow(): + raise NotImplementedError('not fully functional') + return l + + def get_subcall_instr(self): + if not self.lines: + return None + for i in xrange(-1, -1 - self.lines[0].delayslot - 1, -1): + l = self.lines[i] + if l.is_subcall(): + return l + + def get_next(self): + for x in self.bto: + if x.c_t == asm_constraint.c_next: + return x.label + return None + + +class asm_symbol_pool: + + def __init__(self, no_collision=True): + self.labels = [] + self.s = {} + self.s_offset = {} + self.no_collision = no_collision + self.label_num = 0 + + def add_label(self, name="", offset=None): + """ + This should be the only method to create new asm_label objects + """ + l = asm_label(name, offset) + collision = None + if l.offset in self.s_offset and l != self.s_offset[l.offset]: + collision = 'offset' + if l.name in self.s and l != self.s[l.name]: + collision = 'name' + if self.no_collision and collision == 'offset': + raise ValueError('symbol %s has same offset as %s' % + (l, self.s_offset[l.offset])) + if self.no_collision and collision == 'name': + raise ValueError( + 'symbol %s has same name as %s' % (l, self.s[l.name])) + self.labels.append(l) + if l.offset is not None: + self.s_offset[l.offset] = l + if l.name != "": + self.s[l.name] = l + return l + + def remove(self, obj): + """ + obj can be an asm_label or an offset + """ + if isinstance(obj, asm_label): + if obj.name in self.s: + del(self.s[obj.name]) + if obj.offset is not None and obj.offset in self.s_offset: + del(self.s_offset[obj.offset]) + else: + offset = int(obj) + if offset in self.s_offset: + obj = self.s_offset[offset] + del(self.s_offset[offset]) + if obj.name in self.s: + del(self.s[obj.name]) + + def del_offset(self, l=None): + if l is not None: + if l.offset in self.s_offset: + del(self.s_offset[l.offset]) + l.offset = None + else: + self.s_offset = {} + for l in self.s: + self.s[l].offset = None + + def getby_offset(self, offset): + return self.s_offset.get(offset, None) + + def getby_name(self, name): + return self.s.get(name, None) + + def getby_name_create(self, name): + l = self.getby_name(name) + if l is None: + l = self.add_label(name) + return l + + def getby_offset_create(self, offset): + l = self.getby_offset(offset) + if l is None: + l = self.add_label(offset, offset) + return l + + def rename(self, s, newname): + if not s.name in self.s: + log_asmbloc.warn('unk symb') + return + del(self.s[s.name]) + s.name = newname + self.s[s.name] = s + + def set_offset(self, label, offset): + # Note that there is a special case when the offset is a list + # it happens when offsets are recomputed in resolve_symbol* + if not label in self.labels: + raise ValueError('label %s not in symbol pool' % label) + if not isinstance(label.offset, list) and label.offset in self.s_offset: + del(self.s_offset[label.offset]) + label.offset = offset + if not isinstance(label.offset, list): + self.s_offset[label.offset] = label + + def items(self): + return self.labels[:] + + def __str__(self): + return reduce(lambda x, y: x + str(y) + '\n', self.labels, "") + + def __in__(self, obj): + if obj in self.s: + return True + if obj in self.s_offset: + return True + return False + + def __getitem__(self, item): + if item in self.s: + return self.s[item] + if item in self.s_offset: + return self.s_offset[item] + raise KeyError('unknown symbol %r' % item) + + def __contains__(self, item): + return item in self.s or item in self.s_offset + + def merge(self, symbol_pool): + self.labels += symbol_pool.labels + self.s.update(symbol_pool.s) + self.s_offset.update(symbol_pool.s_offset) + + def gen_label(self): + l = self.add_label("lbl_gen_%.8X" % (self.label_num)) + self.label_num += 1 + return l + + +def dis_bloc(mnemo, pool_bin, cur_bloc, offset, job_done, symbol_pool, + dont_dis=[], split_dis=[ + ], follow_call=False, patch_instr_symb=True, + dontdis_retcall=False, lines_wd=None, + dis_bloc_callback=None, dont_dis_nulstart_bloc=False, + attrib={}): + # pool_bin.offset = offset + lines_cpt = 0 + in_delayslot = False + delayslot_count = mnemo.delayslot + offsets_to_dis = set() + add_next_offset = False + log_asmbloc.debug("dis at %X" % int(offset)) + while not in_delayslot or delayslot_count > 0: + if in_delayslot: + delayslot_count -= 1 + + if offset in dont_dis or (lines_cpt > 0 and offset in split_dis): + cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool) + offsets_to_dis.add(offset) + break + + lines_cpt += 1 + if lines_wd is not None and lines_cpt > lines_wd: + # log_asmbloc.warning( "lines watchdog reached at %X"%int(offset)) + break + + if offset in job_done: + cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool) + break + + off_i = offset + try: + # print repr(pool_bin.getbytes(offset, 4)) + instr = mnemo.dis(pool_bin, attrib, offset) + except (Disasm_Exception, IOError), e: + log_asmbloc.warning(e) + instr = None + + if instr is None: + log_asmbloc.warning("cannot disasm at %X" % int(off_i)) + cur_bloc.add_cst(off_i, asm_constraint.c_bad, symbol_pool) + break + + # XXX TODO nul start block option + if dont_dis_nulstart_bloc and instr.b.count('\x00') == instr.l: + log_asmbloc.warning("reach nul instr at %X" % int(off_i)) + cur_bloc.add_cst(off_i, asm_constraint.c_bad, symbol_pool) + break + + # special case: flow graph modificator in delayslot + if in_delayslot and instr and (instr.splitflow() or instr.breakflow()): + add_next_offset = True + break + + job_done.add(offset) + log_asmbloc.debug("dis at %X" % int(offset)) + + offset += instr.l + log_asmbloc.debug(instr) + log_asmbloc.debug(instr.args) + + cur_bloc.addline(instr) + if not instr.breakflow(): + continue + # test split + if instr.splitflow() and not (instr.is_subcall() and dontdis_retcall): + add_next_offset = True + # cur_bloc.add_cst(n, asm_constraint.c_next, symbol_pool) + pass + if instr.dstflow(): + instr.dstflow2label(symbol_pool) + dst = instr.getdstflow(symbol_pool) + dstn = [] + for d in dst: + if isinstance(d, m2_expr.ExprId) and isinstance(d.name, asm_label): + dstn.append(d.name) + dst = dstn + if (not instr.is_subcall()) or follow_call: + cur_bloc.bto.update( + [asm_constraint(x, asm_constraint.c_to) for x in dst]) + + # get in delayslot mode + in_delayslot = True + delayslot_count = instr.delayslot + + for c in cur_bloc.bto: + if c.c_t == asm_constraint.c_bad: + continue + if isinstance(c.label, asm_label): + offsets_to_dis.add(c.label.offset) + + if add_next_offset: + cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool) + offsets_to_dis.add(offset) + + if dis_bloc_callback is not None: + dis_bloc_callback( + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool) + # print 'dst', [hex(x) for x in offsets_to_dis] + return offsets_to_dis + + +def split_bloc(mnemo, attrib, pool_bin, blocs, + symbol_pool, more_ref=None, dis_bloc_callback=None): + i = -1 + err = False + if not more_ref: + more_ref = [] + + # get all possible dst + bloc_dst = [symbol_pool.s_offset[x] for x in more_ref] + for b in blocs: + for c in b.bto: + if not isinstance(c.label, asm_label): + continue + if c.c_t == asm_constraint.c_bad: + continue + bloc_dst.append(c.label) + + bloc_dst = [x.offset for x in bloc_dst if x.offset is not None] + + j = -1 + while j < len(blocs) - 1: + j += 1 + cb = blocs[j] + a, b = cb.get_range() + + for off in bloc_dst: + if not (off > a and off <= b): + continue + l = symbol_pool.getby_offset_create(off) + new_b = cb.split(off, l) + log_asmbloc.debug("split bloc %x" % off) + if new_b is None: + log_asmbloc.error("cannot split %x!!" % off) + err = True + break + if dis_bloc_callback: + offsets_to_dis = set( + [x.label.offset for x in new_b.bto + if isinstance(x.label, asm_label)]) + dis_bloc_callback( + mnemo, attrib, pool_bin, new_b, offsets_to_dis, + symbol_pool) + blocs.append(new_b) + a, b = cb.get_range() + + """ + if err: + break + """ + return blocs + + +def dis_bloc_all(mnemo, pool_bin, offset, job_done, symbol_pool, dont_dis=[], + split_dis=[], follow_call=False, patch_instr_symb=True, + dontdis_retcall=False, + blocs_wd=None, lines_wd=None, blocs=None, + dis_bloc_callback=None, dont_dis_nulstart_bloc=False, + attrib={}): + log_asmbloc.info("dis bloc all") + if blocs is None: + blocs = [] + todo = [offset] + + bloc_cpt = 0 + while len(todo): + bloc_cpt += 1 + if blocs_wd is not None and bloc_cpt > blocs_wd: + log_asmbloc.debug("blocs watchdog reached at %X" % int(offset)) + break + + n = int(todo.pop(0)) + if n is None: + continue + if n in job_done: + continue + + if n in dont_dis: + continue + dd_flag = False + for dd in dont_dis: + if not isinstance(dd, tuple): + continue + dd_a, dd_b = dd + if dd_a <= n < dd_b: + dd_flag = True + break + if dd_flag: + continue + l = symbol_pool.getby_offset_create(n) + cur_bloc = asm_bloc(l) + todo += dis_bloc(mnemo, pool_bin, cur_bloc, n, job_done, symbol_pool, + dont_dis, split_dis, follow_call, patch_instr_symb, + dontdis_retcall, + dis_bloc_callback=dis_bloc_callback, + lines_wd=lines_wd, + dont_dis_nulstart_bloc=dont_dis_nulstart_bloc, + attrib=attrib) + blocs.append(cur_bloc) + + return split_bloc(mnemo, attrib, pool_bin, blocs, + symbol_pool, dis_bloc_callback=dis_bloc_callback) + + +def bloc2graph(blocs, label=False, lines=True): + # rankdir=LR; + out = """ +digraph asm_graph { +size="80,50"; +node [ +fontsize = "16", +shape = "box" +]; +""" + for b in blocs: + out += '%s [\n' % b.label.name + out += 'label = "' + + out += b.label.name + "\\l\\\n" + if lines: + for l in b.lines: + if label: + out += "%.8X " % l.offset + out += ("%s\\l\\\n" % l).replace('"', '\\"') + out += '"\n];\n' + + for b in blocs: + for n in b.bto: + # print 'xxxx', n.label, n.label.__class__ + # if isinstance(n.label, ExprId): + # print n.label.name, n.label.name.__class__ + if isinstance(n.label, m2_expr.ExprId): + dst, name, cst = b.label.name, n.label.name, n.c_t + # out+='%s -> %s [ label = "%s" ];\n'%(b.label.name, + # n.label.name, n.c_t) + elif isinstance(b.label, asm_label): + dst, name, cst = b.label.name, n.label.name, n.c_t + else: + continue + out += '%s -> %s [ label = "%s" ];\n' % (dst, name, cst) + + out += "}" + return out + + +def conservative_asm(mnemo, mode, instr, symbols, conservative): + """ + Asm instruction; + Try to keep original instruction bytes if it exists + """ + candidates = mnemo.asm(instr, symbols) + if not candidates: + raise ValueError('cannot asm:%s' % str(instr)) + if not hasattr(instr, "b"): + return candidates[0], candidates + if instr.b in candidates: + return instr.b, candidates + if conservative: + for c in candidates: + if len(c) == len(instr.b): + return c, candidates + return candidates[0], candidates + + +def guess_blocs_size(mnemo, mode, blocs, symbols): + """ + Asm and compute max bloc length + """ + for b in blocs: + log_asmbloc.debug('---') + blen = 0 + blen_max = 0 + for instr in b.lines: + if isinstance(instr, asm_raw): + candidates = [instr.raw] + c = instr.raw + data = c + l = len(c) + else: + l = mnemo.max_instruction_len + data = None + instr.data = data + instr.l = l + blen += l + + b.blen = blen + # bloc with max rel values encoded + b.blen_max = blen + blen_max + log_asmbloc.info("blen: %d max: %d" % (b.blen, b.blen_max)) + + +def group_blocs(blocs): + """ + this function group asm blocs with next constraints + """ + log_asmbloc.info('group_blocs') + # group adjacent blocs + rest = blocs[:] + groups_bloc = {} + d = dict([(x.label, x) for x in rest]) + log_asmbloc.debug([str(x.label) for x in rest]) + + while rest: + b = [rest.pop()] + # find recursive son + fini = False + while not fini: + fini = True + for c in b[-1].bto: + if c.c_t != asm_constraint.c_next: + continue + if c.label in d and d[c.label] in rest: + b.append(d[c.label]) + rest.remove(d[c.label]) + fini = False + break + # check if son in group: + found_in_group = False + for c in b[-1].bto: + if c.c_t != asm_constraint.c_next: + continue + if c.label in groups_bloc: + b += groups_bloc[c.label] + del(groups_bloc[c.label]) + groups_bloc[b[0].label] = b + found_in_group = True + break + + if not found_in_group: + groups_bloc[b[0].label] = b + + # create max label range for bigbloc + for l in groups_bloc: + l.total_max_l = reduce(lambda x, y: x + y.blen_max, groups_bloc[l], 0) + log_asmbloc.debug(("offset totalmax l", l.offset, l.total_max_l)) + if is_int(l.offset): + hof = hex(int(l.offset)) + else: + hof = l.name + log_asmbloc.debug(("offset totalmax l", hof, l.total_max_l)) + return groups_bloc + + +def gen_free_space_intervals(f, max_offset=0xFFFFFFFF): + interval = {} + offset_label = dict([(x.offset_free, x) for x in f]) + offset_label_order = offset_label.keys() + offset_label_order.sort() + offset_label_order.append(max_offset) + offset_label_order.reverse() + + unfree_stop = 0L + while len(offset_label_order) > 1: + offset = offset_label_order.pop() + offset_end = offset + f[offset_label[offset]] + prev = 0 + if unfree_stop > offset_end: + space = 0 + else: + space = offset_label_order[-1] - offset_end + if space < 0: + space = 0 + interval[offset_label[offset]] = space + if offset_label_order[-1] in offset_label: + prev = offset_label[offset_label_order[-1]] + prev = f[prev] + + interval[offset_label[offset]] = space + + unfree_stop = max( + unfree_stop, offset_end, offset_label_order[-1] + prev) + return interval + + +def add_dont_erase(f, dont_erase=[]): + tmp_symbol_pool = asm_symbol_pool() + for a, b in dont_erase: + l = tmp_symbol_pool.add_label(a, a) + l.offset_free = a + f[l] = b - a + return + + +def gen_non_free_mapping(group_bloc, dont_erase=[]): + non_free_mapping = {} + # calculate free space for bloc placing + for g in group_bloc: + rest_len = 0 + g.fixedblocs = False + # if a label in the group is fixed + diff_offset = 0 + for b in group_bloc[g]: + if not is_int(b.label.offset): + diff_offset += b.blen_max + continue + g.fixedblocs = True + g.offset_free = b.label.offset - diff_offset + break + if g.fixedblocs: + non_free_mapping[g] = g.total_max_l + + log_asmbloc.debug("non free bloc:") + log_asmbloc.debug(non_free_mapping) + add_dont_erase(non_free_mapping, dont_erase) + log_asmbloc.debug("non free more:") + log_asmbloc.debug(non_free_mapping) + return non_free_mapping + + +def resolve_symbol( + group_bloc, symbol_pool, dont_erase=[], max_offset=0xFFFFFFFF): + """ + place all asmblocs + """ + log_asmbloc.info('resolve_symbol') + log_asmbloc.info(str(dont_erase)) + bloc_list = [] + unr_bloc = reduce(lambda x, y: x + group_bloc[y], group_bloc, []) + ending_ad = [] + + non_free_mapping = gen_non_free_mapping(group_bloc, dont_erase) + free_interval = gen_free_space_intervals(non_free_mapping, max_offset) + log_asmbloc.debug(free_interval) + + # first big ones + g_tab = [(x.total_max_l, x) for x in group_bloc] + g_tab.sort() + g_tab.reverse() + g_tab = [x[1] for x in g_tab] + + # g_tab => label of grouped blov + # group_bloc => dict of grouped bloc labeled-key + + # first, near callee placing algo + for g in g_tab: + if g.fixedblocs: + continue + finish = False + for x in group_bloc: + if not x in free_interval.keys(): + continue + if free_interval[x] < g.total_max_l: + continue + + for b in group_bloc[x]: + for c in b.bto: + if c.label == g: + tmp = free_interval[x] - g.total_max_l + log_asmbloc.debug( + "consumed %d rest: %d" % (g.total_max_l, int(tmp))) + free_interval[g] = tmp + del(free_interval[x]) + symbol_pool.set_offset( + g, [group_bloc[x][-1].label, group_bloc[x][-1], 1]) + g.fixedblocs = True + finish = True + break + if finish: + break + if finish: + break + + # second, bigger in smaller algo + for g in g_tab: + if g.fixedblocs: + continue + # chose smaller free_interval first + k_tab = [(free_interval[x], x) for x in free_interval] + k_tab.sort() + k_tab = [x[1] for x in k_tab] + # choose free_interval + for k in k_tab: + if g.total_max_l > free_interval[k]: + continue + symbol_pool.set_offset( + g, [group_bloc[k][-1].label, group_bloc[k][-1], 1]) + tmp = free_interval[k] - g.total_max_l + log_asmbloc.debug( + "consumed %d rest: %d" % (g.total_max_l, int(tmp))) + free_interval[g] = tmp + del(free_interval[k]) + + g.fixedblocs = True + break + + while unr_bloc: + # propagate know offset + resolving = False + i = 0 + while i < len(unr_bloc): + if unr_bloc[i].label.offset is None: + i += 1 + continue + resolving = True + log_asmbloc.info("bloc %s resolved" % unr_bloc[i].label) + bloc_list.append((unr_bloc[i], 0)) + g_found = None + for g in g_tab: + if unr_bloc[i] in group_bloc[g]: + if g_found is not None: + raise ValueError('blocin multiple group!!!') + g_found = g + my_group = group_bloc[g_found] + + index = my_group.index(unr_bloc[i]) + if index > 0 and my_group[index - 1] in unr_bloc: + symbol_pool.set_offset( + my_group[index - 1].label, + [unr_bloc[i].label, unr_bloc[i - 1], -1]) + if index < len(my_group) - 1 and my_group[index + 1] in unr_bloc: + symbol_pool.set_offset( + my_group[index + 1].label, + [unr_bloc[i].label, unr_bloc[i], 1]) + del unr_bloc[i] + + if not resolving: + log_asmbloc.warn("cannot resolve symbol! (no symbol fix found)") + else: + continue + + for g in g_tab: + print g + if g.fixedblocs: + print "fixed" + else: + print "not fixed" + raise ValueError('enable to fix bloc') + return bloc_list + + +def calc_symbol_offset(symbol_pool): + s_to_use = set() + + s_dependent = {} + + for label in symbol_pool.items(): + if label.offset is None: + # raise ValueError("symbol missing?", label) + #print "symbol missing?? %s" % label + label.offset_g = None + continue + if not is_int(label.offset): + # construct dependant blocs tree + s_d = label.offset[0] + if not s_d in s_dependent: + s_dependent[s_d] = set() + s_dependent[s_d].add(label) + else: + s_to_use.add(label) + label.offset_g = label.offset + + while s_to_use: + label = s_to_use.pop() + if not label in s_dependent: + continue + for l in s_dependent[label]: + if label.offset_g is None: + raise ValueError("unknown symbol: %s" % str(label.name)) + l.offset_g = label.offset_g + l.offset_g[1].blen * l.offset_g[2] + s_to_use.add(l) + + +def asmbloc_final(mnemo, mode, blocs, symbol_pool, symb_reloc_off=None, conservative = False): + log_asmbloc.info("asmbloc_final") + if symb_reloc_off is None: + symb_reloc_off = {} + fini = False + # asm with minimal instr len + # check if dst label are ok to this encoded form + # recompute if not + # TODO XXXX: implement todo list to remove n^high complexity! + while fini is not True: + + fini = True + my_symb_reloc_off = {} + + calc_symbol_offset(symbol_pool) + + symbols = asm_symbol_pool() + for s, v in symbol_pool.s.items(): + symbols.add_label(s, v.offset_g) + # print symbols + # test if bad encoded relative + for b, t in blocs: + + offset_i = 0 + blen = 0 + my_symb_reloc_off[b.label] = [] + for instr in b.lines: + if isinstance(instr, asm_raw): + offset_i += instr.l # len(instr.data) + continue + # if not [True for a in instr.arg if mnemo.has_symb(a)]: + # offset_i+=len(instr.data) + # continue + sav_a = instr.args[:] # [a.expr for a in instr.args] + # print [str(x) for x in sav_a] + args_e = instr.resolve_args_with_symbols(symbols) + for i, e in enumerate(args_e): + # print 'ee', e.size, e + instr.args[i] = e + + instr.offset = b.label.offset_g + offset_i + if instr.dstflow(): + # instr.l = len(instr.data) + instr.fixDstOffset() + """ + lbls = {} + xxx = instr.getdstflow() + if len(xxx) !=1: + raise ValueError('multi dst ?!') + label = mnemo.get_label(xxx[0]) + is_mem = mnemo.is_mem(xxx[0]) + lbls[label.name] = label.offset_g + instr.fixdst(lbls, b.label.offset_g+b.blen, is_mem) + """ + # else: + # instr.arg = [mnemo.fix_symbol(a, symbol_pool) + # for a in instr.arg] + # pass + symbol_reloc_off = [] + old_l = instr.l + c, candidates = conservative_asm( + mnemo, mode, instr, symbol_reloc_off, conservative) + + # print "XXXX", instr + # print candidates + for i, e in enumerate(sav_a): + instr.args[i] = e + + if len(c) != instr.l: + # good len, bad offset...XXX + b.blen = b.blen - old_l + len(c) + instr.data = c + instr.l = len(c) + fini = False + continue + found = False + for cpos, c in enumerate(candidates): + # if len(c) == len(instr.data): + if len(c) == instr.l: + # print 'UPDD', repr(instr.data), repr(c) + # b.blen = b.blen-old_l+len(c) + instr.data = c + instr.l = len(c) + + found = True + break + if not found: + raise ValueError('something wrong in instr.data') + + if cpos < len(symbol_reloc_off): + my_s = symbol_reloc_off[cpos] + else: + my_s = None + + if my_s is not None: + my_symb_reloc_off[b.label].append(offset_i + my_s) + offset_i += instr.l + blen += instr.l + assert(len(instr.data) == instr.l) + # we have fixed all relative values + # recompute good offsets + for label in symbol_pool.items(): + # if label.offset_g is None: + # fdfd + symbol_pool.set_offset(label, label.offset_g) + + for a, b in my_symb_reloc_off.items(): + symb_reloc_off[a] = b + + +def asm_resolve_final(mnemo, mode, blocs, symbol_pool, dont_erase=[], + max_offset=0xFFFFFFFF, + symb_reloc_off=None, constrain_pos=False): + if symb_reloc_off is None: + symb_reloc_off = {} + # asmbloc(mnemo, mode, blocs, symbol_pool) + guess_blocs_size(mnemo, mode, blocs, symbol_pool) + bloc_g = group_blocs(blocs) + + resolved_b = resolve_symbol(bloc_g, symbol_pool, dont_erase=dont_erase, + max_offset=max_offset) + + asmbloc_final(mnemo, mode, resolved_b, symbol_pool, symb_reloc_off) + written_bytes = {} + patches = {} + for b, t in resolved_b: + offset = b.label.offset + for i in b.lines: + assert(i.data is not None) + patches[offset] = i.data + for c in range(i.l): + if offset + c in written_bytes: + raise ValueError( + "overlapping bytes in asssembly %X" % int(offset)) + written_bytes[offset + c] = 1 + i.offset = offset + i.l = i.l + offset += i.l + + return resolved_b, patches + + +def blist2graph(ab): + """ + ab: list of asmbloc + return: graph of asmbloc + """ + g = DiGraph() + g.lbl2bloc = {} + for b in ab: + g.lbl2bloc[b.label] = b + g.add_node(b.label) + for x in b.bto: + g.add_edge(b.label, x.label) + return g + + +class basicblocs: + + def __init__(self, ab=[]): + self.blocs = {} + self.g = DiGraph() + self.add_blocs(ab) + + def add(self, b): + self.blocs[b.label] = b + self.g.add_node(b.label) + for dst in b.bto: + if isinstance(dst.label, asm_label): + self.g.add_edge(b.label, dst.label) + + def add_blocs(self, ab): + for b in ab: + self.add(b) + + def get_bad_dst(self): + o = set() + for b in self.blocs.values(): + for c in b.bto: + if c.c_t == asm_constraint.c_bad: + o.add(b) + return o + + +def find_parents(blocs, l): + p = set() + for b in blocs: + if l in [x.label for x in b.bto if isinstance(x.label, asm_label)]: + p.add(b.label) + return p + + +def bloc_blink(blocs): + for b in blocs: + b.parents = find_parents(blocs, b.label) + + +def getbloc_around(blocs, a, level=3, done=None, blocby_label=None): + + if not blocby_label: + blocby_label = {} + for b in blocs: + blocby_label[b.label] = b + if done is None: + done = set() + + done.add(a) + if not level: + return done + for b in a.parents: + b = blocby_label[b] + if b in done: + continue + done.update(getbloc_around(blocs, b, level - 1, done, blocby_label)) + for b in a.bto: + b = blocby_label[b.label] + if b in done: + continue + done.update(getbloc_around(blocs, b, level - 1, done, blocby_label)) + return done + + +def getbloc_parents(blocs, a, level=3, done=None, blocby_label=None): + + if not blocby_label: + blocby_label = {} + for b in blocs: + blocby_label[b.label] = b + if done is None: + done = set() + + done.add(a) + if not level: + return done + for b in a.parents: + b = blocby_label[b] + if b in done: + continue + done.update(getbloc_parents(blocs, b, level - 1, done, blocby_label)) + return done + +# get ONLY level_X parents + + +def getbloc_parents_strict( + blocs, a, level=3, rez=None, done=None, blocby_label=None): + + if not blocby_label: + blocby_label = {} + for b in blocs: + blocby_label[b.label] = b + if rez is None: + rez = set() + if done is None: + done = set() + + done.add(a) + if level == 0: + rez.add(a) + if not level: + return rez + for b in a.parents: + b = blocby_label[b] + if b in done: + continue + rez.update(getbloc_parents_strict( + blocs, b, level - 1, rez, done, blocby_label)) + return rez + + +def bloc_find_path_next(blocs, blocby_label, a, b, path=None): + if path == None: + path = [] + if a == b: + return [path] + + all_path = [] + for x in a.bto: + if x.c_t != asm_constraint.c_next: + continue + if not x.label in blocby_label: + print 'XXX unknown label' + continue + x = blocby_label[x.label] + all_path += bloc_find_path_next(blocs, blocby_label, x, b, path + [a]) + # stop if at least one path found + if all_path: + return all_path + return all_path + + +def bloc_merge(blocs, symbol_pool, dont_merge=[]): + i = -1 + """ + # TODO XXXX implement find all path for digraph + + g = blist2graph(blocs) + g.lbl2node = dict([(b.label, b) for b in blocs]) + + while i<len(blocs)-1: + i+=1 + b = blocs[i] + if b.label in dont_merge: + continue + + successors = [x for x in g.successors(b.label)] + predecessors = [x for x in g.predecessors(b.label)] + # if bloc doesn't self ref + if b.label in successors: + continue + # and bloc has only one parent + if len(predecessors) != 1: + continue + # may merge + bpl = predecessors[0] + + # and parent has only one son + p_s = [x for x in g.successors(bpl)] + if len(p_s)!=1: + continue + + bp = g.lbl2node[bpl] + # and parent has not a next constraint yet + found = False + for gpl in g.predecessors(bpl): + gp = g.lbl2node[gpl] + for x in gp.bto: + if x.c_t != asm_constraint.c_next: + continue + if x.label == bpl: + found = True + break + if found: + break + if found: + continue + if bp.lines: + l = bp.lines[-1] + #jmp opt; jcc opt + if l.is_subcall(): + continue + if l.breakflow() and l.dstflow(): + bp.lines.pop() + #merge + #sons = b.bto[:] + + # update parents + for s in b.bto: + if not isinstance(s.label, asm_label): continue + if s.label.name == None: + continue + if not s.label in g.lbl2node: + print "unknown parent XXX" + continue + bs = g.lbl2node[s.label] + for p in g.predecessors(bs.label): + if p == b.label: + bs.parents.discard(p) + bs.parents.add(bp.label) + bp.lines+=b.lines + bp.bto = b.bto + #symbol_pool.remove(b.label) + del(blocs[i]) + i = -1 + + return + """ + blocby_label = {} + for b in blocs: + blocby_label[b.label] = b + b.parents = find_parents(blocs, b.label) + + while i < len(blocs) - 1: + i += 1 + b = blocs[i] + if b.label in dont_merge: + continue + p = set(b.parents) + # if bloc dont self ref + if b.label in p: + continue + # and bloc has only one parent + if len(p) != 1: + continue + # may merge + bpl = p.pop() + # bp = getblocby_label(blocs, bpl) + bp = blocby_label[bpl] + # and parent has only one son + if len(bp.bto) != 1: + continue + """ + and will not create next loop composed of constraint_next from son to + parent + """ + path = bloc_find_path_next(blocs, blocby_label, b, bp) + if path: + continue + if bp.lines: + l = bp.lines[-1] + # jmp opt; jcc opt + if l.is_subcall(): + continue + if l.breakflow() and l.dstflow(): + bp.lines.pop() + # merge + # sons = b.bto[:] + + # update parents + for s in b.bto: + if not isinstance(s.label, asm_label): + continue + if s.label.name == None: + continue + if not s.label in blocby_label: + print "unknown parent XXX" + continue + bs = blocby_label[s.label] + for p in list(bs.parents): + if p == b.label: + bs.parents.discard(p) + bs.parents.add(bp.label) + bp.lines += b.lines + bp.bto = b.bto + # symbol_pool.remove(b.label) + del(blocs[i]) + i = -1 + + +class disasmEngine(object): + + def __init__(self, arch, attrib, bs=None, **kwargs): + self.arch = arch + self.attrib = attrib + self.bs = bs + self.symbol_pool = asm_symbol_pool() + self.dont_dis = [] + self.split_dis = [] + self.follow_call = False + self.patch_instr_symb = True + self.dontdis_retcall = False + self.lines_wd = None + self.blocs_wd = None + self.dis_bloc_callback = None + self.dont_dis_nulstart_bloc = False + self.job_done = set() + self.__dict__.update(kwargs) + + def dis_bloc(self, offset): + job_done = set() + l = self.symbol_pool.getby_offset_create(offset) + current_bloc = asm_bloc(l) + dis_bloc(self.arch, self.bs, current_bloc, offset, self.job_done, + self.symbol_pool, + dont_dis=self.dont_dis, split_dis=self.split_dis, + follow_call=self.follow_call, + patch_instr_symb=self.patch_instr_symb, + dontdis_retcall=self.dontdis_retcall, + lines_wd=self.lines_wd, + dis_bloc_callback=self.dis_bloc_callback, + dont_dis_nulstart_bloc=self.dont_dis_nulstart_bloc, + attrib=self.attrib) + return current_bloc + + def dis_multibloc(self, offset, blocs=None): + blocs = dis_bloc_all(self.arch, self.bs, offset, self.job_done, + self.symbol_pool, + dont_dis=self.dont_dis, split_dis=self.split_dis, + follow_call=self.follow_call, + patch_instr_symb=self.patch_instr_symb, + dontdis_retcall=self.dontdis_retcall, + blocs_wd=self.blocs_wd, + lines_wd=self.lines_wd, + blocs=blocs, + dis_bloc_callback=self.dis_bloc_callback, + dont_dis_nulstart_bloc=self.dont_dis_nulstart_bloc, + attrib=self.attrib) + return blocs + diff --git a/miasm2/core/bin_stream.py b/miasm2/core/bin_stream.py new file mode 100644 index 00000000..7ae6d3fa --- /dev/null +++ b/miasm2/core/bin_stream.py @@ -0,0 +1,175 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + + +class bin_stream(object): + + def __init__(self, *args, **kargs): + pass + + def __repr__(self): + return "<%s !!>" % self.__class__.__name__ + + def hexdump(self, offset, l): + return + + def getbytes(self, start, l=1): + return self.bin[start:start + l] + + def getbits(self, start, n): + if not n: + return 0 + o = 0 + if n > self.getlen() * 8: + raise ValueError('not enought bits %r %r' % (n, len(self.bin) * 8)) + while n: + # print 'xxx', n, start + i = start / 8 + c = self.getbytes(i) + if not c: + raise IOError + c = ord(c) + # print 'o', hex(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + # print 'm', hex(c) + l = min(r, n) + # print 'd', r-l + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + +class bin_stream_str(bin_stream): + + def __init__(self, bin="", offset=0L, shift=0): + bin_stream.__init__(self) + if offset > len(bin): + raise IOError + self.bin = bin + self.offset = offset + self.shift = shift + self.l = len(bin) + if "is_addr_in" in self.bin.__class__.__dict__: + self.is_addr_in = lambda ad: self.bin.is_addr_in(ad) + + def getbytes(self, start, l=1): + if start + l > self.l: + raise IOError + + return super(bin_stream_str, self).getbytes(start + self.shift, l) + + def readbs(self, l=1): + if self.offset + l > self.l: + raise IOError + self.offset += l + print hex(self.offset + self.shift) + return self.bin[self.offset - l + self.shift:self.offset + self.shift] + + def writebs(self, l=1): + raise ValueError('writebs unsupported') + + def __str__(self): + out = self.bin[self.offset + self.shift:] + return out + + def setoffset(self, val): + self.offset = val + + def __len__(self): + return len(self.bin) - self.offset + self.shift + + def getlen(self): + return len(self.bin) - self.offset + self.shift + + +class bin_stream_file(bin_stream): + + def __init__(self, bin, offset=0L): + bin_stream.__init__(self) + self.bin = bin + self.bin.seek(0, 2) + self.l = self.bin.tell() + self.offset = offset + + def getoffset(self): + return self.bin.tell() + + def setoffset(self, val): + self.bin.seek(val) + offset = property(getoffset, setoffset) + + def readbs(self, l=1): + if self.offset + l > self.l: + raise IOError + return self.bin.read(l) + + def writebs(self, l=1): + if self.offset + l > self.l: + raise IOError + return self.bin.write(l) + + def __str__(self): + return str(self.bin) + + +class bin_stream_pe(bin_stream): + + def __init__(self, bin="", offset=0L): + bin_stream.__init__(self) + # print 'ELF/PE' + self.mylen = len(bin) + if offset > bin.__len__(): + raise IOError + self.bin = bin + self.offset = offset + self.l = bin.__len__() + if "is_addr_in" in self.bin.__class__.__dict__: + self.is_addr_in = lambda ad: self.bin.is_addr_in(ad) + + def getlen(self): + return self.mylen + # s = self.bin.parent.SHList[-1] + # l = self.bin.parent.rva2virt(s.addr+s.size) + # return l + + def readbs(self, l=1): + if self.offset + l > self.l: + raise IOError + self.offset += l + return self.bin(self.offset - l, self.offset) + + def writebs(self, l=1): + raise ValueError('writebs unsupported') + + def getbytes(self, start, l=1): + return self.bin(start, start + l) + + def __str__(self): + out = self.bin[self.offset:] + return out + + def setoffset(self, val): + self.offset = val + + +class bin_stream_elf(bin_stream_pe): + pass diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py new file mode 100644 index 00000000..7d672caa --- /dev/null +++ b/miasm2/core/cpu.py @@ -0,0 +1,1804 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import re +import struct +import logging +from pyparsing import * +from miasm2.expression.expression import * +from miasm2.core import asmbloc +from collections import defaultdict +from bin_stream import bin_stream, bin_stream_str +from utils import Disasm_Exception +from miasm2.expression.simplifications import expr_simp + +log = logging.getLogger("cpuhelper") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + +# size2int = {8:ExprInt8, 16:ExprInt16, 32:ExprInt32,64:ExprInt64} + + +class bitobj: + + def __init__(self, s=""): + if not s: + bits = [] + else: + bits = list(bin(int(str(s).encode('hex'), 16))[2:]) + bits = [int(x) for x in bits] + if len(bits) % 8: + bits = [0 for x in xrange(8 - (len(bits) % 8))] + bits + bits = ['0' for x in xrange(len(s) * 8 - len(bits))] + bits + self.bits = bits + self.offset = 0 + + def __len__(self): + return len(self.bits) - self.offset + + def getbits(self, n): + if not n: + return 0 + o = 0 + if n > len(self.bits) - self.offset: + raise ValueError('not enought bits %r %r' % (n, len(self.bits))) + b = self.bits[self.offset:self.offset + n] + b = int("".join([str(x) for x in b]), 2) + self.offset += n + return b + + def putbits(self, b, n): + if not n: + return + bits = list(bin(b)[2:]) + bits = [int(x) for x in bits] + bits = [0 for x in xrange(n - len(bits))] + bits + self.bits += bits + + def tostring(self): + if len(self.bits) % 8: + raise ValueError( + 'num bits must be 8 bit aligned: %d' % len(self.bits)) + b = int("".join([str(x) for x in self.bits]), 2) + b = "%X" % b + b = '0' * (len(self.bits) / 4 - len(b)) + b + b = b.decode('hex') + return b + + def reset(self): + self.offset = 0 + + def copy_state(self): + b = self.__class__() + b.bits = self.bits + b.offset = self.offset + return b + + +def literal_list(l): + l = l[:] + l.sort() + l = l[::-1] + o = Literal(l[0]) + for x in l[1:]: + o |= Literal(x) + return o + + +class reg_info: + + def __init__(self, reg_str, reg_expr): + self.str = reg_str + self.expr = reg_expr + self.parser = literal_list(reg_str).setParseAction(self.reg2expr) + + def reg2expr(self, s): + i = self.str.index(s[0]) + return self.expr[i] + + def expr2regi(self, e): + return self.expr.index(e) + + +def gen_reg(rname, env, sz=32): + """ + Gen reg expr and parser + Equivalent to: + PC = ExprId('PC') + reg_pc_str = ['PC'] + reg_pc_expr = [ExprId(x, sz) for x in reg_pc_str] + regpc = reg_info(reg_pc_str, reg_pc_expr) + + class bs_rname(m_reg): + reg = regi_rname + + bsrname = bs(l=0, cls=(bs_rname,)) + + """ + rnamel = rname.lower() + r = ExprId(rname, sz) + reg_str = [rname] + reg_expr = [r] + regi = reg_info(reg_str, reg_expr) + # define as global val + cname = "bs_" + rnamel + c = type(cname, (m_reg,), {'reg': regi}) + env[rname] = r + env["regi_" + rnamel] = regi + env[cname] = c + env["bs" + rnamel] = bs(l=0, cls=(c,)) + return r, regi + +LPARENTHESIS = Literal("(") +RPARENTHESIS = Literal(")") + + +# + + +def int2expr(t): + v = t[0] + return (ExprInt, v) + + +def parse_op(t): + v = t[0] + return (ExprOp, v) + + +def parse_id(t): + v = t[0] + return (ExprId, v) + + +def ast_parse_op(t): + if len(t) == 1: + return t[0] + if len(t) == 2: + if t[0] in ['-', '+', '!']: + return ExprOp(t[0], t[1]) + if len(t) == 3: + args = [t[0], t[2]] + return ExprOp(t[1], t[0], t[2]) + t = t[::-1] + while len(t) >= 3: + o1, op, o2 = t.pop(), t.pop(), t.pop() + e = ExprOp(op, o1, o2) + t.append(e) + if len(t) != 1: + raise NotImplementedError('strange op') + return t[0] + + +def ast_id2expr(a): + return ExprId(a, 32) + + +def ast_int2expr(a): + return ExprInt32(a) + + +def ast_raw2expr(a, my_id2expr, my_int2expr): + assert(isinstance(a, tuple)) + if a[0] is ExprId: + e = my_id2expr(a[1]) + elif a[0] is ExprInt: + e = my_int2expr(a[1]) + elif a[0] is ExprOp: + out = [] + for x in a[1]: + if isinstance(x, tuple): + x = ast_raw2expr(x, my_id2expr, my_int2expr) + out.append(x) + e = ast_parse_op(out) + else: + raise TypeError('unknown type') + return e + + +def ast_get_ids(a): + assert(isinstance(a, tuple)) + if a[0] is ExprId: + return set([a[1]]) + elif a[0] is ExprInt: + return set() + elif a[0] is ExprOp: + out = set() + for x in a[1]: + if isinstance(x, tuple): + out.update(ast_get_ids(x)) + return out + raise TypeError('unknown type') + + +def _extract_ast_core(a): + assert(isinstance(a, tuple)) + if a[0] in [ExprInt, ExprId]: + return a + elif a[0] is ExprOp: + out = [] + for x in a[1]: + if isinstance(x, tuple): + x = _extract_ast_core(x) + out.append(x) + return tuple([a[0]] + [out]) + else: + raise TypeError('unknown type') + + +def extract_ast_core(v, my_id2expr, my_int2expr): + ast_tokens = _extract_ast_core(v) + ids = ast_get_ids(ast_tokens) + # print 'IDS', ids + ids_expr = [my_id2expr(x) for x in ids] + # print 'IDS_expr', ids_expr + sizes = set([i.size for i in ids_expr]) + # print "SIZE", sizes + if len(sizes) == 0: + pass + elif len(sizes) == 1: + size = sizes.pop() + my_int2expr = lambda x: ExprInt_fromsize(size, x) + else: + raise ValueError('multiple sizes in ids') + e = ast_raw2expr(ast_tokens, my_id2expr, my_int2expr) + return e + + +class parse_ast: + + def __init__(self, id2expr, int2expr, extract_ast=extract_ast_core): + self.id2expr = id2expr + self.int2expr = int2expr + self.extract_ast_core = extract_ast + + def __call__(self, v): + v = v[0] + if isinstance(v, Expr): + return v + return self.extract_ast_core(v, self.id2expr, self.int2expr) + + +def neg_int(t): + x = -t[0] + return x + + +integer = Word(nums).setParseAction(lambda s, l, t: int(t[0])) +hex_int = Combine(Literal('0x') + Word(hexnums)).setParseAction( + lambda s, l, t: int(t[0], 16)) + +# str_int = (Optional('-') + (hex_int | integer)) +str_int_pos = (hex_int | integer) +str_int_neg = (Suppress('-') + (hex_int | integer)).setParseAction(neg_int) + +str_int = str_int_pos | str_int_neg +str_int.setParseAction(int2expr) + +logicop = oneOf('& | ^ >> << <<< >>>') +signop = oneOf('+ -') +multop = oneOf('* / %') +plusop = oneOf('+ -') + + +def gen_base_expr(): + variable = Word(alphas + "_$.", alphanums + "_") + variable.setParseAction(parse_id) + operand = str_int | variable + base_expr = operatorPrecedence(operand, + [("!", 1, opAssoc.RIGHT, parse_op), + (logicop, 2, opAssoc.RIGHT, parse_op), + (signop, 1, opAssoc.RIGHT, parse_op), + (multop, 2, opAssoc.LEFT, parse_op), + (plusop, 2, opAssoc.LEFT, parse_op), ] + ) + return variable, operand, base_expr + + +variable, operand, base_expr = gen_base_expr() + +my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + +# + + +default_prio = 0x1337 + + +def isbin(s): + return re.match('[0-1]+$', s) + + +def int2bin(i, l): + s = '0' * l + bin(i)[2:] + return s[-l:] + + +def myror32(v, r): + return ((v & 0xFFFFFFFFL) >> r) | ((v << (32 - r)) & 0xFFFFFFFFL) + + +def myrol32(v, r): + return ((v & 0xFFFFFFFFL) >> (32 - r)) | ((v << r) & 0xFFFFFFFFL) + + +class bs(object): + all_new_c = {} + prio = default_prio + + def __init__(self, strbits=None, l=None, cls=None, + fname=None, order=0, flen=None, **kargs): + if fname is None: + # fname = hex(id((strbits, l, cls, fname, order, flen, kargs))) + # fname = hex(id((strbits, l, fname, order, flen))) + # print str((strbits, l, cls, fname, order, flen, kargs)) + fname = hex(id(str((strbits, l, cls, fname, order, flen, kargs)))) + # print fname + if strbits is None: + strbits = "" # "X"*l + elif l is None: + l = len(strbits) + if strbits and isbin(strbits): + value = int(strbits, 2) + elif 'default_val' in kargs: + value = int(kargs['default_val'], 2) + else: + value = None + allbits = list(strbits) + allbits.reverse() + fbits = 0 + fmask = 0 + while allbits: + a = allbits.pop() + if a == " ": + continue + fbits <<= 1 + fmask <<= 1 + if a in '01': + a = int(a) + fbits |= a + fmask |= 1 + lmask = (1 << l) - 1 + # gen conditional field + # if flen is None: + # flen = lambda mode, v:l + if cls: + for b in cls: + if 'flen' in b.__dict__: + flen = getattr(b, 'flen') + + self.strbits = strbits + self.l = l + self.cls = cls + self.fname = fname + self.order = order + self.lmask = lmask + self.fbits = fbits + self.fmask = fmask + self.flen = flen + self.value = value + self.kargs = kargs + + def __getitem__(self, item): + return getattr(self, item) + + def __repr__(self): + o = self.__class__.__name__ + if self.fname: + o += "_%s" % self.fname + o += "_%(strbits)s" % self + if self.cls: + o += '_' + '_'.join([x.__name__ for x in self.cls]) + return o + + def gen(self, parent): + c_name = 'nbsi' + if self.cls: + c_name += '_' + '_'.join([x.__name__ for x in self.cls]) + bases = list(self.cls) + else: + bases = [] + # bsi added at end of list + # used to use first function of added class + bases += [bsi] + # new_c = type(c_name, tuple(bases), {}) + k = c_name, tuple(bases) + if k in self.all_new_c: + new_c = self.all_new_c[k] + else: + new_c = type(c_name, tuple(bases), {}) + self.all_new_c[k] = new_c + c = new_c(parent, + self.strbits, self.l, self.cls, + self.fname, self.order, self.lmask, self.fbits, + self.fmask, self.value, self.flen, **self.kargs) + return c + + def check_fbits(self, v): + return v & self.fmask == self.fbits + + @classmethod + def flen(cls, v): + raise NotImplementedError('not fully functional') + + +class dum_arg(object): + + def __init__(self, e=None): + self.expr = e + + @staticmethod + def arg2str(e): + return str(e) + + +class bsopt(bs): + + def ispresent(self): + return True + + +class bsi(object): + + def __init__(self, parent, strbits, l, cls, fname, order, + lmask, fbits, fmask, value, flen, **kargs): + self.parent = parent + self.strbits = strbits + self.l = l + self.cls = cls + self.fname = fname + self.order = order + self.lmask = lmask + self.fbits = fbits + self.fmask = fmask + self.flen = flen + self.value = value + self.kargs = kargs + self.__dict__.update(self.kargs) + + def decode(self, v): + self.value = v & self.lmask + return True + + def encode(self): + # self.value = v&self.lmask + return True + + def clone(self): + s = self.__class__(self.parent, + self.strbits, self.l, self.cls, + self.fname, self.order, self.lmask, self.fbits, + self.fmask, self.value, self.flen, **self.kargs) + s.__dict__.update(self.kargs) + if hasattr(self, 'expr'): + s.expr = self.expr + return s + + def __hash__(self): + kargs = [] + for k, v in self.kargs.items(): + if isinstance(v, list): + v = tuple(v) + kargs.append((k, v)) + l = [self.strbits, self.l, self.cls, + self.fname, self.order, self.lmask, self.fbits, + self.fmask, self.value] # + kargs + # l = [self.value] + return hash(tuple(l)) + + +class bs_divert(object): + prio = default_prio + + def __init__(self, **kargs): + self.args = kargs + + def __getattr__(self, item): + if item in self.__dict__: + return self.__dict__[item] + elif item in self.args: + return self.args.get(item) + else: + raise AttributeError + + +class bs_name(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + for new_name, value in self.args['name'].items(): + nfields = fields[:] + s = int2bin(value, self.args['l']) + args = dict(self.args) + args.update({'strbits': s}) + f = bs(**args) + nfields[i] = f + ndct = dict(dct) + ndct['name'] = new_name + out.append((cls, new_name, bases, ndct, nfields)) + return out + + +class bs_mod_name(bs_divert): + prio = 2 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + for value, new_name in enumerate(self.args['mn_mod']): + nfields = fields[:] + s = int2bin(value, self.args['l']) + args = dict(self.args) + args.update({'strbits': s}) + f = bs(**args) + nfields[i] = f + ndct = dict(dct) + # new_name = ndct['name'] + new_name + ndct['name'] = self.modname(ndct['name'], value) + # ndct['name'] = new_name + out.append((cls, new_name, bases, ndct, nfields)) + return out + + def modname(self, name, i): + return name + self.args['mn_mod'][i] + + +class bs_cond(bsi): + pass + + +class bs_swapargs(bs_divert): + + def divert(self, i, candidates): + # print candidates + out = [] + for cls, name, bases, dct, fields in candidates: + # args not permuted + ndct = dict(dct) + nfields = fields[:] + # gen fix field + f = gen_bsint(0, self.args['l'], self.args) + nfields[i] = f + out.append((cls, name, bases, ndct, nfields)) + + # args permuted + ndct = dict(dct) + nfields = fields[:] + ap = ndct['args_permut'][:] + a = ap.pop(0) + b = ap.pop(0) + ndct['args_permut'] = [b, a] + ap + # print ndct['args_permut'] + # gen fix field + f = gen_bsint(1, self.args['l'], self.args) + nfields[i] = f + + out.append((cls, name, bases, ndct, nfields)) + return out + + +class m_arg(object): + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + self.expr = e + return start, stop + try: + v, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + self.expr = v[0] + return start, stop + + @staticmethod + def arg2str(e): + return str(e) + + +class m_reg(m_arg): + prio = default_prio + + @property + def parser(self): + return self.reg.parser + + def decode(self, v): + self.expr = self.reg.expr[0] + return True + + def encode(self): + return self.expr == self.reg.expr[0] + + @staticmethod + def arg2str(e): + return str(e) + + +class reg_noarg(object): + reg_info = None + parser = None + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + self.expr = e + return start, stop + try: + v, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + self.expr = v[0] + return start, stop + + @staticmethod + def arg2str(e): + return str(e) + + def decode(self, v): + v = v & self.lmask + if v >= len(self.reg_info.expr): + return False + self.expr = self.reg_info.expr[v] + return True + + def encode(self): + if not self.expr in self.reg_info.expr: + log.debug("cannot encode reg %r" % self.expr) + return False + self.value = self.reg_info.expr.index(self.expr) + if self.value > self.lmask: + log.debug("cannot encode field value %x %x" % + (self.value, self.lmask)) + return False + return True + + def check_fbits(self, v): + return v & self.fmask == self.fbits + + +class mn_prefix: + + def __init__(self): + b = None + + +def swap16(v): + return struct.unpack('<H', struct.pack('>H', v))[0] + + +def swap32(v): + return struct.unpack('<I', struct.pack('>I', v))[0] + + +def perm_inv(p): + o = [None for x in xrange(len(p))] + for i, x in enumerate(p): + o[x] = i + return o + + +def gen_bsint(value, l, args): + s = int2bin(value, l) + args = dict(args) + args.update({'strbits': s}) + f = bs(**args) + return f + +total_scans = 0 + + +def branch2nodes(branch, nodes=None): + if nodes is None: + node = [] + for k, v in branch.items(): + if not isinstance(v, dict): + continue + for k2 in v.keys(): + nodes.append((k, k2)) + branch2nodes(v, nodes) + + +def factor_one_bit(tree): + if isinstance(tree, set): + return tree + new_keys = defaultdict(lambda: defaultdict(dict)) + if len(tree) == 1: + return tree + for k, v in tree.items(): + # print k, v + if k == "mn": + new_keys[k] = v + continue + l, fmask, fbits, fname, flen = k + if flen is not None or l <= 1: + new_keys[k] = v + continue + cfmask = fmask >> (l - 1) + nfmask = fmask & ((1 << (l - 1)) - 1) + cfbits = fbits >> (l - 1) + nfbits = fbits & ((1 << (l - 1)) - 1) + ck = 1, cfmask, cfbits, None, flen + nk = l - 1, nfmask, nfbits, fname, flen + # print ck + if nk in new_keys[ck]: + raise NotImplementedError('not fully functional') + new_keys[ck][nk] = v + for k, v in new_keys.items(): + new_keys[k] = factor_one_bit(v) + # try factor sons + if len(new_keys) != 1: + return new_keys + subtree = new_keys.values()[0] + if len(subtree) != 1: + return new_keys + if subtree.keys()[0] == 'mn': + return new_keys + + return new_keys + + +def factor_fields(tree): + if not isinstance(tree, dict): + return tree + if len(tree) != 1: + return tree + # merge + k1, v1 = tree.items()[0] + if k1 == "mn": + return tree + l1, fmask1, fbits1, fname1, flen1 = k1 + if fname1 is not None: + return tree + if flen1 is not None: + return tree + + if not isinstance(v1, dict): + return tree + if len(v1) != 1: + return tree + k2, v2 = v1.items()[0] + if k2 == "mn": + return tree + l2, fmask2, fbits2, fname2, flen2 = k2 + if fname2 is not None: + return tree + if flen2 is not None: + return tree + l = l1 + l2 + fmask = (fmask1 << l2) | fmask2 + fbits = (fbits1 << l2) | fbits2 + fname = fname2 + flen = flen2 + k = l, fmask, fbits, fname, flen + new_keys = {k: v2} + return new_keys + + +def factor_fields_all(tree): + if not isinstance(tree, dict): + return tree + new_keys = {} + for k, v in tree.items(): + v = factor_fields(v) + new_keys[k] = factor_fields_all(v) + return new_keys + + +def factor_tree(tree): + new_keys = {} + i = 1 + min_len = min([x[0] for x in tree.keys()]) + while i < min_len: + + i += 1 + + +def graph_tree(tree): + nodes = [] + branch2nodes(tree, nodes) + + out = """ + digraph G { + """ + for a, b in nodes: + # print a, id(a) + # print b, id(b) + if b == 'mn': + continue + out += "%s -> %s;\n" % (id(a), id(b)) + out += "}" + open('graph.txt', 'w').write(out) + + +def add_candidate_to_tree(tree, c): + branch = tree + for f in c.fields: + if f.l == 0: + continue + # print len(bits), f.l + # if f.flen: + # pass + # print f + node = f.l, f.fmask, f.fbits, f.fname, f.flen + # node = f.strbits, f.l, f.cls, f.fname, f.order, f.lmask, f.fbits, + # f.fmask, f.value#, tuple(f.kargs.items()) + + if not node in branch: + branch[node] = {} + branch = branch[node] + if not 'mn' in branch: + branch['mn'] = set() + branch['mn'].add(c) + + +def add_candidate(bases, c): + add_candidate_to_tree(bases[0].bintree, c) + + +def getfieldby_name(fields, fname): + f = filter(lambda x: hasattr(x, 'fname') and x.fname == fname, fields) + if len(f) != 1: + raise ValueError('more than one field with name: %s' % fname) + return f[0] + + +def getfieldindexby_name(fields, fname): + for i, f in enumerate(fields): + if hasattr(f, 'fname') and f.fname == fname: + return f, i + return None + + +class metamn(type): + + def __new__(mcs, name, bases, dct): + if name == "cls_mn" or name.startswith('mn_'): + return type.__new__(mcs, name, bases, dct) + alias = dct.get('alias', False) + # fields = [bm_cond]+dct['fields'] + fields = bases[0].mod_fields(dct['fields']) + # print 'f1', dct['fields'] + # print 'f2', fields + if not 'name' in dct: + dct["name"] = bases[0].getmn(name) + if 'args' in dct: + # special case for permuted arguments + o = [] + p = [] + for i, a in enumerate(dct['args']): + o.append((i, a)) + if a in fields: + p.append((fields.index(a), a)) + p.sort() + p = [x[1] for x in p] + p = [dct['args'].index(x) for x in p] + dct['args_permut'] = perm_inv(p) + # order fields + f_ordered = [x for x in enumerate(fields)] + f_ordered.sort(key=lambda x: (x[1].prio, x[0])) + candidates = bases[0].gen_modes(mcs, name, bases, dct, fields) + for i, fc in f_ordered: + # print fc, isinstance(fc, bs_divert) + if isinstance(fc, bs_divert): + # print 'iiii', fc + candidates = fc.divert(i, candidates) + for cls, name, bases, dct, fields in candidates: + ndct = dict(dct) + fields = [f for f in fields if f] + ndct['fields'] = fields + ndct['mn_len'] = sum([x.l for x in fields]) + c = type.__new__(cls, name, bases, ndct) + c.alias = alias + c.check_mnemo(fields) + c.num = bases[0].num + bases[0].num += 1 + bases[0].all_mn.append(c) + mode = dct['mode'] + # print 'add mnemo', c.name, c.mode, len(bases[0].all_mn_mode[mode]) + # print fields + # if 'args_permut' in dct: + # print dct['args_permut'] + bases[0].all_mn_mode[mode].append(c) + bases[0].all_mn_name[c.name].append(c) + i = c() + i.init_class() + bases[0].all_mn_inst[c].append(i) + add_candidate(bases, c) + # gen byte lookup + off = 0 + o = "" + for f in i.fields_order: + if not isinstance(f, bsi): + raise ValueError('f is not bsi') + if f.l == 0: + continue + # if f.fmask: + o += f.strbits + # print o, len(o) + # fd + return c + + +class instruction(object): + + def __init__(self, name, mode, args, args_str=None, additional_info=None): + self.name = name + self.mode = mode + self.args = args + if args_str is None: + raise NotImplementedError('not fully functional') + self.args_str = args_str + self.additional_info = additional_info + + def gen_args(self, args): + out = ', '.join([str(x) for x in args]) + return out + + def __str__(self): + o = "%-10s " % self.name + args = [] + args_str = self.args_str + if args_str is None: + args_str = [lambda x:str(x) for i in xrange(len(self.args))] + for arg, arg_str in zip(self.args, args_str): + if not isinstance(arg, Expr): + raise ValueError('zarb arg type') + x = arg_str(arg) + args.append(x) + o += self.gen_args(args) + return o + + def resolve_args_with_symbols(self, symbols=None): + if symbols is None: + symbols = {} + args_out = [] + for a in self.args: + e = a + # try to resolve symbols using symbols (0 for default value) + ids = get_expr_ids(e) + fixed_ids = {} + for x in ids: + if isinstance(x.name, asmbloc.asm_label): + name = x.name.name + if not name in symbols: + raise ValueError('unresolved symbol! %r' % x) + else: + name = x.name + # special symbol + if name == '$': + value = ExprInt_from(x, self.offset) + fixed_ids[x] = value + continue + if not name in symbols: + continue + if symbols[name].offset is None: + default_size = self.get_symbol_size(x, symbols) + value = ExprInt_fromsize(default_size, 0) # default value + else: + size = x.size + if size is None: + default_size = self.get_symbol_size(x, symbols) + size = default_size + value = ExprInt_fromsize(size, symbols[name].offset) + fixed_ids[x] = value + e = e.replace_expr(fixed_ids) + # print 'replaced e', e, fixed_ids + e = expr_simp(e) + # print 'replaced e simp', e, fixed_ids + args_out.append(e) + # print "args out", [str(x) for x in args_out] + return args_out + + def get_info(self, c): + return + + +class cls_mn(object): + __metaclass__ = metamn + args_symb = [] + instruction = instruction + + @classmethod + def guess_mnemo(cls, bs, mode, pre_dis_info, offset): + candidates = [] + + candidates = set() + + fname_values = pre_dis_info + todo = [(0, dict(fname_values), branch, offset * 8) + for branch in cls.bintree.items()] + cpt = 0 + if hasattr(bs, 'getlen'): + bs_l = bs.getlen() + else: + bs_l = len(bs) + # print fname_values + for bvalo, fname_values, branch, offset_b in todo: + (l, fmask, fbits, fname, flen), vals = branch + cpt += 1 + # print 'len', l, fmask, fbits, fname, flen + if flen is not None: + l = flen(mode, fname_values) + # print 'len', fname, l + if l is not None: + # print fname, hex(bs_l), l + if bs_l * 8 - offset_b < l: + continue + # print hex(offset_b) + v = cls.getbits(bs, offset_b, l) + bval = (bvalo << l) + v + # print 'TEST', bval, fname, offset_b, cpt, (l, fmask, fbits), + # hex(v), hex(v & fmask), hex(fbits), v & fmask == fbits + offset_b += l + if v & fmask != fbits: + continue + if fname is not None and not fname in fname_values: + fname_values[fname] = bval + bval = 0 + # print vals + for nb, v in vals.items(): + if 'mn' in nb: + candidates.update(v) + else: + todo.append((bval, dict(fname_values), (nb, v), offset_b)) + + candidates = [c for c in candidates] # if c.mode == mode] + + if not candidates: + raise Disasm_Exception('cannot disasm (guess) at %X' % offset) + return candidates + + def reset_class(self): + for f in self.fields_order: + if f.strbits and isbin(f.strbits): + # print 'a', + f.value = int(f.strbits, 2) + elif 'default_val' in f.kargs: + # print 'b', + f.value = int(f.kargs['default_val'], 2) + else: + # print 'c', + f.value = None + # print "reset", f.fname, f.value + if f.fname: + # print 'SET asm', f.fname + setattr(self, f.fname, f) + + def init_class(self): + args = [] + fields_order = [] + to_decode = [] + off = 0 + for i, fc in enumerate(self.fields): + f = fc.gen(self) + f.offset = off + off += f.l + fields_order.append(f) + to_decode.append((i, f)) + + if isinstance(f, m_arg): + args.append(f) + # print f, fc.fname + if f.fname: + # print 'SET asm', f.fname + setattr(self, f.fname, f) + # print args + if hasattr(self, 'args_permut'): + args = [args[self.args_permut[i]] + for i in xrange(len(self.args_permut))] + to_decode.sort(key=lambda x: (x[1].order, x[0])) + to_decode = [fields_order.index(f[1]) for f in to_decode] + self.args = args + self.fields_order = fields_order + self.to_decode = to_decode + + def add_pre_dis_info(self, prefix=None): + return True + + @classmethod + def getbits(cls, bs, offset_b, l): + return bs.getbits(offset_b, l) + + @classmethod + def getbytes(cls, bs, offset, l): + return bs.getbytes(offset, l) + + @classmethod + def pre_dis(cls, v_o, mode_o, offset): + return {}, v_o, mode_o, offset, 0 + + def post_dis(self): + return self + + @classmethod + def check_mnemo(cls, fields): + pass + + @classmethod + def mod_fields(cls, fields): + return fields + + @classmethod + def dis(cls, bs_o, mode_o, offset=0): + if not isinstance(bs_o, bin_stream): + bs_o = bin_stream_str(bs_o) + loggg = False + # bs_o == 'fg\x11\x90\x00\x00'#False#'\x48\x15\x44\x33\x22\x11'==bs_o + # print 'disfunc', repr(bs_o) + offset_o = offset + # print 'DIS', hex(offset), mode_o#repr(bs_o.bin) + pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis( + bs_o, mode_o, offset) + candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset) + # print 'guess', repr(v), mode, prefix.rex_w + out = [] + out_c = [] + # print 'DIS CAND', len(candidates), mode + if hasattr(bs, 'getlen'): + bs_l = bs.getlen() + else: + bs_l = len(bs) + + alias = False + for c in candidates: + # print 'RRR' + if loggg: + print "*" * 40, mode, c.mode + print c.fields + # c.mode_o = mode_o + # off = c.parse_prefix(mode_o, v) + # bits = bin_stream(v)#[:c.mn_len/8]) + + # c = c() + # c.init_class() + c = cls.all_mn_inst[c][0] + # c.init_class() + c.reset_class() + c.mode = mode + # for f in c.fields_order: print f.is_present + + if not c.add_pre_dis_info(pre_dis_info): # = prefix#cls.mnprefix() + continue + # print "zz", c.rex_w.value + """ + if prefix.opmode != c.mp[1]: + continue + if prefix.admode != c.mp[2]: + continue + """ + + args = [] + todo = {} + getok = True + fname_values = dict(pre_dis_info) + offset_b = offset * 8 + # print pre_dis_info + total_l = 0 + for i, f in enumerate(c.fields_order): + # print 'XX', i, f, id(f) + # print 'ZZ', c.rex_x.value + if f.flen is not None: + l = f.flen(mode, fname_values) + else: + l = f.l + # print 'len', l + # print "zz", c.rex_w, c.rex_w.value + if l is not None: + total_l += l + f.l = l + f.is_present = True + if loggg: + print "FIELD", f.__class__, f.fname, offset_b, l + if bs_l * 8 - offset_b < l: + getok = False + break + bv = cls.getbits(bs, offset_b, l) + offset_b += l + if not f.fname in fname_values: + fname_values[f.fname] = bv + todo[i] = bv + else: + f.is_present = False + todo[i] = None + + # print "decode", id(f), f.fname, + # print "l", l, "off", offset_b, "v", todo[i] + # print "zzz", c.rex_w, c.rex_w.value + + if not getok: + continue + + # print 'PRIOdec', [(x[0], x[1].order) for x in c.to_decode] + for i in c.to_decode: + f = c.fields_order[i] + if f.is_present: + # print "zz", f.fname, f.is_present, c.rex_w.value, + # c.rex_b.value, c.rex_x.value + ret = f.decode(todo[i]) + if not ret: + log.debug("cannot decode %r" % (f)) + break + + if not ret: + continue + for a in c.args: + a.expr = expr_simp(a.expr) + # print offset, offset_o, total_l + c.l = prefix_len + total_l / 8 + c.b = cls.getbytes(bs, offset, total_l / 8) + c.offset = offset_o + c = c.post_dis() + if c is None: + continue + c_args = [a.expr for a in c.args] + c_args_str = [] + for a in c.args: + if hasattr(a, 'arg2str'): + c_args_str.append(a.arg2str) + else: + raise NotImplementedError('not fully functional') + c_args_str.append(str) + # c_args_str = [a.arg2str for a in c.args] + instr = cls.instruction(c.name, mode, c_args, c_args_str, + additional_info=c.additional_info()) + instr.l = prefix_len + total_l / 8 + instr.b = cls.getbytes(bs, offset, total_l / 8) + instr.offset = offset_o + instr.get_info(c) + # instr = c.post_dis() + if c.alias: + alias = True + out.append(instr) + out_c.append(c) + if not out: + raise Disasm_Exception('cannot disasm at %X' % offset_o) + if len(out) != 1: + if not alias: + log.warning('dis multiple args ret default') + + assert(len(out) == 2) + for i, o in enumerate(out_c): + if o.alias: + return out[i] + raise NotImplementedError('not fully functional') + # for xx in out: + # print xx + # if xx.name == "ADC": + # pass + return out[0] + + @classmethod + def fromstring(cls, s, mode): + global total_scans + name = re.search('(\S+)', s).groups() + if not name: + raise ValueError('cannot find name', s) + name = name[0] + # print "mnemo_name", name + if not name in cls.all_mn_name: + raise ValueError('unknown name', name) + clist = [x for x in cls.all_mn_name[name]] # if x.mode == mode] + out = [] + out_args = [] + parsers = defaultdict(dict) + # print 'ASM CAND', len(clist), name + + for cc in clist: + #""" + # c = cc() + # c.init_class() + #""" + """ + c = cls.all_mn_inst[cc][0] + c.reset_class() + c.mode = mode + """ + for c in cls.get_cls_instance(cc, mode): + args_expr = [] + args_str = s[len(name):].strip(' ') + + start = 0 + cannot_parse = False + len_o = len(args_str) + + for i, f in enumerate(c.args): + start_i = len_o - len(args_str) + # print i, "will parse", repr(args_str) + if type(f.parser) == tuple: + parser = f.parser + else: + parser = (f.parser,) + for p in parser: + if p in parsers[(i, start_i)]: + continue + try: + total_scans += 1 + # print type(p) + v, start, stop = p.scanString(args_str).next() + # print "pp", args_str, v, start, stop + except StopIteration: + v, start, stop = [None], None, None + if start != 0: + v, start, stop = [None], None, None + parsers[(i, start_i)][p] = v[0], start, stop + + start, stop = f.fromstring(args_str, parsers[(i, start_i)]) + # print args_str, start, stop#, f.expr + # if start is not None: print f.expr + if start != 0: + log.debug("cannot fromstring %r" % (args_str)) + cannot_parse = True + # print "cannot_parse1" + break + if f.expr is None: + raise NotImplementedError('not fully functional') + # print "f expr", repr(f.expr) + f.expr = expr_simp(f.expr) + args_expr.append(f.expr) + a = args_str[start:stop] + args_str = args_str[stop:].strip(' ') + if args_str.startswith(','): + args_str = args_str[1:] + args_str = args_str.strip(' ') + if args_str: + # print "cannot_parse", repr(args_str) + cannot_parse = True + if cannot_parse: + continue + # print [x for x in c.args] + # print [str(x) for x in c.args] + """ + try: + c.value() + except Exception, e: + log.debug("cannot encode %r\n%s"%(e, traceback.format_exc())) + cannot_parse = True + if cannot_parse: + continue + """ + out.append(c) + out_args.append(args_expr) + break + + if len(out) == 0: + raise ValueError('cannot fromstring %r' % s) + if len(out) != 1: + log.warning('fromstring multiple args ret default') + # raise ValueError("cannot parse %r (%d cand)"%(s, len(out))) + c = out[0] + c_args = out_args[0] + + c_args_str = [] + for a in c.args: + if hasattr(a, 'arg2str'): + c_args_str.append(a.arg2str) + else: + raise NotImplementedError('not fully functional') + c_args_str.append(str) + + instr = cls.instruction(c.name, mode, c_args, c_args_str, + additional_info=c.additional_info()) + # instruction(name, attrib, args, args_str, additional_info): + # c = c() + # c.init_class() + # re parse instruction + """ + args_str = s[len(name):].strip(' ') + for i, f in enumerate(c.args): + if isinstance(f, m_arg): + start, stop = f.fromstring(args_str) + args_str = args_str[stop:].strip(' ') + if args_str.startswith(','): + args_str = args_str[1:] + args_str = args_str.strip(' ') + """ + + return instr + + def dup_info(self, infos): + return + + @classmethod + def get_cls_instance(cls, cc, mode, infos=None): + c = cls.all_mn_inst[cc][0] + + c.reset_class() + c.add_pre_dis_info() + c.dup_info(infos) + + c.mode = mode + yield c + + @classmethod + def asm(cls, instr, symbols=None): + # t = time.time() + """ + Re asm instruction by searching mnemo using name and args. We then + can modify args and get the hex of a modified instruction + """ + clist = cls.all_mn_name[instr.name] + clist = [x for x in clist] # if x.mode == instr.mode] + # print 'ASM CAN', len(clist) + vals = [] + candidates = [] + # print "resolve" + args = instr.resolve_args_with_symbols(symbols) + # print "ok", [str(x) for x in args] + """ + args = [] + for i, f in enumerate(cls.args): + e = f.expr + # try to resolve symbols using symbols (0 for default value) + if symbols: + #print 'origine', e + ids = get_expr_ids(e) + fixed_ids = {} + for x in ids: + if not x.name in symbols: + #print 'not IN', x + continue + if symbols[x.name].offset is None: + value = ExprInt32(0) # default value + else: + value = ExprInt_fromsize(x.size, symbols[x.name].offset) + fixed_ids[x] = value + e = e.replace_expr(fixed_ids) + #print 'replaced e', e, fixed_ids + e = expr_simp(e) + #print 'replaced e simp', e, fixed_ids + args.append(e) + """ + for cc in clist: + # if cc.mode != cls.mode: + # continue + """ + c = c() + c.init_class() + """ + for c in cls.get_cls_instance( + cc, instr.mode, instr.additional_info): + + # c = cls.all_mn_inst[cc][0] + # c = cc() + # c.init_class() + + cannot_parse = False + if len(c.args) != len(instr.args): + continue + # print c.mode, c.mp, c.fields[6:] + # print "eee", c.fields + # print [str(x.expr) for x in cls.args] + # only fix args expr + for i in xrange(len(c.args)): + c.args[i].expr = args[i] + # print 'ARGS', [str(x) for x in args] + # for a in c.args: + # print a.expr, + # print + # print instr.mode + v = c.value(instr.mode) + if not v: + log.debug("cannot encode %r" % (c)) + cannot_parse = True + if cannot_parse: + continue + vals += v + candidates.append((c, v)) + if len(vals) == 0: + raise ValueError('cannot asm %r %r' % + (instr.name, [str(x) for x in instr.args])) + if len(vals) != 1: + log.debug('asm multiple args ret default') + # raise ValueError("cannot parse %r (%d cand)"%(s, len(out))) + """ + for x in out: + print repr(x.value()) + print [str(a.expr) for a in x.args] + """ + vals = cls.filter_asm_candidates(instr, candidates) + # vals = list(set(vals)) + # vals.sort(key=lambda x:len(x)) + # dt = time.time() - t + # print 'TIME', dt, str(cls) + + return vals + + @classmethod + def filter_asm_candidates(cls, instr, candidates): + o = [] + for c, v in candidates: + o += v + o.sort(key=lambda x: len(x)) + return o + + def value(self, mode): + # print 'PRIOenc', [(x, self.fields_order[x].order) for x in + # self.to_decode[::-1]] + todo = [(0, [(x, self.fields_order[x]) for x in self.to_decode[::-1]])] + # print todo + result = [] + done = [] + cpt = 0 + + # print 'VALUE'#, self.fields[6:] + while todo: + index, to_decode = todo.pop() + # TEST XXX + for i, f in to_decode: + setattr(self, f.fname, f) + # print 'todo:', len(todo), index, to_decode + # print "OOOOOOO" + # if (index, hash(tuple(to_decode))) in done: + if (index, [x[1].value for x in to_decode]) in done: + # print 'skip', to_decode + continue + done.append((index, [x[1].value for x in to_decode])) + + # done.append((index, to_decode)) + cpt += 1 + can_encode = True + for i, f in to_decode[index:]: + # print 'before', f.value, repr(f) + ret = f.encode() + # print 'encode', len(todo), index, f.fname, f.value, f.l, ret + # print 'ret', ret + if not ret: + log.debug('cannot encode %r' % f) + can_encode = False + break + index += 1 + if ret is True: + continue + + # print ret, index + gcpt = 0 + for i in ret: + gcpt += 1 + o = [] + if ((index, [xx[1].value for xx in to_decode]) in todo or + (index, [xx[1].value for xx in to_decode]) in done): + raise NotImplementedError('not fully functional') + continue + for p, f in to_decode: + fnew = f.clone() + o.append((p, fnew)) + todo.append((index, o)) + can_encode = False + # print 'gcpt', gcpt + break + if not can_encode: + continue + result.append(to_decode) + # print 'CPT', cpt + # print "HEX", len(result), result + return self.decoded2bytes(result) + + def encodefields(self, decoded): + bits = bitobj() + for p, f in decoded: + setattr(self, f.fname, f) + + if f.value is None: + continue + bits.putbits(f.value, f.l) + # if f.l: + # print f.l, hex(f.value), len(bits.bits), bits.bits + xx = bits.tostring() + return bits.tostring() + + def decoded2bytes(self, result): + if not result: + return [] + + out = [] + for decoded in result: + decoded.sort() + # print [f.value for p, f in decoded] + o = self.encodefields(decoded) + if o is None: + continue + out.append(o) + out = list(set(out)) + return out + + def gen_args(self, args): + out = ', '.join([str(x) for x in args]) + return out + + def args2str(self): + args = [] + for arg in self.args: + # XXX todo test + if not (isinstance(arg, Expr) or isinstance(arg.expr, Expr)): + raise ValueError('zarb arg type') + x = str(arg) + args.append(x) + return args + + def __str__(self): + o = "%-10s " % self.name + args = [] + for arg in self.args: + # XXX todo test + if not (isinstance(arg, Expr) or isinstance(arg.expr, Expr)): + raise ValueError('zarb arg type') + x = str(arg) + args.append(x) + + o += self.gen_args(args) + return o + + def parse_prefix(self, v): + return 0 + + def set_dst_symbol(self, symbol_pool): + dst = self.getdstflow(symbol_pool) + args = [] + for d in dst: + if isinstance(d, ExprInt): + l = symbol_pool.getby_offset_create(int(d.arg)) + # print l + a = ExprId(l.name, d.size) + else: + a = d + args.append(a) + self.args_symb = args + + def getdstflow(self, symbol_pool): + return [self.args[0].expr] + + +class imm_noarg(object): + # parser = str_int + intsize = 32 + intmask = (1 << intsize) - 1 + # expr2int = lambda self,x:int(self.expr.arg&self.lmask) + + def int2expr(self, v): + if (v & ~self.intmask) != 0: + return None + return ExprInt_fromsize(self.intsize, v) + + def expr2int(self, e): + if not isinstance(e, ExprInt): + return None + v = int(e.arg) + # print "testimm2", hex(v), hex(self.intmask) + if v & ~self.intmask != 0: + return None + return v + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + else: + try: + e, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + if e is None: + return None, None + # print 'fromstring', hex(e), self.int2expr + assert(isinstance(e, Expr)) + if isinstance(e, tuple): + self.expr = self.int2expr(e[1]) + elif isinstance(e, Expr): + self.expr = e + else: + raise TypeError('zarb expr') + if self.expr is None: + log.debug('cannot fromstring int %r' % s) + return None, None + return start, stop + + def decodeval(self, v): + return v + + def encodeval(self, v): + if v > self.lmask: + return False + return v + + @staticmethod + def arg2str(e): + return str(e) + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + e = self.int2expr(v) + if not e: + return False + self.expr = e + return True + + def encode(self): + v = self.expr2int(self.expr) + if v is None: + return False + v = self.encodeval(v) + if v is False: + return False + self.value = v + return True + + +class imm08_noarg(object): + int2expr = lambda self, x: ExprInt08(x) + + +class imm16_noarg(object): + int2expr = lambda self, x: ExprInt16(x) + + +class imm32_noarg(object): + int2expr = lambda self, x: ExprInt32(x) + + +class imm64_noarg(object): + int2expr = lambda self, x: ExprInt64(x) + + +class int32_noarg(imm_noarg): + intsize = 32 + intmask = (1 << intsize) - 1 + + def decode(self, v): + v = sign_ext(v, self.l, self.intsize) + v = self.decodeval(v) + self.expr = self.int2expr(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if sign_ext(v & self.lmask, self.l, self.intsize) != v: + return False + v = self.encodeval(v & self.lmask) + self.value = v & self.lmask + return True + + +def swap_uint(size, i): + if size == 8: + return i & 0xff + elif size == 16: + return struct.unpack('<H', struct.pack('>H', i & 0xffff))[0] + elif size == 32: + return struct.unpack('<I', struct.pack('>I', i & 0xffffffff))[0] + elif size == 64: + return struct.unpack('<Q', struct.pack('>Q', i & 0xffffffffffffffff))[0] + raise ValueError('unknown int len %r' % size) + + +def swap_sint(size, i): + if size == 8: + return i + elif size == 16: + return struct.unpack('<h', struct.pack('>H', i & 0xffff))[0] + elif size == 32: + return struct.unpack('<i', struct.pack('>I', i & 0xffffffff))[0] + elif size == 64: + return struct.unpack('<q', struct.pack('>Q', i & 0xffffffffffffffff))[0] + raise ValueError('unknown int len %r' % size) + + +def sign_ext(v, s_in, s_out): + assert(s_in <= s_out) + v &= (1 << s_in) - 1 + sign_in = v & (1 << (s_in - 1)) + if not sign_in: + return v + m = (1 << (s_out)) - 1 + m ^= (1 << s_in) - 1 + v |= m + return v diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py new file mode 100644 index 00000000..47047269 --- /dev/null +++ b/miasm2/core/graph.py @@ -0,0 +1,126 @@ +class DiGraph: + + def __init__(self): + self._nodes = set() + self._edges = [] + self._nodes_to = {} + self._nodes_from = {} + + def __repr__(self): + out = [] + for n in self._nodes: + out.append(str(n)) + for a, b in self._edges: + out.append("%s -> %s" % (a, b)) + return '\n'.join(out) + + def nodes(self): + return self._nodes + + def edges(self): + return self._edges + + def add_node(self, n): + if n in self._nodes: + return + self._nodes.add(n) + self._nodes_to[n] = [] + self._nodes_from[n] = [] + + def add_edge(self, a, b): + if not a in self._nodes: + self.add_node(a) + if not b in self._nodes: + self.add_node(b) + self._edges.append((a, b)) + self._nodes_to[a].append((a, b)) + self._nodes_from[b].append((a, b)) + + def add_uniq_edge(self, a, b): + if (a, b) in self._edges: + return + else: + self.add_edge(a, b) + + def del_edge(self, a, b): + self._edges.remove((a, b)) + self._nodes_to[a].remove((a, b)) + self._nodes_from[b].remove((a, b)) + + def predecessors_iter(self, n): + if not n in self._nodes_from: + raise StopIteration + for a, _ in self._nodes_from[n]: + yield a + + def predecessors(self, n): + return [x for x in self.predecessors_iter(n)] + + def successors_iter(self, n): + if not n in self._nodes_to: + raise StopIteration + for _, b in self._nodes_to[n]: + yield b + + def successors(self, n): + return [x for x in self.successors_iter(n)] + + def leaves_iter(self): + for n in self._nodes: + if len(self._nodes_to[n]) == 0: + yield n + + def leaves(self): + return [x for x in self.leaves_iter()] + + def roots_iter(self): + for n in self._nodes: + if len(self._nodes_from[n]) == 0: + yield n + + def roots(self): + return [x for x in self.roots_iter()] + + def find_path(self, a, b, cycles_count=0, done=None): + if done is None: + done = {} + if b in done and done[b] > cycles_count: + return [[]] + if a == b: + return [[a]] + out = [] + for n in self.predecessors(b): + done_n = dict(done) + done_n[b] = done_n.get(b, 0) + 1 + for path in self.find_path(a, n, cycles_count, done_n): + if path and path[0] == a: + out.append(path + [b]) + return out + + def node2str(self, n): + return str(n) + + def edge2str(self, a, b): + return "" + + def dot(self): + out = """ +digraph asm_graph { +graph [ +splines=polyline, +]; +node [ +fontsize = "16", +shape = "box" +]; +""" + for n in self.nodes(): + out += '%s [label="%s"];\n' % ( + hash(n) & 0xFFFFFFFFFFFFFFFF, self.node2str(n)) + + for a, b in self.edges(): + out += '%s -> %s [label="%s"]\n' % (hash(a) & 0xFFFFFFFFFFFFFFFF, + hash(b) & 0xFFFFFFFFFFFFFFFF, + self.edge2str(a, b)) + out += "}" + return out diff --git a/miasm2/core/interval.py b/miasm2/core/interval.py new file mode 100644 index 00000000..cd2a793e --- /dev/null +++ b/miasm2/core/interval.py @@ -0,0 +1,238 @@ +INT_EQ = 0 +INT_B_IN_A = 1 +INT_A_IN_B = -1 +INT_DISJOIN = 2 +INT_JOIN = 3 +INT_JOIN_AB = 4 +INT_JOIN_BA = 5 + +# 0 => eq +# 1 => b in a +# -1 => a in b +# 2 => disjoin +# 3 => join +# 4 => join a,b touch +# 5 => join b,a touch + + +def cmp_interval(a, b): + if a == b: + return INT_EQ + a1, a2 = a + b1, b2 = b + if a1 <= b1 and a2 >= b2: + return INT_B_IN_A + if b1 <= a1 and b2 >= a2: + return INT_A_IN_B + if a2 + 1 == b1: + return INT_JOIN_AB + if b2 + 1 == a1: + return INT_JOIN_BA + if a1 > b2 + 1 or b1 > a2 + 1: + return INT_DISJOIN + return INT_JOIN + +# interval is: [a, b] + + +class interval: + + def __init__(self, a=None): + if a is None: + a = [] + if isinstance(a, interval): + a = a.intervals + self.is_cannon = False + self.intervals = a + self.cannon() + + def __iter__(self): + for x in self.intervals: + yield x + + @classmethod + def cannon_list(cls, tmp): + """ + Return a cannonizes list of intervals + """ + tmp = sorted([x for x in tmp if x[0] <= x[1]]) + out = [] + if not tmp: + return out + out.append(tmp.pop()) + while tmp: + x = tmp.pop() + rez = cmp_interval(out[-1], x) + # print out[-1], x, rez + if rez == INT_EQ: + continue + elif rez == INT_DISJOIN: + out.append(x) + elif rez == INT_B_IN_A: + continue + elif rez in [INT_JOIN, INT_JOIN_AB, INT_JOIN_BA, INT_A_IN_B]: + u, v = x + while out and cmp_interval(out[-1], (u, v)) in [ + INT_JOIN, INT_JOIN_AB, INT_JOIN_BA, INT_A_IN_B]: + u = min(u, out[-1][0]) + v = max(v, out[-1][1]) + out.pop() + out.append((u, v)) + else: + raise ValueError('unknown state', rez) + return out[::-1] + + def cannon(self): + if self.is_cannon is True: + return + self.intervals = interval.cannon_list(self.intervals) + self.is_cannon = True + + def __repr__(self): + if self.intervals: + o = " U ".join(["[0x%X 0x%X]" % (x[0], x[1]) + for x in self.intervals]) + else: + o = "[]" + return o + + def __contains__(self, i): + if isinstance(i, interval): + for x in self.intervals: + is_out = True + for y in i.intervals: + if cmp_interval(x, y) in [INT_EQ, INT_B_IN_A]: + is_out = False + break + if is_out: + return False + return True + else: + for x in self.intervals: + if x[0] <= i <= x[1]: + return True + return False + + def __eq__(self, i): + return self.intervals == i.intervals + + def __add__(self, i): + if isinstance(i, interval): + i = i.intervals + i = interval(self.intervals + i) + return i + + def __sub__(self, v): + to_test = self.intervals[:] + i = -1 + to_del = v.intervals[:] + while i < len(to_test) - 1: + i += 1 + x = to_test[i] + if x[0] > x[1]: + del(to_test[i]) + i -= 1 + continue + + while to_del and to_del[0][1] < x[0]: + del(to_del[0]) + + for y in to_del: + if y[0] > x[1]: + break + rez = cmp_interval(x, y) + if rez == INT_DISJOIN: + continue + elif rez == INT_EQ: + del(to_test[i]) + i -= 1 + break + elif rez == INT_A_IN_B: + del(to_test[i]) + i -= 1 + break + elif rez == INT_B_IN_A: + del(to_test[i]) + i1 = (x[0], y[0] - 1) + i2 = (y[1] + 1, x[1]) + to_test[i:i] = [i1, i2] + i -= 1 + break + elif rez in [INT_JOIN_AB, INT_JOIN_BA]: + continue + elif rez == INT_JOIN: + del(to_test[i]) + if x[0] < y[0]: + to_test[i:i] = [(x[0], y[0] - 1)] + else: + to_test[i:i] = [(y[1] + 1, x[1])] + i -= 1 + break + else: + raise ValueError('unknown state', rez) + return interval(to_test) + + def __and__(self, v): + out = [] + for x in self.intervals: + # print "x", x + if x[0] > x[1]: + continue + for y in v.intervals: + # print 'y', y + rez = cmp_interval(x, y) + # print x, y, rez + if rez == INT_DISJOIN: + continue + elif rez == INT_EQ: + out.append(x) + continue + elif rez == INT_A_IN_B: + out.append(x) + continue + elif rez == INT_B_IN_A: + out.append(y) + continue + elif rez == INT_JOIN_AB: + continue + elif rez == INT_JOIN_BA: + continue + elif rez == INT_JOIN: + if x[0] < y[0]: + out.append((y[0], x[1])) + else: + out.append((x[0], y[1])) + continue + else: + raise ValueError('unknown state', rez) + return interval(out) + + def hull(self): + if not self.intervals: + return None, None + return self.intervals[0][0], self.intervals[-1][1] + + def show(self, img_x=1350, img_y=20, dry_run=False): + """ + show image representing the itnerval + """ + try: + import Image + import ImageDraw + except ImportError: + print 'cannot import python PIL imaging' + return + + img = Image.new('RGB', (img_x, img_y), (100, 100, 100)) + draw = ImageDraw.Draw(img) + i_min, i_max = self.hull() + + print hex(i_min), hex(i_max) + + def addr2x(addr): + return (addr - i_min) * img_x / (i_max - i_min) + for a, b in self.intervals: + draw.rectangle((addr2x(a), 0, addr2x(b), img_y), (200, 0, 0)) + + if dry_run is False: + img.show() diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py new file mode 100644 index 00000000..c8967dab --- /dev/null +++ b/miasm2/core/parse_asm.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import re +import struct +import miasm2.expression.expression as m2_expr +from miasm2.core.asmbloc import * + +declarator = {'byte': 'B', + 'word': 'H', + 'dword': 'I', + 'qword': 'Q', + 'long': 'I', 'zero': 'I', + } + + +def guess_next_new_label(symbol_pool, gen_label_index=0): + i = 0 + gen_name = "loc_%.8X" + while True: + name = gen_name % i + l = symbol_pool.getby_name(name) + if l is None: + return symbol_pool.add_label(name) + i += 1 + + +def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): + if symbol_pool is None: + symbol_pool = asm_symbol_pool() + + lines_text = [] + lines_data = [] + lines_bss = [] + + lines = lines_text + # parse each line + for line in txt.split('\n'): + # empty + if re.match(r'\s*$', line): + continue + # comment + if re.match(r'\s*;\S*', line): + continue + # labels to forget + r = re.match(r'\s*\.LF[BE]\d\s*:', line) + if r: + continue + # label beginning with .L + r = re.match(r'\s*(\.L\S+)\s*:', line) + if r: + l = r.groups()[0] + l = symbol_pool.getby_name_create(l) + lines.append(l) + continue + # directive + if re.match(r'\s*\.', line): + r = re.match(r'\s*\.(\S+)', line) + directive = r.groups()[0] + if directive == 'text': + lines = lines_text + continue + if directive == 'data': + lines = lines_data + continue + if directive == 'bss': + lines = lines_bss + continue + if directive in ['string', 'ascii']: + # XXX HACK + line = line.replace(r'\n', '\n').replace(r'\r', '\r') + raw = line[line.find(r'"') + 1:line.rfind(r"'")] + if directive == 'string': + raw += "\x00" + lines.append(asm_raw(raw)) + continue + if directive == 'ustring': + # XXX HACK + line = line.replace(r'\n', '\n').replace(r'\r', '\r') + raw = line[line.find(r'"') + 1:line.rfind(r"'")] + "\x00" + raw = "".join(map(lambda x: x + '\x00', raw)) + lines.append(asm_raw(raw)) + continue + if directive in declarator: + data_raw = line[r.end():].split() + try: + data_int = [] + for b in data_raw: + if re.search(r'0x', b): + data_int.append(int(b, 16)) + else: + data_int.append(int(b) % (1 << 32)) + raw = reduce(lambda x, y: x + struct.pack( + declarator[directive], y), data_int, "") + except ValueError: + raw = line + lines.append(asm_raw(raw)) + continue + if directive == 'comm': + # TODO + continue + if directive == 'split': # custom command + lines.append(asm_raw(line.strip())) + continue + if directive == 'dontsplit': # custom command + lines.append(asm_raw(line.strip())) + continue + if directive in ['file', 'intel_syntax', 'globl', 'local', + 'type', 'size', 'align', 'ident', 'section']: + continue + if directive[0:4] == 'cfi_': + continue + + raise ValueError("unknown directive %s" % str(directive)) + + # label + r = re.match(r'\s*(\S+)\s*:', line) + if r: + l = r.groups()[0] + l = symbol_pool.getby_name_create(l) + lines.append(l) + continue + + # code + if ';' in line: + line = line[:line.find(';')] + line = line.strip(' ').strip('\t') + instr = mnemo.fromstring(line, attrib) + if instr.dstflow(): + instr.dstflow2label(symbol_pool) + lines.append(instr) + + log_asmbloc.info("___pre asm oki___") + # make blocs + # gen_label_index = 0 + + blocs_sections = [] + bloc_num = 0 + for lines in [lines_text, lines_data, lines_bss]: + state = 0 + i = 0 + blocs = [] + blocs_sections.append(blocs) + bloc_to_nlink = None + block_may_link = False + while i < len(lines): + # print 'DEAL', lines[i], state + # no current bloc + if state == 0: + if not isinstance(lines[i], asm_label): + l = guess_next_new_label(symbol_pool) + lines[i:i] = [l] + else: + l = lines[i] + b = asm_bloc(l) + b.bloc_num = bloc_num + bloc_num += 1 + blocs.append(b) + state = 1 + i += 1 + if bloc_to_nlink: + # print 'nlink!' + bloc_to_nlink.addto( + asm_constraint(b.label, asm_constraint.c_next)) + bloc_to_nlink = None + + # in bloc + elif state == 1: + # asm_raw + if isinstance(lines[i], asm_raw): + if lines[i].raw.startswith('.split'): + state = 0 + block_may_link = False + i += 1 + elif lines[i].raw.startswith('.dontsplit'): + # print 'dontsplit' + state = 1 + block_may_link = True + i += 1 + else: + b.addline(lines[i]) + i += 1 + # asm_label + elif isinstance(lines[i], asm_label): + if block_may_link: + # print 'nlink!' + b.addto( + asm_constraint(lines[i], asm_constraint.c_next)) + block_may_link = False + state = 0 + # instruction + else: + b.addline(lines[i]) + if lines[i].dstflow(): + ''' + mydst = lines[i].args + if len(mydst)==1 and mnemo.get_symbols(mydst[0]): + arg = dict(mydst[0]) + symbs = mnemo.get_symbols(arg) + """ + TODO XXX redo this (as many miasm parts) + """ + l = symbs[0][0] + lines[i].setdstflow([l]) + b.addto(asm_constraint(l, asm_constraint.c_to)) + ''' + for x in lines[i].getdstflow(symbol_pool): + if not isinstance(x, m2_expr.ExprId): + continue + if x in mnemo.regs.all_regs_ids: + continue + b.addto(asm_constraint(x, asm_constraint.c_to)) + + # TODO XXX redo this really + + if not lines[i].breakflow() and i + 1 < len(lines): + if isinstance(lines[i + 1], asm_label): + l = lines[i + 1] + else: + l = guess_next_new_label(symbol_pool) + lines[i + 1:i + 1] = [l] + else: + state = 0 + + if lines[i].splitflow(): + bloc_to_nlink = b + if not lines[i].breakflow() or lines[i].splitflow(): + block_may_link = True + else: + block_may_link = False + + i += 1 + + for b in blocs_sections[0]: + log_asmbloc.info(b) + + return blocs_sections, symbol_pool diff --git a/miasm2/core/utils.py b/miasm2/core/utils.py new file mode 100644 index 00000000..44d1e8e2 --- /dev/null +++ b/miasm2/core/utils.py @@ -0,0 +1,39 @@ +import struct + +upck8 = lambda x: struct.unpack('B', x)[0] +upck16 = lambda x: struct.unpack('H', x)[0] +upck32 = lambda x: struct.unpack('I', x)[0] +upck64 = lambda x: struct.unpack('Q', x)[0] +pck16 = lambda x: struct.pack('H', x) +pck32 = lambda x: struct.pack('I', x) +pck64 = lambda x: struct.pack('Q', x) + + +class Disasm_Exception(Exception): + pass + + +def hexdump(src, length=16): + FILTER = ''.join( + [(len(repr(chr(x))) == 3) and chr(x) or '.' for x in range(256)]) + lines = [] + for c in xrange(0, len(src), length): + chars = src[c:c + length] + hex = ' '.join(["%02x" % ord(x) for x in chars]) + printable = ''.join( + ["%s" % ((ord(x) <= 127 and FILTER[ord(x)]) or '.') for x in chars]) + lines.append("%04x %-*s %s\n" % (c, length * 3, hex, printable)) + print ''.join(lines) + +# stackoverflow.com/questions/2912231 + +import collections + + +class keydefaultdict(collections.defaultdict): + + def __missing__(self, key): + if self.default_factory is None: + raise KeyError(key) + value = self[key] = self.default_factory(key) + return value diff --git a/miasm2/expression/__init__.py b/miasm2/expression/__init__.py new file mode 100644 index 00000000..fbabaacf --- /dev/null +++ b/miasm2/expression/__init__.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py new file mode 100644 index 00000000..3d73ee10 --- /dev/null +++ b/miasm2/expression/expression.py @@ -0,0 +1,1253 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# These module implements Miasm IR components and basic operations related. +# IR components are : +# - ExprInt +# - ExprId +# - ExprAff +# - ExprCond +# - ExprMem +# - ExprOp +# - ExprSlice +# - ExprCompose +# + + +import itertools +from miasm2.expression.modint import * +from miasm2.core.graph import DiGraph + + +def visit_chk(visitor): + "Function decorator launching callback on Expression visit" + def wrapped(e, cb, test_visit=lambda x: True): + if (test_visit is not None) and (not test_visit(e)): + return e + e_new = visitor(e, cb, test_visit) + if e_new is None: + return None + e_new2 = cb(e_new) + return e_new2 + return wrapped + +# Hashing constants +EXPRINT = 1 +EXPRID = 2 +EXPRAFF = 3 +EXPRCOND = 4 +EXPRMEM = 5 +EXPROP = 6 +EXPRSLICE = 5 +EXPRCOMPOSE = 5 + +# Expression display + + +class DiGraphExpr(DiGraph): + + """Enhanced graph for Expression diplay + Expression are displayed as a tree with node and edge labeled + with only relevant information""" + + def node2str(self, node): + if isinstance(node, ExprOp): + return node.op + elif isinstance(node, ExprId): + return node.name + elif isinstance(node, ExprMem): + return "@%d" % node.size + elif isinstance(node, ExprCompose): + return "{ %d }" % node.size + elif isinstance(node, ExprCond): + return "? %d" % node.size + elif isinstance(node, ExprSlice): + return "[%d:%d]" % (node.start, node.stop) + return str(node) + + def edge2str(self, nfrom, nto): + if isinstance(nfrom, ExprCompose): + for i in nfrom.args: + if i[0] == nto: + return "[%s, %s]" % (i[1], i[2]) + elif isinstance(nfrom, ExprCond): + if nfrom.cond == nto: + return "?" + elif nfrom.src1 == nto: + return "True" + elif nfrom.src2 == nto: + return "False" + + return "" + + +# IR definitions + +class Expr(object): + + "Parent class for Miasm Expressions" + + is_term = False # Terminal expression + is_simp = False # Expression already simplified + is_canon = False # Expression already canonised + is_eval = False # Expression already evalued + + def set_size(self, value): + raise ValueError('size is not mutable') + size = property(lambda self: self._size) + + def __init__(self, arg): + self.arg = arg + + # Common operations + def __str__(self): + return str(self.arg) + + def __getitem__(self, i): + if not isinstance(i, slice): + raise TypeError("Expression: Bad slice: %s" % i) + start, stop, step = i.indices(self.size) + if step != 1: + raise ValueError("Expression: Bad slice: %s" % i) + return ExprSlice(self, start, stop) + + def get_size(self): + raise DeprecationWarning("use X.size instead of X.get_size()") + + def get_r(self, mem_read=False, cst_read=False): + return self.arg.get_r(mem_read, cst_read) + + def get_w(self): + return self.arg.get_w() + + def __repr__(self): + return "<%s_%d_0x%x>" % (self.__class__.__name__, self.size, id(self)) + + def __hash__(self): + return self._hash + + def __eq__(self, a): + if isinstance(a, Expr): + return self._hash == a._hash + else: + return False + + def __ne__(self, a): + return not self.__eq__(a) + + def __add__(self, a): + return ExprOp('+', self, a) + + def __sub__(self, a): + return ExprOp('+', self, ExprOp('-', a)) + + def __div__(self, a): + return ExprOp('/', self, a) + + def __mod__(self, a): + return ExprOp('%', self, a) + + def __mul__(self, a): + return ExprOp('*', self, a) + + def __lshift__(self, a): + return ExprOp('<<', self, a) + + def __rshift__(self, a): + return ExprOp('>>', self, a) + + def __xor__(self, a): + return ExprOp('^', self, a) + + def __or__(self, a): + return ExprOp('|', self, a) + + def __and__(self, a): + return ExprOp('&', self, a) + + def __neg__(self): + return ExprOp('-', self) + + def __invert__(self): + s = self.size + return ExprOp('^', self, ExprInt(mod_size2uint[s](size2mask(s)))) + + def copy(self): + "Deep copy of the expression" + return self.visit(lambda x: x) + + def replace_expr(self, dct=None): + """Find and replace sub expression using dct + @dct: dictionnary of Expr -> * + """ + if dct is None: + dct = {} + + def my_replace(e, dct): + if e in dct: + return dct[e] + return e + return self.visit(lambda e: my_replace(e, dct)) + + def canonize(self): + "Canonize the Expression" + + def must_canon(e): + # print 'test VISIT', e + return not e.is_simp + + def my_canon(e): + if e.is_simp: + return e + if isinstance(e, ExprOp): + if e.is_associative(): + # ((a+b) + c) => (a + b + c) + args = [] + for a in e.args: + if isinstance(a, ExprOp) and e.op == a.op: + args += a.args + else: + args.append(a) + args = canonize_expr_list(args) + new_e = ExprOp(e.op, *args) + else: + new_e = e + elif isinstance(e, ExprCompose): + new_e = ExprCompose(canonize_expr_list_compose(e.args)) + else: + new_e = e + return new_e + return self.visit(my_canon, must_canon) + + def msb(self): + "Return the Most Significant Bit" + s = self.size + return self[s - 1:s] + + def zeroExtend(self, size): + """Zero extend to size + @size: int + """ + assert(self.size <= size) + if self.size == size: + return self + ad_size = size - self.size + n = ExprInt_fromsize(ad_size, 0) + return ExprCompose([(self, 0, self.size), + (n, self.size, size)]) + + def signExtend(self, size): + """Sign extend to size + @size: int + """ + assert(self.size <= size) + if self.size == size: + return self + ad_size = size - self.size + c = ExprCompose([(self, 0, self.size), + (ExprCond(self.msb(), + ExprInt_fromsize( + ad_size, size2mask(ad_size)), + ExprInt_fromsize(ad_size, 0)), + self.size, size) + ]) + return c + + def graph_recursive(self, graph): + """Recursive method used by graph + @graph: miasm2.core.graph.DiGraph instance + Update @graph instance to include sons + This is an Abstract method""" + + raise ValueError("Abstract method") + + def graph(self): + """Return a DiGraph instance standing for Expr tree + Instance's display functions have been override for better visibility + Wrapper on graph_recursive""" + + # Create recursively the graph + graph = DiGraphExpr() + self.graph_recursive(graph) + + return graph + + def set_mask(self, value): + raise ValueError('mask is not mutable') + mask = property(lambda self: ExprInt_fromsize(self.size, -1)) + + +class ExprInt(Expr): + + """An ExprInt represent a constant in Miasm IR. + + Some use cases: + - Constant 0x42 + - Constant -0x30 + - Constant 0x12345678 on 32bits + """ + + def __init__(self, arg): + """Create an ExprInt from a numpy int + @arg: numpy int""" + + if not is_modint(arg): + raise ValueError('arg must by numpy int! %s' % arg) + + self.arg = arg + self._size = self.arg.size + self._hash = self.myhash() + + def __get_int(self): + "Return self integer representation" + return int(self.arg & size2mask(self.size)) + + def __str__(self): + if self.arg < 0: + return str("-0x%X" % (- self.__get_int())) + else: + return str("0x%X" % self.__get_int()) + + def get_r(self, mem_read=False, cst_read=False): + if cst_read: + return set([self]) + else: + return set() + + def get_w(self): + return set() + + def __contains__(self, e): + return self == e + + def myhash(self): + return hash((EXPRINT, self.arg, self.size)) + + def __repr__(self): + return Expr.__repr__(self)[:-1] + " 0x%X>" % self.__get_int() + + @visit_chk + def visit(self, cb, tv=None): + return self + + def copy(self): + return ExprInt(self.arg) + + def depth(self): + return 1 + + def graph_recursive(self, graph): + graph.add_node(self) + + +class ExprId(Expr): + + """An ExprId represent an identifier in Miasm IR. + + Some use cases: + - EAX register + - 'start' offset + - variable v1 + """ + + def __init__(self, name, size=32, is_term=False): + """Create an identifier + @name: str, identifier's name + @size: int, identifier's size + @is_term: boolean, is the identifier a terminal expression ? + """ + + self.name, self._size = name, size + self.is_term = is_term + self._hash = self.myhash() + + def __str__(self): + return str(self.name) + + def get_r(self, mem_read=False, cst_read=False): + return set([self]) + + def get_w(self): + return set([self]) + + def __contains__(self, e): + return self == e + + def myhash(self): + # TODO XXX: hash size ?? + return hash((EXPRID, self.name, self._size)) + + def __repr__(self): + return Expr.__repr__(self)[:-1] + " %s>" % self.name + + @visit_chk + def visit(self, cb, tv=None): + return self + + def copy(self): + return ExprId(self.name, self._size) + + def depth(self): + return 1 + + def graph_recursive(self, graph): + graph.add_node(self) + + +class ExprAff(Expr): + + """An ExprAff represent an affection from an Expression to another one. + + Some use cases: + - var1 <- 2 + """ + + def __init__(self, dst, src): + """Create an ExprAff for dst <- src + @dst: Expr, affectation destination + @src: Expr, affectation source + """ + + if dst.size != src.size: + raise ValueError( + "sanitycheck: ExprAff args must have same size! %s" % + ([(str(x), x.size) for x in [dst, src]])) + + if isinstance(dst, ExprSlice): + # Complete the source with missing slice parts + self.dst = dst.arg + rest = [(ExprSlice(dst.arg, r[0], r[1]), r[0], r[1]) + for r in dst.slice_rest()] + all_a = [(src, dst.start, dst.stop)] + rest + all_a.sort(key=lambda x: x[1]) + self.src = ExprCompose(all_a) + + else: + self.dst, self.src = dst, src + + self._hash = self.myhash() + self._size = self.dst.size + + def __str__(self): + return "%s = %s" % (str(self.dst), str(self.src)) + + def get_r(self, mem_read=False, cst_read=False): + r = self.src.get_r(mem_read, cst_read) + if isinstance(self.dst, ExprMem): + r.update(self.dst.arg.get_r(mem_read, cst_read)) + return r + + def get_w(self): + if isinstance(self.dst, ExprMem): + return set([self.dst]) # [memreg] + else: + return self.dst.get_w() + + def __contains__(self, e): + return self == e or self.src.__contains__(e) or self.dst.__contains__(e) + + def myhash(self): + return hash((EXPRAFF, self.dst._hash, self.src._hash)) + + # XXX /!\ for hackish expraff to slice + def get_modified_slice(self): + """Return an Expr list of extra expressions needed during the + object instanciation""" + + dst = self.dst + if not isinstance(self.src, ExprCompose): + raise ValueError("Get mod slice not on expraff slice", str(self)) + modified_s = [] + for x in self.src.args: + if (not isinstance(x[0], ExprSlice) or + x[0].arg != dst or + x[1] != x[0].start or + x[2] != x[0].stop): + # If x is not the initial expression + modified_s.append(x) + return modified_s + + @visit_chk + def visit(self, cb, tv=None): + dst, src = self.dst.visit(cb, tv), self.src.visit(cb, tv) + if dst == self.dst and src == self.src: + return self + else: + return ExprAff(dst, src) + + def copy(self): + return ExprAff(self.dst.copy(), self.src.copy()) + + def depth(self): + return max(self.src.depth(), self.dst.depth()) + 1 + + def graph_recursive(self, graph): + graph.add_node(self) + for a in [self.src, self.dst]: + a.graph_recursive(graph) + graph.add_uniq_edge(self, a) + + +class ExprCond(Expr): + + """An ExprCond stand for a condition on an Expr + + Use cases: + - var1 < var2 + - min(var1, var2) + - if (cond) then ... else ... + """ + + def __init__(self, cond, src1, src2): + """Create an ExprCond + @cond: Expr, condition + @src1: Expr, value if condition is evaled to not zero + @src2: Expr, value if condition is evaled zero + """ + + self.cond, self.src1, self.src2 = cond, src1, src2 + assert(src1.size == src2.size) + self._hash = self.myhash() + self._size = self.src1.size + + def __str__(self): + return "%s?(%s,%s)" % (str(self.cond), str(self.src1), str(self.src2)) + + def get_r(self, mem_read=False, cst_read=False): + out_src1 = self.src1.get_r(mem_read, cst_read) + out_src2 = self.src2.get_r(mem_read, cst_read) + return self.cond.get_r(mem_read, + cst_read).union(out_src1).union(out_src2) + + def get_w(self): + return set() + + def __contains__(self, e): + return (self == e or + self.cond.__contains__(e) or + self.src1.__contains__(e) or + self.src2.__contains__(e)) + + def myhash(self): + return hash((EXPRCOND, self.cond._hash, + self.src1._hash, self.src2._hash)) + + @visit_chk + def visit(self, cb, tv=None): + cond = self.cond.visit(cb, tv) + src1 = self.src1.visit(cb, tv) + src2 = self.src2.visit(cb, tv) + if cond == self.cond and \ + src1 == self.src1 and \ + src2 == self.src2: + return self + return ExprCond(cond, src1, src2) + + def copy(self): + return ExprCond(self.cond.copy(), + self.src1.copy(), + self.src2.copy()) + + def depth(self): + return max(self.cond.depth(), + self.src1.depth(), + self.src2.depth()) + 1 + + def graph_recursive(self, graph): + graph.add_node(self) + for a in [self.cond, self.src1, self.src2]: + a.graph_recursive(graph) + graph.add_uniq_edge(self, a) + + +class ExprMem(Expr): + + """An ExprMem stand for a memory access + + Use cases: + - Memory read + - Memory write + """ + + def __init__(self, arg, size=32): + """Create an ExprMem + @arg: Expr, memory access address + @size: int, memory access size + """ + if not isinstance(arg, Expr): + raise ValueError( + 'ExprMem: arg must be an Expr (not %s)' % type(arg)) + + self.arg, self._size = arg, size + self._hash = self.myhash() + + def __str__(self): + return "@%d[%s]" % (self._size, str(self.arg)) + + def get_r(self, mem_read=False, cst_read=False): + if mem_read: + return set(self.arg.get_r(mem_read, cst_read).union(set([self]))) + else: + return set([self]) + + def get_w(self): + return set([self]) # [memreg] + + def __contains__(self, e): + return self == e or self.arg.__contains__(e) + + def myhash(self): + return hash((EXPRMEM, self.arg._hash, self._size)) + + @visit_chk + def visit(self, cb, tv=None): + arg = self.arg.visit(cb, tv) + if arg == self.arg: + return self + return ExprMem(arg, self._size) + + def copy(self): + arg = self.arg.copy() + return ExprMem(arg, size=self._size) + + def is_op_segm(self): + return isinstance(self.arg, ExprOp) and self.arg.op == 'segm' + + def depth(self): + return self.arg.depth() + 1 + + def graph_recursive(self, graph): + graph.add_node(self) + self.arg.graph_recursive(graph) + graph.add_uniq_edge(self, self.arg) + + +class ExprOp(Expr): + + """An ExprOp stand for an operation between Expr + + Use cases: + - var1 XOR var2 + - var1 + var2 + var3 + - parity bit(var1) + """ + + def __init__(self, op, *args): + """Create an ExprOp + @op: str, operation + @*args: Expr, operand list + """ + + sizes = set([x.size for x in args]) + + if None not in sizes and len(sizes) != 1: + # Special cases : operande sizes can differ + if op not in ["segm"]: + raise ValueError( + "sanitycheck: ExprOp args must have same size! %s" % + ([(str(x), x.size) for x in args])) + + if not isinstance(op, str): + raise ValueError("ExprOp: 'op' argument must be a string") + + self.op, self.args = op, tuple(args) + self._hash = self.myhash() + + # Set size for special cases + if self.op in [ + '==', 'parity', 'fcom_c0', 'fcom_c1', 'fcom_c2', 'fcom_c3', + "access_segment_ok", "load_segment_limit_ok", "bcdadd_cf", + "ucomiss_zf", "ucomiss_pf", "ucomiss_cf"]: + sz = 1 + elif self.op in ['mem_16_to_double', 'mem_32_to_double', + 'mem_64_to_double', 'mem_80_to_double', + 'int_16_to_double', 'int_32_to_double', + 'int_64_to_double', 'int_80_to_double']: + sz = 64 + elif self.op in ['double_to_mem_16', 'double_to_int_16']: + sz = 16 + elif self.op in ['double_to_mem_32', 'double_to_int_32']: + sz = 32 + elif self.op in ['double_to_mem_64', 'double_to_int_64']: + sz = 64 + elif self.op in ['double_to_mem_80', 'double_to_int_80']: + sz = 80 + elif self.op in ['segm']: + sz = self.args[1].size + else: + if None in sizes: + sz = None + else: + # All arguments have the same size + sz = list(sizes)[0] + + self._size = sz + + def __str__(self): + if self.is_associative(): + return '(' + self.op.join([str(x) for x in self.args]) + ')' + if len(self.args) == 2: + return '(' + str(self.args[0]) + \ + ' ' + self.op + ' ' + str(self.args[1]) + ')' + elif len(self.args) > 2: + return self.op + '(' + ', '.join([str(x) for x in self.args]) + ')' + else: + return reduce(lambda x, y: x + ' ' + str(y), + self.args, + '(' + str(self.op)) + ')' + + def get_r(self, mem_read=False, cst_read=False): + return reduce(lambda x, y: + x.union(y.get_r(mem_read, cst_read)), self.args, set()) + + def get_w(self): + raise ValueError('op cannot be written!', self) + + def __contains__(self, e): + if self == e: + return True + for a in self.args: + if a.__contains__(e): + return True + return False + + def myhash(self): + h_hargs = [x._hash for x in self.args] + return hash((EXPROP, self.op, tuple(h_hargs))) + + def is_associative(self): + "Return True iff current operation is associative" + return (self.op in ['+', '*', '^', '&', '|']) + + def is_commutative(self): + "Return True iff current operation is commutative" + return (self.op in ['+', '*', '^', '&', '|']) + + @visit_chk + def visit(self, cb, tv=None): + args = [a.visit(cb, tv) for a in self.args] + modified = any([x[0] != x[1] for x in zip(self.args, args)]) + if modified: + return ExprOp(self.op, *args) + return self + + def copy(self): + args = [a.copy() for a in self.args] + return ExprOp(self.op, *args) + + def depth(self): + depth = [a.depth() for a in self.args] + return max(depth) + 1 + + def graph_recursive(self, graph): + graph.add_node(self) + for a in self.args: + a.graph_recursive(graph) + graph.add_uniq_edge(self, a) + + +class ExprSlice(Expr): + + def __init__(self, arg, start, stop): + assert(start < stop) + self.arg, self.start, self.stop = arg, start, stop + self._hash = self.myhash() + self._size = self.stop - self.start + + def __str__(self): + return "%s[%d:%d]" % (str(self.arg), self.start, self.stop) + + def get_r(self, mem_read=False, cst_read=False): + return self.arg.get_r(mem_read, cst_read) + + def get_w(self): + return self.arg.get_w() + + def __contains__(self, e): + if self == e: + return True + return self.arg.__contains__(e) + + def myhash(self): + return hash((EXPRSLICE, self.arg._hash, self.start, self.stop)) + + @visit_chk + def visit(self, cb, tv=None): + arg = self.arg.visit(cb, tv) + if arg == self.arg: + return self + return ExprSlice(arg, self.start, self.stop) + + def copy(self): + return ExprSlice(self.arg.copy(), self.start, self.stop) + + def depth(self): + return self.arg.depth() + 1 + + def slice_rest(self): + "Return the completion of the current slice" + size = self.arg.size + if self.start >= size or self.stop > size: + raise ValueError('bad slice rest %s %s %s' % + (size, self.start, self.stop)) + + if self.start == self.stop: + return [(0, size)] + + rest = [] + if self.start != 0: + rest.append((0, self.start)) + if self.stop < size: + rest.append((self.stop, size)) + + return rest + + def graph_recursive(self, graph): + graph.add_node(self) + self.arg.graph_recursive(graph) + graph.add_uniq_edge(self, self.arg) + + +class ExprCompose(Expr): + + """ + Compose is like a hambuger. + It's arguments are tuple of: (Expression, start, stop) + start and stop are intergers, determining Expression position in the compose. + + Burger Example: + ExprCompose([(salad, 0, 3), (cheese, 3, 10), (beacon, 10, 16)]) + In the example, salad.size == 3. + """ + + def __init__(self, args): + """Create an ExprCompose + @args: tuple(Expr, int, int) + """ + + for e, start, stop in args: + if e.size != stop - start: + raise ValueError( + "sanitycheck: ExprCompose args must have correct size!" + + " %r %r %r" % (e, e.size, stop - start)) + + # Transform args to lists + o = [] + for e, a, b in args: + assert(a >= 0 and b >= 0) + o.append(tuple([e, a, b])) + self.args = tuple(o) + + self._hash = self.myhash() + self._size = max([x[2] + for x in self.args]) - min([x[1] for x in self.args]) + + def __str__(self): + return '{' + ', '.join(['%s,%d,%d' % + (str(x[0]), x[1], x[2]) for x in self.args]) + '}' + + def get_r(self, mem_read=False, cst_read=False): + return reduce(lambda x, y: + x.union(y[0].get_r(mem_read, cst_read)), self.args, set()) + + def get_w(self): + return reduce(lambda x, y: + x.union(y[0].get_r(mem_read, cst_read)), self.args, set()) + + def __contains__(self, e): + if self == e: + return True + for a in self.args: + if a == e: + return True + if a[0].__contains__(e): + return True + return False + + def myhash(self): + h_args = [EXPRCOMPOSE] + [(x[0]._hash, x[1], x[2]) for x in self.args] + return hash(tuple(h_args)) + + @visit_chk + def visit(self, cb, tv=None): + args = [(a[0].visit(cb, tv), a[1], a[2]) for a in self.args] + modified = any([x[0] != x[1] for x in zip(self.args, args)]) + if modified: + return ExprCompose(args) + return self + + def copy(self): + args = [(a[0].copy(), a[1], a[2]) for a in self.args] + return ExprCompose(args) + + def depth(self): + depth = [a[0].depth() for a in self.args] + return max(depth) + 1 + + def graph_recursive(self, graph): + graph.add_node(self) + for a in self.args: + a[0].graph_recursive(graph) + graph.add_uniq_edge(self, a[0]) + + +# Expression order for comparaison +expr_order_dict = {ExprId: 1, + ExprCond: 2, + ExprMem: 3, + ExprOp: 4, + ExprSlice: 5, + ExprCompose: 7, + ExprInt: 8, + } + + +def compare_exprs_compose(e1, e2): + # Sort by start bit address, then expr, then stop but address + x = cmp(e1[1], e2[1]) + if x: + return x + x = compare_exprs(e1[0], e2[0]) + if x: + return x + x = cmp(e1[2], e2[2]) + return x + + +def compare_expr_list_compose(l1_e, l2_e): + # Sort by list elements in incremental order, then by list size + for i in xrange(min(len(l1_e), len(l2_e))): + x = compare_exprs_compose(l1_e[i], l2_e[i]) + if x: + return x + return cmp(len(l1_e), len(l2_e)) + + +def compare_expr_list(l1_e, l2_e): + # Sort by list elements in incremental order, then by list size + for i in xrange(min(len(l1_e), len(l2_e))): + x = compare_exprs(l1_e[i], l2_e[i]) + if x: + return x + return cmp(len(l1_e), len(l2_e)) + + +def compare_exprs(e1, e2): + """Compare 2 expressions for canonization + @e1: Expr + @e2: Expr + 0 => == + 1 => e1 > e2 + -1 => e1 < e2 + """ + c1 = e1.__class__ + c2 = e2.__class__ + if c1 != c2: + return cmp(expr_order_dict[c1], expr_order_dict[c2]) + if e1 == e2: + return 0 + if c1 == ExprInt: + return cmp(e1.arg, e2.arg) + elif c1 == ExprId: + x = cmp(e1.name, e2.name) + if x: + return x + return cmp(e1._size, e2._size) + elif c1 == ExprAff: + raise NotImplementedError( + "Comparaison from an ExprAff not yet implemented") + elif c2 == ExprCond: + x = compare_exprs(e1.cond, e2.cond) + if x: + return x + x = compare_exprs(e1.src1, e2.src1) + if x: + return x + x = compare_exprs(e1.src2, e2.src2) + return x + elif c1 == ExprMem: + x = compare_exprs(e1.arg, e2.arg) + if x: + return x + return cmp(e1._size, e2._size) + elif c1 == ExprOp: + if e1.op != e2.op: + return cmp(e1.op, e2.op) + return compare_expr_list(e1.args, e2.args) + elif c1 == ExprSlice: + x = compare_exprs(e1.arg, e2.arg) + if x: + return x + x = cmp(e1.start, e2.start) + if x: + return x + x = cmp(e1.stop, e2.stop) + return x + elif c1 == ExprCompose: + return compare_expr_list_compose(e1.args, e2.args) + raise NotImplementedError( + "Comparaison between %r %r not implemented" % (e1, e2)) + + +def canonize_expr_list(l): + l = list(l) + l.sort(cmp=compare_exprs) + return l + + +def canonize_expr_list_compose(l): + l = list(l) + l.sort(cmp=compare_exprs_compose) + return l + +# Generate ExprInt with common size + + +def ExprInt1(i): + return ExprInt(uint1(i)) + + +def ExprInt8(i): + return ExprInt(uint8(i)) + + +def ExprInt16(i): + return ExprInt(uint16(i)) + + +def ExprInt32(i): + return ExprInt(uint32(i)) + + +def ExprInt64(i): + return ExprInt(uint64(i)) + + +def ExprInt_from(e, i): + "Generate ExprInt with size equal to expression" + return ExprInt(mod_size2uint[e.size](i)) + + +def ExprInt_fromsize(size, i): + "Generate ExprInt with a given size" + return ExprInt(mod_size2uint[size](i)) + + +def get_expr_ids_visit(e, ids): + if isinstance(e, ExprId): + ids.add(e) + return e + + +def get_expr_ids(e): + ids = set() + e.visit(lambda x: get_expr_ids_visit(x, ids)) + return ids + + +def test_set(e, v, tks, result): + """Test if v can correspond to e. If so, update the context in result. + Otherwise, return False + @e : Expr + @v : Expr + @tks : list of ExprId, available jokers + @result : dictionnary of ExprId -> Expr, current context + """ + + if not v in tks: + return e == v + if v in result and result[v] != e: + return False + result[v] = e + return result + + +def MatchExpr(e, m, tks, result=None): + """Try to match m expression with e expression with tks jokers. + Result is output dictionnary with matching joker values. + @e : Expr to test + @m : Targetted Expr + @tks : list of ExprId, available jokers + @result : dictionnary of ExprId -> Expr, output matching context + """ + + if result is None: + result = {} + + if m in tks: + # m is a Joker + return test_set(e, m, tks, result) + + if isinstance(e, ExprInt): + return test_set(e, m, tks, result) + + elif isinstance(e, ExprId): + return test_set(e, m, tks, result) + + elif isinstance(e, ExprOp): + + # e need to be the same operation than m + if not isinstance(m, ExprOp): + return False + if e.op != m.op: + return False + if len(e.args) != len(m.args): + return False + + # Perform permutation only if the current operation is commutative + if e.is_commutative(): + permutations = itertools.permutations(e.args) + else: + permutations = [e.args] + + # For each permutations of arguments + for permut in permutations: + good = True + # We need to use a copy of result to not override it + myresult = dict(result) + for a1, a2 in zip(permut, m.args): + r = MatchExpr(a1, a2, tks, myresult) + # If the current permutation do not match EVERY terms + if r is False: + good = False + break + if good is True: + # We found a possibility + for k, v in myresult.items(): + # Updating result in place (to keep pointer in recursion) + result[k] = v + return result + return False + + # Recursive tests + + elif isinstance(e, ExprMem): + if not isinstance(m, ExprMem): + return False + if e._size != m._size: + return False + return MatchExpr(e.arg, m.arg, tks, result) + + elif isinstance(e, ExprSlice): + if not isinstance(m, ExprSlice): + return False + if e.start != m.start or e.stop != m.stop: + return False + return MatchExpr(e.arg, m.arg, tks, result) + + elif isinstance(e, ExprCond): + if not isinstance(m, ExprCond): + return False + r = MatchExpr(e.cond, m.cond, tks, result) + if r is False: + return False + r = MatchExpr(e.src1, m.src1, tks, result) + if r is False: + return False + r = MatchExpr(e.src2, m.src2, tks, result) + if r is False: + return False + return result + + elif isinstance(e, ExprCompose): + if not isinstance(m, ExprCompose): + return False + for a1, a2 in zip(e.args, m.args): + if a1[1] != a2[1] or a1[2] != a2[2]: + return False + r = MatchExpr(a1[0], a2[0], tks, result) + if r is False: + return False + return result + + else: + raise NotImplementedError("MatchExpr: Unknown type: %s" % type(e)) + + +def SearchExpr(e, m, tks, result=None): + # TODO XXX: to test + if result is None: + result = set() + + def visit_search(e, m, tks, result): + r = {} + MatchExpr(e, m, tks, r) + if r: + result.add(tuple(r.items())) + return e + e.visit(lambda x: visit_search(x, m, tks, result)) + + +def get_rw(exprs): + o_r = set() + o_w = set() + for e in exprs: + o_r.update(e.get_r(mem_read=True)) + for e in exprs: + o_w.update(e.get_w()) + return o_r, o_w + + +def get_list_rw(exprs, mem_read=False, cst_read=True): + """ + return list of read/write reg/cst/mem for each expressions + """ + list_rw = [] + # cst_num = 0 + for e in exprs: + o_r = set() + o_w = set() + # get r/w + o_r.update(e.get_r(mem_read=mem_read, cst_read=cst_read)) + if isinstance(e.dst, ExprMem): + o_r.update(e.dst.arg.get_r(mem_read=mem_read, cst_read=cst_read)) + o_w.update(e.get_w()) + # each cst is indexed + o_r_rw = set() + for r in o_r: + # if isinstance(r, ExprInt): + # r = ExprOp('cst_%d'%cst_num, r) + # cst_num += 1 + o_r_rw.add(r) + o_r = o_r_rw + list_rw.append((o_r, o_w)) + + return list_rw + + +def get_expr_ops(e): + def visit_getops(e, out=None): + if out is None: + out = set() + if isinstance(e, ExprOp): + out.add(e.op) + return e + ops = set() + e.visit(lambda x: visit_getops(x, ops)) + return ops + + +def get_expr_mem(e): + def visit_getmem(e, out=None): + if out is None: + out = set() + if isinstance(e, ExprMem): + out.add(e) + return e + ops = set() + e.visit(lambda x: visit_getmem(x, ops)) + return ops diff --git a/miasm2/expression/expression_helper.py b/miasm2/expression/expression_helper.py new file mode 100644 index 00000000..cd59730b --- /dev/null +++ b/miasm2/expression/expression_helper.py @@ -0,0 +1,196 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +# Expressions manipulation functions +import miasm2.expression.expression as m2_expr + + +def parity(a): + tmp = (a) & 0xFFL + cpt = 1 + while tmp != 0: + cpt ^= tmp & 1 + tmp >>= 1 + return cpt + + +def merge_sliceto_slice(args): + sources = {} + non_slice = {} + sources_int = {} + for a in args: + if isinstance(a[0], m2_expr.ExprInt): + # sources_int[a.start] = a + # copy ExprInt because we will inplace modify arg just below + # /!\ TODO XXX never ever modify inplace args... + sources_int[a[1]] = (m2_expr.ExprInt_fromsize(a[2] - a[1], + a[0].arg.__class__( + a[0].arg)), + a[1], + a[2]) + elif isinstance(a[0], m2_expr.ExprSlice): + if not a[0].arg in sources: + sources[a[0].arg] = [] + sources[a[0].arg].append(a) + else: + non_slice[a[1]] = a + # find max stop to determine size + max_size = None + for a in args: + if max_size is None or max_size < a[2]: + max_size = a[2] + + # first simplify all num slices + final_sources = [] + sorted_s = [] + for x in sources_int.values(): + # mask int + v = x[0].arg & ((1 << (x[2] - x[1])) - 1) + x[0].arg = v + sorted_s.append((x[1], x)) + sorted_s.sort() + while sorted_s: + start, v = sorted_s.pop() + out = [m2_expr.ExprInt(v[0].arg), v[1], v[2]] + size = v[2] - v[1] + while sorted_s: + if sorted_s[-1][1][2] != start: + break + s_start, s_stop = sorted_s[-1][1][1], sorted_s[-1][1][2] + size += s_stop - s_start + a = m2_expr.mod_size2uint[size]( + (int(out[0].arg) << (out[1] - s_start)) + + int(sorted_s[-1][1][0].arg)) + out[0].arg = a + sorted_s.pop() + out[1] = s_start + out[0] = m2_expr.ExprInt_fromsize(size, out[0].arg) + final_sources.append((start, out)) + + final_sources_int = final_sources + # check if same sources have corresponding start/stop + # is slice AND is sliceto + simp_sources = [] + for args in sources.values(): + final_sources = [] + sorted_s = [] + for x in args: + sorted_s.append((x[1], x)) + sorted_s.sort() + while sorted_s: + start, v = sorted_s.pop() + ee = v[0].arg[v[0].start:v[0].stop] + out = ee, v[1], v[2] + while sorted_s: + if sorted_s[-1][1][2] != start: + break + if sorted_s[-1][1][0].stop != out[0].start: + break + + start = sorted_s[-1][1][1] + # out[0].start = sorted_s[-1][1][0].start + o_e, _, o_stop = out + o1, o2 = sorted_s[-1][1][0].start, o_e.stop + o_e = o_e.arg[o1:o2] + out = o_e, start, o_stop + # update _size + # out[0]._size = out[0].stop-out[0].start + sorted_s.pop() + out = out[0], start, out[2] + + final_sources.append((start, out)) + + simp_sources += final_sources + + simp_sources += final_sources_int + + for i, v in non_slice.items(): + simp_sources.append((i, v)) + + simp_sources.sort() + simp_sources = [x[1] for x in simp_sources] + return simp_sources + + +op_propag_cst = ['+', '*', '^', '&', '|', '>>', + '<<', "a>>", ">>>", "/", "%", 'idiv', 'irem'] + + +def is_pure_int(e): + """ + return True if expr is only composed with integers + /!\ ExprCond returns True is src1 and src2 are integers + """ + def modify_cond(e): + if isinstance(e, m2_expr.ExprCond): + return e.src1 | e.src2 + return e + + def find_int(e, s): + if isinstance(e, m2_expr.ExprId) or isinstance(e, m2_expr.ExprMem): + s.add(e) + return e + s = set() + new_e = e.visit(modify_cond) + new_e.visit(lambda x: find_int(x, s)) + if s: + return False + return True + + +def is_int_or_cond_src_int(e): + if isinstance(e, m2_expr.ExprInt): + return True + if isinstance(e, m2_expr.ExprCond): + return (isinstance(e.src1, m2_expr.ExprInt) and + isinstance(e.src2, m2_expr.ExprInt)) + return False + + +def fast_unify(seq, idfun=None): + # order preserving unifying list function + if idfun is None: + idfun = lambda x: x + seen = {} + result = [] + for item in seq: + marker = idfun(item) + + if marker in seen: + continue + seen[marker] = 1 + result.append(item) + return result + +def get_missing_interval(all_intervals, i_min=0, i_max=32): + """Return a list of missing interval in all_interval + @all_interval: list of (int, int) + @i_min: int, minimal missing interval bound + @i_max: int, maximal missing interval bound""" + + my_intervals = all_intervals[:] + my_intervals.sort() + my_intervals.append((i_max, i_max)) + + missing_i = [] + last_pos = i_min + for start, stop in my_intervals: + if last_pos != start: + missing_i.append((last_pos, start)) + last_pos = stop + return missing_i diff --git a/miasm2/expression/modint.py b/miasm2/expression/modint.py new file mode 100644 index 00000000..ffe1574c --- /dev/null +++ b/miasm2/expression/modint.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +class moduint(object): + + def __init__(self, arg): + if isinstance(arg, moduint): + arg = arg.arg + self.arg = arg % self.__class__.limit + assert(self.arg >= 0 and self.arg < self.__class__.limit) + + def __repr__(self): + return self.__class__.__name__ + '(' + hex(self.arg) + ')' + + def __hash__(self): + return hash(self.arg) + + @classmethod + def maxcast(cls, c2): + c2 = c2.__class__ + if cls.size > c2.size: + return cls + else: + return c2 + + def __cmp__(self, y): + if isinstance(y, moduint): + return cmp(self.arg, y.arg) + else: + return cmp(self.arg, y) + + def __add__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg + y.arg) + else: + return self.__class__(self.arg + y) + + def __and__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg & y.arg) + else: + return self.__class__(self.arg & y) + + def __div__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg / y.arg) + else: + return self.__class__(self.arg / y) + + def __int__(self): + return int(self.arg) + + def __long__(self): + return long(self.arg) + + def __invert__(self): + return self.__class__(~self.arg) + + def __lshift__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg << y.arg) + else: + return self.__class__(self.arg << y) + + def __mod__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg % y.arg) + else: + return self.__class__(self.arg % y) + + def __mul__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg * y.arg) + else: + return self.__class__(self.arg * y) + + def __neg__(self): + return self.__class__(-self.arg) + + def __or__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg | y.arg) + else: + return self.__class__(self.arg | y) + + def __radd__(self, y): + return self.__add__(y) + + def __rand__(self, y): + return self.__and__(y) + + def __rdiv__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(y.arg / self.arg) + else: + return self.__class__(y / self.arg) + + def __rlshift__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(y.arg << self.arg) + else: + return self.__class__(y << self.arg) + + def __rmod__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(y.arg % self.arg) + else: + return self.__class__(y % self.arg) + + def __rmul__(self, y): + return self.__mul__(y) + + def __ror__(self, y): + return self.__or__(y) + + def __rrshift__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(y.arg >> self.arg) + else: + return self.__class__(y >> self.arg) + + def __rshift__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg >> y.arg) + else: + return self.__class__(self.arg >> y) + + def __rsub__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(y.arg - self.arg) + else: + return self.__class__(y - self.arg) + + def __rxor__(self, y): + return self.__xor__(y) + + def __sub__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg - y.arg) + else: + return self.__class__(self.arg - y) + + def __xor__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg ^ y.arg) + else: + return self.__class__(self.arg ^ y) + + def __hex__(self): + return hex(self.arg) + + def __abs__(self): + return abs(self.arg) + + def __rpow__(self, v): + return v ** self.arg + + def __pow__(self, v): + return self.__class__(self.arg ** v) + + +class modint(moduint): + + def __init__(self, arg): + if isinstance(arg, moduint): + arg = arg.arg + a = arg % self.__class__.limit + if a >= self.__class__.limit / 2: + a -= self.__class__.limit + self.arg = a + assert(self.arg >= -self.__class__.limit / + 2 and self.arg < self.__class__.limit) + + +def is_modint(a): + return isinstance(a, moduint) + + +def size2mask(size): + return (1 << size) - 1 + +mod_size2uint = {} +mod_size2int = {} + +mod_uint2size = {} +mod_int2size = {} + + +def define_common_int(): + "Define common int: ExprInt1, ExprInt2, .." + global mod_size2int, mod_int2size, mod_size2uint, mod_uint2size + + common_int = xrange(1, 257) + + for i in common_int: + name = 'uint%d' % i + c = type(name, (moduint,), {"size": i, "limit": 1 << i}) + globals()[name] = c + mod_size2uint[i] = c + mod_uint2size[c] = i + + for i in common_int: + name = 'int%d' % i + c = type(name, (modint,), {"size": i, "limit": 1 << i}) + globals()[name] = c + mod_size2int[i] = c + mod_int2size[c] = i + +define_common_int() diff --git a/miasm2/expression/simplifications.py b/miasm2/expression/simplifications.py new file mode 100644 index 00000000..29d19614 --- /dev/null +++ b/miasm2/expression/simplifications.py @@ -0,0 +1,605 @@ +# +# Simplification methods library # +# + +from miasm2.expression.expression import * +from miasm2.expression.expression_helper import * + +# Common passes +# ------------- + + +def simp_cst_propagation(e_s, e): + """This passe includes: + - Constant folding + - Common logical identities + - Common binary identities + """ + + # merge associatif op + if not isinstance(e, ExprOp): + return e + args = list(e.args) + op = e.op + # simpl integer manip + # int OP int => int + if op in op_propag_cst: + while (len(args) >= 2 and + isinstance(args[-1], ExprInt) and + isinstance(args[-2], ExprInt)): + i2 = args.pop() + i1 = args.pop() + if op == '+': + o = i1.arg + i2.arg + elif op == '*': + o = i1.arg * i2.arg + elif op == '^': + o = i1.arg ^ i2.arg + elif op == '&': + o = i1.arg & i2.arg + elif op == '|': + o = i1.arg | i2.arg + elif op == '>>': + o = i1.arg >> i2.arg + elif op == '<<': + o = i1.arg << i2.arg + elif op == 'a>>': + x1 = mod_size2int[i1.arg.size](i1.arg) + x2 = mod_size2int[i2.arg.size](i2.arg) + o = mod_size2uint[i1.arg.size](x1 >> x2) + elif op == '>>>': + o = i1.arg >> i2.arg | i1.arg << (i1.size - i2.arg) + elif op == '/': + o = i1.arg / i2.arg + elif op == '%': + o = i1.arg % i2.arg + elif op == 'idiv': + assert(i2.arg) + x1 = mod_size2int[i1.arg.size](i1.arg) + x2 = mod_size2int[i2.arg.size](i2.arg) + o = mod_size2uint[i1.arg.size](x1 / x2) + elif op == 'irem': + assert(i2.arg) + x1 = mod_size2int[i1.arg.size](i1.arg) + x2 = mod_size2int[i2.arg.size](i2.arg) + o = mod_size2uint[i1.arg.size](x1 % x2) + + o = ExprInt_fromsize(i1.size, o) + args.append(o) + + # bsf(int) => int + if op == "bsf" and isinstance(args[0], ExprInt) and args[0].arg != 0: + i = 0 + while args[0].arg & (1 << i) == 0: + i += 1 + return ExprInt_from(args[0], i) + + # bsr(int) => int + if op == "bsr" and isinstance(args[0], ExprInt) and args[0].arg != 0: + i = args[0].size - 1 + while args[0].arg & (1 << i) == 0: + i -= 1 + return ExprInt_from(args[0], i) + + # -(-(A)) => A + if op == '-' and len(args) == 1 and isinstance(args[0], ExprOp) and \ + args[0].op == '-' and len(args[0].args) == 1: + return args[0].args[0] + + # -(int) => -int + if op == '-' and len(args) == 1 and isinstance(args[0], ExprInt): + return ExprInt(-args[0].arg) + # A op 0 =>A + if op in ['+', '-', '|', "^", "<<", ">>", "<<<", ">>>"] and len(args) > 1: + if isinstance(args[-1], ExprInt) and args[-1].arg == 0: + args.pop() + # A * 1 =>A + if op == "*" and len(args) > 1: + if isinstance(args[-1], ExprInt) and args[-1].arg == 1: + args.pop() + + # for cannon form + # A * -1 => - A + if op == "*" and len(args) > 1: + if (isinstance(args[-1], ExprInt) and + args[-1].arg == (1 << args[-1].size) - 1): + args.pop() + args[-1] = - args[-1] + + # op A => A + if op in ['+', '*', '^', '&', '|', '>>', '<<', + 'a>>', '<<<', '>>>', 'idiv', 'irem'] and len(args) == 1: + return args[0] + + # A-B => A + (-B) + if op == '-' and len(args) > 1: + if len(args) > 2: + raise ValueError( + 'sanity check fail on expr -: should have one or 2 args ' + + '%r %s' % (e, e)) + return ExprOp('+', args[0], -args[1]) + + # A op 0 => 0 + if op in ['&', "*"] and isinstance(args[1], ExprInt) and args[1].arg == 0: + return ExprInt_from(e, 0) + + # - (A + B +...) => -A + -B + -C + if (op == '-' and + len(args) == 1 and + isinstance(args[0], ExprOp) and + args[0].op == '+'): + args = [-a for a in args[0].args] + e = ExprOp('+', *args) + return e + + # -(a?int1:int2) => (a?-int1:-int2) + if (op == '-' and + len(args) == 1 and + isinstance(args[0], ExprCond) and + isinstance(args[0].src1, ExprInt) and + isinstance(args[0].src2, ExprInt)): + i1 = args[0].src1 + i2 = args[0].src2 + i1 = ExprInt_from(i1, -i1.arg) + i2 = ExprInt_from(i2, -i2.arg) + return ExprCond(args[0].cond, i1, i2) + + i = 0 + while i < len(args) - 1: + j = i + 1 + while j < len(args): + # A ^ A => 0 + if op == '^' and args[i] == args[j]: + args[i] = ExprInt_from(args[i], 0) + del(args[j]) + continue + # A + (- A) => 0 + if op == '+' and isinstance(args[j], ExprOp) and args[j].op == "-": + if len(args[j].args) == 1 and args[i] == args[j].args[0]: + args[i] = ExprInt_from(args[i], 0) + del(args[j]) + continue + # (- A) + A => 0 + if op == '+' and isinstance(args[i], ExprOp) and args[i].op == "-": + if len(args[i].args) == 1 and args[j] == args[i].args[0]: + args[i] = ExprInt_from(args[i], 0) + del(args[j]) + continue + # A | A => A + if op == '|' and args[i] == args[j]: + del(args[j]) + continue + # A & A => A + if op == '&' and args[i] == args[j]: + del(args[j]) + continue + j += 1 + i += 1 + + if op in ['|', '&', '%', '/'] and len(args) == 1: + return args[0] + + # A <<< A.size => A + if (op in ['<<<', '>>>'] and + isinstance(args[1], ExprInt) and + args[1].arg == args[0].size): + return args[0] + + # A <<< X <<< Y => A <<< (X+Y) (ou <<< >>>) + if (op in ['<<<', '>>>'] and + isinstance(args[0], ExprOp) and + args[0].op in ['<<<', '>>>']): + op1 = op + op2 = args[0].op + if op1 == op2: + op = op1 + args1 = args[0].args[1] + args[1] + else: + op = op2 + args1 = args[0].args[1] - args[1] + + args0 = args[0].args[0] + args = [args0, args1] + + # ((A & A.mask) + if op == "&" and args[-1] == e.mask: + return ExprOp('&', *args[:-1]) + + # ((A | A.mask) + if op == "|" and args[-1] == e.mask: + return args[-1] + + # ! (!X + int) => X - int + # TODO + + # ((A & mask) >> shift) whith mask < 2**shift => 0 + if (op == ">>" and + isinstance(args[1], ExprInt) and + isinstance(args[0], ExprOp) and args[0].op == "&"): + if (isinstance(args[0].args[1], ExprInt) and + 2 ** args[1].arg >= args[0].args[1].arg): + return ExprInt_from(args[0], 0) + + # int == int => 0 or 1 + if (op == '==' and + isinstance(args[0], ExprInt) and + isinstance(args[1], ExprInt)): + if args[0].arg == args[1].arg: + return ExprInt_from(args[0], 1) + else: + return ExprInt_from(args[0], 0) + #(A|int == 0) => 0 with int != 0 + if op == '==' and isinstance(args[1], ExprInt) and args[1].arg == 0: + if isinstance(args[0], ExprOp) and args[0].op == '|' and\ + isinstance(args[0].args[1], ExprInt) and \ + args[0].args[1].arg != 0: + return ExprInt_from(args[0], 0) + + # parity(int) => int + if op == 'parity' and isinstance(args[0], ExprInt): + return ExprInt1(parity(args[0].arg)) + + # (-a) * b * (-c) * (-d) => (-a) * b * c * d + if op == "*" and len(args) > 1: + new_args = [] + counter = 0 + for a in args: + if isinstance(a, ExprOp) and a.op == '-' and len(a.args) == 1: + new_args.append(a.args[0]) + counter += 1 + else: + new_args.append(a) + if counter % 2: + return -ExprOp(op, *new_args) + args = new_args + + return ExprOp(op, *args) + + +def simp_cond_op_int(e_s, e): + "Extract conditions from operations" + + if not isinstance(e, ExprOp): + return e + if not e.op in ["+", "|", "^", "&", "*", '<<', '>>', 'a>>']: + return e + if len(e.args) < 2: + return e + if not isinstance(e.args[-1], ExprInt): + return e + a_int = e.args[-1] + conds = [] + for a in e.args[:-1]: + if not isinstance(a, ExprCond): + return e + conds.append(a) + if not conds: + return e + c = conds.pop() + c = ExprCond(c.cond, + ExprOp(e.op, c.src1, a_int), + ExprOp(e.op, c.src2, a_int)) + conds.append(c) + new_e = ExprOp(e.op, *conds) + return new_e + + +def simp_cond_factor(e_s, e): + "Merge similar conditions" + if not isinstance(e, ExprOp): + return e + if not e.op in ["+", "|", "^", "&", "*", '<<', '>>', 'a>>']: + return e + if len(e.args) < 2: + return e + conds = {} + not_conds = [] + multi_cond = False + for a in e.args: + if not isinstance(a, ExprCond): + not_conds.append(a) + continue + c = a.cond + if not c in conds: + conds[c] = [] + else: + multi_cond = True + conds[c].append(a) + if not multi_cond: + return e + c_out = not_conds[:] + for c, vals in conds.items(): + new_src1 = [x.src1 for x in vals] + new_src2 = [x.src2 for x in vals] + src1 = e_s.expr_simp_wrapper(ExprOp(e.op, *new_src1)) + src2 = e_s.expr_simp_wrapper(ExprOp(e.op, *new_src2)) + c_out.append(ExprCond(c, src1, src2)) + + if len(c_out) == 1: + new_e = c_out[0] + else: + new_e = ExprOp(e.op, *c_out) + return new_e + + +def simp_slice(e_s, e): + "Slice optimization" + + # slice(A, 0, a.size) => A + if e.start == 0 and e.stop == e.arg.size: + return e.arg + # Slice(int) => int + elif isinstance(e.arg, ExprInt): + total_bit = e.stop - e.start + mask = (1 << (e.stop - e.start)) - 1 + return ExprInt_fromsize(total_bit, (e.arg.arg >> e.start) & mask) + # Slice(Slice(A, x), y) => Slice(A, z) + elif isinstance(e.arg, ExprSlice): + if e.stop - e.start > e.arg.stop - e.arg.start: + raise ValueError('slice in slice: getting more val', str(e)) + + new_e = ExprSlice(e.arg.arg, e.start + e.arg.start, + e.start + e.arg.start + (e.stop - e.start)) + return new_e + # Slice(Compose(A), x) => Slice(A, y) + elif isinstance(e.arg, ExprCompose): + for a in e.arg.args: + if a[1] <= e.start and a[2] >= e.stop: + new_e = a[0][e.start - a[1]:e.stop - a[1]] + return new_e + # ExprMem(x, size)[:A] => ExprMem(x, a) + # XXXX todo hum, is it safe? + elif (isinstance(e.arg, ExprMem) and + e.start == 0 and + e.arg.size > e.stop and e.stop % 8 == 0): + e = ExprMem(e.arg.arg, size=e.stop) + return e + # distributivity of slice and & + # (a & int)[x:y] => 0 if int[x:y] == 0 + elif (isinstance(e.arg, ExprOp) and + e.arg.op == "&" and + isinstance(e.arg.args[-1], ExprInt)): + tmp = e_s.expr_simp_wrapper(e.arg.args[-1][e.start:e.stop]) + if isinstance(tmp, ExprInt) and tmp.arg == 0: + return tmp + # distributivity of slice and exprcond + # (a?int1:int2)[x:y] => (a?int1[x:y]:int2[x:y]) + elif (isinstance(e.arg, ExprCond) and + isinstance(e.arg.src1, ExprInt) and + isinstance(e.arg.src2, ExprInt)): + src1 = e.arg.src1[e.start:e.stop] + src2 = e.arg.src2[e.start:e.stop] + e = ExprCond(e.arg.cond, src1, src2) + + # (a * int)[0:y] => (a[0:y] * int[0:y]) + elif (isinstance(e.arg, ExprOp) and + e.arg.op == "*" and + isinstance(e.arg.args[-1], ExprInt)): + args = [e_s.expr_simp_wrapper(a[e.start:e.stop]) for a in e.arg.args] + e = ExprOp(e.arg.op, *args) + + return e + + +def simp_compose(e_s, e): + "Commons simplification on ExprCompose" + args = merge_sliceto_slice(e.args) + out = [] + # compose of compose + for a in args: + if isinstance(a[0], ExprCompose): + for x, start, stop in a[0].args: + out.append((x, start + a[1], stop + a[1])) + else: + out.append(a) + args = out + # Compose(a) with a.size = compose.size => a + if len(args) == 1 and args[0][1] == 0 and args[0][2] == e.size: + return args[0][0] + + # {(X[X.size-z, 0, z), (0, z, X.size)} => (X >> x) + if (len(args) == 2 and + isinstance(args[1][0], ExprInt) and + args[1][0].arg == 0): + a1 = args[0] + a2 = args[1] + if (isinstance(a1[0], ExprSlice) and + a1[1] == 0 and a1[0].stop == a1[0].arg.size): + if a2[1] == a1[0].size and a2[2] == a1[0].arg.size: + new_e = a1[0].arg >> ExprInt_fromsize( + a1[0].arg.size, a1[0].start) + return new_e + + # Compose with ExprCond with integers for src1/src2 and intergers => + # propagage integers + # {XXX?(0x0,0x1)?(0x0,0x1),0,8, 0x0,8,32} => XXX?(int1, int2) + + ok = True + expr_cond = None + expr_ints = [] + for i, a in enumerate(args): + if not is_int_or_cond_src_int(a[0]): + ok = False + break + expr_ints.append(a) + if isinstance(a[0], ExprCond): + if expr_cond is not None: + ok = False + expr_cond = i + cond = a[0] + + if ok and expr_cond is not None: + src1 = [] + src2 = [] + for i, a in enumerate(expr_ints): + if i == expr_cond: + src1.append((a[0].src1, a[1], a[2])) + src2.append((a[0].src2, a[1], a[2])) + else: + src1.append(a) + src2.append(a) + src1 = e_s.apply_simp(ExprCompose(src1)) + src2 = e_s.apply_simp(ExprCompose(src2)) + if isinstance(src1, ExprInt) and isinstance(src2, ExprInt): + return ExprCond(cond.cond, src1, src2) + return ExprCompose(args) + + +def simp_cond(e_s, e): + "Common simplifications on ExprCond" + if not isinstance(e, ExprCond): + return e + # eval exprcond src1/src2 with satifiable/unsatisfiable condition + # propagation + if (not isinstance(e.cond, ExprInt)) and e.cond.size == 1: + src1 = e.src1.replace_expr({e.cond: ExprInt1(1)}) + src2 = e.src2.replace_expr({e.cond: ExprInt1(0)}) + if src1 != e.src1 or src2 != e.src2: + return ExprCond(e.cond, src1, src2) + + # -A ? B:C => A ? B:C + if (isinstance(e.cond, ExprOp) and + e.cond.op == '-' and + len(e.cond.args) == 1): + e = ExprCond(e.cond.args[0], e.src1, e.src2) + # a?x:x + elif e.src1 == e.src2: + e = e.src1 + # int ? A:B => A or B + elif isinstance(e.cond, ExprInt): + if e.cond.arg == 0: + e = e.src2 + else: + e = e.src1 + # a?(a?b:c):x => a?b:x + elif isinstance(e.src1, ExprCond) and e.cond == e.src1.cond: + e = ExprCond(e.cond, e.src1.src1, e.src2) + # a?x:(a?b:c) => a?x:c + elif isinstance(e.src2, ExprCond) and e.cond == e.src2.cond: + e = ExprCond(e.cond, e.src1, e.src2.src2) + # a|int ? b:c => b with int != 0 + elif (isinstance(e.cond, ExprOp) and + e.cond.op == '|' and + isinstance(e.cond.args[1], ExprInt) and + e.cond.args[1].arg != 0): + return e.src1 + + # (C?int1:int2)?(A:B) => + elif (isinstance(e.cond, ExprCond) and + isinstance(e.cond.src1, ExprInt) and + isinstance(e.cond.src2, ExprInt)): + int1 = e.cond.src1.arg.arg + int2 = e.cond.src2.arg.arg + if int1 and int2: + e = e.src1 + elif int1 == 0 and int2 == 0: + e = e.src2 + elif int1 == 0 and int2: + e = ExprCond(e.cond.cond, e.src2, e.src1) + elif int1 and int2 == 0: + e = ExprCond(e.cond.cond, e.src1, e.src2) + return e + + +# Expression Simplifier +# --------------------- + + +class ExpressionSimplifier(object): + + """Wrapper on expression simplification passes. + + Instance handle passes lists. + + Available passes lists are: + - commons: common passes such as constant folding + - heavy : rare passes (for instance, in case of obfuscation) + """ + + # Common passes + PASS_COMMONS = { + m2_expr.ExprOp: [simp_cst_propagation, + simp_cond_op_int, + simp_cond_factor], + m2_expr.ExprSlice: [simp_slice], + m2_expr.ExprCompose: [simp_compose], + m2_expr.ExprCond: [simp_cond], + } + + # Heavy passes + PASS_HEAVY = {} + + def __init__(self): + self.expr_simp_cb = {} + + def enable_passes(self, passes): + """Add passes from @passes + @passes: dict(Expr class : list(callback)) + + Callback signature: Expr callback(ExpressionSimplifier, Expr) + """ + + for k, v in passes.items(): + self.expr_simp_cb[k] = fast_unify(self.expr_simp_cb.get(k, []) + v) + + def apply_simp(self, expression): + """Apply enabled simplifications on expression + @expression: Expr instance + Return an Expr instance""" + + cls = expression.__class__ + for simp_func in self.expr_simp_cb.get(cls, []): + # Apply simplifications + expression = simp_func(self, expression) + + # If class changes, stop to prevent wrong simplifications + if expression.__class__ is not cls: + break + + return expression + + def expr_simp(self, expression): + """Apply enabled simplifications on expression and find a stable state + @expression: Expr instance + Return an Expr instance""" + + if expression.is_simp: + return expression + + # Find a stable state + while True: + # Canonize and simplify + e_new = self.apply_simp(expression.canonize()) + if e_new == expression: + break + + # Launch recursivity + expression = self.expr_simp_wrapper(e_new) + expression.is_simp = True + + # Mark expression as simplified + e_new.is_simp = True + return e_new + + def expr_simp_wrapper(self, expression, callback=None): + """Apply enabled simplifications on expression + @expression: Expr instance + @manual_callback: If set, call this function instead of normal one + Return an Expr instance""" + + if expression.is_simp: + return expression + + if callback is None: + callback = self.expr_simp + + return expression.visit(callback, lambda e: not(e.is_simp)) + + def __call__(self, expression, callback=None): + "Wrapper on expr_simp_wrapper" + return self.expr_simp_wrapper(expression, callback) + + +# Public ExprSimplificationPass instance with commons passes +expr_simp = ExpressionSimplifier() +expr_simp.enable_passes(ExpressionSimplifier.PASS_COMMONS) diff --git a/miasm2/expression/stp.py b/miasm2/expression/stp.py new file mode 100644 index 00000000..7ef96166 --- /dev/null +++ b/miasm2/expression/stp.py @@ -0,0 +1,68 @@ +from miasm2.expression.expression import * + + +""" +Quick implementation of miasm traduction to stp langage +TODO XXX: finish +""" + + +def ExprInt_strcst(self): + b = bin(int(self.arg))[2::][::-1] + b += "0" * self.size + b = b[:self.size][::-1] + return "0bin" + b + + +def ExprId_strcst(self): + return self.name + + +def genop(op, size, a, b): + return op + '(' + str(size) + ',' + a + ', ' + b + ')' + + +def genop_nosize(op, size, a, b): + return op + '(' + a + ', ' + b + ')' + + +def ExprOp_strcst(self): + op = self.op + op_dct = {"|": " | ", + "&": " & "} + if op in op_dct: + return '(' + op_dct[op].join([x.strcst() for x in self.args]) + ')' + op_dct = {"-": "BVUMINUS"} + if op in op_dct: + return op_dct[op] + '(' + self.args[0].strcst() + ')' + op_dct = {"^": ("BVXOR", genop_nosize), + "+": ("BVPLUS", genop)} + if not op in op_dct: + raise ValueError('implement op', op) + op, f = op_dct[op] + args = [x.strcst() for x in self.args][::-1] + a = args.pop() + b = args.pop() + size = self.args[0].size + out = f(op, size, a, b) + while args: + out = f(op, size, out, args.pop()) + return out + + +def ExprSlice_strcst(self): + return '(' + self.arg.strcst() + ')[%d:%d]' % (self.stop - 1, self.start) + + +def ExprCond_strcst(self): + cond = self.cond.strcst() + src1 = self.src1.strcst() + src2 = self.src2.strcst() + return "(IF %s=(%s) THEN %s ELSE %s ENDIF)" % ( + "0bin%s" % ('0' * self.cond.size), cond, src2, src1) + +ExprInt.strcst = ExprInt_strcst +ExprId.strcst = ExprId_strcst +ExprOp.strcst = ExprOp_strcst +ExprCond.strcst = ExprCond_strcst +ExprSlice.strcst = ExprSlice_strcst diff --git a/miasm2/ir/__init__.py b/miasm2/ir/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/ir/__init__.py diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py new file mode 100644 index 00000000..5b65acca --- /dev/null +++ b/miasm2/ir/analysis.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.ir.symbexec import symbexec +from miasm2.core.graph import DiGraph +from miasm2.expression.expression import * + + +class ira: + + def sort_dst(self, todo, done): + out = set() + while todo: + dst = todo.pop() + if self.ExprIsLabel(dst): + done.add(dst) + elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): + done.add(dst) + elif isinstance(dst, ExprCond): + todo.add(dst.src1) + todo.add(dst.src2) + elif isinstance(dst, ExprId): + out.add(dst) + else: + done.add(dst) + return out + + def dst_trackback(self, b): + dst = b.dst + todo = set([dst]) + out = set() + done = set() + + for irs in reversed(b.irs): + if len(todo) == 0: + break + out = self.sort_dst(todo, done) + found = set() + follow = set() + for i in irs: + if not out: + break + for o in out: + if i.dst == o: + follow.add(i.src) + found.add(o) + for o in found: + out.remove(o) + + for o in out: + if not o in found: + follow.add(o) + todo = follow + out = self.sort_dst(todo, done) + + return done + + def gen_graph(self, link_all = False): + """ + Gen irbloc digraph + @link_all: also gen edges to non present irblocs + """ + self.g = DiGraph() + for lbl, b in self.blocs.items(): + # print 'add', lbl + self.g.add_node(lbl) + # dst = self.get_bloc_dst(b) + dst = self.dst_trackback(b) + # print "\tdst", dst + for d in dst: + if isinstance(d, ExprInt): + d = ExprId( + self.symbol_pool.getby_offset_create(int(d.arg))) + if self.ExprIsLabel(d): + if d.name in self.blocs or link_all is True: + self.g.add_edge(lbl, d.name) + + def graph(self): + out = """ + digraph asm_graph { + size="80,50"; + node [ + fontsize = "16", + shape = "box" + ]; + """ + all_lbls = {} + for lbl in self.g.nodes(): + if not lbl in self.blocs: + continue + b = self.blocs[lbl] + ir_txt = [str(lbl)] + for irs in b.irs: + for l in irs: + ir_txt.append(str(l)) + ir_txt.append("") + ir_txt.append("DstBloc: %s" % str(b.dst)) + ir_txt.append("") + all_lbls[id(lbl)] = "\l\\\n".join(ir_txt) + for l, v in all_lbls.items(): + out += '%s [label="%s"];\n' % (l, v) + + for a, b in self.g.edges(): + out += '%s -> %s;\n' % (id(a), id(b)) + out += '}' + return out + + def remove_dead(self, b): + for ir, _, c_out in zip(b.irs, b.c_in, b.c_out): + j = 0 + while j < len(ir): + i_cur = ir[j] + if not isinstance(i_cur.dst, ExprId): + pass + elif (isinstance(i_cur.src, ExprOp) and + i_cur.src.op.startswith('call')): + # /!\ never remove ir calls + pass + elif not i_cur.dst in c_out: + del(ir[j]) + continue + j += 1 + + def remove_blocs_dead(self): + for b in self.blocs.values(): + self.remove_dead(b) + + # for test XXX TODO + def set_dead_regs(self, b): + pass + + def add_unused_regs(self): + pass + + def compute_in_out(self, b): + # get out/in from bloc sons + modified = False + # set b in + if b.c_in[-1] != set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))): + modified = True + b.c_in[-1] = set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))) + + # set b out + c_out = set() + has_son = False + for n_son in self.g.successors(b.label): + # print n_me, n_son + has_son = True + if not n_son in self.blocs: + print "leaf has lost her sons!" + continue + b_son = self.blocs[n_son] + c_out.update(b_son.c_in[0]) + if not has_son: + # special case: leaf nodes architecture dependant + c_out = self.get_out_regs(b) + if b.c_out[-1] != set(c_out): + modified = True + b.c_out[-1] = set(c_out) + + # get out/in for bloc + for i in reversed(xrange(len(b.irs))): + if b.c_in[i] != set(b.r[i].union(b.c_out[i].difference(b.w[i]))): + modified = True + b.c_in[i] = set(b.r[i].union(b.c_out[i].difference(b.w[i]))) + if b.c_out[i] != set(b.c_in[i + 1]): + modified = True + b.c_out[i] = set(b.c_in[i + 1]) + return modified + + def test_in_out_fix(self): + fixed = True + for n in self.g.nodes(): + if not n in self.blocs: + # leaf has lost her son + continue + b = self.blocs[n] + if b.c_in != b.l_in or b.c_out != b.l_out: + fixed = False + b.l_in = [set(x) for x in b.c_in] + b.l_out = [set(x) for x in b.c_out] + return fixed + + def compute_dead(self): + self.get_rw() + + it = 0 + fixed_point = False + print 'iteration...', + while not fixed_point: + print it, + it += 1 + for n in self.g.nodes(): + if not n in self.blocs: + # leaf has lost her son + continue + b = self.blocs[n] + self.compute_in_out(b) + + fixed_point = self.test_in_out_fix() + print + + def dead_simp(self): + self.compute_dead() + self.remove_blocs_dead() + self.simplify_blocs() + + def gen_equations(self): + for irb in self.blocs.values(): + symbols_init = {} + for r in self.arch.regs.all_regs_ids: + x = ExprId(r.name, r.size) + x.is_term = True + symbols_init[r] = x + sb = symbexec(self.arch, dict(symbols_init)) + sb.emulbloc(irb) + eqs = [] + for n_w in sb.symbols: + v = sb.symbols[n_w] + if n_w in symbols_init and symbols_init[n_w] == v: + continue + eqs.append(ExprAff(n_w, v)) + print '*' * 40 + print irb + for eq in eqs: + eq + irb.irs = [eqs] + irb.lines = [None] diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py new file mode 100644 index 00000000..a5d079ca --- /dev/null +++ b/miasm2/ir/ir.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +# +# Copyright (C) 2013 Fabrice Desclaux +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + + +import miasm2.expression.expression as m2_expr +from miasm2.expression.expression_helper import get_missing_interval +from miasm2.core import asmbloc +from miasm2.expression.simplifications import expr_simp +from miasm2.core.graph import DiGraph +from miasm2.core.asmbloc import asm_symbol_pool + + +class irbloc: + + def __init__(self, label, dst=None, irs=None, lines=None): + assert(isinstance(label, asmbloc.asm_label)) + self.label = label + self.dst = dst + self.lines = [] + self.irs = [] + if irs is not None: + self.irs = irs + if lines is not None: + self.lines = lines + self.except_automod = True + + def get_rw(self): + self.r = [] + self.w = [] + self.c_out = [] + self.c_in = [] + self.l_out = [] + self.l_in = [] + for ir in self.irs: + r, w = set(), set() + for i in ir: + r.update([x for x in i.get_r(True) if isinstance(x, m2_expr.ExprId)]) + w.update([x for x in i.get_w() if isinstance(x, m2_expr.ExprId)]) + if isinstance(i.dst, m2_expr.ExprMem): + r.update([x for x in i.dst.arg.get_r(True) + if isinstance(x, m2_expr.ExprId)]) + self.r.append(r) + self.w.append(w) + self.c_out.append(set()) + self.c_in.append(set()) + self.l_out.append(set()) + self.l_in.append(set()) + # get rw for dst + i = self.dst + r, w = set(), set() + if i is not None: + r.update([x for x in i.get_r(True) if isinstance(x, m2_expr.ExprId)]) + self.r.append(r) + self.w.append(w) + self.c_out.append(set()) + self.c_in.append(set()) + self.l_out.append(set()) + self.l_in.append(set()) + + def __str__(self): + o = [] + o.append('%s' % self.label) + for expr in self.irs: + for e in expr: + o.append('\t%s' % e) + o.append("") + o.append('\tDst: %s' % self.dst) + + return "\n".join(o) + + +class ir(object): + + def __init__(self, arch, attrib, symbol_pool=None): + if symbol_pool is None: + symbol_pool = asm_symbol_pool() + self.symbol_pool = symbol_pool + self.blocs = {} + self.pc = arch.getpc(attrib) + self.sp = arch.getsp(attrib) + self.arch = arch + self.attrib = attrib + + def instr2ir(self, l): + dst, ir_bloc_cur, ir_blocs_extra = self.get_ir(l) + return dst, ir_bloc_cur, ir_blocs_extra + + def get_bloc(self, ad): + if isinstance(ad, m2_expr.ExprId) and isinstance(ad.name, + asmbloc.asm_label): + ad = ad.name + if isinstance(ad, m2_expr.ExprInt): + ad = int(ad.arg) + if type(ad) in [int, long]: + ad = self.symbol_pool.getby_offset(ad) + elif isinstance(ad, asmbloc.asm_label): + ad = self.symbol_pool.getby_name(ad.name) + return self.blocs.get(ad, None) + + def add_instr(self, l, ad=0, gen_pc_updt = False): + b = asmbloc.asm_bloc(l) + b.lines = [l] + self.add_bloc(b, gen_pc_updt) + + def merge_multi_affect(self, affect_list): + """ + If multiple affection to a same ExprId are present in @affect_list, + merge them (in place). + For instance, XCGH AX, AL semantic is + [ + RAX = {RAX[0:8],0,8, RAX[0:8],8,16, RAX[16:64],16,64} + RAX = {RAX[8:16],0,8, RAX[8:64],8,64} + ] + This function will update @affect_list to replace previous ExprAff by + [ + RAX = {RAX[8:16],0,8, RAX[0:8],8,16, RAX[16:64],16,64} + ] + """ + + # Extract side effect + effect = {} + for expr in affect_list: + effect[expr.dst] = effect.get(expr.dst, []) + [expr] + + # Find candidates + for dst, expr_list in effect.items(): + if len(expr_list) <= 1: + continue + + # Only treat ExprCompose list + if any(map(lambda e: not(isinstance(e.src, m2_expr.ExprCompose)), + expr_list)): + continue + + # Find collision + e_colision = reduce( + lambda x, y: x + y, [e.get_modified_slice() for e in expr_list]) + + # Sort interval collision + known_intervals = sorted([(x[1], x[2]) for x in set(e_colision)]) + + # Fill with missing data + missing_i = get_missing_interval(known_intervals, 0, e.src.size) + + rest = [(m2_expr.ExprSlice(dst, r[0], r[1]), r[0], r[1]) + for r in missing_i] + + # Build the merging expression + slices = e_colision + rest + slices.sort(key=lambda x: x[1]) + final_dst = m2_expr.ExprCompose(slices) + + # Remove unused expression + for expr in expr_list: + affect_list.remove(expr) + + # Add the merged one + affect_list.append(m2_expr.ExprAff(dst, final_dst)) + + + def getby_offset(self, offset): + out = set() + for irb in self.blocs.values(): + for l in irb.lines: + if l.offset <= offset < l.offset + l.l: + out.add(irb) + return out + + def gen_pc_update(self, c, l): + c.irs.append([m2_expr.ExprAff(self.pc, m2_expr.ExprInt_from(self.pc, + l.offset))]) + c.lines.append(l) + + def add_bloc(self, bloc, gen_pc_updt = False): + c = None + ir_blocs_all = [] + for l in bloc.lines: + if c is None: + # print 'new c' + label = self.get_label(l) + c = irbloc(label) + ir_blocs_all.append(c) + bloc_dst = None + # print 'Translate', l + dst, ir_bloc_cur, ir_blocs_extra = self.instr2ir(l) + # print ir_bloc_cur + # for xxx in ir_bloc_cur: + # print "\t", xxx + assert((dst is None) or (bloc_dst is None)) + bloc_dst = dst + if bloc_dst is not None: + c.dst = bloc_dst + + if gen_pc_updt is not False: + self.gen_pc_update(c, l) + + c.irs.append(ir_bloc_cur) + c.lines.append(l) + if ir_blocs_extra: + # print 'split' + for b in ir_blocs_extra: + b.lines = [l] * len(b.irs) + ir_blocs_all += ir_blocs_extra + c = None + self.post_add_bloc(bloc, ir_blocs_all) + return ir_blocs_all + + def expr_fix_regs_for_mode(self, e, *args, **kwargs): + return e + + def expraff_fix_regs_for_mode(self, e, *args, **kwargs): + return e + + def irbloc_fix_regs_for_mode(self, irbloc, *args, **kwargs): + return + + def is_pc_written(self, b): + all_pc = self.arch.pc.values() + for irs in b.irs: + for ir in irs: + if ir.dst in all_pc: + return ir + return None + + def set_empty_dst_to_next(self, bloc, ir_blocs): + for b in ir_blocs: + if b.dst is not None: + continue + dst = m2_expr.ExprId(self.get_next_label(bloc.lines[-1]), + self.pc.size) + b.dst = dst + + def gen_edges(self, bloc, ir_blocs): + pass + + def post_add_bloc(self, bloc, ir_blocs): + self.set_empty_dst_to_next(bloc, ir_blocs) + self.gen_edges(bloc, ir_blocs) + + for irb in ir_blocs: + self.irbloc_fix_regs_for_mode(irb, self.attrib) + + # Detect multi-affectation + for affect_list in irb.irs: + self.merge_multi_affect(affect_list) + + self.blocs[irb.label] = irb + + + def get_label(self, instr): + l = self.symbol_pool.getby_offset_create(instr.offset) + return l + + def gen_label(self): + # TODO: fix hardcoded offset + l = self.symbol_pool.gen_label() + return l + + def get_next_label(self, instr): + l = self.symbol_pool.getby_offset_create(instr.offset + instr.l) + return l + + def simplify_blocs(self): + for b in self.blocs.values(): + for ir in b.irs: + for i, r in enumerate(ir): + ir[i].src = expr_simp(r.src) + ir[i].dst = expr_simp(r.dst) + + def replace_expr_in_ir(self, bloc, rep): + for irs in bloc.irs: + for i, l in enumerate(irs): + irs[i] = l.replace_expr(rep) + + def get_rw(self): + for b in self.blocs.values(): + b.get_rw() + + def ExprIsLabel(self, l): + return isinstance(l, m2_expr.ExprId) and isinstance(l.name, + asmbloc.asm_label) diff --git a/miasm2/ir/ir2C.py b/miasm2/ir/ir2C.py new file mode 100644 index 00000000..64591b44 --- /dev/null +++ b/miasm2/ir/ir2C.py @@ -0,0 +1,601 @@ +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from miasm2.core import asmbloc +import logging + + +log_to_c_h = logging.getLogger("ir_helper") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log_to_c_h.addHandler(console_handler) +log_to_c_h.setLevel(logging.WARN) + + +def ExprInt_toC(self): + return str(self) + + +def ExprId_toC(self): + if isinstance(self.name, asmbloc.asm_label): + return "0x%x" % self.name.offset + return str(self) + + +def ExprAff_toC(self): + return "%s = %s" % (self.dst.toC(), self.src.toC()) + + +def ExprCond_toC(self): + return "(%s?%s:%s)" % (self.cond.toC(), self.src1.toC(), self.src2.toC()) + + +def ExprMem_toC(self): + return "MEM_LOOKUP_%.2d(vm_mngr, %s)" % (self._size, self.arg.toC()) + + +def ExprOp_toC(self): + dct_shift = {'a>>': "right_arith", + '>>': "right_logic", + '<<': "left_logic", + 'a<<': "left_logic", + } + dct_rot = {'<<<': 'rot_left', + '>>>': 'rot_right', + } + dct_div = {'div8': "div_op", + 'div16': "div_op", + 'div32': "div_op", + 'idiv32': "div_op", # XXX to test + 'rem8': "rem_op", + 'rem16': "rem_op", + 'rem32': "rem_op", + 'irem32': "rem_op", # XXX to test + '<<<c_rez': 'rcl_rez_op', + '<<<c_cf': 'rcl_cf_op', + '>>>c_rez': 'rcr_rez_op', + '>>>c_cf': 'rcr_cf_op', + } + if len(self.args) == 1: + if self.op == 'parity': + return "parity(%s&0x%x)" % ( + self.args[0].toC(), size2mask(self.args[0].size)) + elif self.op == '!': + return "(~ %s)&0x%x" % ( + self.args[0].toC(), size2mask(self.args[0].size)) + elif self.op in ["hex2bcd", "bcd2hex"]: + return "%s_%d(%s)" % ( + self.op, self.args[0].size, self.args[0].toC()) + elif (self.op.startswith("double_to_") or + self.op.endswith("_to_double") or + self.op.startswith("access_") or + self.op.startswith("load_") or + self.op in ["-", "ftan", "frndint", "f2xm1", + "fsin", "fsqrt", "fabs", "fcos"]): + return "%s(%s)" % (self.op, self.args[0].toC()) + else: + raise ValueError('unknown op: %r' % self.op) + elif len(self.args) == 2: + if self.op == "==": + return '(((%s&0x%x) == (%s&0x%x))?1:0)' % ( + self.args[0].toC(), size2mask(self.args[0].size), + self.args[1].toC(), size2mask(self.args[1].size)) + elif self.op in dct_shift: + return 'shift_%s_%.2d(%s , %s)' % (dct_shift[self.op], + self.args[0].size, + self.args[0].toC(), + self.args[1].toC()) + elif self.is_associative(): + o = ['(%s&0x%x)' % (a.toC(), size2mask(a.size)) for a in self.args] + o = str(self.op).join(o) + return "((%s)&0x%x)" % (o, size2mask(self.args[0].size)) + elif self.op in ["%", "/"]: + o = ['(%s&0x%x)' % (a.toC(), size2mask(a.size)) for a in self.args] + o = str(self.op).join(o) + return "((%s)&0x%x)" % (o, size2mask(self.args[0].size)) + elif self.op in ['-']: + return '(((%s&0x%x) %s (%s&0x%x))&0x%x)' % ( + self.args[0].toC(), size2mask(self.args[0].size), + str(self.op), + self.args[1].toC(), size2mask(self.args[1].size), + size2mask(self.args[0].size)) + elif self.op in dct_rot: + return '(%s(%s, %s, %s) &0x%x)' % (dct_rot[self.op], + self.args[0].size, + self.args[0].toC(), + self.args[1].toC(), + size2mask(self.args[0].size)) + elif self.op in ['bsr', 'bsf']: + return 'my_%s(%s, %s)' % (self.op, + self.args[0].toC(), + self.args[1].toC()) + elif self.op.startswith('cpuid'): + return "%s(%s, %s)" % ( + self.op, self.args[0].toC(), self.args[1].toC()) + elif self.op.startswith("fcom"): + return "%s(%s, %s)" % ( + self.op, self.args[0].toC(), self.args[1].toC()) + elif self.op in ["fadd", "fsub", "fdiv", 'fmul', "fscale"]: + return "%s(%s, %s)" % ( + self.op, self.args[0].toC(), self.args[1].toC()) + elif self.op == "segm": + return "segm2addr(vmcpu, %s, %s)" % ( + self.args[0].toC(), self.args[1].toC()) + elif self.op in ['udiv', 'umod', 'idiv', 'imod']: + return '%s%d(vmcpu, %s, %s)' % (self.op, + self.args[0].size, + self.args[0].toC(), + self.args[1].toC()) + elif self.op in ["bcdadd", "bcdadd_cf"]: + return "%s_%d(%s, %s)" % (self.op, self.args[0].size, + self.args[0].toC(), + self.args[1].toC()) + else: + raise ValueError('unknown op: %r' % self.op) + elif len(self.args) == 3 and self.op in dct_div: + return '(%s(%s, %s, %s, %s) &0x%x)' % (dct_div[self.op], + self.args[0].size, + self.args[0].toC(), + self.args[1].toC(), + self.args[2].toC(), + size2mask(self.args[0].size)) + elif len(self.args) >= 3 and self.is_associative(): # ????? + o = ['(%s&0x%x)' % (a.toC(), size2mask(a.size)) for a in self.args] + o = str(self.op).join(o) + r = "((%s)&0x%x)" % (o, size2mask(self.args[0].size)) + return r + else: + raise NotImplementedError('unknown op: %s' % self) + + +def ExprSlice_toC(self): + # XXX check mask for 64 bit & 32 bit compat + return "((%s>>%d) & 0x%X)" % (self.arg.toC(), + self.start, + (1 << (self.stop - self.start)) - 1) + + +def ExprCompose_toC(self): + out = [] + # XXX check mask for 64 bit & 32 bit compat + dst_cast = "uint%d_t" % self.size + for x in self.args: + out.append("(((%s)(%s & 0x%X)) << %d)" % (dst_cast, + x[0].toC(), + (1 << (x[2] - x[1])) - 1, + x[1])) + out = ' | '.join(out) + return '(' + out + ')' + + +ExprInt.toC = ExprInt_toC +ExprId.toC = ExprId_toC +ExprAff.toC = ExprAff_toC +ExprCond.toC = ExprCond_toC +ExprMem.toC = ExprMem_toC +ExprOp.toC = ExprOp_toC +ExprSlice.toC = ExprSlice_toC +ExprCompose.toC = ExprCompose_toC + +prefetch_id = [] +prefetch_id_size = {} +for size in [8, 16, 32, 64]: + prefetch_id_size[size] = [] + for i in xrange(20): + name = 'pfmem%.2d_%d' % (size, i) + c = ExprId(name, size) + globals()[name] = c + prefetch_id.append(c) + prefetch_id_size[size].append(c) + + +reg_float_st0 = 'float_st0' +reg_float_st1 = 'float_st1' +reg_float_st2 = 'float_st2' +reg_float_st3 = 'float_st3' +reg_float_st4 = 'float_st4' +reg_float_st5 = 'float_st5' +reg_float_st6 = 'float_st6' +reg_float_st7 = 'float_st7' + + +float_st0 = ExprId(reg_float_st0, 64) +float_st1 = ExprId(reg_float_st1, 64) +float_st2 = ExprId(reg_float_st2, 64) +float_st3 = ExprId(reg_float_st3, 64) +float_st4 = ExprId(reg_float_st4, 64) +float_st5 = ExprId(reg_float_st5, 64) +float_st6 = ExprId(reg_float_st6, 64) +float_st7 = ExprId(reg_float_st7, 64) + +fltregs32_str = ["ST(%d)" % i for i in xrange(8)] +fltregs32_expr = [ExprId(x, 64) for x in fltregs32_str] + + +float_id_e = [ + float_st0, + float_st1, + float_st2, + float_st3, + float_st4, + float_st5, + float_st6, + float_st7, +] + fltregs32_expr + + +def init_arch_C(arch): + arch.id2Cid = {} + for x in arch.regs.all_regs_ids + prefetch_id: + arch.id2Cid[x] = ExprId('vmcpu->' + str(x), x.size) + for i in xrange(8): + arch.id2Cid[ExprId('ST(%d)' % i, 64)] = ExprId( + 'vmcpu->' + "float_st%d" % i, 64) + + arch.id2newCid = {} + + for x in arch.regs.all_regs_ids + prefetch_id: + arch.id2newCid[x] = ExprId('vmcpu->%s_new' % x, x.size) + + for i in xrange(8): + arch.id2newCid[ExprId('ST(%d)' % i, 64)] = ExprId( + 'vmcpu->' + "float_st%d_new" % i, 64) + + +def patch_c_id(arch, e): + return e.replace_expr(arch.id2Cid) + + +def patch_c_new_id(arch, e): + return e.replace_expr(arch.id2newCid) + + +mask_int = 0xffffffffffffffff + + +pre_instr_test_exception = r""" +// pre instruction test exception +if (vm_mngr->exception_flags) { + %s; + RETURN_PC; +} +""" + + +code_exception_fetch_mem_at_instr = r""" +// except fetch mem at instr +if (vm_mngr->exception_flags & EXCEPT_DO_NOT_UPDATE_PC) { + %s; + RETURN_PC; +} +""" +code_exception_fetch_mem_post_instr = r""" +// except fetch mem post instr +if (vm_mngr->exception_flags) { + %s; + RETURN_PC; +} +""" + + +code_exception_fetch_mem_at_instr_noautomod = r""" +// except fetch mem at instr noauto +if ((vm_mngr->exception_flags & ~EXCEPT_CODE_AUTOMOD) & EXCEPT_DO_NOT_UPDATE_PC) { + %s; + RETURN_PC; +} +""" +code_exception_fetch_mem_post_instr_noautomod = r""" +// except post instr noauto +if (vm_mngr->exception_flags & ~EXCEPT_CODE_AUTOMOD) { + %s; + RETURN_PC; +} +""" + + +code_exception_at_instr = r""" +// except at instr +if (vmcpu->exception_flags && vmcpu->exception_flags > EXCEPT_NUM_UPDT_EIP) { + %s; + RETURN_PC; +} +""" + +code_exception_post_instr = r""" +// except post instr +if (vmcpu->exception_flags) { + if (vmcpu->exception_flags > EXCEPT_NUM_UPDT_EIP) + %s; + else + %s; + RETURN_PC; +} +""" + + +code_exception_at_instr_noautomod = r""" +if ((vmcpu->exception_flags & ~EXCEPT_CODE_AUTOMOD) && vmcpu->exception_flags > EXCEPT_NUM_UPDT_EIP) { + %s; + RETURN_PC; +} +""" + +code_exception_post_instr_noautomod = r""" +if (vmcpu->exception_flags & ~EXCEPT_CODE_AUTOMOD) { + if (vmcpu->exception_flags > EXCEPT_NUM_UPDT_EIP) + %s; + else + %s; + RETURN_PC; +} +""" + + +my_size_mask = {1: 1, 2: 3, 3: 7, 7: 0x7f, + 8: 0xFF, + 16: 0xFFFF, + 32: 0xFFFFFFFF, + 64: 0xFFFFFFFFFFFFFFFFL} + +exception_flags = ExprId('exception_flags', 32) + + +def set_pc(my_ir, src): + dst = my_ir.jit_pc + if not isinstance(src, Expr): + src = ExprInt_from(dst, src) + e = ExprAff(dst, src.zeroExtend(dst.size)) + return e + + +def Expr2C(my_ir, l, exprs, gen_exception_code=False): + id_to_update = [] + out = ["// %s" % (l)] + out_pc = [] + + dst_dict = {} + src_mem = {} + + prefect_index = {8: 0, 16: 0, 32: 0, 64: 0} + new_expr = [] + + e = set_pc(my_ir, l.offset & mask_int) + #out.append("%s;" % patch_c_id(my_ir.arch, e).toC()) + + pc_is_dst = False + fetch_mem = False + set_exception_flags = False + for e in exprs: + assert(isinstance(e, ExprAff)) + assert(not isinstance(e.dst, ExprOp)) + if isinstance(e.dst, ExprId): + if not e.dst in dst_dict: + dst_dict[e.dst] = [] + dst_dict[e.dst].append(e) + else: + new_expr.append(e) + # test exception flags + ops = get_expr_ops(e) + if set(['umod', 'udiv']).intersection(ops): + set_exception_flags = True + if e.dst == exception_flags: + set_exception_flags = True + # TODO XXX test function whose set exception_flags + + # search mem lookup for generate mem read prefetch + rs = e.src.get_r(mem_read=True) + for r in rs: + if (not isinstance(r, ExprMem)) or r in src_mem: + continue + fetch_mem = True + index = prefect_index[r.size] + prefect_index[r.size] += 1 + pfmem = prefetch_id_size[r.size][index] + src_mem[r] = pfmem + + for dst, exs in dst_dict.items(): + if len(exs) == 1: + new_expr += exs + continue + exs = [expr_simp(x) for x in exs] + log_to_c_h.debug('warning: detected multi dst to same id') + log_to_c_h.debug('\t'.join([str(x) for x in exs])) + new_expr += exs + out_mem = [] + + # first, generate mem prefetch + mem_k = src_mem.keys() + mem_k.sort() + for k in mem_k: + str_src = patch_c_id(my_ir.arch, k).toC() + str_dst = patch_c_id(my_ir.arch, src_mem[k]).toC() + out.append('%s = %s;' % (str_dst, str_src)) + src_w_len = {} + for k, v in src_mem.items(): + src_w_len[k] = v + for e in new_expr: + src, dst = e.src, e.dst + # reload src using prefetch + src = src.replace_expr(src_w_len) + str_src = patch_c_id(my_ir.arch, src).toC() + str_dst = patch_c_id(my_ir.arch, dst).toC() + if isinstance(dst, ExprId): + id_to_update.append(dst) + str_dst = patch_c_new_id(my_ir.arch, dst) + if dst in float_id_e: + # dont mask float affectation + out.append('%s = (%s);' % (str_dst, str_src)) + else: + out.append('%s = (%s)&0x%X;' % (str_dst, str_src, + my_size_mask[src.size])) + elif isinstance(dst, ExprMem): + fetch_mem = True + str_dst = str_dst.replace('MEM_LOOKUP', 'MEM_WRITE') + out_mem.append('%s, %s);' % (str_dst[:-1], str_src)) + + if e.dst == my_ir.arch.pc[my_ir.attrib]: + pc_is_dst = True + out_pc += ["RETURN_PC;"] + + # if len(id_to_update) != len(set(id_to_update)): + # raise ValueError('Not implemented: multi dst to same id!', str([str(x) + # for x in exprs])) + out += out_mem + + if gen_exception_code: + if fetch_mem: + e = set_pc(my_ir, l.offset & mask_int) + s1 = "%s" % patch_c_id(my_ir.arch, e).toC() + out.append(code_exception_fetch_mem_at_instr_noautomod % s1) + if set_exception_flags: + e = set_pc(my_ir, l.offset & mask_int) + s1 = "%s" % patch_c_id(my_ir.arch, e).toC() + out.append(code_exception_at_instr_noautomod % s1) + + for i in id_to_update: + out.append('%s = %s;' % + (patch_c_id(my_ir.arch, i), patch_c_new_id(my_ir.arch, i))) + + post_instr = [] + # test stop exec #### + if gen_exception_code: + if set_exception_flags: + if pc_is_dst: + post_instr.append("if (vm_mngr->exception_flags) { " + + "/*pc = 0x%X; */RETURN_PC; }" % (l.offset)) + else: + e = set_pc(my_ir, l.offset & mask_int) + s1 = "%s" % patch_c_id(my_ir.arch, e).toC() + e = set_pc(my_ir, (l.offset + l.l) & mask_int) + s2 = "%s" % patch_c_id(my_ir.arch, e).toC() + post_instr.append( + code_exception_post_instr_noautomod % (s1, s2)) + + if fetch_mem: + if l.additional_info.except_on_instr: + offset = l.offset + else: + offset = l.offset + l.l + + e = set_pc(my_ir, offset & mask_int) + s1 = "%s" % patch_c_id(my_ir.arch, e).toC() + post_instr.append( + code_exception_fetch_mem_post_instr_noautomod % (s1)) + + # pc manip after all modifications + return out, post_instr, post_instr + out_pc + + +def label2offset(e): + if not isinstance(e, ExprId): + return e + if not isinstance(e.name, asmbloc.asm_label): + return e + return ExprInt_from(e, e.name.offset) + + +def expr2pyobj(arch, e): + if isinstance(e, ExprId): + if isinstance(e.name, asmbloc.asm_label): + src_c = 'PyString_FromStringAndSize("%s", %d)' % ( + e.name.name, len(e.name.name)) + else: + src_c = 'PyLong_FromUnsignedLongLong(%s)' % patch_c_id(arch, e) + else: + raise NotImplementedError('unknown type for e: %s' % type(e)) + return src_c + + +def ir2C(my_ir, irbloc, lbl_done, + gen_exception_code=False, log_mn=False, log_regs=False): + out = [] + # print "TRANS" + # print irbloc + out.append(["%s:" % irbloc.label.name]) + assert(len(irbloc.irs) == len(irbloc.lines)) + for l, exprs in zip(irbloc.lines, irbloc.irs): + if l.offset not in lbl_done: + e = set_pc(my_ir, l.offset & mask_int) + s1 = "%s" % patch_c_id(my_ir.arch, e).toC() + out.append([pre_instr_test_exception % (s1)]) + lbl_done.add(l.offset) + + if log_regs: + out.append([r'dump_gpregs(vmcpu);']) + + if log_mn: + out.append(['printf("%.8X %s\\n");' % (l.offset, str(l))]) + # print l + # gen pc update + post_instr = "" + c_code, post_instr, _ = Expr2C(my_ir, l, exprs, gen_exception_code) + out.append(c_code + post_instr) + return out + + +def irblocs2C(my_ir, resolvers, label, irblocs, + gen_exception_code=False, log_mn=False, log_regs=False): + out = [] + out.append("goto %s;" % label.name) + bloc_labels = [x.label for x in irblocs] + assert(label in bloc_labels) + + lbl_done = set([None]) + + for irbloc in irblocs: + # XXXX TEST + if irbloc.label.offset is None: + b_out = ir2C(my_ir, irbloc, lbl_done, gen_exception_code) + else: + b_out = ir2C( + my_ir, irbloc, lbl_done, gen_exception_code, log_mn, log_regs) + for exprs in b_out: + for l in exprs: + out.append(l) + dst = irbloc.dst + out.append("") + if asmbloc.expr_is_label(dst): + if dst.name in bloc_labels: + out.append("goto %s;" % dst.name.name) + else: + resolver = resolvers.get_resolver(dst.name.offset) + + e = set_pc(my_ir, dst.name.offset & mask_int) + #out.append("%s;" % patch_c_id(my_ir.arch, e).toC()) + out.append(resolver.ret()) + elif isinstance(dst, ExprSlice) and isinstance(dst.arg, ExprId): + e = set_pc(my_ir, dst) + #out.append("%s;" % patch_c_id(my_ir.arch, e).toC()) + + e = patch_c_id(my_ir.arch, dst).toC() + out.append("return PyLong_FromUnsignedLongLong(%s);" % e) + + elif isinstance(dst, ExprId): + e = set_pc(my_ir, dst) + #out.append("%s;" % patch_c_id(my_ir.arch, e).toC()) + + e = patch_c_id(my_ir.arch, dst).toC() + out.append("return PyLong_FromUnsignedLongLong(%s);" % e) + elif isinstance(dst, ExprCond): + dst_cond_c = patch_c_id(my_ir.arch, dst.cond).toC() + out.append("if (%s)" % dst_cond_c) + + if dst.src1.name in bloc_labels: + out.append(" goto %s;" % dst.src1.name.name) + else: + resolver = resolvers.get_resolver(dst.src1.name.offset) + out.append(resolver.ret()) + + out.append("else") + + if dst.src2.name in bloc_labels: + out.append(" goto %s;" % dst.src2.name.name) + else: + resolver = resolvers.get_resolver(dst.src2.name.offset) + out.append(resolver.ret()) + + else: + raise NotImplementedError('unknown type for dst: %s' % type(dst)) + #print '\n'.join(out) + return out + diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py new file mode 100644 index 00000000..08608142 --- /dev/null +++ b/miasm2/ir/symbexec.py @@ -0,0 +1,435 @@ +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from miasm2.core import asmbloc +import logging + + +log = logging.getLogger("symbexec") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.INFO) + + +class symbols(): + + def __init__(self, init=None): + if init is None: + init = {} + self.symbols_id = {} + self.symbols_mem = {} + for k, v in init.items(): + self[k] = v + + def __contains__(self, a): + if not isinstance(a, ExprMem): + return self.symbols_id.__contains__(a) + if not self.symbols_mem.__contains__(a.arg): + return False + return self.symbols_mem[a.arg][0].size == a.size + + def __getitem__(self, a): + if not isinstance(a, ExprMem): + return self.symbols_id.__getitem__(a) + if not a.arg in self.symbols_mem: + raise KeyError, a + m = self.symbols_mem.__getitem__(a.arg) + if m[0].size != a.size: + raise KeyError, a + return m[1] + + def __setitem__(self, a, v): + if not isinstance(a, ExprMem): + self.symbols_id.__setitem__(a, v) + return + self.symbols_mem.__setitem__(a.arg, (a, v)) + + def __iter__(self): + for a in self.symbols_id: + yield a + for a in self.symbols_mem: + yield self.symbols_mem[a][0] + + def __delitem__(self, a): + if not isinstance(a, ExprMem): + self.symbols_id.__delitem__(a) + else: + self.symbols_mem.__delitem__(a.arg) + + def items(self): + k = self.symbols_id.items() + [x for x in self.symbols_mem.values()] + return k + + def keys(self): + k = self.symbols_id.keys() + [x[0] for x in self.symbols_mem.values()] + return k + + def copy(self): + p = symbols() + p.symbols_id = dict(self.symbols_id) + p.symbols_mem = dict(self.symbols_mem) + return p + + def inject_info(self, info): + s = symbols() + for k, v in self.items(): + k = expr_simp(k.replace_expr(info)) + v = expr_simp(v.replace_expr(info)) + s[k] = v + return s + + +class symbexec: + + def __init__(self, arch, known_symbols, + func_read=None, + func_write=None, + sb_expr_simp=expr_simp): + self.symbols = symbols() + for k, v in known_symbols.items(): + self.symbols[k] = v + self.func_read = func_read + self.func_write = func_write + self.arch = arch + self.expr_simp = sb_expr_simp + + def find_mem_by_addr(self, e): + if e in self.symbols.symbols_mem: + return self.symbols.symbols_mem[e][0] + return None + + def eval_ExprId(self, e, eval_cache=None): + if isinstance(e.name, asmbloc.asm_label) and e.name.offset is not None: + return ExprInt_from(e, e.name.offset) + if not e in self.symbols: + # raise ValueError('unknown symbol %s'% e) + return e + return self.symbols[e] + + def eval_ExprInt(self, e, eval_cache=None): + return e + + def eval_ExprMem(self, e, eval_cache=None): + a_val = self.expr_simp(self.eval_expr(e.arg, eval_cache)) + if a_val != e.arg: + a = self.expr_simp(ExprMem(a_val, size=e.size)) + else: + a = e + if a in self.symbols: + return self.symbols[a] + tmp = None + # test if mem lookup is known + if a_val in self.symbols.symbols_mem: + tmp = self.symbols.symbols_mem[a_val][0] + if tmp is None: + + v = self.find_mem_by_addr(a_val) + if not v: + out = [] + ov = self.get_mem_overlapping(a, eval_cache) + off_base = 0 + ov.sort() + # ov.reverse() + for off, x in ov: + # off_base = off * 8 + # x_size = self.symbols[x].size + if off >= 0: + m = min(a.size - off * 8, x.size) + ee = ExprSlice(self.symbols[x], 0, m) + ee = self.expr_simp(ee) + out.append((ee, off_base, off_base + m)) + off_base += m + else: + m = min(a.size - off * 8, x.size) + ee = ExprSlice(self.symbols[x], -off * 8, m) + ff = self.expr_simp(ee) + new_off_base = off_base + m + off * 8 + out.append((ff, off_base, new_off_base)) + off_base = new_off_base + if out: + missing_slice = self.rest_slice(out, 0, a.size) + for sa, sb in missing_slice: + ptr = self.expr_simp(a_val + ExprInt32(sa / 8)) + mm = ExprMem(ptr, size=sb - sa) + mm.is_term = True + mm.is_simp = True + out.append((mm, sa, sb)) + out.sort(key=lambda x: x[1]) + # for e, sa, sb in out: + # print str(e), sa, sb + ee = ExprSlice(ExprCompose(out), 0, a.size) + ee = self.expr_simp(ee) + return ee + if self.func_read and isinstance(a.arg, ExprInt): + return self.func_read(a) + else: + # XXX hack test + a.is_term = True + return a + # bigger lookup + if a.size > tmp.size: + rest = a.size + ptr = a_val + out = [] + ptr_index = 0 + while rest: + v = self.find_mem_by_addr(ptr) + if v is None: + # raise ValueError("cannot find %s in mem"%str(ptr)) + val = ExprMem(ptr, 8) + v = val + diff_size = 8 + elif rest >= v.size: + val = self.symbols[v] + diff_size = v.size + else: + diff_size = rest + val = self.symbols[v][0:diff_size] + val = (val, ptr_index, ptr_index + diff_size) + out.append(val) + ptr_index += diff_size + rest -= diff_size + ptr = self.expr_simp(self.eval_expr(ExprOp('+', ptr, + ExprInt_from(ptr, v.size / 8)), eval_cache)) + e = self.expr_simp(ExprCompose(out)) + return e + # part lookup + tmp = self.expr_simp(ExprSlice(self.symbols[tmp], 0, a.size)) + return tmp + + def eval_expr_visit(self, e, eval_cache=None): + # print 'visit', e, e.is_term + if e.is_term: + return e + c = e.__class__ + deal_class = {ExprId: self.eval_ExprId, + ExprInt: self.eval_ExprInt, + ExprMem: self.eval_ExprMem, + } + # print 'eval', e + if c in deal_class: + e = deal_class[c](e, eval_cache) + # print "ret", e + if not (isinstance(e, ExprId) or isinstance(e, ExprInt)): + e.is_term = True + return e + + def eval_expr(self, e, eval_cache=None): + r = e.visit(lambda x: self.eval_expr_visit(x, eval_cache)) + return r + + def modified_regs(self, init_state=None): + if init_state is None: + init_state = self.arch.regs.regs_init + ids = self.symbols.symbols_id.keys() + ids.sort() + for i in ids: + if i in init_state and \ + i in self.symbols.symbols_id and \ + self.symbols.symbols_id[i] == init_state[i]: + continue + yield i + + def modified_mems(self, init_state=None): + mems = self.symbols.symbols_mem.values() + mems.sort() + for m, _ in mems: + yield m + + def modified(self, init_state=None): + for r in self.modified_regs(init_state): + yield r + for m in self.modified_mems(init_state): + yield m + + def dump_id(self): + ids = self.symbols.symbols_id.keys() + ids.sort() + for i in ids: + if i in self.arch.regs.regs_init and \ + i in self.symbols.symbols_id and \ + self.symbols.symbols_id[i] == self.arch.regs.regs_init[i]: + continue + print i, self.symbols.symbols_id[i] + + def dump_mem(self): + mems = self.symbols.symbols_mem.values() + mems.sort() + for m, v in mems: + print m, v + + def rest_slice(self, slices, start, stop): + o = [] + last = start + for _, a, b in slices: + if a == last: + last = b + continue + o.append((last, a)) + last = b + if last != stop: + o.append((b, stop)) + return o + + def substract_mems(self, a, b): + ex = ExprOp('-', b.arg, a.arg) + ex = self.expr_simp(self.eval_expr(ex, {})) + if not isinstance(ex, ExprInt): + return None + ptr_diff = int(int32(ex.arg)) + out = [] + if ptr_diff < 0: + # [a ] + #[b ]XXX + sub_size = b.size + ptr_diff * 8 + if sub_size >= a.size: + pass + else: + ex = ExprOp('+', a.arg, ExprInt_from(a.arg, sub_size / 8)) + ex = self.expr_simp(self.eval_expr(ex, {})) + + rest_ptr = ex + rest_size = a.size - sub_size + + val = self.symbols[a][sub_size:a.size] + out = [(ExprMem(rest_ptr, rest_size), val)] + else: + #[a ] + # XXXX[b ]YY + + #[a ] + # XXXX[b ] + + out = [] + # part X + if ptr_diff > 0: + val = self.symbols[a][0:ptr_diff * 8] + out.append((ExprMem(a.arg, ptr_diff * 8), val)) + # part Y + if ptr_diff * 8 + b.size < a.size: + + ex = ExprOp('+', b.arg, ExprInt_from(b.arg, b.size / 8)) + ex = self.expr_simp(self.eval_expr(ex, {})) + + rest_ptr = ex + rest_size = a.size - (ptr_diff * 8 + b.size) + val = self.symbols[a][ptr_diff * 8 + b.size:a.size] + out.append((ExprMem(ex, val.size), val)) + return out + + # give mem stored overlapping requested mem ptr + def get_mem_overlapping(self, e, eval_cache=None): + if not isinstance(e, ExprMem): + raise ValueError('mem overlap bad arg') + ov = [] + # suppose max mem size is 64 bytes, compute all reachable addresses + to_test = [] + base_ptr = self.expr_simp(e.arg) + for i in xrange(-7, e.size / 8): + ex = self.expr_simp( + self.eval_expr(base_ptr + ExprInt_from(e.arg, i), eval_cache)) + to_test.append((i, ex)) + + for i, x in to_test: + if not x in self.symbols.symbols_mem: + continue + ex = self.expr_simp(self.eval_expr(e.arg - x, eval_cache)) + if not isinstance(ex, ExprInt): + raise ValueError('ex is not ExprInt') + ptr_diff = int32(ex.arg) + if ptr_diff >= self.symbols.symbols_mem[x][1].size / 8: + # print "too long!" + continue + ov.append((i, self.symbols.symbols_mem[x][0])) + return ov + + def eval_ir_expr(self, exprs): + pool_out = {} + + eval_cache = {} + + for e in exprs: + if not isinstance(e, ExprAff): + raise TypeError('not affect', str(e)) + + src = self.eval_expr(e.src, eval_cache) + if isinstance(e.dst, ExprMem): + a = self.eval_expr(e.dst.arg, eval_cache) + a = self.expr_simp(a) + # search already present mem + tmp = None + # test if mem lookup is known + tmp = ExprMem(a, e.dst.size) + dst = tmp + if self.func_write and isinstance(dst.arg, ExprInt): + self.func_write(self, dst, src, pool_out) + else: + pool_out[dst] = src + + elif isinstance(e.dst, ExprId): + pool_out[e.dst] = src + else: + raise ValueError("affected zarb", str(e.dst)) + + return pool_out.items() + + def eval_ir(self, ir): + mem_dst = [] + # src_dst = [(x.src, x.dst) for x in ir] + src_dst = self.eval_ir_expr(ir) + + for dst, src in src_dst: + if isinstance(dst, ExprMem): + mem_overlap = self.get_mem_overlapping(dst) + for _, base in mem_overlap: + diff_mem = self.substract_mems(base, dst) + del(self.symbols[base]) + for new_mem, new_val in diff_mem: + new_val.is_term = True + self.symbols[new_mem] = new_val + src_o = self.expr_simp(src) + # print 'SRCo', src_o + # src_o.is_term = True + self.symbols[dst] = src_o + if isinstance(dst, ExprMem): + mem_dst.append(dst) + return mem_dst + + def emulbloc(self, bloc_ir, step=False): + for ir in bloc_ir.irs: + self.eval_ir(ir) + if step: + print '_' * 80 + self.dump_id() + if bloc_ir.dst is None: + return None + return self.eval_expr(bloc_ir.dst) + + def emul_ir_bloc(self, myir, ad): + b = myir.get_bloc(ad) + if b is not None: + ad = self.emulbloc(b) + return ad + + def emul_ir_blocs(self, myir, ad, lbl_stop=None): + while True: + b = myir.get_bloc(ad) + if b is None: + break + if b.label == lbl_stop: + break + ad = self.emulbloc(b) + return ad + + def del_mem_above_stack(self, sp): + sp_val = self.symbols[sp] + for mem_ad, (mem, _) in self.symbols.symbols_mem.items(): + # print mem_ad, sp_val + diff = self.eval_expr(mem_ad - sp_val, {}) + diff = expr_simp(diff) + if not isinstance(diff, ExprInt): + continue + m = expr_simp(diff.msb()) + if m.arg == 1: + del(self.symbols[mem]) + diff --git a/miasm2/jitter/Jitllvm.c b/miasm2/jitter/Jitllvm.c new file mode 100644 index 00000000..ab077b05 --- /dev/null +++ b/miasm2/jitter/Jitllvm.c @@ -0,0 +1,36 @@ +#include <Python.h> + +#include <inttypes.h> + +#include <stdint.h> + +PyObject* llvm_exec_bloc(PyObject* self, PyObject* args) +{ + uint64_t (*func)(void*, void*); + uint64_t vm; + uint64_t cpu; + uint64_t ret; + + if (!PyArg_ParseTuple(args, "KKK", &func, &cpu, &vm)) + return NULL; + ret = func((void*)cpu, (void*)vm); + return PyLong_FromUnsignedLongLong( (uint64_t)ret); +} + + +static PyMethodDef LLVMMethods[] = { + {"llvm_exec_bloc", llvm_exec_bloc, METH_VARARGS, + "llvm exec bloc"}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +PyMODINIT_FUNC +initJitllvm(void) +{ + PyObject *m; + + m = Py_InitModule("Jitllvm", LLVMMethods); + if (m == NULL) + return; + +} diff --git a/miasm2/jitter/Jittcc.c b/miasm2/jitter/Jittcc.c new file mode 100644 index 00000000..4b16f0b1 --- /dev/null +++ b/miasm2/jitter/Jittcc.c @@ -0,0 +1,226 @@ +/* +** Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License along +** with this program; if not, write to the Free Software Foundation, Inc., +** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ +#include <Python.h> + +#include <inttypes.h> +#include <libtcc.h> + +#include <stdint.h> + + + +/* tcc global state */ +TCCState *tcc_state = NULL; + + +int include_array_count = 0; +char **include_array = NULL; + + +int lib_array_count = 0; +char **lib_array = NULL; + +//char *libcodenat_path = NULL; + + +void tcc_init_state(void) +{ + int i; + + tcc_state = tcc_new(); + if (!tcc_state) { + fprintf(stderr, "Impossible de creer un contexte TCC\n"); + exit(1); + } + tcc_set_output_type(tcc_state, TCC_OUTPUT_MEMORY); + + //tcc_add_file(tcc_state, libcodenat_path); + for (i=0;i<lib_array_count; i++){ + tcc_add_file(tcc_state, lib_array[i]); + } + + for (i=0;i<include_array_count; i++){ + tcc_add_include_path(tcc_state, include_array[i]); + } +} + + + + +PyObject* tcc_set_emul_lib_path(PyObject* self, PyObject* args) +{ + char* include_arg; + char* lib_arg; + + char* str1, * str2; + + if (!PyArg_ParseTuple(args, "ss", + &include_arg, + &lib_arg)) + return NULL; + /* + if (include_array) + free(include_array); + */ + + str2 = strdup(include_arg); + while (str2){ + str1 = strsep(&str2, ";"); + if (str1){ + include_array_count ++; + include_array = realloc(include_array, + include_array_count * sizeof(char*)); + include_array[include_array_count-1] = strdup(str1); + fprintf(stderr, "adding include file: %s\n", str1); + } + } + + + str2 = strdup(lib_arg); + while (str2){ + str1 = strsep(&str2, ";"); + if (str1){ + lib_array_count ++; + lib_array = realloc(lib_array, + lib_array_count * sizeof(char*)); + lib_array[lib_array_count-1] = strdup(str1); + fprintf(stderr, "adding lib file: %s\n", str1); + } + } + + /* + libcodenat_path = (char*)malloc(strlen(libcodenat_path_arg)+1); + strcpy(libcodenat_path, libcodenat_path_arg); + */ + Py_INCREF(Py_None); + + + return Py_None; +} + + +PyObject* tcc_exec_bloc(PyObject* self, PyObject* args) +{ + PyObject* (*func)(void*, void*); + uint64_t vm; + uint64_t cpu; + PyObject* ret; + + if (!PyArg_ParseTuple(args, "KKK", &func, &cpu, &vm)) + return NULL; + ret = func((void*)cpu, (void*)vm); + return ret; +} + +PyObject* tcc_compil(PyObject* self, PyObject* args) +{ + char* func_name; + char* func_code; + int (*entry)(void); + + tcc_init_state(); + + if (!PyArg_ParseTuple(args, "ss", &func_name, &func_code)) + return NULL; + + if (tcc_compile_string(tcc_state, func_code) != 0) { + fprintf(stderr, "Erreur de compilation !\n"); + fprintf(stderr, "%s\n", func_code); + exit(0); + } + /* XXX use tinycc devel with -fPIC patch in makefile */ + if (tcc_relocate(tcc_state, TCC_RELOCATE_AUTO) < 0) { + fprintf(stderr, "tcc relocate error\n"); + exit(0); + } + entry = tcc_get_symbol(tcc_state, func_name); + if (!entry){ + fprintf(stderr, "Erreur de symbole %s!\n", func_name); + fprintf(stderr, "%s\n", func_name); + exit(0); + } + return PyLong_FromUnsignedLongLong((uint64_t)entry); + +} + + + +PyObject* tcc_loop_exec(PyObject* self, PyObject* args) +{ + //PyObject* (*func)(void*, void*); + uint64_t* vm; + uint64_t* cpu; + PyObject* ret; + PyObject* func; + PyObject* pArgs; + + + if (!PyArg_ParseTuple(args, "OKK", &func, &cpu, &vm)) + return NULL; + + while (1) { + if (!PyCallable_Check (func)) { + fprintf(stderr, "function not callable!\n"); + exit(0); + } + + pArgs = PyTuple_New(2); + PyTuple_SetItem(pArgs, 0, PyLong_FromUnsignedLongLong((uint64_t)cpu)); + PyTuple_SetItem(pArgs, 1, PyLong_FromUnsignedLongLong((uint64_t)vm)); + ret = PyObject_CallObject(func, pArgs); + Py_DECREF(2); + + if (ret == Py_None) { + Py_INCREF(Py_None); + return Py_None; + } + func = ret; + } + + return ret; +} + + + +static PyObject *TccError; + + +static PyMethodDef TccMethods[] = { + {"tcc_set_emul_lib_path", tcc_set_emul_lib_path, METH_VARARGS, + "init tcc path"}, + {"tcc_exec_bloc", tcc_exec_bloc, METH_VARARGS, + "tcc exec bloc"}, + {"tcc_compil", tcc_compil, METH_VARARGS, + "tcc compil"}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +PyMODINIT_FUNC +initJittcc(void) +{ + PyObject *m; + + m = Py_InitModule("Jittcc", TccMethods); + if (m == NULL) + return; + + TccError = PyErr_NewException("tcc.error", NULL, NULL); + Py_INCREF(TccError); + PyModule_AddObject(m, "error", TccError); +} + diff --git a/miasm2/jitter/Makefile b/miasm2/jitter/Makefile new file mode 100644 index 00000000..7d43f28c --- /dev/null +++ b/miasm2/jitter/Makefile @@ -0,0 +1,17 @@ +all: vm_mngr.so Jit_x86.so Jittcc.so + + +vm_mngr.so: vm_mngr.c vm_mngr.h vm_mngr_py.c + gcc -I /usr/include/python2.7 -shared -fPIC vm_mngr.c vm_mngr_py.c -o vm_mngr.so + +Jit_x86.so: arch/Jit_x86.c + gcc -I /usr/include/python2.7 -shared -fPIC arch/Jit_x86.c -o arch/Jit_x86.so + +Jit_arm.so: arch/Jit_arm.c + gcc -I /usr/include/python2.7 -shared -fPIC arch/Jit_arm.c -o arch/Jit_arm.so + +Jittcc.so: Jittcc.c + gcc -I /usr/include/python2.7 -shared -fPIC Jittcc.c -o Jittcc.so -ltcc + +clean: + rm vm_mngr.so Jittcc.so arch/Jit_x86.so \ No newline at end of file diff --git a/miasm2/jitter/__init__.py b/miasm2/jitter/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/jitter/__init__.py diff --git a/miasm2/jitter/arch/JitCore.h b/miasm2/jitter/arch/JitCore.h new file mode 100644 index 00000000..2686cb46 --- /dev/null +++ b/miasm2/jitter/arch/JitCore.h @@ -0,0 +1,66 @@ + +#define PyGetInt(item, value) \ + if (PyInt_Check(item)){ \ + value = (uint64_t)PyInt_AsLong(item); \ + } \ + else if (PyLong_Check(item)){ \ + value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ + } \ + else{ \ + RAISE(PyExc_TypeError,"arg must be int"); \ + } \ + + +#define PyGetInt_ret0(item, value) \ + if (PyInt_Check(item)){ \ + value = (uint64_t)PyInt_AsLong(item); \ + } \ + else if (PyLong_Check(item)){ \ + value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ + } \ + else{ \ + printf("error\n"); return 0; \ + } \ + + + +#define getset_reg_u64(regname) \ + static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ + { \ + return PyLong_FromUnsignedLongLong((uint64_t)(self->vmcpu. regname )); \ + } \ + static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + uint64_t val; \ + PyGetInt_ret0(value, val); \ + self->vmcpu. regname = val; \ + return 0; \ + } + +#define getset_reg_u32(regname) \ + static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ + { \ + return PyLong_FromUnsignedLongLong((uint32_t)(self->vmcpu. regname )); \ + } \ + static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + uint32_t val; \ + PyGetInt_ret0(value, val); \ + self->vmcpu. regname = val; \ + return 0; \ + } + + +#define getset_reg_u16(regname) \ + static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ + { \ + return PyLong_FromUnsignedLongLong((uint16_t)(self->vmcpu. regname )); \ + } \ + static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + uint16_t val; \ + PyGetInt_ret0(value, val); \ + self->vmcpu. regname = val; \ + return 0; \ + } + diff --git a/miasm2/jitter/arch/JitCore_arm.c b/miasm2/jitter/arch/JitCore_arm.c new file mode 100644 index 00000000..7e64ae77 --- /dev/null +++ b/miasm2/jitter/arch/JitCore_arm.c @@ -0,0 +1,388 @@ +#include <Python.h> +#include "JitCore.h" +#include "structmember.h" +#include <stdint.h> +#include <inttypes.h> +#include "JitCore_arm.h" + +#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} + +typedef struct _reg_dict{ + char* name; + unsigned long offset; +} reg_dict; + + +#define PyGetInt(item, value) \ + if (PyInt_Check(item)){ \ + value = (uint64_t)PyInt_AsLong(item); \ + } \ + else if (PyLong_Check(item)){ \ + value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ + } \ + else{ \ + RAISE(PyExc_TypeError,"arg must be int"); \ + } \ + +reg_dict gpreg_dict[] = { {.name = "R0", .offset = offsetof(vm_cpu_t, R0)}, + {.name = "R1", .offset = offsetof(vm_cpu_t, R1)}, + {.name = "R2", .offset = offsetof(vm_cpu_t, R2)}, + {.name = "R3", .offset = offsetof(vm_cpu_t, R3)}, + {.name = "R4", .offset = offsetof(vm_cpu_t, R4)}, + {.name = "R5", .offset = offsetof(vm_cpu_t, R5)}, + {.name = "R6", .offset = offsetof(vm_cpu_t, R6)}, + {.name = "R7", .offset = offsetof(vm_cpu_t, R7)}, + {.name = "R8", .offset = offsetof(vm_cpu_t, R8)}, + {.name = "R9", .offset = offsetof(vm_cpu_t, R9)}, + {.name = "R10", .offset = offsetof(vm_cpu_t, R10)}, + {.name = "R11", .offset = offsetof(vm_cpu_t, R11)}, + {.name = "R12", .offset = offsetof(vm_cpu_t, R12)}, + {.name = "SP", .offset = offsetof(vm_cpu_t, SP)}, + {.name = "LR", .offset = offsetof(vm_cpu_t, LR)}, + {.name = "PC", .offset = offsetof(vm_cpu_t, PC)}, + + {.name = "zf", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "nf", .offset = offsetof(vm_cpu_t, nf)}, + {.name = "of", .offset = offsetof(vm_cpu_t, of)}, + {.name = "cf", .offset = offsetof(vm_cpu_t, cf)}, +}; + +/************************** JitCpu object **************************/ + +typedef struct { + PyObject_HEAD + PyObject *cpu; /* cpu */ + vm_cpu_t vmcpu; +} JitCpu; + + + +#define get_reg(reg) do { \ + o = PyLong_FromUnsignedLongLong((uint64_t)self->vmcpu.reg); \ + PyDict_SetItemString(dict, #reg, o); \ + Py_DECREF(o); \ + } while(0); + + + +PyObject* vm_get_gpreg(JitCpu* self) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg(R0); + get_reg(R1); + get_reg(R2); + get_reg(R3); + get_reg(R4); + get_reg(R5); + get_reg(R6); + get_reg(R7); + get_reg(R8); + get_reg(R9); + get_reg(R10); + get_reg(R11); + get_reg(R12); + get_reg(SP); + get_reg(LR); + get_reg(PC); + + get_reg(zf); + get_reg(nf); + get_reg(of); + get_reg(cf); + + return dict; +} + +PyObject* _vm_set_gpreg(JitCpu* self, PyObject *dict) +{ + PyObject *d_key, *d_value = NULL; + Py_ssize_t pos = 0; + uint64_t val; + unsigned int i, found; + + if(!PyDict_Check(dict)) + RAISE(PyExc_TypeError, "arg must be dict"); + while(PyDict_Next(dict, &pos, &d_key, &d_value)){ + if(!PyString_Check(d_key)) + RAISE(PyExc_TypeError, "key must be str"); + + PyGetInt(d_value, val); + + + found = 0; + for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ + if (strcmp(PyString_AsString(d_key), gpreg_dict[i].name)) + continue; + *((uint32_t*)(((char*)&(self->vmcpu)) + gpreg_dict[i].offset)) = val; + found = 1; + break; + } + + if (found) + continue; + fprintf(stderr, "unkown key: %s\n", PyString_AsString(d_key)); + RAISE(PyExc_ValueError, "unkown reg"); + } + return NULL; +} + +PyObject* vm_set_gpreg(JitCpu* self, PyObject *args) +{ + PyObject* dict; + if (!PyArg_ParseTuple(args, "O", &dict)) + return NULL; + _vm_set_gpreg(self, dict); + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject* vm_set_exception(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + return NULL; + + PyGetInt(item1, i); + + self->vmcpu.exception_flags = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_get_exception(JitCpu* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)self->vmcpu.exception_flags); +} + + +PyObject * vm_init_regs(JitCpu* self) +{ + memset(&self->vmcpu, 0, sizeof(vm_cpu_t)); + + Py_INCREF(Py_None); + return Py_None; + +} + +void dump_gpregs(vm_cpu_t* vmcpu) +{ + printf("R0 %.16"PRIX32" R1 %.16"PRIX32" R2 %.16"PRIX32" R3 %.16"PRIX32"\n", + vmcpu->R0, vmcpu->R1, vmcpu->R2, vmcpu->R3); + printf("R4 %.16"PRIX32" R5 %.16"PRIX32" R6 %.16"PRIX32" R7 %.16"PRIX32"\n", + vmcpu->R4, vmcpu->R5, vmcpu->R6, vmcpu->R7); + printf("R8 %.16"PRIX32" R9 %.16"PRIX32" R10 %.16"PRIX32" R11 %.16"PRIX32"\n", + vmcpu->R8, vmcpu->R9, vmcpu->R10, vmcpu->R11); + printf("R12 %.16"PRIX32" SP %.16"PRIX32" LR %.16"PRIX32" PC %.16"PRIX32"\n", + vmcpu->R12, vmcpu->SP, vmcpu->LR, vmcpu->PC); + printf("zf %.16"PRIX32" nf %.16"PRIX32" of %.16"PRIX32" cf %.16"PRIX32"\n", + vmcpu->zf, vmcpu->nf, vmcpu->of, vmcpu->cf); +} + + +PyObject * vm_dump_gpregs(JitCpu* self, PyObject* args) +{ + vm_cpu_t* vmcpu; + + vmcpu = &self->vmcpu; + dump_gpregs(vmcpu); + Py_INCREF(Py_None); + return Py_None; +} + + + +static void +JitCpu_dealloc(JitCpu* self) +{ + self->ob_type->tp_free((PyObject*)self); +} + + +static PyObject * +JitCpu_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + JitCpu *self; + + self = (JitCpu *)type->tp_alloc(type, 0); + return (PyObject *)self; +} + +static PyObject * +JitCpu_get_cpu(JitCpu *self, void *closure) +{ + return PyLong_FromUnsignedLongLong((uint64_t)&(self->vmcpu)); +} + +static int +JitCpu_set_cpu(JitCpu *self, PyObject *value, void *closure) +{ + PyErr_SetString(PyExc_TypeError, "immutable cpu"); + return -1; +} + +static PyMemberDef JitCpu_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef JitCpu_methods[] = { + {"vm_init_regs", (PyCFunction)vm_init_regs, METH_NOARGS, + "X"}, + {"vm_dump_gpregs", (PyCFunction)vm_dump_gpregs, METH_NOARGS, + "X"}, + {"vm_get_gpreg", (PyCFunction)vm_get_gpreg, METH_NOARGS, + "X"}, + {"vm_set_gpreg", (PyCFunction)vm_set_gpreg, METH_VARARGS, + "X"}, + {"vm_get_exception", (PyCFunction)vm_get_exception, METH_VARARGS, + "X"}, + {"vm_set_exception", (PyCFunction)vm_set_exception, METH_VARARGS, + "X"}, + {NULL} /* Sentinel */ +}; + +static int +JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) +{ + + + fprintf(stderr, "ad cpu: %p\n", &(self->vmcpu)); + + return 0; +} + +getset_reg_u32(R0); +getset_reg_u32(R1); +getset_reg_u32(R2); +getset_reg_u32(R3); +getset_reg_u32(R4); +getset_reg_u32(R5); +getset_reg_u32(R6); +getset_reg_u32(R7); +getset_reg_u32(R8); +getset_reg_u32(R9); +getset_reg_u32(R10); +getset_reg_u32(R11); +getset_reg_u32(R12); +getset_reg_u32(SP); +getset_reg_u32(LR); +getset_reg_u32(PC); + +getset_reg_u32(zf); +getset_reg_u32(nf); +getset_reg_u32(of); +getset_reg_u32(cf); + + +static PyGetSetDef JitCpu_getseters[] = { + {"cpu", + (getter)JitCpu_get_cpu, (setter)JitCpu_set_cpu, + "first name", + NULL}, + + {"R0" , (getter)JitCpu_get_R0 , (setter)JitCpu_set_R0 , "R0" , NULL}, + {"R1" , (getter)JitCpu_get_R1 , (setter)JitCpu_set_R1 , "R1" , NULL}, + {"R2" , (getter)JitCpu_get_R2 , (setter)JitCpu_set_R2 , "R2" , NULL}, + {"R3" , (getter)JitCpu_get_R3 , (setter)JitCpu_set_R3 , "R3" , NULL}, + {"R4" , (getter)JitCpu_get_R4 , (setter)JitCpu_set_R4 , "R4" , NULL}, + {"R5" , (getter)JitCpu_get_R5 , (setter)JitCpu_set_R5 , "R5" , NULL}, + {"R6" , (getter)JitCpu_get_R6 , (setter)JitCpu_set_R6 , "R6" , NULL}, + {"R7" , (getter)JitCpu_get_R7 , (setter)JitCpu_set_R7 , "R7" , NULL}, + {"R8" , (getter)JitCpu_get_R8 , (setter)JitCpu_set_R8 , "R8" , NULL}, + {"R9" , (getter)JitCpu_get_R9 , (setter)JitCpu_set_R9 , "R9" , NULL}, + {"R10", (getter)JitCpu_get_R10, (setter)JitCpu_set_R10, "R10", NULL}, + {"R11", (getter)JitCpu_get_R11, (setter)JitCpu_set_R11, "R11", NULL}, + {"R12", (getter)JitCpu_get_R12, (setter)JitCpu_set_R12, "R12", NULL}, + {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, + {"LR" , (getter)JitCpu_get_LR , (setter)JitCpu_set_LR , "LR" , NULL}, + {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, + + {"zf", (getter)JitCpu_get_zf, (setter)JitCpu_set_zf, "zf", NULL}, + {"nf", (getter)JitCpu_get_nf, (setter)JitCpu_set_nf, "nf", NULL}, + {"of", (getter)JitCpu_get_of, (setter)JitCpu_set_of, "of", NULL}, + {"cf", (getter)JitCpu_get_cf, (setter)JitCpu_set_cf, "cf", NULL}, + + {NULL} /* Sentinel */ +}; + + +static PyTypeObject JitCpuType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "JitCore_arm.JitCpu", /*tp_name*/ + sizeof(JitCpu), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)JitCpu_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "JitCpu objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + JitCpu_methods, /* tp_methods */ + JitCpu_members, /* tp_members */ + JitCpu_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)JitCpu_init, /* tp_init */ + 0, /* tp_alloc */ + JitCpu_new, /* tp_new */ +}; + + + +static PyMethodDef JitCore_arm_Methods[] = { + + /* + + */ + {NULL, NULL, 0, NULL} /* Sentinel */ + +}; + +static PyObject *JitCore_arm_Error; + +PyMODINIT_FUNC +initJitCore_arm(void) +{ + PyObject *m; + + if (PyType_Ready(&JitCpuType) < 0) + return; + + m = Py_InitModule("JitCore_arm", JitCore_arm_Methods); + if (m == NULL) + return; + + JitCore_arm_Error = PyErr_NewException("JitCore_arm.error", NULL, NULL); + Py_INCREF(JitCore_arm_Error); + PyModule_AddObject(m, "error", JitCore_arm_Error); + + Py_INCREF(&JitCpuType); + PyModule_AddObject(m, "JitCpu", (PyObject *)&JitCpuType); + +} + diff --git a/miasm2/jitter/arch/JitCore_arm.h b/miasm2/jitter/arch/JitCore_arm.h new file mode 100644 index 00000000..e92db860 --- /dev/null +++ b/miasm2/jitter/arch/JitCore_arm.h @@ -0,0 +1,147 @@ + +typedef struct { + uint32_t exception_flags; + uint32_t exception_flags_new; + + /* gpregs */ + uint32_t R0; + uint32_t R1; + uint32_t R2; + uint32_t R3; + uint32_t R4; + uint32_t R5; + uint32_t R6; + uint32_t R7; + uint32_t R8; + uint32_t R9; + uint32_t R10; + uint32_t R11; + uint32_t R12; + uint32_t SP; + uint32_t LR; + uint32_t PC; + + uint32_t R0_new; + uint32_t R1_new; + uint32_t R2_new; + uint32_t R3_new; + uint32_t R4_new; + uint32_t R5_new; + uint32_t R6_new; + uint32_t R7_new; + uint32_t R8_new; + uint32_t R9_new; + uint32_t R10_new; + uint32_t R11_new; + uint32_t R12_new; + uint32_t SP_new; + uint32_t LR_new; + uint32_t PC_new; + + /* eflag */ + uint32_t zf; + uint32_t nf; + uint32_t of; + uint32_t cf; + + uint32_t zf_new; + uint32_t nf_new; + uint32_t of_new; + uint32_t cf_new; + + + uint8_t pfmem08_0; + uint8_t pfmem08_1; + uint8_t pfmem08_2; + uint8_t pfmem08_3; + uint8_t pfmem08_4; + uint8_t pfmem08_5; + uint8_t pfmem08_6; + uint8_t pfmem08_7; + uint8_t pfmem08_8; + uint8_t pfmem08_9; + uint8_t pfmem08_10; + uint8_t pfmem08_11; + uint8_t pfmem08_12; + uint8_t pfmem08_13; + uint8_t pfmem08_14; + uint8_t pfmem08_15; + uint8_t pfmem08_16; + uint8_t pfmem08_17; + uint8_t pfmem08_18; + uint8_t pfmem08_19; + + + uint16_t pfmem16_0; + uint16_t pfmem16_1; + uint16_t pfmem16_2; + uint16_t pfmem16_3; + uint16_t pfmem16_4; + uint16_t pfmem16_5; + uint16_t pfmem16_6; + uint16_t pfmem16_7; + uint16_t pfmem16_8; + uint16_t pfmem16_9; + uint16_t pfmem16_10; + uint16_t pfmem16_11; + uint16_t pfmem16_12; + uint16_t pfmem16_13; + uint16_t pfmem16_14; + uint16_t pfmem16_15; + uint16_t pfmem16_16; + uint16_t pfmem16_17; + uint16_t pfmem16_18; + uint16_t pfmem16_19; + + + uint32_t pfmem32_0; + uint32_t pfmem32_1; + uint32_t pfmem32_2; + uint32_t pfmem32_3; + uint32_t pfmem32_4; + uint32_t pfmem32_5; + uint32_t pfmem32_6; + uint32_t pfmem32_7; + uint32_t pfmem32_8; + uint32_t pfmem32_9; + uint32_t pfmem32_10; + uint32_t pfmem32_11; + uint32_t pfmem32_12; + uint32_t pfmem32_13; + uint32_t pfmem32_14; + uint32_t pfmem32_15; + uint32_t pfmem32_16; + uint32_t pfmem32_17; + uint32_t pfmem32_18; + uint32_t pfmem32_19; + + + uint64_t pfmem64_0; + uint64_t pfmem64_1; + uint64_t pfmem64_2; + uint64_t pfmem64_3; + uint64_t pfmem64_4; + uint64_t pfmem64_5; + uint64_t pfmem64_6; + uint64_t pfmem64_7; + uint64_t pfmem64_8; + uint64_t pfmem64_9; + uint64_t pfmem64_10; + uint64_t pfmem64_11; + uint64_t pfmem64_12; + uint64_t pfmem64_13; + uint64_t pfmem64_14; + uint64_t pfmem64_15; + uint64_t pfmem64_16; + uint64_t pfmem64_17; + uint64_t pfmem64_18; + uint64_t pfmem64_19; + + + uint32_t segm_base[0x10000]; + +}vm_cpu_t; + + + +#define RETURN_PC return PyLong_FromUnsignedLongLong(vmcpu->PC); diff --git a/miasm2/jitter/arch/JitCore_msp430.c b/miasm2/jitter/arch/JitCore_msp430.c new file mode 100644 index 00000000..17c1497e --- /dev/null +++ b/miasm2/jitter/arch/JitCore_msp430.c @@ -0,0 +1,694 @@ +#include <Python.h> +#include "JitCore.h" +#include "structmember.h" +#include <stdint.h> +#include <inttypes.h> +#include "JitCore_msp430.h" + +#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} + +/* +void check_align(uint64_t addr) +{ + if (addr & 1) { + printf("unaligned mem lookup %X\n", addr); + exit(0); + } +} + +void VM_MEM_WRITE_08(vm_mngr_t* vm_mngr, uint64_t addr, unsigned char src) +{ + //check_align(addr); + MEM_WRITE_08(vm_mngr, addr, src); +} + +void VM_MEM_WRITE_16(vm_mngr_t* vm_mngr, uint64_t addr, unsigned short src) +{ + check_align(addr); + MEM_WRITE_16(vm_mngr, addr, src); +} + +void VM_MEM_WRITE_32(vm_mngr_t* vm_mngr, uint64_t addr, unsigned int src) +{ + check_align(addr); + MEM_WRITE_32(vm_mngr, addr, src); +} + +void VM_MEM_WRITE_64(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t src) +{ + check_align(addr); + MEM_WRITE_64(vm_mngr, addr, src); +} +*/ + +typedef struct _reg_dict{ + char* name; + unsigned long offset; +} reg_dict; + + +#define PyGetInt(item, value) \ + if (PyInt_Check(item)){ \ + value = (uint64_t)PyInt_AsLong(item); \ + } \ + else if (PyLong_Check(item)){ \ + value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ + } \ + else{ \ + RAISE(PyExc_TypeError,"arg must be int"); \ + } \ + +reg_dict gpreg_dict[] = { {.name = "PC", .offset = offsetof(vm_cpu_t, PC)}, + {.name = "SP", .offset = offsetof(vm_cpu_t, SP)}, + //{.name = "SR", .offset = offsetof(vm_cpu_t, SR)}, + {.name = "R3", .offset = offsetof(vm_cpu_t, R3)}, + {.name = "R4", .offset = offsetof(vm_cpu_t, R4)}, + {.name = "R5", .offset = offsetof(vm_cpu_t, R5)}, + {.name = "R6", .offset = offsetof(vm_cpu_t, R6)}, + {.name = "R7", .offset = offsetof(vm_cpu_t, R7)}, + {.name = "R8", .offset = offsetof(vm_cpu_t, R8)}, + {.name = "R9", .offset = offsetof(vm_cpu_t, R9)}, + {.name = "R10", .offset = offsetof(vm_cpu_t, R10)}, + {.name = "R11", .offset = offsetof(vm_cpu_t, R11)}, + {.name = "R12", .offset = offsetof(vm_cpu_t, R12)}, + {.name = "R13", .offset = offsetof(vm_cpu_t, R13)}, + {.name = "R14", .offset = offsetof(vm_cpu_t, R14)}, + {.name = "R15", .offset = offsetof(vm_cpu_t, R15)}, + + {.name = "zf", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "nf", .offset = offsetof(vm_cpu_t, nf)}, + {.name = "of", .offset = offsetof(vm_cpu_t, of)}, + {.name = "cf", .offset = offsetof(vm_cpu_t, cf)}, + + {.name = "cpuoff", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "gie", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "osc", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "scg0", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "scg1", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "res", .offset = offsetof(vm_cpu_t, zf)}, + +}; + +/************************** JitCpu object **************************/ + +typedef struct { + PyObject_HEAD + PyObject *cpu; /* cpu */ + vm_cpu_t vmcpu; +} JitCpu; + + + +#define get_reg(reg) do { \ + o = PyLong_FromUnsignedLongLong((uint64_t)self->vmcpu.reg); \ + PyDict_SetItemString(dict, #reg, o); \ + Py_DECREF(o); \ + } while(0); + + + +PyObject* vm_get_gpreg(JitCpu* self) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg(PC); + get_reg(SP); + //get_reg(SR); + get_reg(R3); + get_reg(R4); + get_reg(R5); + get_reg(R6); + get_reg(R7); + get_reg(R8); + get_reg(R9); + get_reg(R10); + get_reg(R11); + get_reg(R12); + get_reg(R13); + get_reg(R14); + get_reg(R15); + + get_reg(zf); + get_reg(nf); + get_reg(of); + get_reg(cf); + + get_reg(cpuoff); + get_reg(gie); + get_reg(osc); + get_reg(scg0); + get_reg(scg1); + get_reg(res); + + + return dict; +} + +#define get_reg_off(reg) do { \ + o = PyLong_FromUnsignedLongLong((uint64_t)offsetof(vm_cpu_t, reg)); \ + PyDict_SetItemString(dict, #reg, o); \ + Py_DECREF(o); \ + } while(0); + + +PyObject* get_gpreg_offset_all(void) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + get_reg_off(exception_flags); + get_reg_off(exception_flags_new); + get_reg_off(PC); + get_reg_off(SP); + get_reg_off(R3); + get_reg_off(R4); + get_reg_off(R5); + get_reg_off(R6); + get_reg_off(R7); + get_reg_off(R8); + get_reg_off(R9); + get_reg_off(R10); + get_reg_off(R11); + get_reg_off(R12); + get_reg_off(R13); + get_reg_off(R14); + get_reg_off(R15); + get_reg_off(PC_new); + get_reg_off(SP_new); + get_reg_off(SR_new); + get_reg_off(R3_new); + get_reg_off(R4_new); + get_reg_off(R5_new); + get_reg_off(R6_new); + get_reg_off(R7_new); + get_reg_off(R8_new); + get_reg_off(R9_new); + get_reg_off(R10_new); + get_reg_off(R11_new); + get_reg_off(R12_new); + get_reg_off(R13_new); + get_reg_off(R14_new); + get_reg_off(R15_new); + get_reg_off(zf); + get_reg_off(nf); + get_reg_off(of); + get_reg_off(cf); + get_reg_off(cpuoff); + get_reg_off(gie); + get_reg_off(osc); + get_reg_off(scg0); + get_reg_off(scg1); + get_reg_off(res); + get_reg_off(zf_new); + get_reg_off(nf_new); + get_reg_off(of_new); + get_reg_off(cf_new); + get_reg_off(cpuoff_new); + get_reg_off(gie_new); + get_reg_off(osc_new); + get_reg_off(scg0_new); + get_reg_off(scg1_new); + get_reg_off(res_new); + get_reg_off(pfmem08_0); + get_reg_off(pfmem08_1); + get_reg_off(pfmem08_2); + get_reg_off(pfmem08_3); + get_reg_off(pfmem08_4); + get_reg_off(pfmem08_5); + get_reg_off(pfmem08_6); + get_reg_off(pfmem08_7); + get_reg_off(pfmem08_8); + get_reg_off(pfmem08_9); + get_reg_off(pfmem08_10); + get_reg_off(pfmem08_11); + get_reg_off(pfmem08_12); + get_reg_off(pfmem08_13); + get_reg_off(pfmem08_14); + get_reg_off(pfmem08_15); + get_reg_off(pfmem08_16); + get_reg_off(pfmem08_17); + get_reg_off(pfmem08_18); + get_reg_off(pfmem08_19); + get_reg_off(pfmem16_0); + get_reg_off(pfmem16_1); + get_reg_off(pfmem16_2); + get_reg_off(pfmem16_3); + get_reg_off(pfmem16_4); + get_reg_off(pfmem16_5); + get_reg_off(pfmem16_6); + get_reg_off(pfmem16_7); + get_reg_off(pfmem16_8); + get_reg_off(pfmem16_9); + get_reg_off(pfmem16_10); + get_reg_off(pfmem16_11); + get_reg_off(pfmem16_12); + get_reg_off(pfmem16_13); + get_reg_off(pfmem16_14); + get_reg_off(pfmem16_15); + get_reg_off(pfmem16_16); + get_reg_off(pfmem16_17); + get_reg_off(pfmem16_18); + get_reg_off(pfmem16_19); + get_reg_off(pfmem32_0); + get_reg_off(pfmem32_1); + get_reg_off(pfmem32_2); + get_reg_off(pfmem32_3); + get_reg_off(pfmem32_4); + get_reg_off(pfmem32_5); + get_reg_off(pfmem32_6); + get_reg_off(pfmem32_7); + get_reg_off(pfmem32_8); + get_reg_off(pfmem32_9); + get_reg_off(pfmem32_10); + get_reg_off(pfmem32_11); + get_reg_off(pfmem32_12); + get_reg_off(pfmem32_13); + get_reg_off(pfmem32_14); + get_reg_off(pfmem32_15); + get_reg_off(pfmem32_16); + get_reg_off(pfmem32_17); + get_reg_off(pfmem32_18); + get_reg_off(pfmem32_19); + get_reg_off(pfmem64_0); + get_reg_off(pfmem64_1); + get_reg_off(pfmem64_2); + get_reg_off(pfmem64_3); + get_reg_off(pfmem64_4); + get_reg_off(pfmem64_5); + get_reg_off(pfmem64_6); + get_reg_off(pfmem64_7); + get_reg_off(pfmem64_8); + get_reg_off(pfmem64_9); + get_reg_off(pfmem64_10); + get_reg_off(pfmem64_11); + get_reg_off(pfmem64_12); + get_reg_off(pfmem64_13); + get_reg_off(pfmem64_14); + get_reg_off(pfmem64_15); + get_reg_off(pfmem64_16); + get_reg_off(pfmem64_17); + get_reg_off(pfmem64_18); + get_reg_off(pfmem64_19); + return dict; +} + + +PyObject* _vm_set_gpreg(JitCpu* self, PyObject *dict) +{ + PyObject *d_key, *d_value = NULL; + Py_ssize_t pos = 0; + uint64_t val; + unsigned int i, found; + + if(!PyDict_Check(dict)) + RAISE(PyExc_TypeError, "arg must be dict"); + while(PyDict_Next(dict, &pos, &d_key, &d_value)){ + if(!PyString_Check(d_key)) + RAISE(PyExc_TypeError, "key must be str"); + + PyGetInt(d_value, val); + + + found = 0; + for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ + if (strcmp(PyString_AsString(d_key), gpreg_dict[i].name)) + continue; + *((uint32_t*)(((char*)&(self->vmcpu)) + gpreg_dict[i].offset)) = val; + found = 1; + break; + } + + if (found) + continue; + fprintf(stderr, "unkown key: %s\n", PyString_AsString(d_key)); + RAISE(PyExc_ValueError, "unkown reg"); + } + return NULL; +} + +uint8_t const bcd2bin_data[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 0, 0, 0, 0, 0, 0, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, 0, 0, 0, 0, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 0, 0, 0, 0, 0, 0, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 0, 0, 0, 0, 0, 0, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 0, 0, 0, 0, 0, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 0, 0, 0, 0, 0, 0, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 0, 0, 0, 0, 0, 0, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 +}; + +uint8_t const bin2bcd_data[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99 +}; + +inline uint16_t bcdadd_16(uint16_t a, uint16_t b) +{ + int carry = 0; + int i,j = 0; + uint16_t res = 0; + int nib_a, nib_b; + for (i = 0; i < 16; i += 4) { + nib_a = (a >> i) & (0xF); + nib_b = (b >> i) & (0xF); + + j = (carry + nib_a + nib_b); + if (j >= 10) { + carry = 1; + j -= 10; + j &=0xf; + } + else { + carry = 0; + } + res += j << i; + } + return res; +} + +inline uint16_t bcdadd_cf_16(uint16_t a, uint16_t b) +{ + int carry = 0; + int i,j = 0; + int nib_a, nib_b; + for (i = 0; i < 16; i += 4) { + nib_a = (a >> i) & (0xF); + nib_b = (b >> i) & (0xF); + + j = (carry + nib_a + nib_b); + if (j >= 10) { + carry = 1; + j -= 10; + j &=0xf; + } + else { + carry = 0; + } + } + return carry; +} + + +inline uint16_t hex2bcd_16(uint16_t a) +{ + return bcd2bin_data[a & 0xFF] + (bcd2bin_data[(a >> 8) & 0xFF] * 100); +} + +inline uint8_t hex2bcd_8(uint8_t a) +{ + return bcd2bin_data[a & 0xFF]; +} + +inline uint8_t bcd2hex_8(uint8_t a) +{ + return bin2bcd_data[a & 0xFF]; +} + +inline uint16_t bcd2hex_16(uint16_t a) +{ + return bcd2bin_data[a % 100] | (bcd2bin_data[(a / 100)] << 8); +} + + +PyObject* vm_set_gpreg(JitCpu* self, PyObject *args) +{ + PyObject* dict; + if (!PyArg_ParseTuple(args, "O", &dict)) + return NULL; + _vm_set_gpreg(self, dict); + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject* vm_set_exception(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + return NULL; + + PyGetInt(item1, i); + + self->vmcpu.exception_flags = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_get_exception(JitCpu* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)self->vmcpu.exception_flags); +} + + +PyObject * vm_init_regs(JitCpu* self) +{ + memset(&self->vmcpu, 0, sizeof(vm_cpu_t)); + + Py_INCREF(Py_None); + return Py_None; + +} + +void dump_gpregs(vm_cpu_t* vmcpu) +{ + + printf("PC %.4"PRIX32" SP %.4"PRIX32" R3 %.4"PRIX32" ", + vmcpu->PC, vmcpu->SP, vmcpu->R3); + printf("R4 %.4"PRIX32" R5 %.4"PRIX32" R6 %.4"PRIX32" R7 %.4"PRIX32"\n", + vmcpu->R4, vmcpu->R5, vmcpu->R6, vmcpu->R7); + printf("R8 %.4"PRIX32" R9 %.4"PRIX32" R10 %.4"PRIX32" R11 %.4"PRIX32" ", + vmcpu->R8, vmcpu->R9, vmcpu->R10, vmcpu->R11); + printf("R12 %.4"PRIX32" R13 %.4"PRIX32" R14 %.4"PRIX32" R15 %.4"PRIX32"\n", + vmcpu->R12, vmcpu->R13, vmcpu->R14, vmcpu->R15); + printf("zf %.4"PRIX32" nf %.4"PRIX32" of %.4"PRIX32" cf %.4"PRIX32"\n", + vmcpu->zf, vmcpu->nf, vmcpu->of, vmcpu->cf); +} + + +PyObject * vm_dump_gpregs(JitCpu* self, PyObject* args) +{ + vm_cpu_t* vmcpu; + + vmcpu = &self->vmcpu; + dump_gpregs(vmcpu); + Py_INCREF(Py_None); + return Py_None; +} + + + +static void +JitCpu_dealloc(JitCpu* self) +{ + self->ob_type->tp_free((PyObject*)self); +} + + +static PyObject * +JitCpu_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + JitCpu *self; + + self = (JitCpu *)type->tp_alloc(type, 0); + return (PyObject *)self; +} + +static PyObject * +JitCpu_get_cpu(JitCpu *self, void *closure) +{ + return PyLong_FromUnsignedLongLong((uint64_t)&(self->vmcpu)); +} + +static int +JitCpu_set_cpu(JitCpu *self, PyObject *value, void *closure) +{ + PyErr_SetString(PyExc_TypeError, "immutable cpu"); + return -1; +} + +static PyMemberDef JitCpu_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef JitCpu_methods[] = { + {"vm_init_regs", (PyCFunction)vm_init_regs, METH_NOARGS, + "X"}, + {"vm_dump_gpregs", (PyCFunction)vm_dump_gpregs, METH_NOARGS, + "X"}, + {"vm_get_gpreg", (PyCFunction)vm_get_gpreg, METH_NOARGS, + "X"}, + {"vm_set_gpreg", (PyCFunction)vm_set_gpreg, METH_VARARGS, + "X"}, + {"vm_get_exception", (PyCFunction)vm_get_exception, METH_VARARGS, + "X"}, + {"vm_set_exception", (PyCFunction)vm_set_exception, METH_VARARGS, + "X"}, + {NULL} /* Sentinel */ +}; + +static int +JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) +{ + + + fprintf(stderr, "ad cpu: %p\n", &(self->vmcpu)); + + return 0; +} + +getset_reg_u16(PC); +getset_reg_u16(SP); +getset_reg_u16(R3); +getset_reg_u16(R4); +getset_reg_u16(R5); +getset_reg_u16(R6); +getset_reg_u16(R7); +getset_reg_u16(R8); +getset_reg_u16(R9); +getset_reg_u16(R10); +getset_reg_u16(R11); +getset_reg_u16(R12); +getset_reg_u16(R13); +getset_reg_u16(R14); +getset_reg_u16(R15); +getset_reg_u16(zf); +getset_reg_u16(nf); +getset_reg_u16(of); +getset_reg_u16(cf); +getset_reg_u16(cpuoff); +getset_reg_u16(gie); +getset_reg_u16(osc); +getset_reg_u16(scg0); +getset_reg_u16(scg1); +getset_reg_u16(res); + + + + +static PyGetSetDef JitCpu_getseters[] = { + {"cpu", + (getter)JitCpu_get_cpu, (setter)JitCpu_set_cpu, + "first name", + NULL}, + + + {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, + {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, + {"R3" , (getter)JitCpu_get_R3 , (setter)JitCpu_set_R3 , "R3" , NULL}, + {"R4" , (getter)JitCpu_get_R4 , (setter)JitCpu_set_R4 , "R4" , NULL}, + {"R5" , (getter)JitCpu_get_R5 , (setter)JitCpu_set_R5 , "R5" , NULL}, + {"R6" , (getter)JitCpu_get_R6 , (setter)JitCpu_set_R6 , "R6" , NULL}, + {"R7" , (getter)JitCpu_get_R7 , (setter)JitCpu_set_R7 , "R7" , NULL}, + {"R8" , (getter)JitCpu_get_R8 , (setter)JitCpu_set_R8 , "R8" , NULL}, + {"R9" , (getter)JitCpu_get_R9 , (setter)JitCpu_set_R9 , "R9" , NULL}, + {"R10" , (getter)JitCpu_get_R10 , (setter)JitCpu_set_R10 , "R10" , NULL}, + {"R11" , (getter)JitCpu_get_R11 , (setter)JitCpu_set_R11 , "R11" , NULL}, + {"R12" , (getter)JitCpu_get_R12 , (setter)JitCpu_set_R12 , "R12" , NULL}, + {"R13" , (getter)JitCpu_get_R13 , (setter)JitCpu_set_R13 , "R13" , NULL}, + {"R14" , (getter)JitCpu_get_R14 , (setter)JitCpu_set_R14 , "R14" , NULL}, + {"R15" , (getter)JitCpu_get_R15 , (setter)JitCpu_set_R15 , "R15" , NULL}, + {"zf" , (getter)JitCpu_get_zf , (setter)JitCpu_set_zf , "zf" , NULL}, + {"nf" , (getter)JitCpu_get_nf , (setter)JitCpu_set_nf , "nf" , NULL}, + {"of" , (getter)JitCpu_get_of , (setter)JitCpu_set_of , "of" , NULL}, + {"cf" , (getter)JitCpu_get_cf , (setter)JitCpu_set_cf , "cf" , NULL}, + {"cpuoff" , (getter)JitCpu_get_cpuoff , (setter)JitCpu_set_cpuoff , "cpuoff" , NULL}, + {"gie" , (getter)JitCpu_get_gie , (setter)JitCpu_set_gie , "gie" , NULL}, + {"osc" , (getter)JitCpu_get_osc , (setter)JitCpu_set_osc , "osc" , NULL}, + {"scg0" , (getter)JitCpu_get_scg0 , (setter)JitCpu_set_scg0 , "scg0" , NULL}, + {"scg1" , (getter)JitCpu_get_scg1 , (setter)JitCpu_set_scg1 , "scg1" , NULL}, + {"res" , (getter)JitCpu_get_res , (setter)JitCpu_set_res , "res" , NULL}, + + {NULL} /* Sentinel */ +}; + + + +static PyTypeObject JitCpuType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "JitCore_msp430.JitCpu", /*tp_name*/ + sizeof(JitCpu), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)JitCpu_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "JitCpu objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + JitCpu_methods, /* tp_methods */ + JitCpu_members, /* tp_members */ + JitCpu_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)JitCpu_init, /* tp_init */ + 0, /* tp_alloc */ + JitCpu_new, /* tp_new */ +}; + + + +static PyMethodDef JitCore_msp430_Methods[] = { + + /* + + */ + {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, + {NULL, NULL, 0, NULL} /* Sentinel */ + +}; + +static PyObject *JitCore_msp430_Error; + +PyMODINIT_FUNC +initJitCore_msp430(void) +{ + PyObject *m; + + if (PyType_Ready(&JitCpuType) < 0) + return; + + m = Py_InitModule("JitCore_msp430", JitCore_msp430_Methods); + if (m == NULL) + return; + + JitCore_msp430_Error = PyErr_NewException("JitCore_msp430.error", NULL, NULL); + Py_INCREF(JitCore_msp430_Error); + PyModule_AddObject(m, "error", JitCore_msp430_Error); + + Py_INCREF(&JitCpuType); + PyModule_AddObject(m, "JitCpu", (PyObject *)&JitCpuType); + +} + diff --git a/miasm2/jitter/arch/JitCore_msp430.h b/miasm2/jitter/arch/JitCore_msp430.h new file mode 100644 index 00000000..c65989b0 --- /dev/null +++ b/miasm2/jitter/arch/JitCore_msp430.h @@ -0,0 +1,171 @@ + +typedef struct { + uint32_t exception_flags; + uint32_t exception_flags_new; + + /* gpregs */ + uint32_t PC; + uint32_t SP; + uint32_t R3; + uint32_t R4; + uint32_t R5; + uint32_t R6; + uint32_t R7; + uint32_t R8; + uint32_t R9; + uint32_t R10; + uint32_t R11; + uint32_t R12; + uint32_t R13; + uint32_t R14; + uint32_t R15; + + uint32_t PC_new; + uint32_t SP_new; + uint32_t SR_new; + uint32_t R3_new; + uint32_t R4_new; + uint32_t R5_new; + uint32_t R6_new; + uint32_t R7_new; + uint32_t R8_new; + uint32_t R9_new; + uint32_t R10_new; + uint32_t R11_new; + uint32_t R12_new; + uint32_t R13_new; + uint32_t R14_new; + uint32_t R15_new; + + /* eflag */ + uint32_t zf; + uint32_t nf; + uint32_t of; + uint32_t cf; + + uint32_t cpuoff; + uint32_t gie; + uint32_t osc; + uint32_t scg0; + uint32_t scg1; + uint32_t res; + + + uint32_t zf_new; + uint32_t nf_new; + uint32_t of_new; + uint32_t cf_new; + + uint32_t cpuoff_new; + uint32_t gie_new; + uint32_t osc_new; + uint32_t scg0_new; + uint32_t scg1_new; + uint32_t res_new; + + + uint8_t pfmem08_0; + uint8_t pfmem08_1; + uint8_t pfmem08_2; + uint8_t pfmem08_3; + uint8_t pfmem08_4; + uint8_t pfmem08_5; + uint8_t pfmem08_6; + uint8_t pfmem08_7; + uint8_t pfmem08_8; + uint8_t pfmem08_9; + uint8_t pfmem08_10; + uint8_t pfmem08_11; + uint8_t pfmem08_12; + uint8_t pfmem08_13; + uint8_t pfmem08_14; + uint8_t pfmem08_15; + uint8_t pfmem08_16; + uint8_t pfmem08_17; + uint8_t pfmem08_18; + uint8_t pfmem08_19; + + + uint16_t pfmem16_0; + uint16_t pfmem16_1; + uint16_t pfmem16_2; + uint16_t pfmem16_3; + uint16_t pfmem16_4; + uint16_t pfmem16_5; + uint16_t pfmem16_6; + uint16_t pfmem16_7; + uint16_t pfmem16_8; + uint16_t pfmem16_9; + uint16_t pfmem16_10; + uint16_t pfmem16_11; + uint16_t pfmem16_12; + uint16_t pfmem16_13; + uint16_t pfmem16_14; + uint16_t pfmem16_15; + uint16_t pfmem16_16; + uint16_t pfmem16_17; + uint16_t pfmem16_18; + uint16_t pfmem16_19; + + + uint32_t pfmem32_0; + uint32_t pfmem32_1; + uint32_t pfmem32_2; + uint32_t pfmem32_3; + uint32_t pfmem32_4; + uint32_t pfmem32_5; + uint32_t pfmem32_6; + uint32_t pfmem32_7; + uint32_t pfmem32_8; + uint32_t pfmem32_9; + uint32_t pfmem32_10; + uint32_t pfmem32_11; + uint32_t pfmem32_12; + uint32_t pfmem32_13; + uint32_t pfmem32_14; + uint32_t pfmem32_15; + uint32_t pfmem32_16; + uint32_t pfmem32_17; + uint32_t pfmem32_18; + uint32_t pfmem32_19; + + + uint64_t pfmem64_0; + uint64_t pfmem64_1; + uint64_t pfmem64_2; + uint64_t pfmem64_3; + uint64_t pfmem64_4; + uint64_t pfmem64_5; + uint64_t pfmem64_6; + uint64_t pfmem64_7; + uint64_t pfmem64_8; + uint64_t pfmem64_9; + uint64_t pfmem64_10; + uint64_t pfmem64_11; + uint64_t pfmem64_12; + uint64_t pfmem64_13; + uint64_t pfmem64_14; + uint64_t pfmem64_15; + uint64_t pfmem64_16; + uint64_t pfmem64_17; + uint64_t pfmem64_18; + uint64_t pfmem64_19; + + + uint32_t segm_base[0x10000]; + +}vm_cpu_t; + +#define RETURN_PC return PyLong_FromUnsignedLongLong(vmcpu->PC); + +uint16_t bcdadd_16(uint16_t a, uint16_t b); + +uint16_t bcdadd_cf_16(uint16_t a, uint16_t b); + +uint16_t hex2bcd_16(uint16_t a); + +uint8_t hex2bcd_8(uint8_t a); + +uint8_t bcd2hex_8(uint8_t a); + +uint16_t bcd2hex_16(uint16_t a); diff --git a/miasm2/jitter/arch/JitCore_x86.c b/miasm2/jitter/arch/JitCore_x86.c new file mode 100644 index 00000000..107491c1 --- /dev/null +++ b/miasm2/jitter/arch/JitCore_x86.c @@ -0,0 +1,873 @@ +#include <Python.h> +#include "JitCore.h" +#include "structmember.h" +#include <stdint.h> +#include <inttypes.h> +#include "../queue.h" +#include "../vm_mngr.h" +#include "JitCore_x86.h" + +#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} +#define RAISE_ret0(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return 0;} + +typedef struct _reg_dict{ + char* name; + size_t offset; +} reg_dict; + + +reg_dict gpreg_dict[] = { {.name = "RAX", .offset = offsetof(vm_cpu_t, RAX)}, + {.name = "RBX", .offset = offsetof(vm_cpu_t, RBX)}, + {.name = "RCX", .offset = offsetof(vm_cpu_t, RCX)}, + {.name = "RDX", .offset = offsetof(vm_cpu_t, RDX)}, + {.name = "RSI", .offset = offsetof(vm_cpu_t, RSI)}, + {.name = "RDI", .offset = offsetof(vm_cpu_t, RDI)}, + {.name = "RSP", .offset = offsetof(vm_cpu_t, RSP)}, + {.name = "RBP", .offset = offsetof(vm_cpu_t, RBP)}, + + {.name = "R8", .offset = offsetof(vm_cpu_t, R8)}, + {.name = "R9", .offset = offsetof(vm_cpu_t, R9)}, + {.name = "R10", .offset = offsetof(vm_cpu_t, R10)}, + {.name = "R11", .offset = offsetof(vm_cpu_t, R11)}, + {.name = "R12", .offset = offsetof(vm_cpu_t, R12)}, + {.name = "R13", .offset = offsetof(vm_cpu_t, R13)}, + {.name = "R14", .offset = offsetof(vm_cpu_t, R14)}, + {.name = "R15", .offset = offsetof(vm_cpu_t, R15)}, + + {.name = "RIP", .offset = offsetof(vm_cpu_t, RIP)}, + + {.name = "zf", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "nf", .offset = offsetof(vm_cpu_t, nf)}, + {.name = "pf", .offset = offsetof(vm_cpu_t, pf)}, + {.name = "of", .offset = offsetof(vm_cpu_t, of)}, + {.name = "cf", .offset = offsetof(vm_cpu_t, cf)}, + {.name = "af", .offset = offsetof(vm_cpu_t, af)}, + {.name = "df", .offset = offsetof(vm_cpu_t, df)}, + + {.name = "ES", .offset = offsetof(vm_cpu_t, ES)}, + {.name = "CS", .offset = offsetof(vm_cpu_t, CS)}, + {.name = "SS", .offset = offsetof(vm_cpu_t, SS)}, + {.name = "DS", .offset = offsetof(vm_cpu_t, DS)}, + {.name = "FS", .offset = offsetof(vm_cpu_t, FS)}, + {.name = "GS", .offset = offsetof(vm_cpu_t, GS)}, + +}; + + + +/************************** JitCpu object **************************/ + +typedef struct { + PyObject_HEAD + PyObject *cpu; /* cpu */ + vm_cpu_t vmcpu; +} JitCpu; + + + +#define get_reg(reg) do { \ + o = PyLong_FromUnsignedLongLong((uint64_t)self->vmcpu.reg); \ + PyDict_SetItemString(dict, #reg, o); \ + Py_DECREF(o); \ + } while(0); + + +PyObject* vm_get_gpreg(JitCpu* self) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg(RAX); + get_reg(RBX); + get_reg(RCX); + get_reg(RDX); + get_reg(RSI); + get_reg(RDI); + get_reg(RSP); + get_reg(RBP); + + get_reg(R8); + get_reg(R9); + get_reg(R10); + get_reg(R11); + get_reg(R12); + get_reg(R13); + get_reg(R14); + get_reg(R15); + + get_reg(RIP); + + get_reg(zf); + get_reg(nf); + get_reg(pf); + get_reg(of); + get_reg(cf); + get_reg(af); + get_reg(df); + + + get_reg(ES); + get_reg(CS); + get_reg(SS); + get_reg(DS); + get_reg(FS); + get_reg(GS); + + return dict; +} + + + + +PyObject* _vm_set_gpreg(JitCpu* self, PyObject *dict) +{ + PyObject *d_key, *d_value = NULL; + Py_ssize_t pos = 0; + uint64_t val; + unsigned int i, found; + + if(!PyDict_Check(dict)) + RAISE(PyExc_TypeError, "arg must be dict"); + while(PyDict_Next(dict, &pos, &d_key, &d_value)){ + if(!PyString_Check(d_key)) + RAISE(PyExc_TypeError, "key must be str"); + + PyGetInt(d_value, val); + + found = 0; + for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ + if (strcmp(PyString_AsString(d_key), gpreg_dict[i].name)) + continue; + *((uint64_t*)(((char*)&(self->vmcpu)) + gpreg_dict[i].offset)) = val; + found = 1; + break; + } + + if (found) + continue; + fprintf(stderr, "unkown key: %s\n", PyString_AsString(d_key)); + RAISE(PyExc_ValueError, "unkown reg"); + } + return NULL; +} + +PyObject* vm_set_gpreg(JitCpu* self, PyObject *args) +{ + PyObject* dict; + if (!PyArg_ParseTuple(args, "O", &dict)) + return NULL; + _vm_set_gpreg(self, dict); + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject * vm_init_regs(JitCpu* self) +{ + memset(&self->vmcpu, 0, sizeof(vm_cpu_t)); + + self->vmcpu.tsc1 = 0x22222222; + self->vmcpu.tsc2 = 0x11111111; + self->vmcpu.i_f = 1; + + Py_INCREF(Py_None); + return Py_None; + +} + +void dump_gpregs(vm_cpu_t* vmcpu) +{ + + printf("RAX %.16"PRIX64" RBX %.16"PRIX64" RCX %.16"PRIX64" RDX %.16"PRIX64"\n", + vmcpu->RAX, vmcpu->RBX, vmcpu->RCX, vmcpu->RDX); + printf("RSI %.16"PRIX64" RDI %.16"PRIX64" RSP %.16"PRIX64" RBP %.16"PRIX64"\n", + vmcpu->RSI, vmcpu->RDI, vmcpu->RSP, vmcpu->RBP); + printf("zf %.16"PRIX64" nf %.16"PRIX64" of %.16"PRIX64" cf %.16"PRIX64"\n", + vmcpu->zf, vmcpu->nf, vmcpu->of, vmcpu->cf); + printf("RIP %.16"PRIX64"\n", + vmcpu->RIP); + +} + +PyObject * vm_dump_gpregs(JitCpu* self, PyObject* args) +{ + vm_cpu_t* vmcpu; + + vmcpu = &self->vmcpu; + dump_gpregs(vmcpu); + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject* vm_set_segm_base(JitCpu* self, PyObject* args) +{ + PyObject *item1, *item2; + uint64_t segm_num, segm_base; + + if (!PyArg_ParseTuple(args, "OO", &item1, &item2)) + return NULL; + + PyGetInt(item1, segm_num); + PyGetInt(item2, segm_base); + self->vmcpu.segm_base[segm_num] = segm_base; + + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_get_segm_base(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t segm_num; + PyObject* v; + + if (!PyArg_ParseTuple(args, "O", &item1)) + return NULL; + PyGetInt(item1, segm_num); + v = PyInt_FromLong((long)self->vmcpu.segm_base[segm_num]); + return v; +} + +uint64_t segm2addr(vm_cpu_t* vmcpu, uint64_t segm, uint64_t addr) +{ + return addr + vmcpu->segm_base[segm]; +} + + +PyObject* vm_set_exception(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + return NULL; + + PyGetInt(item1, i); + + self->vmcpu.exception_flags = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_get_exception(JitCpu* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)self->vmcpu.exception_flags); +} + + + +#define UDIV(sizeA, sizeB) \ + uint ## sizeA ## _t udiv ## sizeB (vm_cpu_t* vmcpu, uint ## sizeB ## _t a, uint ## sizeB ## _t b) \ + { \ + uint ## sizeA ## _t r; \ + if (b == 0) { \ + vmcpu->exception_flags |= EXCEPT_INT_DIV_BY_ZERO; \ + return 0; \ + } \ + r = a/b; \ + return r; \ + } + + +#define UMOD(sizeA, sizeB) \ + uint ## sizeA ## _t umod ## sizeB (vm_cpu_t* vmcpu, uint ## sizeB ## _t a, uint ## sizeB ## _t b) \ + { \ + uint ## sizeA ## _t r; \ + if (b == 0) { \ + vmcpu->exception_flags |= EXCEPT_INT_DIV_BY_ZERO; \ + return 0; \ + } \ + r = a%b; \ + return r; \ + } + + +#define IDIV(sizeA) \ + int ## sizeA ## _t idiv ## sizeA (vm_cpu_t* vmcpu, int ## sizeA ## _t a, int ## sizeA ## _t b) \ + { \ + int ## sizeA ## _t r; \ + if (b == 0) { \ + vmcpu->exception_flags |= EXCEPT_INT_DIV_BY_ZERO; \ + return 0; \ + } \ + r = a/b; \ + return r; \ + } + + +#define IMOD(sizeA) \ + int ## sizeA ## _t imod ## sizeA (vm_cpu_t* vmcpu, int ## sizeA ## _t a, int ## sizeA ## _t b) \ + { \ + int ## sizeA ## _t r; \ + if (b == 0) { \ + vmcpu->exception_flags |= EXCEPT_INT_DIV_BY_ZERO; \ + return 0; \ + } \ + r = a%b; \ + return r; \ + } + +UDIV(8, 16) +UDIV(16, 32) +UDIV(32, 64) + +UMOD(8, 16) +UMOD(16, 32) +UMOD(32, 64) + + +IDIV(16) +IDIV(32) +IDIV(64) + +IMOD(16) +IMOD(32) +IMOD(64) + + + + +static void +JitCpu_dealloc(JitCpu* self) +{ + self->ob_type->tp_free((PyObject*)self); +} + + +static PyObject * +JitCpu_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + JitCpu *self; + + self = (JitCpu *)type->tp_alloc(type, 0); + return (PyObject *)self; +} + +static PyObject * +JitCpu_get_cpu(JitCpu *self, void *closure) +{ + return PyLong_FromUnsignedLongLong((uint64_t)&(self->vmcpu)); +} + +static int +JitCpu_set_cpu(JitCpu *self, PyObject *value, void *closure) +{ + PyErr_SetString(PyExc_TypeError, "immutable cpu"); + return -1; +} + +static PyMemberDef JitCpu_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef JitCpu_methods[] = { + {"vm_init_regs", (PyCFunction)vm_init_regs, METH_NOARGS, + "X"}, + {"vm_dump_gpregs", (PyCFunction)vm_dump_gpregs, METH_NOARGS, + "X"}, + {"vm_get_gpreg", (PyCFunction)vm_get_gpreg, METH_NOARGS, + "X"}, + {"vm_set_gpreg", (PyCFunction)vm_set_gpreg, METH_VARARGS, + "X"}, + {"vm_get_segm_base", (PyCFunction)vm_get_segm_base, METH_VARARGS, + "X"}, + {"vm_set_segm_base", (PyCFunction)vm_set_segm_base, METH_VARARGS, + "X"}, + {"vm_get_exception", (PyCFunction)vm_get_exception, METH_VARARGS, + "X"}, + {"vm_set_exception", (PyCFunction)vm_set_exception, METH_VARARGS, + "X"}, + {NULL} /* Sentinel */ +}; + +static int +JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) +{ + + + fprintf(stderr, "ad cpu: %p\n", &(self->vmcpu)); + + return 0; +} +/* +static PyObject *JitCpu_get_RAX(JitCpu *self, void *closure) +{ + return PyLong_FromUnsignedLongLong((uint64_t)(self->vmcpu.RAX)); +} + +static int JitCpu_set_RAX(JitCpu *self, PyObject *value, void *closure) +{ + uint64_t val; + PyGetInt_ret0(value, val); + self->vmcpu.RAX = val; + return 0; +} +*/ + + +#define getset_reg_E_u32(regname) \ + static PyObject *JitCpu_get_E ## regname (JitCpu *self, void *closure) \ + { \ + return PyLong_FromUnsignedLongLong((uint32_t)(self->vmcpu.R ## regname & 0xFFFFFFFF )); \ + } \ + static int JitCpu_set_E ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + uint64_t val; \ + PyGetInt_ret0(value, val); \ + val &= 0xFFFFFFFF; \ + val |= self->vmcpu.R ##regname & 0xFFFFFFFF00000000ULL; \ + self->vmcpu.R ## regname = val; \ + return 0; \ + } + + + +#define getset_reg_R_u16(regname) \ + static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ + { \ + return PyLong_FromUnsignedLongLong((uint16_t)(self->vmcpu.R ## regname & 0xFFFF )); \ + } \ + static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + uint64_t val; \ + PyGetInt_ret0(value, val); \ + val &= 0xFFFF; \ + val |= self->vmcpu.R ##regname & 0xFFFFFFFFFFFF0000ULL; \ + self->vmcpu.R ## regname = val; \ + return 0; \ + } + + +getset_reg_u64(RAX); +getset_reg_u64(RBX); +getset_reg_u64(RCX); +getset_reg_u64(RDX); +getset_reg_u64(RSI); +getset_reg_u64(RDI); +getset_reg_u64(RSP); +getset_reg_u64(RBP); + +getset_reg_u64(R8); +getset_reg_u64(R9); +getset_reg_u64(R10); +getset_reg_u64(R11); +getset_reg_u64(R12); +getset_reg_u64(R13); +getset_reg_u64(R14); +getset_reg_u64(R15); + +getset_reg_u64(RIP); + +getset_reg_u64(zf); +getset_reg_u64(nf); +getset_reg_u64(pf); +getset_reg_u64(of); +getset_reg_u64(cf); +getset_reg_u64(af); +getset_reg_u64(df); + + +getset_reg_u64(ES); +getset_reg_u64(CS); +getset_reg_u64(SS); +getset_reg_u64(DS); +getset_reg_u64(FS); +getset_reg_u64(GS); + + +getset_reg_E_u32(AX); +getset_reg_E_u32(BX); +getset_reg_E_u32(CX); +getset_reg_E_u32(DX); +getset_reg_E_u32(SI); +getset_reg_E_u32(DI); +getset_reg_E_u32(SP); +getset_reg_E_u32(BP); +getset_reg_E_u32(IP); + +getset_reg_R_u16(AX); +getset_reg_R_u16(BX); +getset_reg_R_u16(CX); +getset_reg_R_u16(DX); +getset_reg_R_u16(SI); +getset_reg_R_u16(DI); +getset_reg_R_u16(SP); +getset_reg_R_u16(BP); + +#define get_reg_off(reg) do { \ + o = PyLong_FromUnsignedLongLong((uint64_t)offsetof(vm_cpu_t, reg)); \ + PyDict_SetItemString(dict, #reg, o); \ + Py_DECREF(o); \ + } while(0); + + +PyObject* get_gpreg_offset_all(void) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg_off(exception_flags); + get_reg_off(exception_flags_new); + + get_reg_off(RAX); + get_reg_off(RBX); + get_reg_off(RCX); + get_reg_off(RDX); + get_reg_off(RSI); + get_reg_off(RDI); + get_reg_off(RSP); + get_reg_off(RBP); + get_reg_off(R8); + get_reg_off(R9); + get_reg_off(R10); + get_reg_off(R11); + get_reg_off(R12); + get_reg_off(R13); + get_reg_off(R14); + get_reg_off(R15); + get_reg_off(RIP); + get_reg_off(RAX_new); + get_reg_off(RBX_new); + get_reg_off(RCX_new); + get_reg_off(RDX_new); + get_reg_off(RSI_new); + get_reg_off(RDI_new); + get_reg_off(RSP_new); + get_reg_off(RBP_new); + get_reg_off(R8_new); + get_reg_off(R9_new); + get_reg_off(R10_new); + get_reg_off(R11_new); + get_reg_off(R12_new); + get_reg_off(R13_new); + get_reg_off(R14_new); + get_reg_off(R15_new); + get_reg_off(RIP_new); + get_reg_off(zf); + get_reg_off(nf); + get_reg_off(pf); + get_reg_off(of); + get_reg_off(cf); + get_reg_off(af); + get_reg_off(df); + get_reg_off(zf_new); + get_reg_off(nf_new); + get_reg_off(pf_new); + get_reg_off(of_new); + get_reg_off(cf_new); + get_reg_off(af_new); + get_reg_off(df_new); + get_reg_off(tf); + get_reg_off(i_f); + get_reg_off(iopl_f); + get_reg_off(nt); + get_reg_off(rf); + get_reg_off(vm); + get_reg_off(ac); + get_reg_off(vif); + get_reg_off(vip); + get_reg_off(i_d); + get_reg_off(tf_new); + get_reg_off(i_f_new); + get_reg_off(iopl_f_new); + get_reg_off(nt_new); + get_reg_off(rf_new); + get_reg_off(vm_new); + get_reg_off(ac_new); + get_reg_off(vif_new); + get_reg_off(vip_new); + get_reg_off(i_d_new); + get_reg_off(my_tick); + get_reg_off(cond); + + get_reg_off(float_st0); + get_reg_off(float_st1); + get_reg_off(float_st2); + get_reg_off(float_st3); + get_reg_off(float_st4); + get_reg_off(float_st5); + get_reg_off(float_st6); + get_reg_off(float_st7); + get_reg_off(float_st0_new); + get_reg_off(float_st1_new); + get_reg_off(float_st2_new); + get_reg_off(float_st3_new); + get_reg_off(float_st4_new); + get_reg_off(float_st5_new); + get_reg_off(float_st6_new); + get_reg_off(float_st7_new); + + get_reg_off(ES); + get_reg_off(CS); + get_reg_off(SS); + get_reg_off(DS); + get_reg_off(FS); + get_reg_off(GS); + get_reg_off(ES_new); + get_reg_off(CS_new); + get_reg_off(SS_new); + get_reg_off(DS_new); + get_reg_off(FS_new); + get_reg_off(GS_new); + + get_reg_off(pfmem08_0); + get_reg_off(pfmem08_1); + get_reg_off(pfmem08_2); + get_reg_off(pfmem08_3); + get_reg_off(pfmem08_4); + get_reg_off(pfmem08_5); + get_reg_off(pfmem08_6); + get_reg_off(pfmem08_7); + get_reg_off(pfmem08_8); + get_reg_off(pfmem08_9); + get_reg_off(pfmem08_10); + get_reg_off(pfmem08_11); + get_reg_off(pfmem08_12); + get_reg_off(pfmem08_13); + get_reg_off(pfmem08_14); + get_reg_off(pfmem08_15); + get_reg_off(pfmem08_16); + get_reg_off(pfmem08_17); + get_reg_off(pfmem08_18); + get_reg_off(pfmem08_19); + get_reg_off(pfmem16_0); + get_reg_off(pfmem16_1); + get_reg_off(pfmem16_2); + get_reg_off(pfmem16_3); + get_reg_off(pfmem16_4); + get_reg_off(pfmem16_5); + get_reg_off(pfmem16_6); + get_reg_off(pfmem16_7); + get_reg_off(pfmem16_8); + get_reg_off(pfmem16_9); + get_reg_off(pfmem16_10); + get_reg_off(pfmem16_11); + get_reg_off(pfmem16_12); + get_reg_off(pfmem16_13); + get_reg_off(pfmem16_14); + get_reg_off(pfmem16_15); + get_reg_off(pfmem16_16); + get_reg_off(pfmem16_17); + get_reg_off(pfmem16_18); + get_reg_off(pfmem16_19); + get_reg_off(pfmem32_0); + get_reg_off(pfmem32_1); + get_reg_off(pfmem32_2); + get_reg_off(pfmem32_3); + get_reg_off(pfmem32_4); + get_reg_off(pfmem32_5); + get_reg_off(pfmem32_6); + get_reg_off(pfmem32_7); + get_reg_off(pfmem32_8); + get_reg_off(pfmem32_9); + get_reg_off(pfmem32_10); + get_reg_off(pfmem32_11); + get_reg_off(pfmem32_12); + get_reg_off(pfmem32_13); + get_reg_off(pfmem32_14); + get_reg_off(pfmem32_15); + get_reg_off(pfmem32_16); + get_reg_off(pfmem32_17); + get_reg_off(pfmem32_18); + get_reg_off(pfmem32_19); + get_reg_off(pfmem64_0); + get_reg_off(pfmem64_1); + get_reg_off(pfmem64_2); + get_reg_off(pfmem64_3); + get_reg_off(pfmem64_4); + get_reg_off(pfmem64_5); + get_reg_off(pfmem64_6); + get_reg_off(pfmem64_7); + get_reg_off(pfmem64_8); + get_reg_off(pfmem64_9); + get_reg_off(pfmem64_10); + get_reg_off(pfmem64_11); + get_reg_off(pfmem64_12); + get_reg_off(pfmem64_13); + get_reg_off(pfmem64_14); + get_reg_off(pfmem64_15); + get_reg_off(pfmem64_16); + get_reg_off(pfmem64_17); + get_reg_off(pfmem64_18); + get_reg_off(pfmem64_19); + + get_reg_off(MM0); + get_reg_off(MM1); + get_reg_off(MM2); + get_reg_off(MM3); + get_reg_off(MM4); + get_reg_off(MM5); + get_reg_off(MM6); + get_reg_off(MM7); + get_reg_off(MM0_new); + get_reg_off(MM1_new); + get_reg_off(MM2_new); + get_reg_off(MM3_new); + get_reg_off(MM4_new); + get_reg_off(MM5_new); + get_reg_off(MM6_new); + get_reg_off(MM7_new); + return dict; +} + + +static PyGetSetDef JitCpu_getseters[] = { + {"cpu", + (getter)JitCpu_get_cpu, (setter)JitCpu_set_cpu, + "first name", + NULL}, + + {"RAX", (getter)JitCpu_get_RAX, (setter)JitCpu_set_RAX, "RAX", NULL}, + {"RBX", (getter)JitCpu_get_RBX, (setter)JitCpu_set_RBX, "RBX", NULL}, + {"RCX", (getter)JitCpu_get_RCX, (setter)JitCpu_set_RCX, "RCX", NULL}, + {"RDX", (getter)JitCpu_get_RDX, (setter)JitCpu_set_RDX, "RDX", NULL}, + {"RSI", (getter)JitCpu_get_RSI, (setter)JitCpu_set_RSI, "RSI", NULL}, + {"RDI", (getter)JitCpu_get_RDI, (setter)JitCpu_set_RDI, "RDI", NULL}, + {"RSP", (getter)JitCpu_get_RSP, (setter)JitCpu_set_RSP, "RSP", NULL}, + {"RBP", (getter)JitCpu_get_RBP, (setter)JitCpu_set_RBP, "RBP", NULL}, + {"R8", (getter)JitCpu_get_R8, (setter)JitCpu_set_R8, "R8", NULL}, + {"R9", (getter)JitCpu_get_R9, (setter)JitCpu_set_R9, "R9", NULL}, + {"R10", (getter)JitCpu_get_R10, (setter)JitCpu_set_R10, "R10", NULL}, + {"R11", (getter)JitCpu_get_R11, (setter)JitCpu_set_R11, "R11", NULL}, + {"R12", (getter)JitCpu_get_R12, (setter)JitCpu_set_R12, "R12", NULL}, + {"R13", (getter)JitCpu_get_R13, (setter)JitCpu_set_R13, "R13", NULL}, + {"R14", (getter)JitCpu_get_R14, (setter)JitCpu_set_R14, "R14", NULL}, + {"R15", (getter)JitCpu_get_R15, (setter)JitCpu_set_R15, "R15", NULL}, + {"RIP", (getter)JitCpu_get_RIP, (setter)JitCpu_set_RIP, "RIP", NULL}, + {"zf", (getter)JitCpu_get_zf, (setter)JitCpu_set_zf, "zf", NULL}, + {"nf", (getter)JitCpu_get_nf, (setter)JitCpu_set_nf, "nf", NULL}, + {"pf", (getter)JitCpu_get_pf, (setter)JitCpu_set_pf, "pf", NULL}, + {"of", (getter)JitCpu_get_of, (setter)JitCpu_set_of, "of", NULL}, + {"cf", (getter)JitCpu_get_cf, (setter)JitCpu_set_cf, "cf", NULL}, + {"af", (getter)JitCpu_get_af, (setter)JitCpu_set_af, "af", NULL}, + {"df", (getter)JitCpu_get_df, (setter)JitCpu_set_df, "df", NULL}, + {"ES", (getter)JitCpu_get_ES, (setter)JitCpu_set_ES, "ES", NULL}, + {"CS", (getter)JitCpu_get_CS, (setter)JitCpu_set_CS, "CS", NULL}, + {"SS", (getter)JitCpu_get_SS, (setter)JitCpu_set_SS, "SS", NULL}, + {"DS", (getter)JitCpu_get_DS, (setter)JitCpu_set_DS, "DS", NULL}, + {"FS", (getter)JitCpu_get_FS, (setter)JitCpu_set_FS, "FS", NULL}, + {"GS", (getter)JitCpu_get_GS, (setter)JitCpu_set_GS, "GS", NULL}, + + {"EAX", (getter)JitCpu_get_EAX, (setter)JitCpu_set_EAX, "EAX", NULL}, + {"EBX", (getter)JitCpu_get_EBX, (setter)JitCpu_set_EBX, "EBX", NULL}, + {"ECX", (getter)JitCpu_get_ECX, (setter)JitCpu_set_ECX, "ECX", NULL}, + {"EDX", (getter)JitCpu_get_EDX, (setter)JitCpu_set_EDX, "EDX", NULL}, + {"ESI", (getter)JitCpu_get_ESI, (setter)JitCpu_set_ESI, "ESI", NULL}, + {"EDI", (getter)JitCpu_get_EDI, (setter)JitCpu_set_EDI, "EDI", NULL}, + {"ESP", (getter)JitCpu_get_ESP, (setter)JitCpu_set_ESP, "ESP", NULL}, + {"EBP", (getter)JitCpu_get_EBP, (setter)JitCpu_set_EBP, "EBP", NULL}, + {"EIP", (getter)JitCpu_get_EIP, (setter)JitCpu_set_EIP, "EIP", NULL}, + + {"AX", (getter)JitCpu_get_AX, (setter)JitCpu_set_AX, "AX", NULL}, + {"BX", (getter)JitCpu_get_BX, (setter)JitCpu_set_BX, "BX", NULL}, + {"CX", (getter)JitCpu_get_CX, (setter)JitCpu_set_CX, "CX", NULL}, + {"DX", (getter)JitCpu_get_DX, (setter)JitCpu_set_DX, "DX", NULL}, + {"SI", (getter)JitCpu_get_SI, (setter)JitCpu_set_SI, "SI", NULL}, + {"DI", (getter)JitCpu_get_DI, (setter)JitCpu_set_DI, "DI", NULL}, + {"SP", (getter)JitCpu_get_SP, (setter)JitCpu_set_SP, "SP", NULL}, + {"BP", (getter)JitCpu_get_BP, (setter)JitCpu_set_BP, "BP", NULL}, + + + {NULL} /* Sentinel */ +}; + + +static PyTypeObject JitCpuType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "JitCore_x86.JitCpu", /*tp_name*/ + sizeof(JitCpu), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)JitCpu_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "JitCpu objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + JitCpu_methods, /* tp_methods */ + JitCpu_members, /* tp_members */ + JitCpu_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)JitCpu_init, /* tp_init */ + 0, /* tp_alloc */ + JitCpu_new, /* tp_new */ +}; + + + +static PyMethodDef JitCore_x86_Methods[] = { + + /* + + */ + {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, + {NULL, NULL, 0, NULL} /* Sentinel */ + +}; + +static PyObject *JitCore_x86_Error; + +PyMODINIT_FUNC +initJitCore_x86(void) +{ + PyObject *m; + + if (PyType_Ready(&JitCpuType) < 0) + return; + + m = Py_InitModule("JitCore_x86", JitCore_x86_Methods); + if (m == NULL) + return; + + JitCore_x86_Error = PyErr_NewException("JitCore_x86.error", NULL, NULL); + Py_INCREF(JitCore_x86_Error); + PyModule_AddObject(m, "error", JitCore_x86_Error); + + Py_INCREF(&JitCpuType); + PyModule_AddObject(m, "JitCpu", (PyObject *)&JitCpuType); + +} + + + + + + + + + + + + + + + + + + + + + + diff --git a/miasm2/jitter/arch/JitCore_x86.h b/miasm2/jitter/arch/JitCore_x86.h new file mode 100644 index 00000000..7d4379a8 --- /dev/null +++ b/miasm2/jitter/arch/JitCore_x86.h @@ -0,0 +1,293 @@ + +typedef struct { + uint32_t exception_flags; + uint32_t exception_flags_new; + + + /* gpregs */ + uint64_t RAX; + uint64_t RBX; + uint64_t RCX; + uint64_t RDX; + uint64_t RSI; + uint64_t RDI; + uint64_t RSP; + uint64_t RBP; + uint64_t R8; + uint64_t R9; + uint64_t R10; + uint64_t R11; + uint64_t R12; + uint64_t R13; + uint64_t R14; + uint64_t R15; + + uint64_t RIP; + + uint64_t RAX_new; + uint64_t RBX_new; + uint64_t RCX_new; + uint64_t RDX_new; + uint64_t RSI_new; + uint64_t RDI_new; + uint64_t RSP_new; + uint64_t RBP_new; + uint64_t R8_new; + uint64_t R9_new; + uint64_t R10_new; + uint64_t R11_new; + uint64_t R12_new; + uint64_t R13_new; + uint64_t R14_new; + uint64_t R15_new; + + uint64_t RIP_new; + + /* eflag */ + uint64_t zf; + uint64_t nf; + uint64_t pf; + uint64_t of; + uint64_t cf; + uint64_t af; + uint64_t df; + + uint64_t zf_new; + uint64_t nf_new; + uint64_t pf_new; + uint64_t of_new; + uint64_t cf_new; + uint64_t af_new; + uint64_t df_new; + + uint64_t tf; + uint64_t i_f; + uint64_t iopl_f; + uint64_t nt; + uint64_t rf; + uint64_t vm; + uint64_t ac; + uint64_t vif; + uint64_t vip; + uint64_t i_d; + + uint64_t tf_new; + uint64_t i_f_new; + uint64_t iopl_f_new; + uint64_t nt_new; + uint64_t rf_new; + uint64_t vm_new; + uint64_t ac_new; + uint64_t vif_new; + uint64_t vip_new; + uint64_t i_d_new; + + uint64_t my_tick; + + uint64_t cond; + + double float_st0; + double float_st1; + double float_st2; + double float_st3; + double float_st4; + double float_st5; + double float_st6; + double float_st7; + + double float_st0_new; + double float_st1_new; + double float_st2_new; + double float_st3_new; + double float_st4_new; + double float_st5_new; + double float_st6_new; + double float_st7_new; + + unsigned int float_c0; + unsigned int float_c1; + unsigned int float_c2; + unsigned int float_c3; + + unsigned int float_c0_new; + unsigned int float_c1_new; + unsigned int float_c2_new; + unsigned int float_c3_new; + + unsigned int float_stack_ptr; + unsigned int float_stack_ptr_new; + + unsigned int reg_float_control; + unsigned int reg_float_control_new; + + unsigned int reg_float_eip; + unsigned int reg_float_eip_new; + unsigned int reg_float_cs; + unsigned int reg_float_cs_new; + unsigned int reg_float_address; + unsigned int reg_float_address_new; + unsigned int reg_float_ds; + unsigned int reg_float_ds_new; + + + unsigned int tsc1; + unsigned int tsc2; + + unsigned int tsc1_new; + unsigned int tsc2_new; + + + uint64_t ES; + uint64_t CS; + uint64_t SS; + uint64_t DS; + uint64_t FS; + uint64_t GS; + + uint64_t ES_new; + uint64_t CS_new; + uint64_t SS_new; + uint64_t DS_new; + uint64_t FS_new; + uint64_t GS_new; + + unsigned int cr0; + unsigned int cr0_new; + + unsigned int cr3; + unsigned int cr3_new; + + + + uint8_t pfmem08_0; + uint8_t pfmem08_1; + uint8_t pfmem08_2; + uint8_t pfmem08_3; + uint8_t pfmem08_4; + uint8_t pfmem08_5; + uint8_t pfmem08_6; + uint8_t pfmem08_7; + uint8_t pfmem08_8; + uint8_t pfmem08_9; + uint8_t pfmem08_10; + uint8_t pfmem08_11; + uint8_t pfmem08_12; + uint8_t pfmem08_13; + uint8_t pfmem08_14; + uint8_t pfmem08_15; + uint8_t pfmem08_16; + uint8_t pfmem08_17; + uint8_t pfmem08_18; + uint8_t pfmem08_19; + + + uint16_t pfmem16_0; + uint16_t pfmem16_1; + uint16_t pfmem16_2; + uint16_t pfmem16_3; + uint16_t pfmem16_4; + uint16_t pfmem16_5; + uint16_t pfmem16_6; + uint16_t pfmem16_7; + uint16_t pfmem16_8; + uint16_t pfmem16_9; + uint16_t pfmem16_10; + uint16_t pfmem16_11; + uint16_t pfmem16_12; + uint16_t pfmem16_13; + uint16_t pfmem16_14; + uint16_t pfmem16_15; + uint16_t pfmem16_16; + uint16_t pfmem16_17; + uint16_t pfmem16_18; + uint16_t pfmem16_19; + + + uint32_t pfmem32_0; + uint32_t pfmem32_1; + uint32_t pfmem32_2; + uint32_t pfmem32_3; + uint32_t pfmem32_4; + uint32_t pfmem32_5; + uint32_t pfmem32_6; + uint32_t pfmem32_7; + uint32_t pfmem32_8; + uint32_t pfmem32_9; + uint32_t pfmem32_10; + uint32_t pfmem32_11; + uint32_t pfmem32_12; + uint32_t pfmem32_13; + uint32_t pfmem32_14; + uint32_t pfmem32_15; + uint32_t pfmem32_16; + uint32_t pfmem32_17; + uint32_t pfmem32_18; + uint32_t pfmem32_19; + + + uint64_t pfmem64_0; + uint64_t pfmem64_1; + uint64_t pfmem64_2; + uint64_t pfmem64_3; + uint64_t pfmem64_4; + uint64_t pfmem64_5; + uint64_t pfmem64_6; + uint64_t pfmem64_7; + uint64_t pfmem64_8; + uint64_t pfmem64_9; + uint64_t pfmem64_10; + uint64_t pfmem64_11; + uint64_t pfmem64_12; + uint64_t pfmem64_13; + uint64_t pfmem64_14; + uint64_t pfmem64_15; + uint64_t pfmem64_16; + uint64_t pfmem64_17; + uint64_t pfmem64_18; + uint64_t pfmem64_19; + + + uint64_t MM0; + uint64_t MM1; + uint64_t MM2; + uint64_t MM3; + uint64_t MM4; + uint64_t MM5; + uint64_t MM6; + uint64_t MM7; + + uint64_t MM0_new; + uint64_t MM1_new; + uint64_t MM2_new; + uint64_t MM3_new; + uint64_t MM4_new; + uint64_t MM5_new; + uint64_t MM6_new; + uint64_t MM7_new; + + uint32_t segm_base[0x10000]; + +}vm_cpu_t; + + + +void dump_gpregs(vm_cpu_t* vmcpu); +uint64_t segm2addr(vm_cpu_t* vmcpu, uint64_t segm, uint64_t addr); + + +uint32_t udiv64(vm_cpu_t* vmcpu, uint64_t a, uint64_t b); +uint32_t umod64(vm_cpu_t* vmcpu, uint64_t a, uint64_t b); +int64_t idiv64(vm_cpu_t* vmcpu, int64_t a, int64_t b); +int64_t imod64(vm_cpu_t* vmcpu, int64_t a, int64_t b); + +uint16_t udiv32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); +uint16_t umod32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); +int32_t idiv32(vm_cpu_t* vmcpu, int32_t a, int32_t b); +int32_t imod32(vm_cpu_t* vmcpu, int32_t a, int32_t b); + +uint8_t udiv16(vm_cpu_t* vmcpu, uint16_t a, uint16_t b); +uint8_t umod16(vm_cpu_t* vmcpu, uint16_t a, uint16_t b); +int16_t idiv16(vm_cpu_t* vmcpu, int16_t a, int16_t b); +int16_t imod16(vm_cpu_t* vmcpu, int16_t a, int16_t b); + +#define RETURN_PC return PyLong_FromUnsignedLongLong(vmcpu->RIP); diff --git a/miasm2/jitter/arch/__init__.py b/miasm2/jitter/arch/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/jitter/arch/__init__.py diff --git a/miasm2/jitter/csts.py b/miasm2/jitter/csts.py new file mode 100644 index 00000000..e4b315e1 --- /dev/null +++ b/miasm2/jitter/csts.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + + +# VM Mngr Exceptions +EXCEPT_DO_NOT_UPDATE_PC = 1 << 25 + +EXCEPT_CODE_AUTOMOD = (1 << 0) +EXCEPT_SOFT_BP = (1 << 1) +EXCEPT_INT_XX = (1 << 2) +EXCEPT_BREAKPOINT_INTERN = (1 << 10) + +EXCEPT_ACCESS_VIOL = ((1 << 14) | EXCEPT_DO_NOT_UPDATE_PC) +# VM Mngr constants + +PAGE_READ = 1 +PAGE_WRITE = 2 +PAGE_EXEC = 4 + +BREAKPOINT_READ = 1 +BREAKPOINT_WRITE = 2 + diff --git a/miasm2/jitter/jitcore.py b/miasm2/jitter/jitcore.py new file mode 100644 index 00000000..34ae3be9 --- /dev/null +++ b/miasm2/jitter/jitcore.py @@ -0,0 +1,252 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +from miasm2.core import asmbloc +from miasm2.core.interval import interval +from csts import * + + +class JitCore(object): + + "JiT management. This is an abstract class" + + def __init__(self, my_ir, bs=None): + """Initialise a JitCore instance. + @my_ir: ir instance for current architecture + @bs: bitstream + """ + + self.my_ir = my_ir + self.bs = bs + self.known_blocs = {} + self.lbl2jitbloc = {} + self.lbl2bloc = {} + self.log_mn = False + self.log_regs = False + self.log_newbloc = False + self.segm_to_do = set() + self.job_done = set() + self.jitcount = 0 + self.addr2obj = {} + self.addr2objref = {} + self.blocs_mem_interval = interval() + self.disasm_cb = None + self.split_dis = set() + + self.options = {"jit_maxline": 50 # Maximum number of line jitted + } + + def set_options(self, **kwargs): + "Set options relative to the backend" + + self.options.update(kwargs) + + def add_disassembly_splits(self, *args): + """The disassembly engine will stop on address in args if they + are not at the block beginning""" + self.split_dis.update(set(args)) + + def remove_disassembly_splits(self, *args): + """The disassembly engine will no longer stop on address in args""" + self.split_dis.difference_update(set(args)) + + def load(self, arch, attrib): + "Initialise the Jitter according to arch and attrib" + + raise Exception("DO NOT instanciate JitCore") + + def __get_bloc_min_max(self, cur_bloc): + "Update cur_bloc to set min/max address" + + if cur_bloc.lines: + cur_bloc.ad_min = cur_bloc.lines[0].offset + cur_bloc.ad_max = cur_bloc.lines[-1].offset + cur_bloc.lines[-1].l + + def __add_bloc_to_mem_interval(self, vm, bloc): + "Update vm to include bloc addresses in its memory range" + + self.blocs_mem_interval += interval([(bloc.ad_min, bloc.ad_max - 1)]) + + vm.vm_reset_code_bloc_pool() + for a, b in self.blocs_mem_interval: + vm.vm_add_code_bloc(a, b + 1) + + def jitirblocs(self, label, irblocs): + """JiT a group of irblocs. + @label: the label of the irblocs + @irblocs: a gorup of irblocs + """ + + raise Exception("DO NOT instanciate JitCore") + + def add_bloc(self, b): + """Add a bloc to JiT and JiT it. + @b: the bloc to add + """ + + irblocs = self.my_ir.add_bloc(b, gen_pc_updt = True) + b.irblocs = irblocs + self.jitirblocs(b.label, irblocs) + + def __disbloc(self, addr, cpu, vm): + "Disassemble a new bloc and JiT it" + + # Get the bloc + if isinstance(addr, asmbloc.asm_label): + addr = addr.offset + + l = self.my_ir.symbol_pool.getby_offset_create(addr) + cur_bloc = asmbloc.asm_bloc(l) + + # Disassemble it + try: + asmbloc.dis_bloc(self.my_ir.arch, self.bs, cur_bloc, addr, + set(), self.my_ir.symbol_pool, [], + follow_call=False, patch_instr_symb=True, + dontdis_retcall=False, + lines_wd=self.options["jit_maxline"], + # max 10 asm lines + attrib=self.my_ir.attrib, + split_dis=self.split_dis) + except IOError: + # vm_exception_flag is set + pass + + # Logging + if self.log_newbloc: + print cur_bloc + if self.disasm_cb is not None: + self.disasm_cb(cur_bloc) + # Update label -> bloc + self.lbl2bloc[l] = cur_bloc + + # Store min/max bloc address needed in jit automod code + self.__get_bloc_min_max(cur_bloc) + + # JiT it + self.add_bloc(cur_bloc) + + # Update jitcode mem range + self.__add_bloc_to_mem_interval(vm, cur_bloc) + + def jit_call(self, label, cpu, vmmngr): + """Call the function label with cpu and vmmngr states + @label: function's label + @cpu: address of the cpu state structure + @vmmngr: address of the memory state structure + """ + + fc_ptr = self.lbl2jitbloc[label] + return self.exec_wrapper(fc_ptr, cpu, vmmngr) + + def runbloc(self, cpu, vm, lbl): + """Run the bloc starting at lbl. + @cpu: JitCpu instance + @vm: VmMngr instance + @lbl: target label + """ + + if lbl is None: + lbl = cpu.vm_get_gpreg()[self.my_ir.pc.name] + + if not lbl in self.lbl2jitbloc: + # Need to JiT the bloc + self.__disbloc(lbl, cpu, vm) + + # Run the bloc and update cpu/vmmngr state + ret = self.jit_call(lbl, cpu.cpu, vm.vmmngr) + + return ret + + def __blocs2memrange(self, blocs): + """Return an interval instance standing for blocs addresses + @blocs: list of asm_bloc instances + """ + + mem_range = interval() + + for b in blocs: + mem_range += interval([(b.ad_min, b.ad_max - 1)]) + + return mem_range + + def __updt_jitcode_mem_range(self, vm): + """Rebuild the VM blocs address memory range + @vm: VmMngr instance + """ + + # Reset the current pool + vm.vm_reset_code_bloc_pool() + + # Add blocs in the pool + for a, b in self.blocs_mem_interval: + vm.vm_add_code_bloc(a, b + 1) + + def __del_bloc_in_range(self, ad1, ad2): + """Find and remove jitted bloc in range [ad1, ad2]. + Return the list of bloc removed. + @ad1: First address + @ad2: Last address + """ + + # Find concerned blocs + modified_blocs = set() + for b in self.lbl2bloc.values(): + if not b.lines: + continue + if b.ad_max <= ad1 or b.ad_min >= ad2: + # Bloc not modified + pass + else: + # Modified blocs + modified_blocs.add(b) + + # Generate interval to delete + del_interval = self.__blocs2memrange(modified_blocs) + + # Remove interval from monitored interval list + self.blocs_mem_interval -= del_interval + + # Remove modified blocs + for b in modified_blocs: + try: + for irbloc in b.irblocs: + + # Remove offset -> jitted bloc link + if irbloc.label.offset in self.lbl2jitbloc: + del(self.lbl2jitbloc[irbloc.label.offset]) + + except AttributeError: + # The bloc has never been translated in IR + if b.label.offset in self.lbl2jitbloc: + del(self.lbl2jitbloc[b.label.offset]) + + # Remove label -> bloc link + del(self.lbl2bloc[b.label]) + + return modified_blocs + + def updt_automod_code(self, vm, addr, size): + """Remove code jitted in range [addr, addr + size] + @vm: VmMngr instance + @addr: Address of modified code in sandbox + @size: Modification range size (in bits) + """ + + self.__del_bloc_in_range(addr, addr + size / 8) + self.__updt_jitcode_mem_range(vm) + diff --git a/miasm2/jitter/jitcore_llvm.py b/miasm2/jitter/jitcore_llvm.py new file mode 100644 index 00000000..f081d281 --- /dev/null +++ b/miasm2/jitter/jitcore_llvm.py @@ -0,0 +1,157 @@ +import os +import importlib +import hashlib +try: + from llvmconvert import * +except ImportError: + pass +import jitcore +import Jitllvm + + +class JitCore_LLVM(jitcore.JitCore): + + "JiT management, using LLVM as backend" + + # Architecture dependant libraries + arch_dependent_libs = {"x86": "arch/JitCore_x86.so"} + + def __init__(self, my_ir, bs=None): + super(JitCore_LLVM, self).__init__(my_ir, bs) + + self.options.update({"safe_mode": False, # Verify each function + "optimise": False, # Optimise functions + "log_func": False, # Print LLVM functions + "log_assembly": False, # Print assembly executed + "cache_ir": None # SaveDir for cached .ll + }) + + self.exec_wrapper = Jitllvm.llvm_exec_bloc + self.exec_engines = [] + + def load(self, arch): + + # Library to load within Jit context + libs_to_load = [] + + # Get the vm_mngr librairy + lib_dir = os.path.dirname(os.path.realpath(__file__)) + vm_mngr_path = os.path.join(lib_dir, 'vm_mngr.so') + libs_to_load.append(vm_mngr_path) + + # Get architecture dependant Jitcore library (if any) + try: + jit_lib = os.path.join( + lib_dir, self.arch_dependent_libs[arch.name]) + libs_to_load.append(jit_lib) + except KeyError: + pass + + # Create a context + self.context = LLVMContext_JIT(libs_to_load) + + # Set the optimisation level + self.context.optimise_level() + + # Save the current architecture parameters + self.arch = arch + + # Get the correspondance between registers and vmcpu struct + mod_name = "miasm2.jitter.arch.JitCore_%s" % (arch.name) + mod = importlib.import_module(mod_name) + self.context.set_vmcpu(mod.get_gpreg_offset_all()) + + # Save module base + self.mod_base_str = str(self.context.mod) + + # Set IRs transformation to apply + self.context.set_IR_transformation(self.my_ir.expr_fix_regs_for_mode) + + def add_bloc(self, bloc): + + # Search in IR cache + if self.options["cache_ir"] is not None: + + # /!\ This part is under development + # Use it at your own risk + + # Compute Hash : label + bloc binary + func_name = bloc.label.name + to_hash = func_name + + # Get binary from bloc + for line in bloc.lines: + b = line.b + to_hash += b + + # Compute Hash + md5 = hashlib.md5(to_hash).hexdigest() + + # Try to load the function from cache + filename = self.options["cache_ir"] + md5 + ".ll" + + try: + fcontent = open(filename) + content = fcontent.read() + fcontent.close() + + except IOError: + content = None + + if content is None: + # Compute the IR + super(JitCore_LLVM, self).add_bloc(bloc) + + # Save it + fdest = open(filename, "w") + dump = str(self.context.mod.get_function_named(func_name)) + my = "declare i16 @llvm.bswap.i16(i16) nounwind readnone\n" + + fdest.write(self.mod_base_str + my + dump) + fdest.close() + + else: + import llvm.core as llvm_c + import llvm.ee as llvm_e + my_mod = llvm_c.Module.from_assembly(content) + func = my_mod.get_function_named(func_name) + exec_en = llvm_e.ExecutionEngine.new(my_mod) + self.exec_engines.append(exec_en) + + # We can use the same exec_engine + ptr = self.exec_engines[0].get_pointer_to_function(func) + + # Store a pointer on the function jitted code + self.lbl2jitbloc[bloc.label.offset] = ptr + + else: + super(JitCore_LLVM, self).add_bloc(bloc) + + def jitirblocs(self, label, irblocs): + + # Build a function in the context + func = LLVMFunction(self.context, label.name) + + # Set log level + func.log_regs = self.log_regs + func.log_mn = self.log_mn + + # Import irblocs + func.from_blocs(irblocs) + + # Verify + if self.options["safe_mode"] is True: + func.verify() + + # Optimise + if self.options["optimise"] is True: + func.optimise() + + # Log + if self.options["log_func"] is True: + print func + if self.options["log_assembly"] is True: + print func.get_assembly() + + # Store a pointer on the function jitted code + self.lbl2jitbloc[label.offset] = func.get_function_pointer() diff --git a/miasm2/jitter/jitcore_tcc.py b/miasm2/jitter/jitcore_tcc.py new file mode 100644 index 00000000..ee33bcd0 --- /dev/null +++ b/miasm2/jitter/jitcore_tcc.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import os +from miasm2.ir.ir2C import irblocs2C +from subprocess import Popen, PIPE +import jitcore +from distutils.sysconfig import get_python_inc +import Jittcc + + +def jit_tcc_compil(func_name, func_code): + global Jittcc + c = Jittcc.tcc_compil(func_name, func_code) + return c + + +class jit_tcc_code(): + + def __init__(self, c): + self.c = c + + def __call__(self, cpu, vm): + return Jittcc.tcc_exec_bloc(self.c, cpu, vm) + + +def gen_core(arch, attrib): + lib_dir = os.path.dirname(os.path.realpath(__file__)) + + txt = "" + txt += '#include "%s/queue.h"\n' % lib_dir + txt += '#include "%s/vm_mngr.h"\n' % lib_dir + txt += '#include "%s/arch/JitCore_%s.h"\n' % (lib_dir, arch.name) + + txt += r''' +#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} +''' + return txt + + +def gen_C_source(my_ir, func_code): + c_source = "" + c_source += "\n".join(func_code) + + c_source = gen_core(my_ir.arch, my_ir.attrib) + c_source + + c_source = """ + #ifdef __x86_64__ + #ifndef __LP64__ + /* + for ubuntu ?!? XXX TODO + /!\ force 64 bit system using 64 bits libc + change this to __ILP32__ to do so. + */ + #define __LP64__ + #endif + #endif + """ + "#include <Python.h>\n" + c_source + + return c_source + + +class objref: + + def __init__(self, obj): + self.obj = obj + + +class myresolver: + + def __init__(self, offset): + self.offset = offset + + def ret(self): + return "return PyLong_FromUnsignedLongLong(0x%X);" % self.offset + +from miasm2.core.utils import keydefaultdict + + +class resolver: + + def __init__(self): + self.resolvers = keydefaultdict(myresolver) + + def get_resolver(self, offset): + return self.resolvers[offset] + + +class JitCore_Tcc(jitcore.JitCore): + + "JiT management, using LibTCC as backend" + + def __init__(self, my_ir, bs=None): + super(JitCore_Tcc, self).__init__(my_ir, bs) + self.resolver = resolver() + self.exec_wrapper = Jittcc.tcc_exec_bloc + + def load(self, arch): + # os.path.join(os.path.dirname(os.path.realpath(__file__)), "jitter") + lib_dir = os.path.dirname(os.path.realpath(__file__)) + libs = [] + libs.append(os.path.join(lib_dir, 'vm_mngr.so')) + libs.append(os.path.join(lib_dir, 'arch/JitCore_%s.so' % (arch.name))) + libs = ';'.join(libs) + jittcc_path = Jittcc.__file__ + include_dir = os.path.dirname(jittcc_path) + include_dir += ";" + os.path.join(include_dir, "arch") + # print include_dir + + # XXX HACK + # As debian/ubuntu have moved some include files using arch directory, + # TCC doesn't know them, so we get the info from GCC + # For example /usr/include/x86_64-linux-gnu which contains limits.h + p = Popen(["cc", "-Wp,-v", "-E", "-"], + stdout=PIPE, stderr=PIPE, stdin=PIPE) + p.stdin.close() + include_files = p.stderr.read().split('\n') + include_files = [x[1:] + for x in include_files if x.startswith(' /usr/include')] + include_files += [include_dir, get_python_inc()] + + include_files = ";".join(include_files) + Jittcc.tcc_set_emul_lib_path(include_files, libs) + + def jitirblocs(self, label, irblocs): + # irbloc = self.lbl2irbloc[lbl] + f_name = "bloc_%s" % label.name + f_declaration = \ + 'PyObject* %s(vm_cpu_t* vmcpu, vm_mngr_t* vm_mngr)' % f_name + out = irblocs2C(self.my_ir, self.resolver, label, irblocs, + gen_exception_code=True, + log_mn=self.log_mn, + log_regs=self.log_regs) + out = [f_declaration + '{'] + out + ['}\n'] + c_code = out + + func_code = gen_C_source(self.my_ir, c_code) + # print func_code + # open('tmp_%.4d.c'%self.jitcount, "w").write(func_code) + self.jitcount += 1 + mcode = jit_tcc_compil(f_name, func_code) + jcode = jit_tcc_code(mcode) + self.lbl2jitbloc[label.offset] = mcode + self.addr2obj[label.offset] = jcode + self.addr2objref[label.offset] = objref(jcode) + # print "ADDR2CODE", hex(b.label.offset), hex(id(jcode)) diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py new file mode 100644 index 00000000..d9aca929 --- /dev/null +++ b/miasm2/jitter/jitload.py @@ -0,0 +1,1089 @@ +#!/usr/bin/env python + +import os +from miasm2.core import asmbloc +from collections import defaultdict +import struct +from elfesteem import pe +from elfesteem import cstruct +from elfesteem import * +from vm_mngr import * +from vm_mngr import VmMngr + +from csts import * +from miasm2.core.utils import * +from jitcore_tcc import JitCore_Tcc +from jitcore_llvm import JitCore_LLVM +from miasm2.core.bin_stream import bin_stream + +from miasm2.ir.ir2C import init_arch_C +import inspect + +import logging + +log = logging.getLogger('jitload.py') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + +""" +name2jit = {'x86':JitCore_x86, + 'arm':JitCore_arm, + 'msp430':JitCore_msp430, + } +""" + + +def whoami(): + return inspect.stack()[2][3] + + +class bin_stream_vm(bin_stream): + + def __init__(self, vm, offset=0L, base_offset=0L): + self.offset = offset + self.base_offset = base_offset + self.vm = vm + + def getlen(self): + return 0xFFFFFFFFFFFFFFFF + + def getbytes(self, start, l=1): + try: + s = self.vm.vm_get_mem(start + self.base_offset, l) + except: + raise IOError('cannot get mem ad', hex(start)) + return s + + def readbs(self, l=1): + try: + s = self.vm.vm_get_mem(self.offset + self.base_offset, l) + except: + raise IOError('cannot get mem ad', hex(self.offset)) + self.offset += l + return s + + def writebs(self, l=1): + raise ValueError('writebs unsupported') + + def setoffset(self, val): + self.offset = val + + +def get_import_address(e): + import2addr = defaultdict(set) + if e.DirImport.impdesc is None: + return import2addr + for s in e.DirImport.impdesc: + # fthunk = e.rva2virt(s.firstthunk) + # l = "%2d %-25s %s" % (i, repr(s.dlldescname), repr(s)) + libname = s.dlldescname.name.lower() + for ii, imp in enumerate(s.impbynames): + if isinstance(imp, pe.ImportByName): + funcname = imp.name + else: + funcname = imp + # l = " %2d %-16s" % (ii, repr(funcname)) + import2addr[(libname, funcname)].add( + e.rva2virt(s.firstthunk + e._wsize * ii / 8)) + return import2addr + + +def preload_pe(vm, e, runtime_lib, patch_vm_imp=True): + fa = get_import_address(e) + dyn_funcs = {} + # log.debug('imported funcs: %s' % fa) + for (libname, libfunc), ads in fa.items(): + for ad in ads: + ad_base_lib = runtime_lib.lib_get_add_base(libname) + ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) + + libname_s = canon_libname_libfunc(libname, libfunc) + dyn_funcs[libname_s] = ad_libfunc + if patch_vm_imp: + vm.vm_set_mem( + ad, struct.pack(cstruct.size2type[e._wsize], ad_libfunc)) + return dyn_funcs + + +def get_import_address_elf(e): + import2addr = defaultdict(set) + for sh in e.sh: + if not hasattr(sh, 'rel'): + continue + for k, v in sh.rel.items(): + import2addr[('xxx', k)].add(v.offset) + return import2addr + + +def preload_elf(vm, e, runtime_lib, patch_vm_imp=True): + # XXX quick hack + fa = get_import_address_elf(e) + dyn_funcs = {} + # log.debug('imported funcs: %s' % fa) + for (libname, libfunc), ads in fa.items(): + for ad in ads: + ad_base_lib = runtime_lib.lib_get_add_base(libname) + ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) + + libname_s = canon_libname_libfunc(libname, libfunc) + dyn_funcs[libname_s] = ad_libfunc + if patch_vm_imp: + log.debug('patch %s %s %s' % + (hex(ad), hex(ad_libfunc), libfunc)) + vm.vm_set_mem( + ad, struct.pack(cstruct.size2type[e.size], ad_libfunc)) + return runtime_lib, dyn_funcs + + +def is_redirected_export(e, ad): + # test is ad points to code or dll name + out = '' + for i in xrange(0x200): + c = e.virt(ad + i) + if c == "\x00": + break + out += c + if not (c.isalnum() or c in "_.-+*$@&#()[]={}"): + return False + if not "." in out: + return False + i = out.find('.') + return out[:i], out[i + 1:] + + +def get_export_name_addr_list(e): + out = [] + # add func name + for i, n in enumerate(e.DirExport.f_names): + addr = e.DirExport.f_address[e.DirExport.f_nameordinals[i].ordinal] + f_name = n.name.name + # log.debug('%s %s' % (f_name, hex(e.rva2virt(addr.rva)))) + out.append((f_name, e.rva2virt(addr.rva))) + + # add func ordinal + for i, o in enumerate(e.DirExport.f_nameordinals): + addr = e.DirExport.f_address[o.ordinal] + # log.debug('%s %s %s' % (o.ordinal, e.DirExport.expdesc.base, + # hex(e.rva2virt(addr.rva)))) + out.append( + (o.ordinal + e.DirExport.expdesc.base, e.rva2virt(addr.rva))) + return out + + +def canon_libname_libfunc(libname, libfunc): + dn = libname.split('.')[0] + if type(libfunc) == str: + return "%s_%s" % (dn, libfunc) + else: + return str(dn), libfunc + + +class libimp: + + def __init__(self, lib_base_ad=0x71111000, **kargs): + self.name2off = {} + self.libbase2lastad = {} + self.libbase_ad = lib_base_ad + self.lib_imp2ad = {} + self.lib_imp2dstad = {} + self.fad2cname = {} + self.fad2info = {} + self.all_exported_lib = [] + + def lib_get_add_base(self, name): + name = name.lower().strip(' ') + if not "." in name: + log.debug('warning adding .dll to modulename') + name += '.dll' + log.debug('%s' % name) + + if name in self.name2off: + ad = self.name2off[name] + else: + ad = self.libbase_ad + log.debug('new lib %s %s' % (name, hex(ad))) + self.name2off[name] = ad + self.libbase2lastad[ad] = ad + 0x1 + self.lib_imp2ad[ad] = {} + self.lib_imp2dstad[ad] = {} + self.libbase_ad += 0x1000 + return ad + + def lib_get_add_func(self, libad, imp_ord_or_name, dst_ad=None): + if not libad in self.name2off.values(): + raise ValueError('unknown lib base!', hex(libad)) + + # test if not ordinatl + # if imp_ord_or_name >0x10000: + # imp_ord_or_name = vm_get_str(imp_ord_or_name, 0x100) + # imp_ord_or_name = imp_ord_or_name[:imp_ord_or_name.find('\x00')] + + #/!\ can have multiple dst ad + if not imp_ord_or_name in self.lib_imp2dstad[libad]: + self.lib_imp2dstad[libad][imp_ord_or_name] = set() + self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) + + if imp_ord_or_name in self.lib_imp2ad[libad]: + return self.lib_imp2ad[libad][imp_ord_or_name] + # log.debug('new imp %s %s' % (imp_ord_or_name, dst_ad)) + ad = self.libbase2lastad[libad] + self.libbase2lastad[libad] += 0x11 # arbitrary + self.lib_imp2ad[libad][imp_ord_or_name] = ad + + name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) + c_name = canon_libname_libfunc(name_inv[libad], imp_ord_or_name) + self.fad2cname[ad] = c_name + self.fad2info[ad] = libad, imp_ord_or_name + return ad + + def check_dst_ad(self): + for ad in self.lib_imp2dstad: + all_ads = self.lib_imp2dstad[ad].values() + all_ads.sort() + for i, x in enumerate(all_ads[:-1]): + if x is None or all_ads[i + 1] is None: + return False + if x + 4 != all_ads[i + 1]: + return False + return True + + def add_export_lib(self, e, name): + self.all_exported_lib.append(e) + # will add real lib addresses to database + if name in self.name2off: + ad = self.name2off[name] + else: + log.debug('new lib %s' % name) + ad = e.NThdr.ImageBase + libad = ad + self.name2off[name] = ad + self.libbase2lastad[ad] = ad + 0x1 + self.lib_imp2ad[ad] = {} + self.lib_imp2dstad[ad] = {} + self.libbase_ad += 0x1000 + + ads = get_export_name_addr_list(e) + todo = ads + # done = [] + while todo: + # for imp_ord_or_name, ad in ads: + imp_ord_or_name, ad = todo.pop() + + # if export is a redirection, search redirected dll + # and get function real addr + ret = is_redirected_export(e, ad) + if ret: + exp_dname, exp_fname = ret + # log.debug('export redirection %s' % imp_ord_or_name) + # log.debug('source %s %s' % (exp_dname, exp_fname)) + exp_dname = exp_dname + '.dll' + exp_dname = exp_dname.lower() + # if dll auto refes in redirection + if exp_dname == name: + libad_tmp = self.name2off[exp_dname] + if not exp_fname in self.lib_imp2ad[libad_tmp]: + # schedule func + todo = [(imp_ord_or_name, ad)] + todo + continue + elif not exp_dname in self.name2off: + raise ValueError('load %r first' % exp_dname) + c_name = canon_libname_libfunc(exp_dname, exp_fname) + libad_tmp = self.name2off[exp_dname] + ad = self.lib_imp2ad[libad_tmp][exp_fname] + # log.debug('%s' % hex(ad)) + # if not imp_ord_or_name in self.lib_imp2dstad[libad]: + # self.lib_imp2dstad[libad][imp_ord_or_name] = set() + # self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) + + # log.debug('new imp %s %s' % (imp_ord_or_name, hex(ad))) + self.lib_imp2ad[libad][imp_ord_or_name] = ad + + name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) + c_name = canon_libname_libfunc( + name_inv[libad], imp_ord_or_name) + self.fad2cname[ad] = c_name + self.fad2info[ad] = libad, imp_ord_or_name + + def gen_new_lib(self, e, filter=lambda x: True): + new_lib = [] + for n, ad in self.name2off.items(): + out_ads = dict() + for k, vs in self.lib_imp2dstad[ad].items(): + for v in vs: + out_ads[v] = k + all_ads = self.lib_imp2dstad[ad].values() + all_ads = reduce(lambda x, y: x + list(y), all_ads, []) + all_ads = [x for x in all_ads if filter(x)] + # log.debug('%s' % [hex(x) for x in all_ads]) + all_ads.sort() + # first, drop None + if not all_ads: + continue + for i, x in enumerate(all_ads): + if not x in [0, None]: + break + all_ads = all_ads[i:] + while all_ads: + othunk = all_ads[0] + i = 0 + while i + 1 < len(all_ads) and all_ads[i] + 4 == all_ads[i + 1]: + i += 1 + funcs = [out_ads[x] for x in all_ads[:i + 1]] + try: + off = e.virt2off(othunk) + except pe.InvalidOffset: + off = None + if off is not None: # e.is_in_virt_address(othunk): + new_lib.append(({"name": n, + "firstthunk": off}, + funcs) + ) + all_ads = all_ads[i + 1:] + return new_lib + + +def vm_load_pe(vm, fname, align_s=True, load_hdr=True, + **kargs): + e = pe_init.PE(open(fname, 'rb').read(), **kargs) + + aligned = True + for s in e.SHList: + if s.addr & 0xFFF: + aligned = False + break + + if aligned: + if load_hdr: + hdr_len = max(0x200, e.NThdr.sectionalignment) + min_len = min(e.SHList[0].addr, hdr_len) + pe_hdr = e.content[:hdr_len] + pe_hdr = pe_hdr + min_len * "\x00" + pe_hdr = pe_hdr[:min_len] + vm.vm_add_memory_page( + e.NThdr.ImageBase, PAGE_READ | PAGE_WRITE, pe_hdr) + if align_s: + for i, s in enumerate(e.SHList[:-1]): + s.size = e.SHList[i + 1].addr - s.addr + s.rawsize = s.size + s.data = strpatchwork.StrPatchwork(s.data[:s.size]) + s.offset = s.addr + s = e.SHList[-1] + s.size = (s.size + 0xfff) & 0xfffff000 + for s in e.SHList: + data = str(s.data) + data += "\x00" * (s.size - len(data)) + # log.debug('SECTION %s %s' % (hex(s.addr), + # hex(e.rva2virt(s.addr)))) + vm.vm_add_memory_page( + e.rva2virt(s.addr), PAGE_READ | PAGE_WRITE, data) + # s.offset = s.addr + return e + + # not aligned + log.warning('pe is not aligned, creating big section') + min_addr = None + max_addr = None + data = "" + + if load_hdr: + data = e.content[:0x400] + data += (e.SHList[0].addr - len(data)) * "\x00" + min_addr = 0 + + for i, s in enumerate(e.SHList): + if i < len(e.SHList) - 1: + s.size = e.SHList[i + 1].addr - s.addr + s.rawsize = s.size + s.offset = s.addr + + if min_addr is None or s.addr < min_addr: + min_addr = s.addr + if max_addr is None or s.addr + s.size > max_addr: + max_addr = s.addr + max(s.size, len(s.data)) + min_addr = e.rva2virt(min_addr) + max_addr = e.rva2virt(max_addr) + log.debug('%s %s %s' % + (hex(min_addr), hex(max_addr), hex(max_addr - min_addr))) + + vm.vm_add_memory_page(min_addr, + PAGE_READ | PAGE_WRITE, + (max_addr - min_addr) * "\x00") + for s in e.SHList: + log.debug('%s %s' % (hex(e.rva2virt(s.addr)), len(s.data))) + vm.vm_set_mem(e.rva2virt(s.addr), str(s.data)) + return e + + +def vm_load_elf(vm, fname, **kargs): + e = elf_init.ELF(open(fname, 'rb').read(), **kargs) + for p in e.ph.phlist: + if p.ph.type != 1: + continue + log.debug('%s %s %s' % + (hex(p.ph.vaddr), hex(p.ph.offset), hex(p.ph.filesz))) + data = e._content[p.ph.offset:p.ph.offset + p.ph.filesz] + r_vaddr = p.ph.vaddr & ~0xFFF + data = (p.ph.vaddr - r_vaddr) * "\x00" + data + data += (((len(data) + 0xFFF) & ~0xFFF) - len(data)) * "\x00" + vm.vm_add_memory_page(r_vaddr, PAGE_READ | PAGE_WRITE, data) + return e +""" +def init_jitter(arch, attrib): + jitarch = name2jit[(arch.name, attrib)] + jitarch.vm_init_regs() + init_memory_page_pool() + init_code_bloc_pool() + init_memory_breakpoint() + jit_tcc_init(arch, attrib) + +def init_stack(arch, attrib, stack_size = 0x10000, stack_base = 0x1230000, **kargs): + jitarch = name2jit[(arch.name, attrib)] + + vm_add_memory_page(stack_base, PAGE_READ|PAGE_WRITE, "\x00"*stack_size) + regs = jitarch.vm_get_gpreg() + regs[arch.sp[attrib].name] = stack_base+stack_size + jitarch.vm_set_gpreg(regs) + regs = jitarch.vm_get_gpreg() +""" + + +def vm_load_pe_lib(fname_in, libs, lib_path_base, patch_vm_imp, **kargs): + fname = os.path.join(lib_path_base, fname_in) + e = vm_load_pe(fname, **kargs) + libs.add_export_lib(e, fname_in) + # preload_pe(e, libs, patch_vm_imp) + return e + + +def vm_load_pe_libs(libs_name, libs, lib_path_base="win_dll", + patch_vm_imp=True, **kargs): + lib_imgs = {} + for fname in libs_name: + e = vm_load_pe_lib(fname, libs, lib_path_base, patch_vm_imp) + lib_imgs[fname] = e + return lib_imgs + + +def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base="win_dll", + patch_vm_imp=True, **kargs): + for e in lib_imgs.values(): + preload_pe(e, libs, patch_vm_imp) + + + +class CallbackHandler(object): + + "Handle a list of callback" + + def __init__(self): + self.callbacks = {} # Key -> [callback list] + + def add_callback(self, name, callback): + "Add a callback to the key 'name'" + self.callbacks[name] = self.callbacks.get(name, []) + [callback] + + def set_callback(self, name, *args): + "Set the list of callback for key 'name'" + self.callbacks[name] = args + + def get_callbacks(self, name): + "Return the list of callbacks associated to key 'name'" + return self.callbacks.get(name, []) + + def remove_callback(self, callback): + """Remove the callback from the list. + Return the list of empty keys (removed)""" + + to_check = set() + for key, cb_list in self.callbacks.items(): + try: + cb_list.remove(callback) + to_check.add(key) + except ValueError: + pass + + empty_keys = [] + for key in to_check: + if len(self.callbacks[key]) == 0: + empty_keys.append(key) + del(self.callbacks[key]) + + return empty_keys + + def call_callbacks(self, name, *args): + """Call callbacks associated to key 'name' with arguments args. While + callbacks return True, continue with next callback. + Iterator on other results.""" + + res = True + + for c in self.get_callbacks(name): + res = c(*args) + if res is not True: + yield res + + def __call__(self, name, *args): + "Wrapper for call_callbacks" + return self.call_callbacks(name, *args) + + +class CallbackHandlerBitflag(CallbackHandler): + + "Handle a list of callback with conditions on bitflag" + + def __call__(self, bitflag, *args): + """Call each callbacks associated with bit set in bitflag. While + callbacks return True, continue with next callback. + Iterator on other results""" + + res = True + for b in self.callbacks.keys(): + + if b & bitflag != 0: + # If the flag matched + for res in self.call_callbacks(b, *args): + if res is not True: + yield res + + +class ExceptionHandle(): + + "Return type for exception handler" + + def __init__(self, except_flag): + self.except_flag = except_flag + + @classmethod + def memoryBreakpoint(cls): + return cls(EXCEPT_BREAKPOINT_INTERN) + + def __eq__(self, to_cmp): + if not isinstance(to_cmp, ExceptionHandle): + return False + return (self.except_flag == to_cmp.except_flag) + + +class jitter: + + "Main class for JIT handling" + + def __init__(self, my_ir, jit_type="tcc"): + """Init an instance of jitter. + @my_ir: ir instance for this architecture + @jit_type: JiT backend to use. Available options are: + - "tcc" + - "llvm" + """ + + self.arch = my_ir.arch + self.attrib = my_ir.attrib + arch_name = my_ir.arch.name # (my_ir.arch.name, my_ir.attrib) + if arch_name == "x86": + from arch import JitCore_x86 as jcore + elif arch_name == "arm": + from arch import JitCore_arm as jcore + elif arch_name == "msp430": + from arch import JitCore_msp430 as jcore + else: + raise ValueError("unsupported jit arch!") + + self.cpu = jcore.JitCpu() + self.vm = VmMngr() + self.bs = bin_stream_vm(self.vm) + self.my_ir = my_ir + init_arch_C(self.arch) + + if jit_type == "tcc": + self.jit = JitCore_Tcc(self.my_ir, self.bs) + elif jit_type == "llvm": + self.jit = JitCore_LLVM(self.my_ir, self.bs) + else: + raise Exception("Unkown JiT Backend") + + self.cpu.vm_init_regs() + self.vm.vm_init_memory_page_pool() + self.vm.vm_init_code_bloc_pool() + self.vm.vm_init_memory_breakpoint() + + self.vm.vm_set_addr2obj(self.jit.addr2obj) + + self.jit.load(self.arch) + self.stack_size = 0x10000 + self.stack_base = 0x1230000 + + # Init callback handler + self.breakpoints_handler = CallbackHandler() + self.exceptions_handler = CallbackHandlerBitflag() + self.init_exceptions_handler() + self.exec_cb = None + + def init_exceptions_handler(self): + "Add common exceptions handlers" + + def exception_automod(jitter): + "Tell the JiT backend to update blocs modified" + addr = self.vm.vm_get_last_write_ad() + size = self.vm.vm_get_last_write_size() + + self.jit.updt_automod_code(self.vm, addr, size) + self.vm.vm_set_exception(0) + + return True + + def exception_memory_breakpoint(jitter): + "Stop the execution and return an identifier" + return ExceptionHandle.memoryBreakpoint() + + self.add_exception_handler(EXCEPT_CODE_AUTOMOD, exception_automod) + self.add_exception_handler(EXCEPT_BREAKPOINT_INTERN, + exception_memory_breakpoint) + + def add_breakpoint(self, addr, callback): + """Add a callback associated with addr. + @addr: breakpoint address + @callback: function with definition (jitter instance) + """ + self.breakpoints_handler.add_callback(addr, callback) + self.jit.add_disassembly_splits(addr) + + def remove_breakpoints_by_callback(self, callback): + """Remove callbacks associated with breakpoint. + @callback: callback to remove + """ + empty_keys = self.breakpoints_handler.remove_callback(callback) + for key in empty_keys: + self.jit.remove_disassembly_splits(key) + + def add_exception_handler(self, flag, callback): + """Add a callback associated with an exception flag. + @flag: bitflag + @callback: function with definition (jitter instance) + """ + self.exceptions_handler.add_callback(flag, callback) + + def runbloc(self, pc): + """Wrapper on JiT backend. Run the code at PC and return the next PC. + @pc: address of code to run""" + + return self.jit.runbloc(self.cpu, self.vm, pc) + + def runiter_once(self, pc): + """Iterator on callbacks results on code running from PC. + Check exceptions before breakpoints.""" + + self.pc = pc + + # Callback called before exec + if self.exec_cb is not None: + res = self.exec_cb(self) + if res is not True: + yield res + + # Check breakpoints + old_pc = self.pc + for res in self.breakpoints_handler(self.pc, self): + if res is not True: + yield res + + # If a callback changed pc, re call every callback + if old_pc != self.pc: + return + + # Exceptions should never be activated before run + assert(self.get_exception() == 0) + + # Run the bloc at PC + self.pc = self.runbloc(self.pc) + + # Check exceptions + exception_flag = self.get_exception() + for res in self.exceptions_handler(exception_flag, self): + if res is not True: + yield res + + def init_run(self, pc): + """Create an iterator on pc with runiter. + @pc: address of code to run + """ + self.run_iterator = self.runiter_once(pc) + self.pc = pc + self.run = True + + def continue_run(self, step=False): + """PRE: init_run. + Continue the run of the current session until iterator returns or run is + set to False. + If step is True, run only one time. + Return the iterator value""" + + while self.run: + try: + return self.run_iterator.next() + except StopIteration: + pass + + self.run_iterator = self.runiter_once(self.pc) + + if step is True: + return None + + return None + + def init_stack(self): + self.vm.vm_add_memory_page( + self.stack_base, PAGE_READ | PAGE_WRITE, "\x00" * self.stack_size) + sp = self.arch.getsp(self.attrib) + setattr(self.cpu, sp.name, self.stack_base + self.stack_size) + # regs = self.cpu.vm_get_gpreg() + # regs[sp.name] = self.stack_base+self.stack_size + # self.cpu.vm_set_gpreg(regs) + + def get_exception(self): + return self.cpu.vm_get_exception() | self.vm.vm_get_exception() + + # commun functions + def get_str_ansi(self, addr, max_char=None): + """Get ansi str from vm. + @addr: address in memory + @max_char: maximum len""" + l = 0 + tmp = addr + while ((max_char is None or l < max_char) and + self.vm.vm_get_mem(tmp, 1) != "\x00"): + tmp += 1 + l += 1 + return self.vm.vm_get_mem(addr, l) + + def get_str_unic(self, addr, max_char=None): + """Get unicode str from vm. + @addr: address in memory + @max_char: maximum len""" + l = 0 + tmp = addr + while ((max_char is None or l < max_char) and + self.vm.vm_get_mem(tmp, 2) != "\x00\x00"): + tmp += 2 + l += 2 + s = self.vm.vm_get_mem(addr, l) + s = s[::2] # TODO: real unicode decoding + return s + + def set_str_ansi(self, addr, s): + """Set an ansi string in memory""" + s = s + "\x00" + self.vm.vm_set_mem(addr, s) + + def set_str_unic(self, addr, s): + """Set an unicode string in memory""" + s = "\x00".join(list(s)) + '\x00' * 3 + self.vm.vm_set_mem(addr, s) + + +class jitter_x86_16(jitter): + + def __init__(self, *args, **kwargs): + from miasm2.arch.x86.sem import ir_x86_16 + sp = asmbloc.asm_symbol_pool() + jitter.__init__(self, ir_x86_16(sp), *args, **kwargs) + self.my_ir.jit_pc = self.my_ir.arch.regs.RIP + self.my_ir.do_stk_segm = False + self.orig_irbloc_fix_regs_for_mode = self.my_ir.irbloc_fix_regs_for_mode + self.my_ir.irbloc_fix_regs_for_mode = self.my_irbloc_fix_regs_for_mode + + def my_irbloc_fix_regs_for_mode(self, irbloc, attrib=64): + self.orig_irbloc_fix_regs_for_mode(irbloc, 64) + + def vm_push_uint16_t(self, v): + self.cpu.SP -= self.my_ir.sp.size / 8 + self.vm.vm_set_mem(self.cpu.SP, pck16(v)) + + def vm_pop_uint16_t(self): + x = upck16(self.vm.vm_get_mem(self.cpu.SP, self.my_ir.sp.size / 8)) + self.cpu.SP += self.my_ir.sp.size / 8 + return x + + def get_stack_arg(self, n): + x = upck16(self.vm.vm_get_mem(self.cpu.SP + 4 * n, 4)) + return x + + def init_run(self, *args, **kwargs): + jitter.init_run(self, *args, **kwargs) + self.cpu.IP = self.pc + + +class jitter_x86_32(jitter): + + def __init__(self, *args, **kwargs): + from miasm2.arch.x86.sem import ir_x86_32 + sp = asmbloc.asm_symbol_pool() + jitter.__init__(self, ir_x86_32(sp), *args, **kwargs) + self.my_ir.jit_pc = self.my_ir.arch.regs.RIP + self.my_ir.do_stk_segm = False + + self.orig_irbloc_fix_regs_for_mode = self.my_ir.irbloc_fix_regs_for_mode + self.my_ir.irbloc_fix_regs_for_mode = self.my_irbloc_fix_regs_for_mode + + def my_irbloc_fix_regs_for_mode(self, irbloc, attrib=64): + self.orig_irbloc_fix_regs_for_mode(irbloc, 64) + + def vm_push_uint32_t(self, v): + self.cpu.ESP -= self.my_ir.sp.size / 8 + self.vm.vm_set_mem(self.cpu.ESP, pck32(v)) + + def vm_pop_uint32_t(self): + x = upck32(self.vm.vm_get_mem(self.cpu.ESP, self.my_ir.sp.size / 8)) + self.cpu.ESP += self.my_ir.sp.size / 8 + return x + + def get_stack_arg(self, n): + x = upck32(self.vm.vm_get_mem(self.cpu.ESP + 4 * n, 4)) + return x + + # calling conventions + + # stdcall + def func_args_stdcall(self, n_args): + ret_ad = self.vm_pop_uint32_t() + args = [] + for _ in xrange(n_args): + args.append(self.vm_pop_uint32_t()) + log.debug('%s %s %s' % (whoami(), hex(ret_ad), [hex(x) for x in args])) + return ret_ad, args + + def func_ret_stdcall(self, ret_addr, ret_value1=None, ret_value2=None): + self.cpu.EIP = ret_addr + if ret_value1 is not None: + self.cpu.EAX = ret_value1 + if ret_value2 is not None: + self.cpu.EDX = ret_value + + # cdecl + def func_args_cdecl(self, n_args, dolog=True): + ret_ad = self.vm_pop_uint32_t() + args = [] + for i in xrange(n_args): + args.append(self.get_stack_arg(i)) + if dolog: + log.debug('%s %s %s' % + (whoami(), hex(ret_ad), [hex(x) for x in args])) + return ret_ad, args + + def func_ret_cdecl(self, ret_addr, ret_value): + self.cpu.EIP = ret_addr + self.cpu.EAX = ret_value + + def add_lib_handler(self, libs, user_globals=None): + """Add a function to handle libs call with breakpoints + @libs: libimp instance + @user_globals: dictionnary for defined user function + """ + if user_globals is None: + user_globals = {} + + from miasm2.jitter.os_dep import win_api_x86_32 + + def handle_lib(jitter): + fname = libs.fad2cname[jitter.pc] + if fname in user_globals: + f = user_globals[fname] + elif fname in win_api_x86_32.__dict__: + f = win_api_x86_32.__dict__[fname] + else: + log.debug('%s' % repr(fname)) + raise ValueError('unknown api', hex(jitter.vm_pop_uint32_t()), repr(fname)) + f(jitter) + jitter.pc = getattr(jitter.cpu, jitter.my_ir.pc.name) + return True + + for f_addr in libs.fad2cname: + self.add_breakpoint(f_addr, handle_lib) + + def init_run(self, *args, **kwargs): + jitter.init_run(self, *args, **kwargs) + self.cpu.EIP = self.pc + + +class jitter_x86_64(jitter): + + def __init__(self, *args, **kwargs): + from miasm2.arch.x86.sem import ir_x86_64 + sp = asmbloc.asm_symbol_pool() + jitter.__init__(self, ir_x86_64(sp), *args, **kwargs) + self.my_ir.jit_pc = self.my_ir.arch.regs.RIP + self.my_ir.do_stk_segm = False + + self.orig_irbloc_fix_regs_for_mode = self.my_ir.irbloc_fix_regs_for_mode + self.my_ir.irbloc_fix_regs_for_mode = self.my_irbloc_fix_regs_for_mode + + def my_irbloc_fix_regs_for_mode(self, irbloc, attrib=64): + self.orig_irbloc_fix_regs_for_mode(irbloc, 64) + + def vm_push_uint64_t(self, v): + self.cpu.RSP -= self.my_ir.sp.size / 8 + self.vm.vm_set_mem(self.cpu.RSP, pck64(v)) + + def vm_pop_uint64_t(self): + x = upck64(self.vm.vm_get_mem(self.cpu.RSP, self.my_ir.sp.size / 8)) + self.cpu.RSP += self.my_ir.sp.size / 8 + return x + + def get_stack_arg(self, n): + x = upck64(self.vm.vm_get_mem(self.cpu.RSP + 4 * n, 4)) + return x + + def init_run(self, *args, **kwargs): + jitter.init_run(self, *args, **kwargs) + self.cpu.RIP = self.pc + + +class jitter_arm(jitter): + + def __init__(self, *args, **kwargs): + from miasm2.arch.arm.sem import ir_arm + sp = asmbloc.asm_symbol_pool() + jitter.__init__(self, ir_arm(sp), *args, **kwargs) + self.my_ir.jit_pc = self.my_ir.arch.regs.PC + + def vm_push_uint32_t(self, v): + self.cpu.SP -= 4 + self.vm.vm_set_mem(self.cpu.SP, pck32(v)) + + def vm_pop_uint32_t(self): + x = upck32(self.vm.vm_get_mem(self.cpu.SP, 4)) + self.cpu.SP += 4 + return x + + def get_stack_arg(self, n): + x = upck32(self.vm.vm_get_mem(self.cpu.SP + 4 * n, 4)) + return x + + # calling conventions + + def func_args_fastcall(self, n_args): + args = [] + for i in xrange(min(n_args, 4)): + args.append(self.cpu.vm_get_gpreg()['R%d' % i]) + for i in xrange(max(0, n_args - 4)): + args.append(self.get_stack_arg(i)) + log.debug('%s %s' % (whoami(), [hex(x) for x in args])) + return args + + def func_ret_fastcall(self, ret_value=None): + self.pc = self.cpu.PC = self.cpu.LR + if ret_value is not None: + self.cpu.R0 = ret_value + return True + + def add_lib_handler(self, libs): + from miasm2.jitter.os_dep import linux_stdlib + for offset, fname in libs.fad2cname.iteritems(): + if fname in linux_stdlib.__dict__: + self.add_breakpoint(offset, linux_stdlib.__dict__[fname]) + else: + log.warning( + 'jitter libhandler: %s function not found!' % fname) + + def init_run(self, *args, **kwargs): + jitter.init_run(self, *args, **kwargs) + self.cpu.PC = self.pc + + +def vm2pe(myjit, fname, libs=None, e_orig=None, + max_addr=1 << 64, min_addr=0x401000, + min_section_offset=0x1000, img_base=None, + added_funcs=None): + mye = pe_init.PE() + + if img_base is None: + img_base = e_orig.NThdr.ImageBase + + mye.NThdr.ImageBase = img_base + all_mem = myjit.vm.vm_get_all_memory() + addrs = all_mem.keys() + addrs.sort() + mye.Opthdr.AddressOfEntryPoint = mye.virt2rva(myjit.cpu.EIP) + first = True + for ad in addrs: + if not min_addr <= ad < max_addr: + continue + log.debug('%s' % hex(ad)) + if first: + mye.SHList.add_section( + "%.8X" % ad, + addr=ad - mye.NThdr.ImageBase, + data=all_mem[ad]['data'], + offset=min_section_offset) + else: + mye.SHList.add_section( + "%.8X" % ad, + addr=ad - mye.NThdr.ImageBase, + data=all_mem[ad]['data']) + first = False + if libs: + if added_funcs is not None: + # name_inv = dict([(x[1], x[0]) for x in libs.name2off.items()]) + + for addr, funcaddr in added_func: + libbase, dllname = libs.fad2info[funcaddr] + libs.lib_get_add_func(libbase, dllname, addr) + + new_dll = libs.gen_new_lib(mye, lambda x: mye.virt.is_addr_in(x)) + else: + new_dll = {} + + log.debug('%s' % new_dll) + + mye.DirImport.add_dlldesc(new_dll) + s_imp = mye.SHList.add_section("import", rawsize=len(mye.DirImport)) + mye.DirImport.set_rva(s_imp.addr) + log.debug('%s' % repr(mye.SHList)) + if e_orig: + # resource + xx = str(mye) + mye.content = xx + ad = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva + log.debug('dirres %s' % hex(ad)) + if ad != 0: + mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva = ad + mye.DirRes = pe.DirRes.unpack(xx, ad, mye) + # log.debug('%s' % repr(mye.DirRes)) + s_res = mye.SHList.add_section( + name="myres", rawsize=len(mye.DirRes)) + mye.DirRes.set_rva(s_res.addr) + log.debug('%s' % repr(mye.DirRes)) + # generation + open(fname, 'w').write(str(mye)) + + +class jitter_msp430(jitter): + + def __init__(self, *args, **kwargs): + from miasm2.arch.msp430.sem import ir_msp430 + sp = asmbloc.asm_symbol_pool() + jitter.__init__(self, ir_msp430(sp), *args, **kwargs) + self.my_ir.jit_pc = self.my_ir.arch.regs.PC + + def vm_push_uint16_t(self, v): + regs = self.cpu.vm_get_gpreg() + regs['SP'] -= 2 + self.cpu.vm_set_gpreg(regs) + self.vm.vm_set_mem(regs['SP'], pck16(v)) + + def vm_pop_uint16_t(self): + regs = self.cpu.vm_get_gpreg() + x = upck16(self.vm.vm_get_mem(regs['SP'], 2)) + regs['SP'] += 2 + self.cpu.vm_set_gpreg(regs) + return x + + def get_stack_arg(self, n): + regs = self.cpu.vm_get_gpreg() + x = upck16(self.vm.vm_get_mem(regs['SP'] + 2 * n, 2)) + return x + + def init_run(self, *args, **kwargs): + jitter.init_run(self, *args, **kwargs) + self.cpu.PC = self.pc + diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py new file mode 100644 index 00000000..406a36c9 --- /dev/null +++ b/miasm2/jitter/llvmconvert.py @@ -0,0 +1,982 @@ +# +# +# Miasm2 Extension: # +# - Miasm2 IR to LLVM IR # +# - JiT # +# +# Requires: # +# - llvmpy (tested on v0.11.2) # +# +# Authors : Fabrice DESCLAUX (CEA/DAM), Camille MOUGEY (CEA/DAM) # +# +# + +import llvm +import llvm.core as llvm_c +import llvm.ee as llvm_e +import llvm.passes as llvm_p +import miasm2.expression.expression as m2_expr +import miasm2.jitter.csts as m2_csts +import miasm2.core.asmbloc as m2_asmbloc + + +class LLVMType(llvm_c.Type): + + "Handle LLVM Type" + + int_cache = {} + + @classmethod + def int(cls, size=32): + try: + return cls.int_cache[size] + except KeyError: + cls.int_cache[size] = llvm_c.Type.int(size) + return cls.int_cache[size] + + @classmethod + def pointer(cls, addr): + "Generic pointer for execution" + return llvm_e.GenericValue.pointer(addr) + + @classmethod + def generic(cls, e): + "Generic value for execution" + if isinstance(e, m2_expr.ExprInt): + return llvm_e.GenericValue.int(LLVMType.int(e.size), int(e.arg)) + elif isinstance(e, llvm_e.GenericValue): + return e + else: + raise ValueError() + + +class LLVMContext(): + + "Context for llvm binding. Stand for a LLVM Module" + + known_fc = {} + + def __init__(self, name="mod"): + "Initialize a context with a module named 'name'" + self.mod = llvm_c.Module.new(name) + self.pass_manager = llvm_p.FunctionPassManager.new(self.mod) + self.exec_engine = llvm_e.ExecutionEngine.new(self.mod) + self.add_fc(self.known_fc) + + def optimise_level(self, classic_passes=True, dead_passes=True): + """Set the optimisation level : + classic_passes : + - combine instruction + - reassociate + - global value numbering + - simplify cfg + + dead_passes : + - dead code + - dead store + - dead instructions + """ + + # Set up the optimiser pipeline + + if classic_passes is True: + # self.pass_manager.add(llvm_p.PASS_INSTCOMBINE) + self.pass_manager.add(llvm_p.PASS_REASSOCIATE) + self.pass_manager.add(llvm_p.PASS_GVN) + self.pass_manager.add(llvm_p.PASS_SIMPLIFYCFG) + + if dead_passes is True: + self.pass_manager.add(llvm_p.PASS_DCE) + self.pass_manager.add(llvm_p.PASS_DSE) + self.pass_manager.add(llvm_p.PASS_DIE) + + self.pass_manager.initialize() + + def get_execengine(self): + "Return the Execution Engine associated with this context" + return self.exec_engine + + def get_passmanager(self): + "Return the Pass Manager associated with this context" + return self.exec_engine + + def get_module(self): + "Return the module associated with this context" + return self.mod + + def add_shared_library(self, filename): + "Load the shared library 'filename'" + return llvm_c.load_library_permanently(filename) + + def add_fc(self, fc): + "Add function into known_fc" + + for name, detail in fc.items(): + self.mod.add_function(LLVMType.function(detail["ret"], + detail["args"]), + name) + + +class LLVMContext_JIT(LLVMContext): + + "Extend LLVMContext_JIT in order to handle memory management" + + def __init__(self, library_filenames, name="mod"): + "Init a LLVMContext object, and load the mem management shared library" + LLVMContext.__init__(self, name) + for lib_fname in library_filenames: + self.add_shared_library(lib_fname) + self.add_memlookups() + self.add_get_exceptionflag() + self.add_op() + self.add_log_functions() + self.vmcpu = {} + + def add_memlookups(self): + "Add MEM_LOOKUP functions" + + fc = {} + p8 = llvm_c.PointerType.pointer(LLVMType.int(8)) + for i in [8, 16, 32, 64]: + fc["MEM_LOOKUP_%02d" % i] = {"ret": LLVMType.int(i), + "args": [p8, + LLVMType.int(64)]} + + fc["MEM_WRITE_%02d" % i] = {"ret": LLVMType.void(), + "args": [p8, + LLVMType.int(64), + LLVMType.int(i)]} + + self.add_fc(fc) + + def add_get_exceptionflag(self): + "Add 'get_exception_flag' function" + p8 = llvm_c.PointerType.pointer(LLVMType.int(8)) + self.add_fc({"get_exception_flag": {"ret": LLVMType.int(64), + "args": [p8]}}) + + def add_op(self): + "Add operations functions" + + p8 = llvm_c.PointerType.pointer(LLVMType.int(8)) + self.add_fc({"parity": {"ret": LLVMType.int(), + "args": [LLVMType.int()]}}) + self.add_fc({"rot_left": {"ret": LLVMType.int(), + "args": [LLVMType.int(), + LLVMType.int(), + LLVMType.int()]}}) + self.add_fc({"rot_right": {"ret": LLVMType.int(), + "args": [LLVMType.int(), + LLVMType.int(), + LLVMType.int()]}}) + + self.add_fc({"segm2addr": {"ret": LLVMType.int(64), + "args": [p8, + LLVMType.int(64), + LLVMType.int(64)]}}) + + for k in [8, 16]: + self.add_fc({"bcdadd_%s" % k: {"ret": LLVMType.int(k), + "args": [LLVMType.int(k), + LLVMType.int(k)]}}) + self.add_fc({"bcdadd_cf_%s" % k: {"ret": LLVMType.int(k), + "args": [LLVMType.int(k), + LLVMType.int(k)]}}) + + for k in [16, 32, 64]: + self.add_fc({"imod%s" % k: {"ret": LLVMType.int(k), + "args": [p8, + LLVMType.int(k), + LLVMType.int(k)]}}) + self.add_fc({"idiv%s" % k: {"ret": LLVMType.int(k), + "args": [p8, + LLVMType.int(k), + LLVMType.int(k)]}}) + + def add_log_functions(self): + "Add functions for state logging" + + p8 = llvm_c.PointerType.pointer(LLVMType.int(8)) + self.add_fc({"dump_gpregs": {"ret": LLVMType.void(), + "args": [p8]}}) + + def set_vmcpu(self, lookup_table): + "Set the correspondance between register name and vmcpu offset" + + self.vmcpu = lookup_table + + def set_IR_transformation(self, *args): + """Set a list of transformation to apply on expression before their + treatments. + args: function Expr(Expr)""" + self.IR_transformation_functions = args + + +class LLVMFunction(): + + "Represent a llvm function" + + # Default logging values + log_mn = False + log_regs = False + + def __init__(self, llvm_context, name="fc"): + "Create a new function with name fc" + self.llvm_context = llvm_context + self.mod = self.llvm_context.get_module() + + self.my_args = [] # (Expr, LLVMType, Name) + self.ret_type = None + self.builder = None + self.entry_bbl = None + + self.branch_counter = 0 + self.name = name + + def new_branch_name(self): + "Return a new branch name" + + self.branch_counter += 1 + return "%s" % self.branch_counter + + def viewCFG(self): + "Show the CFG of the current function" + self.fc.viewCFG() + + def append_basic_block(self, label): + """Add a new basic block to the current function. + @label: str or asmlabel + Return the corresponding LLVM Basic Block""" + name = self.canonize_label_name(label) + bbl = self.fc.append_basic_block(name) + self.name2bbl[label] = bbl + + return bbl + + def init_fc(self): + "Init the function" + + # Build type for fc signature + fc_type = LLVMType.function( + self.ret_type, [k[1] for k in self.my_args]) + + # Add fc in module + try: + fc = self.mod.add_function(fc_type, self.name) + except llvm.LLVMException: + # Overwrite the previous function + previous_fc = self.mod.get_function_named(self.name) + previous_fc.delete() + fc = self.mod.add_function(fc_type, self.name) + + # Name args + for i, a in enumerate(self.my_args): + fc.args[i].name = a[2] + + # Initialize local variable pool + self.local_vars = {} + self.local_vars_pointers = {} + for i, a in enumerate(self.my_args): + self.local_vars[a[2]] = fc.args[i] + + # Init cache + self.expr_cache = {} + self.main_stream = True + self.name2bbl = {} + self.offsets_jitted = set() + + # Function link + self.fc = fc + + # Add a first BasicBlock + self.entry_bbl = self.append_basic_block("entry") + + # Instruction builder + self.builder = llvm_c.Builder.new(self.entry_bbl) + + def CreateEntryBlockAlloca(self, var_type): + "Create an alloca instruction at the beginning of the current fc" + builder = self.builder + current_bbl = builder.basic_block + builder.position_at_end(self.entry_bbl) + + ret = builder.alloca(var_type) + builder.position_at_end(current_bbl) + return ret + + def get_ptr_by_expr(self, expr): + """"Return a pointer casted corresponding to ExprId expr. If it is not + already computed, compute it at the end of entry_bloc""" + + name = expr.name + + try: + # If the pointer has already been computed + ptr_casted = self.local_vars_pointers[name] + + except KeyError: + # Get current objects + builder = self.builder + current_bbl = builder.basic_block + + # Go at the right position + entry_bloc_bbl = self.entry_bbl + builder.position_at_end(entry_bloc_bbl) + + # Compute the pointer address + offset = self.llvm_context.vmcpu[name] + + # Pointer cast + ptr = builder.gep(self.local_vars["vmcpu"], + [llvm_c.Constant.int(LLVMType.int(), + offset)]) + int_size = LLVMType.int(expr.size) + ptr_casted = builder.bitcast(ptr, + llvm_c.PointerType.pointer(int_size)) + # Store in cache + self.local_vars_pointers[name] = ptr_casted + + # Reset builder + builder.position_at_end(current_bbl) + + return ptr_casted + + def clear_cache(self, regs_updated): + "Remove from the cache values which depends on regs_updated" + + regs_updated_set = set(regs_updated) + + for expr in self.expr_cache.keys(): + if expr.get_r(True).isdisjoint(regs_updated_set) is not True: + self.expr_cache.pop(expr) + + def update_cache(self, name, value): + "Add 'name' = 'value' to the cache iff main_stream = True" + + if self.main_stream is True: + self.expr_cache[name] = value + + def add_ir(self, expr): + "Add a Miasm2 IR to the last bbl. Return the var created" + + if self.main_stream is True and expr in self.expr_cache: + return self.expr_cache[expr] + + builder = self.builder + + if isinstance(expr, m2_expr.ExprInt): + ret = llvm_c.Constant.int(LLVMType.int(expr.size), int(expr.arg)) + self.update_cache(expr, ret) + return ret + + if isinstance(expr, m2_expr.ExprId): + name = expr.name + if not isinstance(name, str): + # Resolve label + offset = name.offset + ret = llvm_c.Constant.int(LLVMType.int(expr.size), offset) + self.update_cache(expr, ret) + return ret + + try: + # If expr.name is already known (args) + return self.local_vars[name] + except KeyError: + pass + + ptr_casted = self.get_ptr_by_expr(expr) + + var = builder.load(ptr_casted, name) + self.update_cache(expr, var) + return var + + if isinstance(expr, m2_expr.ExprOp): + op = expr.op + + if op == "parity": + fc_ptr = self.mod.get_function_named("parity") + arg = builder.zext(self.add_ir(expr.args[0]), + LLVMType.int()) + ret = builder.call(fc_ptr, [arg]) + ret = builder.trunc(ret, LLVMType.int(expr.size)) + self.update_cache(expr, ret) + return ret + + if op in ["<<<", ">>>"]: + fc_name = "rot_left" if op == "<<<" else "rot_right" + fc_ptr = self.mod.get_function_named(fc_name) + args = [self.add_ir(arg) for arg in expr.args] + arg_size = expr.args[0].size + if arg_size < 32: + # Cast args + args = [builder.zext(arg, LLVMType.int(32)) + for arg in args] + arg_size_cst = llvm_c.Constant.int(LLVMType.int(), + arg_size) + ret = builder.call(fc_ptr, [arg_size_cst] + args) + if arg_size < 32: + # Cast ret + ret = builder.trunc(ret, LLVMType.int(arg_size)) + self.update_cache(expr, ret) + return ret + + if op == "bcdadd": + size = expr.args[0].size + fc_ptr = self.mod.get_function_named("bcdadd_%s" % size) + args = [self.add_ir(arg) for arg in expr.args] + ret = builder.call(fc_ptr, args) + self.update_cache(expr, ret) + return ret + + if op == "bcdadd_cf": + size = expr.args[0].size + fc_ptr = self.mod.get_function_named("bcdadd_cf_%s" % size) + args = [self.add_ir(arg) for arg in expr.args] + ret = builder.call(fc_ptr, args) + ret = builder.trunc(ret, LLVMType.int(expr.size)) + self.update_cache(expr, ret) + return ret + + if op == "-": + zero = llvm_c.Constant.int(LLVMType.int(expr.size), + 0) + ret = builder.sub(zero, self.add_ir(expr.args[0])) + self.update_cache(expr, ret) + return ret + + if op == "segm": + fc_ptr = self.mod.get_function_named("segm2addr") + args_casted = [builder.zext(self.add_ir(arg), LLVMType.int(64)) + for arg in expr.args] + args = [self.local_vars["vmcpu"]] + args_casted + ret = builder.call(fc_ptr, args) + ret = builder.trunc(ret, LLVMType.int(expr.size)) + self.update_cache(expr, ret) + return ret + + if op in ["imod", "idiv"]: + fc_ptr = self.mod.get_function_named( + "%s%s" % (op, expr.args[0].size)) + args_casted = [self.add_ir(arg) for arg in expr.args] + args = [self.local_vars["vmcpu"]] + args_casted + ret = builder.call(fc_ptr, args) + self.update_cache(expr, ret) + return ret + + if len(expr.args) > 1: + + if op == "*": + callback = builder.mul + elif op == "+": + callback = builder.add + elif op == "&": + callback = builder.and_ + elif op == "^": + callback = builder.xor + elif op == "|": + callback = builder.or_ + elif op == ">>": + callback = builder.lshr + elif op == "<<": + callback = builder.shl + elif op == "a>>": + callback = builder.ashr + elif op == "udiv": + callback = builder.udiv + elif op == "umod": + callback = builder.urem + else: + raise NotImplementedError('Unknown op: %s' % op) + + last = self.add_ir(expr.args[0]) + + for i in range(1, len(expr.args)): + last = callback(last, + self.add_ir(expr.args[i])) + + self.update_cache(expr, last) + + return last + + raise NotImplementedError() + + if isinstance(expr, m2_expr.ExprMem): + + fc_name = "MEM_LOOKUP_%02d" % expr.size + fc_ptr = self.mod.get_function_named(fc_name) + addr_casted = builder.zext(self.add_ir(expr.arg), + LLVMType.int(64)) + + ret = builder.call(fc_ptr, [self.local_vars["vmmngr"], + addr_casted]) + + self.update_cache(expr, ret) + return ret + + if isinstance(expr, m2_expr.ExprCond): + # Compute cond + cond = self.add_ir(expr.cond) + zero_casted = llvm_c.Constant.int(LLVMType.int(expr.cond.size), + 0) + condition_bool = builder.icmp(llvm_c.ICMP_NE, cond, + zero_casted) + + # Alloc return var + alloca = self.CreateEntryBlockAlloca(LLVMType.int(expr.size)) + + # Create bbls + branch_id = self.new_branch_name() + then_block = self.append_basic_block('then%s' % branch_id) + else_block = self.append_basic_block('else%s' % branch_id) + merge_block = self.append_basic_block('ifcond%s' % branch_id) + + builder.cbranch(condition_bool, then_block, else_block) + + # Deactivate object caching + current_main_stream = self.main_stream + self.main_stream = False + + # Then Bloc + builder.position_at_end(then_block) + then_value = self.add_ir(expr.src1) + builder.store(then_value, alloca) + builder.branch(merge_block) + + # Else Bloc + builder.position_at_end(else_block) + else_value = self.add_ir(expr.src2) + builder.store(else_value, alloca) + builder.branch(merge_block) + + # Merge bloc + builder.position_at_end(merge_block) + ret = builder.load(alloca) + + # Reactivate object caching + self.main_stream = current_main_stream + + self.update_cache(expr, ret) + return ret + + if isinstance(expr, m2_expr.ExprSlice): + + src = self.add_ir(expr.arg) + + # Remove trailing bits + if expr.start != 0: + to_shr = llvm_c.Constant.int(LLVMType.int(expr.arg.size), + expr.start) + shred = builder.lshr(src, + to_shr) + else: + shred = src + + # Remove leading bits + to_and = llvm_c.Constant.int(LLVMType.int(expr.arg.size), + (1 << (expr.stop - expr.start)) - 1) + anded = builder.and_(shred, + to_and) + + # Cast into e.size + ret = builder.trunc(anded, + LLVMType.int(expr.size)) + + self.update_cache(expr, ret) + return ret + + if isinstance(expr, m2_expr.ExprCompose): + + args = [] + + # Build each part + for arg in expr.args: + src, start, stop = arg + + # src & (stop - start) + src = self.add_ir(src) + src_casted = builder.zext(src, + LLVMType.int(expr.size)) + to_and = llvm_c.Constant.int(LLVMType.int(expr.size), + (1 << (stop - start)) - 1) + anded = builder.and_(src_casted, + to_and) + + if (start != 0): + # result << start + to_shl = llvm_c.Constant.int(LLVMType.int(expr.size), + start) + shled = builder.shl(anded, to_shl) + final = shled + else: + # Optimisation + final = anded + + args.append(final) + + # result = part1 | part2 | ... + last = args[0] + for i in xrange(1, len(expr.args)): + last = builder.or_(last, args[i]) + + self.update_cache(expr, last) + return last + + raise Exception("UnkownExpression", expr.__class__.__name__) + + def set_ret(self, var): + "Add a ret of var to the last bbl" + self.builder.ret(var) + + def from_expr(self, expr): + "Build the function from an expression" + + # Build function signature + args = expr.get_r(True) + for a in args: + if not isinstance(a, m2_expr.ExprMem): + self.my_args.append((a, LLVMType.int(a.size), a.name)) + + self.ret_type = LLVMType.int(expr.size) + + # Initialise the function + self.init_fc() + + ret = self.add_ir(expr) + + self.set_ret(ret) + + def affect(self, src, dst, add_new=True): + "Affect from M2 src to M2 dst. If add_new, add a suffix '_new' to dest" + + # Source + src = self.add_ir(src) + + # Destination + builder = self.builder + self.add_ir(m2_expr.ExprId("vmcpu")) + + if isinstance(dst, m2_expr.ExprId): + dst_name = dst.name + "_new" if add_new else dst.name + + ptr_casted = self.get_ptr_by_expr( + m2_expr.ExprId(dst_name, dst.size)) + builder.store(src, ptr_casted) + + elif isinstance(dst, m2_expr.ExprMem): + self.add_ir(dst.arg) + + # Function call + fc_name = "MEM_WRITE_%02d" % dst.size + fc_ptr = self.mod.get_function_named(fc_name) + dst = self.add_ir(dst.arg) + dst_casted = builder.zext(dst, LLVMType.int(64)) + builder.call(fc_ptr, [self.local_vars["vmmngr"], + dst_casted, + src]) + + else: + raise Exception("UnknownAffectationType") + + def check_error(self, line, except_do_not_update_pc=False): + """Add a check for memory errors. + @line: Irbloc line corresponding to the current instruction + If except_do_not_update_pc, check only for exception which do not + require a pc update""" + + # VmMngr "get_exception_flag" return's size + size = 64 + t_size = LLVMType.int(size) + + # Current address + pc_to_return = line.offset + + # Get exception flag value + builder = self.builder + fc_ptr = self.mod.get_function_named("get_exception_flag") + exceptionflag = builder.call(fc_ptr, [self.local_vars["vmmngr"]]) + + if except_do_not_update_pc is True: + auto_mod_flag = m2_csts.EXCEPT_DO_NOT_UPDATE_PC + m2_flag = llvm_c.Constant.int(t_size, auto_mod_flag) + exceptionflag = builder.and_(exceptionflag, m2_flag) + + # Compute cond + zero_casted = llvm_c.Constant.int(t_size, 0) + condition_bool = builder.icmp(llvm_c.ICMP_NE, + exceptionflag, + zero_casted) + + # Create bbls + branch_id = self.new_branch_name() + then_block = self.append_basic_block('then%s' % branch_id) + merge_block = self.append_basic_block('ifcond%s' % branch_id) + + builder.cbranch(condition_bool, then_block, merge_block) + + # Deactivate object caching + current_main_stream = self.main_stream + self.main_stream = False + + # Then Bloc + builder.position_at_end(then_block) + builder.ret(llvm_c.Constant.int(self.ret_type, pc_to_return)) + + builder.position_at_end(merge_block) + + # Reactivate object caching + self.main_stream = current_main_stream + + def log_instruction(self, instruction, line): + "Print current instruction and registers if options are set" + + # Get builder + builder = self.builder + + if self.log_mn is True: + print instruction # TODO + + if self.log_regs is True: + # Call dump general purpose registers + fc_ptr = self.mod.get_function_named("dump_gpregs") + builder.call(fc_ptr, [self.local_vars["vmcpu"]]) + + def add_bloc(self, bloc, lines): + "Add a bloc of instruction in the current function" + + for instruction, line in zip(bloc, lines): + new_reg = set() + + # Check general errors only at the beggining of instruction + if line.offset not in self.offsets_jitted: + self.offsets_jitted.add(line.offset) + self.check_error(line) + + # Log mn and registers if options is set + self.log_instruction(instruction, line) + + + # Pass on empty instruction + if len(instruction) == 0: + continue + + for expression in instruction: + # Apply preinit transformation + for func in self.llvm_context.IR_transformation_functions: + expression = func(expression) + + # Treat current expression + self.affect(expression.src, expression.dst) + + # Save registers updated + new_reg.update(expression.dst.get_w()) + + # Check for errors (without updating PC) + self.check_error(line, except_do_not_update_pc=True) + + # new -> normal + reg_written = [] + for r in new_reg: + if isinstance(r, m2_expr.ExprId): + r_new = m2_expr.ExprId(r.name + "_new", r.size) + reg_written += [r, r_new] + self.affect(r_new, r, add_new=False) + + # Clear cache + self.clear_cache(reg_written) + self.main_stream = True + + def from_bloc(self, bloc, final_expr): + """Build the function from a bloc, with the dst equation. + Prototype : f(i8* vmcpu, i8* vmmngr)""" + + # Build function signature + self.my_args.append((m2_expr.ExprId("vmcpu"), + llvm_c.PointerType.pointer(LLVMType.int(8)), + "vmcpu")) + self.my_args.append((m2_expr.ExprId("vmmngr"), + llvm_c.PointerType.pointer(LLVMType.int(8)), + "vmmngr")) + self.ret_type = LLVMType.int(final_expr.size) + + # Initialise the function + self.init_fc() + + # Add content + self.add_bloc(bloc, []) + + # Finalise the function + self.set_ret(self.add_ir(final_expr)) + + raise NotImplementedError("Not tested") + + def canonize_label_name(self, label): + """Canonize @label names to a common form. + @label: str or asmlabel instance""" + if isinstance(label, str): + return label + elif isinstance(label, m2_asmbloc.asm_label): + return "label_%s" % label.name + else: + raise ValueError("label must either be str or asmlabel") + + def get_basic_bloc_by_label(self, label): + "Return the bbl corresponding to label, None otherwise" + return self.name2bbl.get(self.canonize_label_name(label), None) + + def gen_ret_or_branch(self, dest): + """Manage the dest ExprId. If label, branch on it if it is known. + Otherwise, return the ExprId or the offset value""" + + builder = self.builder + + if isinstance(dest, m2_expr.ExprId): + dest_name = dest.name + elif isinstance(dest, m2_expr.ExprSlice) and \ + isinstance(dest.arg, m2_expr.ExprId): + # Manage ExprId mask case + dest_name = dest.arg.name + else: + raise ValueError() + + if not isinstance(dest_name, str): + label = dest_name + target_bbl = self.get_basic_bloc_by_label(label) + if target_bbl is None: + builder.ret(self.add_ir(dest)) + else: + builder.branch(target_bbl) + else: + builder.ret(self.add_ir(dest)) + + def add_irbloc(self, irbloc): + "Add the content of irbloc at the corresponding labeled block" + builder = self.builder + + bloc = irbloc.irs + dest = irbloc.dst + label = irbloc.label + lines = irbloc.lines + + # Get labeled basic bloc + label_block = self.get_basic_bloc_by_label(label) + builder.position_at_end(label_block) + + # Erase cache + self.expr_cache = {} + + # Add the content of the bloc with corresponding lines + self.add_bloc(bloc, lines) + + # Erase cache + self.expr_cache = {} + + # Manage ret + for func in self.llvm_context.IR_transformation_functions: + dest = func(dest) + + if isinstance(dest, m2_expr.ExprCond): + # Compute cond + cond = self.add_ir(dest.cond) + zero_casted = llvm_c.Constant.int(LLVMType.int(dest.cond.size), + 0) + condition_bool = builder.icmp(llvm_c.ICMP_NE, cond, + zero_casted) + + # Create bbls + branch_id = self.new_branch_name() + then_block = self.append_basic_block('then%s' % branch_id) + else_block = self.append_basic_block('else%s' % branch_id) + + builder.cbranch(condition_bool, then_block, else_block) + + # Then Bloc + builder.position_at_end(then_block) + self.gen_ret_or_branch(dest.src1) + + # Else Bloc + builder.position_at_end(else_block) + self.gen_ret_or_branch(dest.src2) + + elif isinstance(dest, m2_expr.ExprId): + self.gen_ret_or_branch(dest) + + elif isinstance(dest, m2_expr.ExprSlice): + self.gen_ret_or_branch(dest) + + else: + raise Exception("Bloc dst has to be an ExprId or an ExprCond") + + def from_blocs(self, blocs): + """Build the function from a list of bloc (irbloc instances). + Prototype : f(i8* vmcpu, i8* vmmngr)""" + + # Build function signature + self.my_args.append((m2_expr.ExprId("vmcpu"), + llvm_c.PointerType.pointer(LLVMType.int(8)), + "vmcpu")) + self.my_args.append((m2_expr.ExprId("vmmngr"), + llvm_c.PointerType.pointer(LLVMType.int(8)), + "vmmngr")) + ret_size = blocs[0].dst.size + + self.ret_type = LLVMType.int(ret_size) + + # Initialise the function + self.init_fc() + + # Create basic blocks (for label branchs) + entry_bbl, builder = self.entry_bbl, self.builder + + for irbloc in blocs: + name = self.canonize_label_name(irbloc.label) + self.append_basic_block(name) + + # Add content + builder.position_at_end(entry_bbl) + + for irbloc in blocs: + self.add_irbloc(irbloc) + + # Branch entry_bbl on first label + builder.position_at_end(entry_bbl) + first_label_bbl = self.get_basic_bloc_by_label(blocs[0].label) + builder.branch(first_label_bbl) + + def __str__(self): + "Print the llvm IR corresponding to the current module" + + return str(self.fc) + + def verify(self): + "Verify the module syntax" + + return self.mod.verify() + + def get_assembly(self): + "Return native assembly corresponding to the current module" + + return self.mod.to_native_assembly() + + def optimise(self): + "Optimise the function in place" + while self.llvm_context.pass_manager.run(self.fc): + continue + + def __call__(self, *args): + "Eval the function with arguments args" + + e = self.llvm_context.get_execengine() + + genargs = [LLVMType.generic(a) for a in args] + ret = e.run_function(self.fc, genargs) + + return ret.as_int() + + def get_function_pointer(self): + "Return a pointer on the Jitted function" + e = self.llvm_context.get_execengine() + + return e.get_pointer_to_function(self.fc) + +# TODO: +# - Add more expressions diff --git a/miasm2/jitter/os_dep/__init__.py b/miasm2/jitter/os_dep/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/jitter/os_dep/__init__.py diff --git a/miasm2/jitter/os_dep/linux_stdlib.py b/miasm2/jitter/os_dep/linux_stdlib.py new file mode 100644 index 00000000..0a1e0bfb --- /dev/null +++ b/miasm2/jitter/os_dep/linux_stdlib.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from sys import stdout +from string import printable + + +def xxx_isprint(jitter): + ''' + #include <ctype.h> + int isprint(int c); + + checks for any printable character including space. + ''' + c, = jitter.func_args_fastcall(1) + ret = chr(c & 0xFF) in printable and 1 or 0 + return jitter.func_ret_fastcall(ret) + + +def xxx_memcpy(jitter): + ''' + #include <string.h> + void *memcpy(void *dest, const void *src, size_t n); + + copies n bytes from memory area src to memory area dest. + ''' + dest, src, n, = jitter.func_args_fastcall(3) + jitter.vm.vm_set_mem(dest, jitter.vm.vm_get_mem(src, n)) + return jitter.func_ret_fastcall(dest) + + +def xxx_puts(jitter): + ''' + #include <stdio.h> + int puts(const char *s); + + writes the string s and a trailing newline to stdout. + ''' + s, = jitter.func_args_fastcall(1) + while True: + c = jitter.vm.vm_get_mem(s, 1) + s += 1 + if c == '\x00': + break + stdout.write(c) + stdout.write('\n') + return jitter.func_ret_fastcall(1) + + +def xxx_snprintf(jitter): + ''' + #include <stdio.h> + int snprintf(char *str, size_t size, const char *format, ...); + + writes to string str according to format format and at most size bytes. + ''' + str, size, format, = jitter.func_args_fastcall(3) + curarg, output = 4, '' + while True: + c = jitter.vm.vm_get_mem(format, 1) + format += 1 + if c == '\x00': + break + if c == '%': + token = '%' + while True: + c = jitter.vm.vm_get_mem(format, 1) + format += 1 + token += c + if c in '%cdfsux': + break + c = token % jitter.func_args_fastcall(curarg)[-1] + curarg += 1 + output += c + output = output[:size - 1] + ret = len(output) + jitter.vm.vm_set_mem(str, output + '\x00') + return jitter.func_ret_fastcall(ret) diff --git a/miasm2/jitter/os_dep/win_api_x86_32.py b/miasm2/jitter/os_dep/win_api_x86_32.py new file mode 100644 index 00000000..b6813efc --- /dev/null +++ b/miasm2/jitter/os_dep/win_api_x86_32.py @@ -0,0 +1,3069 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +import struct +try: + from Crypto.Hash import MD5, SHA +except ImportError: + print "cannot find crypto, skipping" +import inspect +from zlib import crc32 +import os +import stat +import time +from miasm2.jitter.csts import * +from miasm2.core.utils import * +import string + + +MAX_PATH = 260 + + +def get_next_alloc_addr(size): + global alloc_ad + ret = winobjs.alloc_ad + winobjs.alloc_ad = (winobjs.alloc_ad + size + winobjs.alloc_align - 1) + winobjs.alloc_ad &= (0xffffffff ^ (winobjs.alloc_align - 1)) + return ret + + +def alloc_mem(myjit, msize): + alloc_addr = get_next_alloc_addr(msize) + myjit.vm.vm_add_memory_page( + alloc_addr, PAGE_READ | PAGE_WRITE, "\x00" * msize) + return alloc_addr + +""" +typedef struct tagPROCESSENTRY32 { + DWORD dwSize; + DWORD cntUsage; + DWORD th32ProcessID; + ULONG_PTR th32DefaultHeapID; + DWORD th32ModuleID; + DWORD cntThreads; + DWORD th32ParentProcessID; + LONG pcPriClassBase; + DWORD dwFlags; + TCHAR szExeFile[MAX_PATH]; +} PROCESSENTRY32, *PPROCESSENTRY32; +""" + + +access_dict = {0x0: 0, + 0x1: 0, + 0x2: PAGE_READ, + 0x4: PAGE_READ | PAGE_WRITE, + 0x10: PAGE_EXEC, + 0x20: PAGE_EXEC | PAGE_READ, + 0x40: PAGE_EXEC | PAGE_READ | PAGE_WRITE, + 0x80: PAGE_EXEC | PAGE_READ | PAGE_WRITE, + # 0x80: PAGE_EXECUTE_WRITECOPY + 0x100: 0 + } + +access_dict_inv = dict([(x[1], x[0]) for x in access_dict.items()]) + + +class whandle(): + + def __init__(self, name, info): + self.name = name + self.info = info + + def __repr__(self): + return '<%r %r %r>' % (self.__class__.__name__, self.name, self.info) + + +class handle_generator(): + + def __init__(self): + self.offset = 600 + self.all_handles = {} + + def add(self, name, info=None): + self.offset += 1 + h = whandle(name, info) + self.all_handles[self.offset] = h + + print repr(self) + return self.offset + + def __repr__(self): + out = '<%r\n' % self.__class__.__name__ + ks = self.all_handles.keys() + ks.sort() + + for k in ks: + out += " %r %r\n" % (k, self.all_handles[k]) + out += '>' + return out + + def __contains__(self, e): + return e in self.all_handles + + def __getitem__(self, item): + return self.all_handles.__getitem__(item) + + def __delitem__(self, item): + self.all_handles.__delitem__(item) + + +class c_winobjs: + + def __init__(self): + self.alloc_ad = 0x20000000 + self.alloc_align = 0x1000 + self.handle_toolhelpsnapshot = 0xaaaa00 + self.toolhelpsnapshot_info = {} + self.handle_curprocess = 0xaaaa01 + self.dbg_present = 0 + self.tickcount = 0 + self.dw_pid_dummy1 = 0x111 + self.dw_pid_explorer = 0x222 + self.dw_pid_dummy2 = 0x333 + self.dw_pid_cur = 0x444 + self.module_fname_nux = None + self.module_name = "test.exe" + self.module_path = "c:\\mydir\\" + self.module_name + self.hcurmodule = None + self.module_filesize = None + self.getversion = 0x0A280105 + self.getforegroundwindow = 0x333333 + self.cryptcontext_hwnd = 0x44400 + self.cryptcontext_bnum = 0x44000 + self.cryptcontext_num = 0 + self.cryptcontext = {} + self.phhash_crypt_md5 = 0x55555 + self.files_hwnd = {} + self.windowlong_dw = 0x77700 + self.module_cur_hwnd = 0x88800 + self.module_file_nul = 0x999000 + self.runtime_dll = None + self.current_pe = None + self.tls_index = 0xf + self.tls_values = {} + self.handle_pool = handle_generator() + self.handle_mapped = {} + self.hkey_handles = {0x80000001: "hkey_current_user"} + + self.nt_mdl = {} + self.nt_mdl_ad = None + self.nt_mdl_cur = 0 + self.win_event_num = 0x13370 + self.cryptdll_md5_h = {} + + self.lastwin32error = 0 + self.mutex = {} + self.env_variables = {} + self.events_pool = {} + self.find_data = None +winobjs = c_winobjs() + + +process_list = [ + [ + 0x40, # DWORD dwSize; + 0, # DWORD cntUsage; + winobjs.dw_pid_dummy1, # DWORD th32ProcessID; + 0x11111111, # ULONG_PTR th32DefaultHeapID; + 0x11111112, # DWORD th32ModuleID; + 1, # DWORD cntThreads; + winobjs.dw_pid_explorer, # DWORD th32ParentProcessID; + 0xbeef, # LONG pcPriClassBase; + 0x0, # DWORD dwFlags; + "dummy1.exe" # TCHAR szExeFile[MAX_PATH]; + ], + [ + 0x40, # DWORD dwSize; + 0, # DWORD cntUsage; + winobjs.dw_pid_explorer, # DWORD th32ProcessID; + 0x11111111, # ULONG_PTR th32DefaultHeapID; + 0x11111112, # DWORD th32ModuleID; + 1, # DWORD cntThreads; + 4, # DWORD th32ParentProcessID; + 0xbeef, # LONG pcPriClassBase; + 0x0, # DWORD dwFlags; + "explorer.exe" # TCHAR szExeFile[MAX_PATH]; + ], + + [ + 0x40, # DWORD dwSize; + 0, # DWORD cntUsage; + winobjs.dw_pid_dummy2, # DWORD th32ProcessID; + 0x11111111, # ULONG_PTR th32DefaultHeapID; + 0x11111112, # DWORD th32ModuleID; + 1, # DWORD cntThreads; + winobjs.dw_pid_explorer, # DWORD th32ParentProcessID; + 0xbeef, # LONG pcPriClassBase; + 0x0, # DWORD dwFlags; + "dummy2.exe" # TCHAR szExeFile[MAX_PATH]; + ], + + [ + 0x40, # DWORD dwSize; + 0, # DWORD cntUsage; + winobjs.dw_pid_cur, # DWORD th32ProcessID; + 0x11111111, # ULONG_PTR th32DefaultHeapID; + 0x11111112, # DWORD th32ModuleID; + 1, # DWORD cntThreads; + winobjs.dw_pid_explorer, # DWORD th32ParentProcessID; + 0xbeef, # LONG pcPriClassBase; + 0x0, # DWORD dwFlags; + winobjs.module_name # TCHAR szExeFile[MAX_PATH]; + ], + + +] + + +def whoami(): + return inspect.stack()[1][3] + + +class hobj: + pass + + +class mdl: + + def __init__(self, ad, l): + self.ad = ad + self.l = l + + def __str__(self): + return struct.pack('LL', self.ad, self.l) + + +def get_str_ansi(myjit, ad_str, max_char=None): + l = 0 + tmp = ad_str + while ((max_char is None or l < max_char) and + myjit.vm.vm_get_mem(tmp, 1) != "\x00"): + tmp += 1 + l += 1 + return myjit.vm.vm_get_mem(ad_str, l) + + +def get_str_unic(myjit, ad_str, max_char=None): + l = 0 + tmp = ad_str + while ((max_char is None or l < max_char) and + myjit.vm.vm_get_mem(tmp, 2) != "\x00\x00"): + tmp += 2 + l += 2 + s = myjit.vm.vm_get_mem(ad_str, l) + s = s[::2] # TODO: real unicode decoding + return s + + +def set_str_ansi(s): + return s + "\x00" + + +def set_str_unic(s): + return "\x00".join(list(s)) + '\x00' * 3 + + +def kernel32_HeapAlloc(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + heap, flags, size = args + + alloc_addr = alloc_mem(myjit, size) + + myjit.func_ret_stdcall(ret_ad, alloc_addr) + + +def kernel32_HeapFree(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + heap, flags, pmem = args + + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_GlobalAlloc(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + uflags, msize = args + alloc_addr = get_next_alloc_addr(msize) + myjit.func_ret_stdcall(ret_ad, alloc_addr) + + +def kernel32_LocalFree(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + lpvoid, = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_LocalAlloc(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + uflags, msize = args + alloc_addr = alloc_mem(myjit, msize) + myjit.func_ret_stdcall(ret_ad, alloc_addr) + + +def kernel32_GlobalFree(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + ad, = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_IsDebuggerPresent(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, winobjs.dbg_present) + + +def kernel32_CreateToolhelp32Snapshot(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + dwflags, th32processid = args + myjit.func_ret_stdcall(ret_ad, winobjs.handle_toolhelpsnapshot) + + +def kernel32_GetCurrentProcess(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, winobjs.handle_curprocess) + + +def kernel32_GetCurrentProcessId(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, winobjs.dw_pid_cur) + + +def kernel32_Process32First(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + s_handle, ad_pentry = args + + pentry = struct.pack( + 'IIIIIIIII', *process_list[0][:-1]) + process_list[0][-1] + myjit.vm.vm_set_mem(ad_pentry, pentry) + winobjs.toolhelpsnapshot_info[s_handle] = 0 + + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_Process32Next(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + s_handle, ad_pentry = args + + winobjs.toolhelpsnapshot_info[s_handle] += 1 + if winobjs.toolhelpsnapshot_info[s_handle] >= len(process_list): + ret = 0 + else: + ret = 1 + n = winobjs.toolhelpsnapshot_info[s_handle] + print whoami(), hex(ret_ad), '(', hex(s_handle), hex(ad_pentry), ')' + pentry = struct.pack( + 'IIIIIIIII', *process_list[n][:-1]) + process_list[n][-1] + myjit.vm.vm_set_mem(ad_pentry, pentry) + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetTickCount(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + winobjs.tickcount += 1 + myjit.func_ret_stdcall(ret_ad, winobjs.tickcount) + + +def kernel32_GetVersion(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, winobjs.getversion) + + +def my_GetVersionEx(myjit, funcname, set_str): + ret_ad, args = myjit.func_args_stdcall(1) + ptr_struct, = args + + s = struct.pack("IIIII", + 0x114, # struct size + 0x5, # maj vers + 0x2, # min vers + 0xa28, # build nbr + 0x2, # platform id + ) + t = set_str("Service pack 4") + t = s + (t + '\x00' * 128 * 2)[:128 * 2] + t += struct.pack('HHHBB', 3, 0, 0x100, 1, 0) + s = t + myjit.vm.vm_set_mem(ptr_struct, s) + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GetVersionExA(myjit): + my_GetVersionEx(myjit, whoami(), set_str_ansi) + + +def kernel32_GetVersionExW(myjit): + my_GetVersionEx(myjit, whoami(), set_str_unic) + + +def kernel32_GetPriorityClass(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + hwnd, = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_SetPriorityClass(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + hwnd, dwpclass = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_CloseHandle(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + hwnd, = args + myjit.func_ret_stdcall(ret_ad, 1) + + +def user32_GetForegroundWindow(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, winobjs.getforegroundwindow) + + +def user32_FindWindowA(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + pclassname, pwindowname = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def user32_GetTopWindow(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + hwnd, = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def user32_BlockInput(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + b, = args + myjit.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptAcquireContext(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(5) + phprov, pszcontainer, pszprovider, dwprovtype, dwflags = args + + if pszprovider: + prov = get_str(myjit, pszprovider) + else: + prov = "NONE" + print 'prov:', prov + myjit.vm.vm_set_mem(phprov, pck32(winobjs.cryptcontext_hwnd)) + + myjit.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptAcquireContextA(myjit): + advapi32_CryptAcquireContext(myjit, whoami(), get_str_ansi) + + +def advapi32_CryptAcquireContextW(myjit): + advapi32_CryptAcquireContext(myjit, whoami(), get_str_unic) + + +def advapi32_CryptCreateHash(myjit): + ret_ad, args = myjit.func_args_stdcall(5) + hprov, algid, hkey, dwflags, phhash = args + + winobjs.cryptcontext_num += 1 + + if algid == 0x00008003: + print 'algo is MD5' + myjit.vm.vm_set_mem( + phhash, pck32(winobjs.cryptcontext_bnum + winobjs.cryptcontext_num)) + winobjs.cryptcontext[ + winobjs.cryptcontext_bnum + winobjs.cryptcontext_num] = hobj() + winobjs.cryptcontext[ + winobjs.cryptcontext_bnum + winobjs.cryptcontext_num].h = MD5.new() + elif algid == 0x00008004: + print 'algo is SHA1' + myjit.vm.vm_set_mem( + phhash, pck32(winobjs.cryptcontext_bnum + winobjs.cryptcontext_num)) + winobjs.cryptcontext[ + winobjs.cryptcontext_bnum + winobjs.cryptcontext_num] = hobj() + winobjs.cryptcontext[ + winobjs.cryptcontext_bnum + winobjs.cryptcontext_num].h = SHA.new() + else: + raise ValueError('un impl algo1') + myjit.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptHashData(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + hhash, pbdata, dwdatalen, dwflags = args + + if not hhash in winobjs.cryptcontext: + raise ValueError("unknown crypt context") + + data = myjit.vm.vm_get_mem(pbdata, dwdatalen) + print 'will hash %X' % dwdatalen + print repr(data[:10]) + "..." + winobjs.cryptcontext[hhash].h.update(data) + myjit.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptGetHashParam(myjit): + ret_ad, args = myjit.func_args_stdcall(5) + hhash, param, pbdata, dwdatalen, dwflags = args + + if not hhash in winobjs.cryptcontext: + raise ValueError("unknown crypt context") + + if param == 2: + # XXX todo: save h state? + h = winobjs.cryptcontext[hhash].h.digest() + else: + raise ValueError('not impl', param) + myjit.vm.vm_set_mem(pbdata, h) + myjit.vm.vm_set_mem(dwdatalen, pck32(len(h))) + + myjit.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptReleaseContext(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + hhash, flags = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def advapi32_CryptDeriveKey(myjit): + ret_ad, args = myjit.func_args_stdcall(5) + hprov, algid, hbasedata, dwflags, phkey = args + + if algid == 0x6801: + print 'using DES' + else: + raise ValueError('un impl algo2') + h = winobjs.cryptcontext[hbasedata].h.digest() + print 'hash', repr(h) + winobjs.cryptcontext[hbasedata].h_result = h + myjit.vm.vm_set_mem(phkey, pck32(hbasedata)) + myjit.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptDestroyHash(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + hhash, = args + myjit.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptDecrypt(myjit): + ret_ad, args = myjit.func_args_stdcall(5) + hkey, hhash, final, dwflags, pbdata, pdwdatalen = args + raise NotImplementedError() + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_CreateFile(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(7) + (lpfilename, access, dwsharedmode, lpsecurityattr, + dwcreationdisposition, dwflagsandattr, htemplatefile) = args + + fname = get_str(myjit, lpfilename) + print 'fname', fname + fname_o = fname[:] + ret = 0xffffffff + + # test if file is original binary + f = fname_o + """ + if "\\" in fname_o: + f = fname_o[fname_o.rfind('\\')+1:] + else: + f = fname_o + """ + print f.lower(), winobjs.module_path.lower() + is_original_file = f.lower() == winobjs.module_path.lower() + + if fname.upper() in [r"\\.\SICE", r"\\.\NTICE", r"\\.\SIWVID"]: + pass + elif fname.upper() in ['NUL']: + ret = winobjs.module_cur_hwnd + else: + # nuxify path + fname = fname.replace('\\', "/").lower() + # go in sandbox files + f = os.path.join('file_sb', fname) + if access & 0x80000000: + # read + if dwcreationdisposition == 2: + # create_always + if os.access(f, os.R_OK): + # but file exist + pass + else: + raise NotImplementedError("Untested case") # to test + h = open(f, 'rb+') + elif dwcreationdisposition == 3: + # open_existing + if os.access(f, os.R_OK): + s = os.stat(f) + if stat.S_ISDIR(s.st_mode): + ret = winobjs.handle_pool.add(f, 0x1337) + else: + h = open(f, 'rb+') + ret = winobjs.handle_pool.add(f, h) + else: + print "FILE %r DOES NOT EXIST!" % fname + pass + elif dwcreationdisposition == 1: + # create new + if os.access(f, os.R_OK): + # file exist + # ret = 80 + winobjs.lastwin32error = 80 + pass + else: + open(f, 'w') + h = open(f, 'rb+') + ret = winobjs.handle_pool.add(f, h) + else: + raise NotImplementedError("Untested case") + elif access & 0x40000000: + # write + if dwcreationdisposition == 3: + # open existing + if is_original_file: + # cannot open self in write mode! + pass + elif os.access(f, os.R_OK): + s = os.stat(f) + if stat.S_ISDIR(s.st_mode): + # open dir + ret = winobjs.handle_pool.add(f, 0x1337) + else: + h = open(f, 'rb+') + ret = winobjs.handle_pool.add(f, h) + else: + raise NotImplementedError("Untested case") # to test + elif dwcreationdisposition == 5: + # truncate_existing + if is_original_file: + pass + else: + raise NotImplementedError("Untested case") # to test + else: + # raise NotImplementedError("Untested case") # to test + h = open(f, 'w') + ret = winobjs.handle_pool.add(f, h) + else: + raise NotImplementedError("Untested case") + + # h = open(f, 'rb+') + # ret = winobjs.handle_pool.add(f, h) + print 'ret', hex(ret) + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_CreateFileA(myjit): + kernel32_CreateFile(myjit, whoami(), get_str_ansi) + + +def kernel32_CreateFileW(myjit): + kernel32_CreateFile(myjit, whoami(), lambda x, y: get_str_unic(myjit, y)) + + +def kernel32_ReadFile(myjit): + ret_ad, args = myjit.func_args_stdcall(5) + (hwnd, lpbuffer, nnumberofbytestoread, + lpnumberofbytesread, lpoverlapped) = args + + if hwnd == winobjs.module_cur_hwnd: + pass + elif hwnd in winobjs.handle_pool: + pass + else: + raise ValueError('unknown hwnd!') + + eax = 0xffffffff + data = None + if hwnd in winobjs.files_hwnd: + data = winobjs.files_hwnd[ + winobjs.module_cur_hwnd].read(nnumberofbytestoread) + elif hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[hwnd] + data = wh.info.read(nnumberofbytestoread) + else: + raise ValueError('unknown filename') + + if data is not None: + if (lpnumberofbytesread): + myjit.vm.vm_set_mem(lpnumberofbytesread, pck32(len(data))) + myjit.vm.vm_set_mem(lpbuffer, data) + + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GetFileSize(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + hwnd, lpfilesizehight = args + + if hwnd == winobjs.module_cur_hwnd: + ret = len(open(winobjs.module_fname_nux).read()) + elif hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[hwnd] + print wh + ret = len(open(wh.name).read()) + else: + raise ValueError('unknown hwnd!') + + if lpfilesizehight != 0: + myjit.vm.vm_set_mem(lpfilesizehight, pck32(ret)) + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetFileSizeEx(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + hwnd, lpfilesizehight = args + + if hwnd == winobjs.module_cur_hwnd: + l = len(open(winobjs.module_fname_nux).read()) + elif hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[hwnd] + print wh + l = len(open(wh.name).read()) + else: + raise ValueError('unknown hwnd!') + + if lpfilesizehight == 0: + raise NotImplementedError("Untested case") + myjit.vm.vm_set_mem(lpfilesizehight, pck32( + l & 0xffffffff) + pck32((l >> 32) & 0xffffffff)) + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_FlushInstructionCache(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + hprocess, lpbasead, dwsize = args + myjit.func_ret_stdcall(ret_ad, 0x1337) + + +def kernel32_VirtualProtect(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + lpvoid, dwsize, flnewprotect, lpfloldprotect = args + + # XXX mask hpart + flnewprotect &= 0xFFF + + if not flnewprotect in access_dict: + raise ValueError('unknown access dw!') + myjit.vm.vm_set_mem_access(lpvoid, access_dict[flnewprotect]) + + # XXX todo real old protect + if lpfloldprotect: + myjit.vm.vm_set_mem(lpfloldprotect, pck32(0x40)) + + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_VirtualAlloc(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + lpvoid, dwsize, alloc_type, flprotect = args + + access_dict = {0x0: 0, + 0x1: 0, + 0x2: PAGE_READ, + 0x4: PAGE_READ | PAGE_WRITE, + 0x10: PAGE_EXEC, + 0x20: PAGE_EXEC | PAGE_READ, + 0x40: PAGE_EXEC | PAGE_READ | PAGE_WRITE, + 0x100: 0 + } + + access_dict_inv = dict([(x[1], x[0]) for x in access_dict.items()]) + + if not flprotect in access_dict: + raise ValueError('unknown access dw!') + + if lpvoid == 0: + alloc_addr = get_next_alloc_addr(dwsize) + myjit.vm.vm_add_memory_page( + alloc_addr, access_dict[flprotect], "\x00" * dwsize) + else: + all_mem = myjit.vm.vm_get_all_memory() + if lpvoid in all_mem: + alloc_addr = lpvoid + myjit.vm.vm_set_mem_access(lpvoid, access_dict[flprotect]) + else: + alloc_addr = get_next_alloc_addr(dwsize) + # alloc_addr = lpvoid + myjit.vm.vm_add_memory_page( + alloc_addr, access_dict[flprotect], "\x00" * dwsize) + + print 'Memory addr:', hex(alloc_addr) + myjit.func_ret_stdcall(ret_ad, alloc_addr) + + +def kernel32_VirtualFree(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + lpvoid, dwsize, alloc_type = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def user32_GetWindowLongA(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + hwnd, nindex = args + myjit.func_ret_stdcall(ret_ad, winobjs.windowlong_dw) + + +def user32_SetWindowLongA(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + hwnd, nindex, newlong = args + myjit.func_ret_stdcall(ret_ad, winobjs.windowlong_dw) + + +def kernel32_GetModuleFileName(myjit, funcname, set_str): + ret_ad, args = myjit.func_args_stdcall(3) + hmodule, lpfilename, nsize = args + + if hmodule in [0, winobjs.hcurmodule]: + p = winobjs.module_path[:] + elif (winobjs.runtime_dll and + hmodule in winobjs.runtime_dll.name2off.values()): + name_inv = dict([(x[1], x[0]) + for x in winobjs.runtime_dll.name2off.items()]) + p = name_inv[hmodule] + else: + print ValueError('unknown module h', hex(hmodule)) + p = None + + if p is None: + l = 0 + elif nsize < len(p): + p = p[:nsize] + l = len(p) + else: + l = len(p) + + print repr(p) + if p: + myjit.vm.vm_set_mem(lpfilename, set_str(p)) + + myjit.func_ret_stdcall(ret_ad, l) + + +def kernel32_GetModuleFileNameA(myjit): + kernel32_GetModuleFileName(myjit, whoami(), set_str_ansi) + + +def kernel32_GetModuleFileNameW(myjit): + kernel32_GetModuleFileName(myjit, whoami(), set_str_unic) + + +def kernel32_CreateMutex(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(3) + print funcname + mutexattr, initowner, lpname = args + + if lpname: + name = get_str(myjit, lpname) + print repr(name) + else: + name = None + if initowner: + if name in winobjs.mutex: + raise NotImplementedError("Untested case") + ret = 0 + else: + winobjs.mutex[name] = id(name) + ret = winobjs.mutex[name] + else: + if name in winobjs.mutex: + raise NotImplementedError("Untested case") + ret = 0 + else: + winobjs.mutex[name] = id(name) + ret = winobjs.mutex[name] + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_CreateMutexA(myjit): + kernel32_CreateMutex(myjit, whoami(), get_str_ansi) + + +def kernel32_CreateMutexW(myjit): + kernel32_CreateMutex(myjit, whoami(), get_str_unic) + + +def shell32_SHGetSpecialFolderLocation(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + hwndowner, nfolder, ppidl = args + myjit.vm.vm_set_mem(ppidl, pck32(nfolder)) + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_SHGetPathFromIDList(myjit, funcname, set_str): + ret_ad, args = myjit.func_args_stdcall(2) + pidl, ppath = args + + if pidl == 7: # CSIDL_STARTUP: + s = "c:\\doc\\user\\startmenu\\programs\\startup" + s = set_str(s) + else: + raise ValueError('pidl not implemented', pidl) + myjit.vm.vm_set_mem(ppath, s) + myjit.func_ret_stdcall(ret_ad, 1) + + +def shell32_SHGetPathFromIDListW(myjit): + kernel32_SHGetPathFromIDList(myjit, whoami(), set_str_unic) + + +def shell32_SHGetPathFromIDListA(myjit): + kernel32_SHGetPathFromIDList(myjit, whoami(), set_str_ansi) + + +def kernel32_GetLastError(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, winobjs.lastwin32error) + + +def kernel32_SetLastError(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + e, = args + # lasterr addr + # ad = seh_helper.FS_0_AD + 0x34 + # myjit.vm.vm_set_mem(ad, pck32(e)) + winobjs.lastwin32error = e + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_RestoreLastError(myjit): + kernel32_SetLastError(myjit) + + +def kernel32_LoadLibraryA(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + dllname, = args + + libname = get_str_ansi(myjit, dllname, 0x100) + print repr(libname) + + ret = winobjs.runtime_dll.lib_get_add_base(libname) + print "ret", hex(ret) + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_LoadLibraryExA(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + dllname, hfile, flags = args + + if hfile != 0: + raise NotImplementedError("Untested case") + libname = get_str_ansi(myjit, dllname, 0x100) + print repr(libname) + + ret = winobjs.runtime_dll.lib_get_add_base(libname) + print "ret", hex(ret) + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetProcAddress(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + libbase, fname = args + fname = fname & 0xFFFFFFFF + if fname < 0x10000: + fname = fname + else: + fname = get_str_ansi(myjit, fname, 0x100) + if not fname: + fname = None + print repr(fname) + if fname is not None: + ad = winobjs.runtime_dll.lib_get_add_func(libbase, fname) + else: + ad = 0 + ad = winobjs.runtime_dll.lib_get_add_func(libbase, fname) + + myjit.func_ret_stdcall(ret_ad, ad) + + +def kernel32_LoadLibraryW(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + dllname, = args + + libname = get_str_unic(myjit, dllname, 0x100) + print repr(libname) + + ret = winobjs.runtime_dll.lib_get_add_base(libname) + print "ret", hex(ret) + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetModuleHandle(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(1) + dllname, = args + + if dllname: + libname = get_str(myjit, dllname) + print repr(libname) + if libname: + ret = winobjs.runtime_dll.lib_get_add_base(libname) + else: + print 'unknown module!' + ret = 0 + else: + ret = winobjs.current_pe.NThdr.ImageBase + print "default img base", hex(ret) + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetModuleHandleA(myjit): + kernel32_GetModuleHandle(myjit, whoami(), get_str_ansi) + + +def kernel32_GetModuleHandleW(myjit): + kernel32_GetModuleHandle(myjit, whoami(), get_str_unic) + + +def kernel32_VirtualLock(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + lpaddress, dwsize = args + myjit.func_ret_stdcall(ret_ad, 1) + + +class systeminfo: + oemId = 0 + dwPageSize = 0x1000 + lpMinimumApplicationAddress = 0x10000 + lpMaximumApplicationAddress = 0x7ffeffff + dwActiveProcessorMask = 0x1 + numberOfProcessors = 0x1 + ProcessorsType = 586 + dwAllocationgranularity = 0x10000 + wProcessorLevel = 0x6 + ProcessorRevision = 0xf0b + + def pack(self): + return struct.pack('IIIIIIIIHH', + self.oemId, + self.dwPageSize, + self.lpMinimumApplicationAddress, + self.lpMaximumApplicationAddress, + self.dwActiveProcessorMask, + self.numberOfProcessors, + self.ProcessorsType, + self.dwAllocationgranularity, + self.wProcessorLevel, + self.ProcessorRevision) + + +def kernel32_GetSystemInfo(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + sys_ptr, = args + sysinfo = systeminfo() + myjit.vm.vm_set_mem(sys_ptr, sysinfo.pack()) + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_IsWow64Process(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + h, bool_ptr = args + + myjit.vm.vm_set_mem(bool_ptr, pck32(0)) + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GetCommandLineA(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + s = winobjs.module_path + '\x00' + s = '"%s"' % s + alloc_addr = alloc_mem(myjit, 0x1000) + myjit.vm.vm_set_mem(alloc_addr, s) + myjit.func_ret_stdcall(ret_ad, alloc_addr) + + +def kernel32_GetCommandLineW(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + s = winobjs.module_path + '\x00' + s = set_str_unic('"%s"' % s) + alloc_addr = alloc_mem(myjit, 0x1000) + myjit.vm.vm_set_mem(alloc_addr, s) + myjit.func_ret_stdcall(ret_ad, alloc_addr) + + +def shell32_CommandLineToArgvW(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + pcmd, pnumargs = args + cmd = get_str_unic(myjit, pcmd) + print repr(cmd) + tks = cmd.split(' ') + addr = alloc_mem(myjit, len(cmd) * 2 + 4 * len(tks)) + addr_ret = alloc_mem(myjit, 4 * (len(tks) + 1)) + o = 0 + for i, t in enumerate(tks): + x = set_str_unic(t) + "\x00\x00" + myjit.vm.vm_set_mem(addr_ret + 4 * i, pck32(addr + o)) + myjit.vm.vm_set_mem(addr + o, x) + o += len(x) + 2 + + myjit.vm.vm_set_mem(addr_ret + 4 * i, pck32(0)) + myjit.vm.vm_set_mem(pnumargs, pck32(len(tks))) + myjit.func_ret_stdcall(ret_ad, addr_ret) + + +def cryptdll_MD5Init(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + ad_ctx, = args + index = len(winobjs.cryptdll_md5_h) + h = MD5.new() + winobjs.cryptdll_md5_h[index] = h + + myjit.vm.vm_set_mem(ad_ctx, pck32(index)) + myjit.func_ret_stdcall(ret_ad, 0) + + +def cryptdll_MD5Update(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + ad_ctx, ad_input, inlen = args + + index = myjit.vm.vm_get_mem(ad_ctx, 4) + index = upck32(index) + if not index in winobjs.cryptdll_md5_h: + raise ValueError('unknown h context', index) + + data = myjit.vm.vm_get_mem(ad_input, inlen) + winobjs.cryptdll_md5_h[index].update(data) + print hexdump(data) + + myjit.func_ret_stdcall(ret_ad, 0) + + +def cryptdll_MD5Final(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + ad_ctx, = args + + index = myjit.vm.vm_get_mem(ad_ctx, 4) + index = upck32(index) + if not index in winobjs.cryptdll_md5_h: + raise ValueError('unknown h context', index) + h = winobjs.cryptdll_md5_h[index].digest() + myjit.vm.vm_set_mem(ad_ctx + 88, h) + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlInitAnsiString(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + ad_ctx, ad_str = args + + s = get_str_ansi(myjit, ad_str) + l = len(s) + print "string", l, s + myjit.vm.vm_set_mem(ad_ctx, pw(l) + pw(l + 1) + pck32(ad_str)) + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlHashUnicodeString(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + ad_ctxu, case_i, h_id, phout = args + + print hex(h_id) + if h_id != 1: + raise ValueError('unk hash unicode', h_id) + + l1, l2, ptra = struct.unpack('HHL', myjit.vm.vm_get_mem(ad_ctxu, 8)) + print hex(l1), hex(l2), hex(ptra) + s = myjit.vm.vm_get_mem(ptra, l1) + print repr(s) + s = s[:-1] + print repr(s) + hv = 0 + + if case_i: + s = s.lower() + for c in s: + hv = ((65599 * hv) + ord(c)) & 0xffffffff + print "unicode h", hex(hv) + myjit.vm.vm_set_mem(phout, pck32(hv)) + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_RtlMoveMemory(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + ad_dst, ad_src, m_len = args + data = myjit.vm.vm_get_mem(ad_src, m_len) + myjit.vm.vm_set_mem(ad_dst, data) + print hexdump(data) + + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlAnsiCharToUnicodeChar(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + ad_ad_ch, = args + ad_ch = upck32(myjit.vm.vm_get_mem(ad_ad_ch, 4)) + print hex(ad_ch) + ch = ord(myjit.vm.vm_get_mem(ad_ch, 1)) + myjit.vm.vm_set_mem(ad_ad_ch, pck32(ad_ch + 1)) + + print repr(ch), repr(chr(ch)) + myjit.func_ret_stdcall(ret_ad, ch) + + +def ntdll_RtlFindCharInUnicodeString(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + flags, main_str_ad, search_chars_ad, pos_ad = args + + print flags + if flags != 0: + raise ValueError('unk flags') + + ml1, ml2, mptra = struct.unpack('HHL', myjit.vm.vm_get_mem(main_str_ad, 8)) + print ml1, ml2, hex(mptra) + sl1, sl2, sptra = struct.unpack( + 'HHL', myjit.vm.vm_get_mem(search_chars_ad, 8)) + print sl1, sl2, hex(sptra) + main_data = myjit.vm.vm_get_mem(mptra, ml1)[:-1] + search_data = myjit.vm.vm_get_mem(sptra, sl1)[:-1] + + print repr(main_data) + print repr(search_data) + + pos = None + for i, c in enumerate(main_data): + for s in search_data: + if s == c: + pos = i + break + if pos: + break + print pos + if pos is None: + ret = 0xC0000225 + myjit.vm.vm_set_mem(pos_ad, pck32(0)) + else: + ret = 0 + myjit.vm.vm_set_mem(pos_ad, pck32(pos)) + + myjit.func_ret_stdcall(ret_ad, ret) + + +def ntdll_RtlComputeCrc32(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + dwinit, pdata, ilen = args + + data = myjit.vm.vm_get_mem(pdata, ilen) + print hex(dwinit) + print hexdump(data) + crc_r = crc32(data, dwinit) + print "crc32", hex(crc_r) + myjit.func_ret_stdcall(ret_ad, crc_r) + + +def ntdll_RtlExtendedIntegerMultiply(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + b2, b1, bm = args + a = (b1 << 32) + b2 + a = a * bm + print hex(a) + myjit.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) + + +def ntdll_RtlLargeIntegerAdd(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + a2, a1, b2, b1 = args + a = (a1 << 32) + a2 + (b1 << 32) + b2 + print hex(a) + myjit.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) + + +def ntdll_RtlLargeIntegerShiftRight(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + a2, a1, m = args + a = ((a1 << 32) + a2) >> m + print hex(a) + myjit.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) + + +def ntdll_RtlEnlargedUnsignedMultiply(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + a, b = args + a = a * b + print hex(a) + myjit.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) + + +def ntdll_RtlLargeIntegerSubtract(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + a2, a1, b2, b1 = args + a = (a1 << 32) + a2 - (b1 << 32) + b2 + print hex(a) + myjit.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) + + +def ntdll_RtlCompareMemory(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + ad1, ad2, m_len = args + data1 = myjit.vm.vm_get_mem(ad1, m_len) + data2 = myjit.vm.vm_get_mem(ad2, m_len) + + print hexdump(data1) + print hexdump(data2) + i = 0 + while data1[i] == data2[i]: + i += 1 + if i >= m_len: + break + + myjit.func_ret_stdcall(ret_ad, i) + + +def user32_GetMessagePos(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, 0x00110022) + + +def kernel32_Sleep(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + t, = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_ZwUnmapViewOfSection(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + h, ad = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_IsBadReadPtr(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + lp, ucb = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_KeInitializeEvent(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + my_event, my_type, my_state = args + myjit.vm.vm_set_mem(my_event, pck32(winobjs.win_event_num)) + winobjs.win_event_num += 1 + + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_RtlGetVersion(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + ptr_version, = args + + s = struct.pack("IIIII", + 0x114, # struct size + 0x5, # maj vers + 0x2, # min vers + 0x666, # build nbr + 0x2, # platform id + ) + set_str_unic("Service pack 4") + + myjit.vm.vm_set_mem(ptr_version, s) + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_RtlVerifyVersionInfo(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + ptr_version, = args + + s = myjit.vm.vm_get_mem(ptr_version, 0x5 * 4) + print repr(s) + s_size, s_majv, s_minv, s_buildn, s_platform = struct.unpack('IIIII', s) + print s_size, s_majv, s_minv, s_buildn, s_platform + raise NotImplementedError("Untested case") + myjit.vm.vm_set_mem(ptr_version, s) + myjit.func_ret_stdcall(ret_ad, 0) + + +def hal_ExAcquireFastMutex(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, 0) + + +def mdl2ad(n): + return winobjs.nt_mdl_ad + 0x10 * n + + +def ad2mdl(ad): + return ((ad - winobjs.nt_mdl_ad) & 0xFFFFFFFFL) / 0x10 + + +def ntoskrnl_IoAllocateMdl(myjit): + ret_ad, args = myjit.func_args_stdcall(5) + v_addr, l, second_buf, chargequota, pirp = args + m = mdl(v_addr, l) + winobjs.nt_mdl[winobjs.nt_mdl_cur] = m + myjit.vm.vm_set_mem(mdl2ad(winobjs.nt_mdl_cur), str(m)) + myjit.func_ret_stdcall(ret_ad, mdl2ad(winobjs.nt_mdl_cur)) + winobjs.nt_mdl_cur += 1 + + +def ntoskrnl_MmProbeAndLockPages(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + p_mdl, access_mode, op = args + + if not ad2mdl(p_mdl) in winobjs.nt_mdl: + raise ValueError('unk mdl', hex(p_mdl)) + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_MmMapLockedPagesSpecifyCache(myjit): + ret_ad, args = myjit.func_args_stdcall(6) + p_mdl, access_mode, cache_type, base_ad, bugcheckonfailure, priority = args + if not ad2mdl(p_mdl) in winobjs.nt_mdl: + raise ValueError('unk mdl', hex(p_mdl)) + + myjit.func_ret_stdcall(ret_ad, winobjs.nt_mdl[ad2mdl(p_mdl)].ad) + + +def ntoskrnl_MmProtectMdlSystemAddress(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + p_mdl, prot = args + if not ad2mdl(p_mdl) in winobjs.nt_mdl: + raise ValueError('unk mdl', hex(p_mdl)) + + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_MmUnlockPages(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + p_mdl, = args + if not ad2mdl(p_mdl) in winobjs.nt_mdl: + raise ValueError('unk mdl', hex(p_mdl)) + + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_IoFreeMdl(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + p_mdl, = args + if not ad2mdl(p_mdl) in winobjs.nt_mdl: + raise ValueError('unk mdl', hex(p_mdl)) + del(winobjs.nt_mdl[ad2mdl(p_mdl)]) + myjit.func_ret_stdcall(ret_ad, 0) + + +def hal_ExReleaseFastMutex(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_RtlQueryRegistryValues(myjit): + ret_ad, args = myjit.func_args_stdcall(5) + relativeto, path, querytable, context, environ = args + p = get_str_unic(myjit, path) + print repr(p) + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_ExAllocatePoolWithTagPriority(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + pool_type, nbr_of_bytes, tag, priority = args + + alloc_addr = get_next_alloc_addr(nbr_of_bytes) + myjit.vm.vm_add_memory_page( + alloc_addr, PAGE_READ | PAGE_WRITE, "\x00" * nbr_of_bytes) + + print "ad", hex(alloc_addr) + myjit.func_ret_stdcall(ret_ad, alloc_addr) + + +def my_lstrcmp(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(2) + ptr_str1, ptr_str2 = args + s1 = get_str(ptr_str1) + s2 = get_str(ptr_str2) + print '%s (%r, %r)' % (' ' * len(funcname), s1, s2) + myjit.func_ret_stdcall(ret_ad, cmp(s1, s2)) + + +def kernel32_lstrcmpA(myjit): + my_lstrcmp(myjit, whoami(), lambda x: get_str_ansi(myjit, x)) + + +def kernel32_lstrcmpiA(myjit): + my_lstrcmp(myjit, whoami(), lambda x: get_str_ansi(myjit, x).lower()) + + +def kernel32_lstrcmpW(myjit): + my_lstrcmp(myjit, whoami(), lambda x: get_str_unic(myjit, x)) + + +def kernel32_lstrcmpiW(myjit): + my_lstrcmp(myjit, whoami(), lambda x: get_str_unic(myjit, x).lower()) + + +def kernel32_lstrcmpi(myjit): + my_lstrcmp(myjit, whoami(), lambda x: get_str_ansi(myjit, x).lower()) + + +def my_strcpy(myjit, funcname, get_str, set_str): + ret_ad, args = myjit.func_args_stdcall(2) + ptr_str1, ptr_str2 = args + s2 = get_str(myjit, ptr_str2) + print '%s (%r)' % (funcname, s2) + myjit.vm.vm_set_mem(ptr_str1, set_str(s2)) + myjit.func_ret_stdcall(ret_ad, ptr_str1) + + +def kernel32_lstrcpyW(myjit): + my_strcpy(myjit, whoami(), get_str_unic, + lambda x: set_str_unic(x) + "\x00\x00") + + +def kernel32_lstrcpyA(myjit): + my_strcpy(myjit, whoami(), get_str_ansi, lambda x: x + "\x00") + + +def kernel32_lstrcpy(myjit): + my_strcpy(myjit, whoami(), get_str_ansi, lambda x: x + "\x00") + + +def kernel32_lstrcpyn(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + ptr_str1, ptr_str2, mlen = args + s2 = get_str_ansi(myjit, ptr_str2) + print repr(s2) + s2 = s2[:mlen] + myjit.vm.vm_set_mem(ptr_str1, s2) + + myjit.func_ret_stdcall(ret_ad, ptr_str1) + + +def my_strlen(myjit, funcname, get_str, mylen): + ret_ad, args = myjit.func_args_stdcall(1) + arg_src, = args + src = get_str(myjit, arg_src) + print funcname, repr(src) + myjit.func_ret_stdcall(ret_ad, mylen(src)) + + +def kernel32_lstrlenA(myjit): + my_strlen(myjit, whoami(), get_str_ansi, lambda x: len(x)) + + +def kernel32_lstrlenW(myjit): + my_strlen(myjit, whoami(), get_str_unic, lambda x: len(x)) + + +def kernel32_lstrlen(myjit): + my_strlen(myjit, whoami(), get_str_ansi, lambda x: len(x)) + + +def my_lstrcat(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(3) + ptr_str1, ptr_str2 = args + s1 = get_str(myjit, ptr_str1) + s2 = get_str(myjit, ptr_str2) + print '%s (%r, %r)' % (whoami(), s1, s2) + + s = s1 + s2 + print repr(s) + myjit.vm.vm_set_mem(ptr_str1, s1 + s2) + myjit.func_ret_stdcall(ret_ad, ptr_str1) + + +def kernel32_lstrcatA(myjit): + my_lstrcat(myjit, whoami(), get_str_ansi) + + +def kernel32_lstrcatW(myjit): + my_lstrcat(myjit, whoami(), get_str_unic) + + +def kernel32_GetUserGeoID(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + geoclass, = args + if geoclass == 14: + ret = 12345678 + elif geoclass == 16: + ret = 55667788 + else: + raise ValueError('unknown geolcass') + + myjit.func_ret_stdcall(ret_ad, ret) + + +def my_GetVolumeInformation(myjit, funcname, get_str, set_str): + ret_ad, args = myjit.func_args_stdcall(8) + (lprootpathname, lpvolumenamebuffer, nvolumenamesize, + lpvolumeserialnumber, lpmaximumcomponentlength, lpfilesystemflags, + lpfilesystemnamebuffer, nfilesystemnamesize) = args + + print funcname, hex(lprootpathname), hex(lpvolumenamebuffer), \ + hex(nvolumenamesize), hex(lpvolumeserialnumber), \ + hex(lpmaximumcomponentlength), hex(lpfilesystemflags), \ + hex(lpfilesystemnamebuffer), hex(nfilesystemnamesize) + + if lprootpathname: + s = get_str(myjit, lprootpathname) + print repr(s) + + if lpvolumenamebuffer: + s = "volumename" + s = s[:nvolumenamesize] + myjit.vm.vm_set_mem(lpvolumenamebuffer, set_str(s)) + + if lpvolumeserialnumber: + myjit.vm.vm_set_mem(lpvolumeserialnumber, pck32(11111111)) + if lpmaximumcomponentlength: + myjit.vm.vm_set_mem(lpmaximumcomponentlength, pck32(0xff)) + if lpfilesystemflags: + myjit.vm.vm_set_mem(lpfilesystemflags, pck32(22222222)) + + if lpfilesystemnamebuffer: + s = "filesystemname" + s = s[:nfilesystemnamesize] + myjit.vm.vm_set_mem(lpfilesystemnamebuffer, set_str(s)) + + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GetVolumeInformationA(myjit): + my_GetVolumeInformation( + myjit, whoami(), get_str_ansi, lambda x: x + "\x00") + + +def kernel32_GetVolumeInformationW(myjit): + my_GetVolumeInformation(myjit, whoami(), get_str_unic, set_str_unic) + + +def kernel32_MultiByteToWideChar(myjit): + ret_ad, args = myjit.func_args_stdcall(6) + (codepage, dwflags, lpmultibytestr, + cbmultibyte, lpwidecharstr, cchwidechar) = args + + print whoami(), hex(ret_ad), \ + hex(codepage), hex(dwflags), hex(lpmultibytestr), hex( + cbmultibyte), hex(lpwidecharstr), hex(cchwidechar) + src = get_str_ansi(myjit, lpmultibytestr) + '\x00' + l = len(src) + print repr(src) + + src = "\x00".join(list(src)) + print repr(src), hex(len(src)) + myjit.vm.vm_set_mem(lpwidecharstr, src) + myjit.func_ret_stdcall(ret_ad, l) + + +def my_GetEnvironmentVariable(myjit, funcname, get_str, set_str, mylen): + ret_ad, args = myjit.func_args_stdcall(3) + lpname, lpbuffer, nsize = args + + s = get_str(myjit, lpname) + if get_str == get_str_unic: + s = s + print 'variable', repr(s) + if s in winobjs.env_variables: + v = set_str(winobjs.env_variables[s]) + else: + print 'WARNING unknown env variable', repr(s) + v = "" + print 'return', repr(v) + myjit.vm.vm_set_mem(lpbuffer, v) + myjit.func_ret_stdcall(ret_ad, mylen(v)) + + +def my_GetSystemDirectory(myjit, funcname, set_str): + ret_ad, args = myjit.func_args_stdcall(2) + lpbuffer, usize = args + print funcname + + s = "c:\\windows\\system32" + l = len(s) + s = set_str(s) + myjit.vm.vm_set_mem(lpbuffer, s) + + myjit.func_ret_stdcall(ret_ad, l) + + +def kernel32_GetSystemDirectoryA(myjit): + my_GetSystemDirectory(myjit, whoami(), set_str_ansi) + + +def kernel32_GetSystemDirectoryW(myjit): + my_GetSystemDirectory(myjit, whoami(), set_str_unic) + + +def my_CreateDirectory(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(2) + lppath, secattrib = args + p = get_str(myjit, lppath) + myjit.func_ret_stdcall(ret_ad, 0x1337) + + +def kernel32_CreateDirectoryW(myjit): + my_CreateDirectory(myjit, whoami(), get_str_unic) + + +def kernel32_CreateDirectoryA(myjit): + my_CreateDirectory(myjit, whoami(), get_str_ansi) + + +def kernel32_GetEnvironmentVariableA(myjit): + my_GetEnvironmentVariable(myjit, whoami(), + get_str_ansi, + lambda x: x + "\x00", + lambda x: len(x)) + + +def kernel32_GetEnvironmentVariableW(myjit): + my_GetEnvironmentVariable(myjit, whoami(), + get_str_unic, + lambda x: "\x00".join(list(x + "\x00")), + lambda x: len(x)) + + +def my_CreateEvent(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(4) + lpeventattributes, bmanualreset, binitialstate, lpname = args + if lpname: + s = get_str(myjit, lpname) + else: + s = None + print repr(s) + if not s in winobjs.events_pool: + winobjs.events_pool[s] = (bmanualreset, binitialstate) + else: + print 'WARNING: known event' + + myjit.func_ret_stdcall(ret_ad, id(s)) + + +def kernel32_CreateEventA(myjit): + my_CreateEvent(myjit, whoami(), get_str_ansi) + + +def kernel32_CreateEventW(myjit): + my_CreateEvent(myjit, whoami(), get_str_unic) + + +def kernel32_WaitForSingleObject(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + handle, dwms = args + + print whoami(), hex(ret_ad), hex(handle), hex(dwms) + + t_start = time.time() * 1000 + found = False + while True: + if dwms and dwms + t_start > time.time() * 1000: + ret = 0x102 + break + for k, v in winobjs.events_pool.items(): + if k != handle: + continue + found = True + if winobjs.events_pool[k][1] == 1: + ret = 0 + break + if not found: + print 'unknown handle' + ret = 0xffffffff + break + time.sleep(0.1) + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_SetFileAttributesA(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + lpfilename, dwfileattributes = args + print whoami(), hex(ret_ad), hex(lpfilename), hex(dwfileattributes) + + if lpfilename: + fname = get_str_ansi(myjit, lpfilename) + print "filename", repr(fname) + ret = 1 + else: + ret = 0 + myjit.vm.vm_set_mem(seh_helper.FS_0_AD + 0x34, pck32(3)) + + myjit.func_ret_stdcall(ret_ad, ret) + + +def ntdll_RtlMoveMemory(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + dst, src, l = args + s = myjit.vm.vm_get_mem(src, l) + myjit.vm.vm_set_mem(dst, s) + + myjit.func_ret_stdcall(ret_ad, 1) + + +def ntdll_ZwQuerySystemInformation(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + (systeminformationclass, systeminformation, + systeminformationl, returnl) = args + if systeminformationclass == 2: + # SYSTEM_PERFORMANCE_INFORMATION + o = struct.pack('II', 0x22222222, 0x33333333) + o += "\x00" * systeminformationl + o = o[:systeminformationl] + myjit.vm.vm_set_mem(systeminformation, o) + else: + raise ValueError('unknown sysinfo class', systeminformationclass) + + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_ZwProtectVirtualMemory(myjit): + ret_ad, args = myjit.func_args_stdcall(5) + handle, lppvoid, pdwsize, flnewprotect, lpfloldprotect = args + + ad = upck32(myjit.vm.vm_get_mem(lppvoid, 4)) + dwsize = upck32(myjit.vm.vm_get_mem(pdwsize, 4)) + print 'ad', hex(ad), 'size', hex(dwsize) + # XXX mask hpart + flnewprotect &= 0xFFF + + if not flnewprotect in access_dict: + raise ValueError('unknown access dw!') + myjit.vm.vm_set_mem_access(ad, access_dict[flnewprotect]) + + # XXX todo real old protect + myjit.vm.vm_set_mem(lpfloldprotect, pck32(0x40)) + + dump_memory_page_pool_py() + myjit.func_ret_stdcall(ret_ad, 1) + + +def ntdll_ZwAllocateVirtualMemory(myjit): + ret_ad, args = myjit.func_args_stdcall(6) + handle, lppvoid, zerobits, pdwsize, alloc_type, flprotect = args + + ad = upck32(myjit.vm.vm_get_mem(lppvoid, 4)) + dwsize = upck32(myjit.vm.vm_get_mem(pdwsize, 4)) + print 'ad', hex(ad), 'size', hex(dwsize) + + access_dict = {0x0: 0, + 0x1: 0, + 0x2: PAGE_READ, + 0x4: PAGE_READ | PAGE_WRITE, + 0x10: PAGE_EXEC, + 0x20: PAGE_EXEC | PAGE_READ, + 0x40: PAGE_EXEC | PAGE_READ | PAGE_WRITE, + 0x100: 0 + } + + access_dict_inv = dict([(x[1], x[0]) for x in access_dict.items()]) + + if not flprotect in access_dict: + raise ValueError('unknown access dw!') + + alloc_addr = get_next_alloc_addr(dwsize) + myjit.vm.vm_add_memory_page( + alloc_addr, access_dict[flprotect], "\x00" * dwsize) + myjit.vm.vm_set_mem(lppvoid, pck32(alloc_addr)) + + print 'ret', hex(alloc_addr) + dump_memory_page_pool_py() + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_ZwFreeVirtualMemory(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + handle, lppvoid, pdwsize, alloc_type = args + ad = upck32(myjit.vm.vm_get_mem(lppvoid, 4)) + dwsize = upck32(myjit.vm.vm_get_mem(pdwsize, 4)) + print 'ad', hex(ad), 'size', hex(dwsize) + + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlInitString(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + pstring, source = args + s = get_str_ansi(myjit, source) + print "str", repr(s) + + l = len(s) + 1 + + o = struct.pack('HHI', l, l, source) + myjit.vm.vm_set_mem(pstring, o) + + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlAnsiStringToUnicodeString(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + dst, src, alloc_str = args + + l1, l2, p_src = struct.unpack('HHI', myjit.vm.vm_get_mem(src, 0x8)) + print hex(l1), hex(l2), hex(p_src) + s = get_str_ansi(myjit, p_src) + print "str", repr(s) + s = ("\x00".join(s + "\x00")) + l = len(s) + 1 + if alloc_str: + print 'alloc' + alloc_addr = get_next_alloc_addr(l) + myjit.vm.vm_add_memory_page( + alloc_addr, PAGE_READ | PAGE_WRITE, "\x00" * l) + else: + print 'use buf' + alloc_addr = p_src + myjit.vm.vm_set_mem(alloc_addr, s) + o = struct.pack('HHI', l, l, alloc_addr) + myjit.vm.vm_set_mem(dst, o) + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_LdrLoadDll(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + path, flags, modname, modhandle = args + + print whoami(), hex(ret_ad), + print '(', hex(path), hex(flags), hex(modname), hex(modhandle), ')' + l1, l2, p_src = struct.unpack('HHI', myjit.vm.vm_get_mem(modname, 0x8)) + print hex(l1), hex(l2), hex(p_src) + s = get_str_unic(myjit, p_src) + print repr(s) + libname = s.lower() + print repr(libname) + + ad = winobjs.runtime_dll.lib_get_add_base(libname) + print "ret", hex(ad) + myjit.vm.vm_set_mem(modhandle, pck32(ad)) + + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlFreeUnicodeString(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + src, = args + + l1, l2, p_src = struct.unpack('HHI', myjit.vm.vm_get_mem(src, 0x8)) + print hex(l1), hex(l2), hex(p_src) + s = get_str_unic(myjit, p_src) + print "str", repr(s) + print repr(s) + + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_LdrGetProcedureAddress(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + libbase, pfname, opt, p_ad = args + + l1, l2, p_src = struct.unpack('HHI', myjit.vm.vm_get_mem(pfname, 0x8)) + print hex(l1), hex(l2), hex(p_src) + fname = get_str_ansi(myjit, p_src) + print "str", repr(fname) + + ad = winobjs.runtime_dll.lib_get_add_func(libbase, fname) + + myjit.vm.vm_set_mem(p_ad, pck32(ad)) + + myjit.func_ret_stdcall(ret_ad, 0) + + +def ntdll_memset(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + arg_addr, arg_c, arg_size = args + + myjit.vm.vm_set_mem(arg_addr, chr(arg_c) * arg_size) + myjit.func_ret_stdcall(ret_ad, arg_addr) + + +def msvcrt_memset(myjit): + ret_ad, args = myjit.func_args_cdecl(3) + arg_addr, arg_c, arg_size = args + + myjit.vm.vm_set_mem(arg_addr, chr(arg_c) * arg_size) + myjit.func_ret_cdecl(ret_ad, arg_addr) + + +def msvcrt_memcpy(myjit): + ret_ad, args = myjit.func_args_cdecl(3) + dst, src, size = args + + s = myjit.vm.vm_get_mem(src, size) + myjit.vm.vm_set_mem(dst, s) + myjit.func_ret_cdecl(ret_ad, dst) + + +def msvcrt_memcmp(myjit): + ret_ad, args = myjit.func_args_cdecl(3) + ps1, ps2, size = args + + s1 = myjit.vm.vm_get_mem(ps1, size) + s2 = myjit.vm.vm_get_mem(ps2, size) + ret = cmp(s1, s2) + myjit.func_ret_cdecl(ret_ad, ret) + + +def shlwapi_PathFindExtensionA(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + path_ad, = args + + path = get_str_ansi(myjit, path_ad) + print repr(path) + i = path.rfind('.') + if i == -1: + i = path_ad + len(path) + else: + i = path_ad + i + myjit.func_ret_stdcall(ret_ad, i) + + +def shlwapi_PathRemoveFileSpecW(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + path_ad, = args + + path = get_str_unic(myjit, path_ad) + print repr(path) + i = path.rfind('\\') + if i == -1: + i = 0 + myjit.vm.vm_set_mem(path_ad + i * 2, "\x00\x00") + path = get_str_unic(myjit, path_ad) + print repr(path) + myjit.func_ret_stdcall(ret_ad, 1) + + +def shlwapi_PathIsPrefixW(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + ptr_prefix, ptr_path = args + prefix = get_str_unic(myjit, ptr_prefix) + path = get_str_unic(myjit, ptr_path) + print repr(prefix), repr(path) + + if path.startswith(prefix): + ret = 1 + else: + ret = 0 + myjit.func_ret_stdcall(ret_ad, ret) + + +def shlwapi_PathIsDirectoryW(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + ppath, = args + fname = get_str_unic(myjit, ppath) + + fname = fname.replace('\\', "/").lower() + f = os.path.join('file_sb', fname) + + s = os.stat(f) + ret = 0 + if stat.S_ISDIR(s.st_mode): + ret = 1 + + myjit.func_ret_cdecl(ret_ad, ret) + + +def shlwapi_PathIsFileSpec(funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(1) + path_ad, = args + path = get_str(myjit, path_ad) + print repr(path) + if path.find(':') != -1 and path.find('\\') != -1: + ret = 0 + else: + ret = 1 + + myjit.func_ret_stdcall(ret_ad, ret) + + +def shlwapi_PathGetDriveNumber(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(1) + path_ad, = args + path = get_str(myjit, path_ad) + print repr(path) + l = ord(path[0].upper()) - ord('A') + if 0 <= l <= 25: + ret = l + else: + ret = -1 + + myjit.func_ret_stdcall(ret_ad, ret) + + +def shlwapi_PathGetDriveNumberA(myjit): + shlwapi_PathGetDriveNumber(myjit, whoami(), get_str_ansi) + + +def shlwapi_PathGetDriveNumberW(myjit): + shlwapi_PathGetDriveNumber(myjit, whoami(), get_str_unic) + + +def shlwapi_PathIsFileSpecA(myjit): + shlwapi_PathIsFileSpec(whoami(), get_str_ansi) + + +def shlwapi_PathIsFileSpecW(myjit): + shlwapi_PathIsFileSpec(whoami(), get_str_unic) + + +def shlwapi_StrToIntA(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + i_str_ad, = args + i_str = get_str_ansi(myjit, i_str_ad) + print repr(i_str) + try: + i = int(i_str) + except: + print 'WARNING cannot convert int' + i = 0 + + myjit.func_ret_stdcall(ret_ad, i) + + +def shlwapi_StrToInt64Ex(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(3) + pstr, flags, pret = args + i_str = get_str(myjit, pstr) + if get_str is get_str_unic: + i_str = i_str + print repr(i_str) + + if flags == 0: + r = int(i_str) + elif flags == 1: + r = int(i_str, 16) + else: + raise ValueError('cannot decode int') + + myjit.vm.vm_set_mem(pret, struct.pack('q', r)) + + myjit.func_ret_stdcall(ret_ad, i) + + +def shlwapi_StrToInt64ExA(myjit): + shlwapi_StrToInt64Ex(myjit, whoami(), get_str_ansi) + + +def shlwapi_StrToInt64ExW(myjit): + shlwapi_StrToInt64Ex(myjit, whoami(), get_str_unic) + + +def user32_IsCharAlpha(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(1) + c, = args + try: + c = chr(c) + except: + print 'bad char', c + c = "\x00" + if c.isalpha(myjit): + ret = 1 + else: + ret = 0 + myjit.func_ret_stdcall(ret_ad, ret) + + +def user32_IsCharAlphaA(myjit): + user32_IsCharAlpha(myjit, whoami(), get_str_ansi) + + +def user32_IsCharAlphaW(myjit): + user32_IsCharAlpha(myjit, whoami(), get_str_unic) + + +def user32_IsCharAlphaNumericA(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + c, = args + c = chr(c) + if c.isalnum(myjit): + ret = 1 + else: + ret = 0 + myjit.func_ret_stdcall(ret_ad, ret) + + +def shlwapi_StrCmpNIA(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + ptr_str1, ptr_str2, nchar = args + s1 = get_str_ansi(myjit, ptr_str1).lower() + s2 = get_str_ansi(myjit, ptr_str2).lower() + s1 = s1[:nchar] + s2 = s2[:nchar] + + print repr(s1), repr(s2) + myjit.func_ret_stdcall(ret_ad, cmp(s1, s2)) + + +def advapi32_RegOpenKeyEx(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(5) + hkey, subkey, reserved, access, phandle = args + if subkey: + s_subkey = get_str(myjit, subkey).lower() + else: + s_subkey = "" + print hex(hkey) + print repr(s_subkey) + print winobjs.hkey_handles + + ret_hkey = 0 + ret = 2 + if hkey in winobjs.hkey_handles: + if s_subkey: + h = hash(s_subkey) & 0xffffffff + print hex(h) + if h in winobjs.hkey_handles: + ret_hkey = h + ret = 0 + else: + print 'unknown skey' + + print 'set hkey', hex(ret_hkey) + myjit.vm.vm_set_mem(phandle, pck32(ret_hkey)) + + myjit.func_ret_stdcall(ret_ad, ret) + + +def advapi32_RegOpenKeyExA(myjit): + advapi32_RegOpenKeyEx(myjit, whoami(), get_str_ansi) + + +def advapi32_RegOpenKeyExW(myjit): + advapi32_RegOpenKeyEx(myjit, whoami(), get_str_unic) + + +def advapi32_RegSetValue(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(5) + hkey, psubkey, valuetype, pvalue, length = args + if psubkey: + subkey = get_str(myjit, psubkey).lower() + else: + subkey = "" + print repr(subkey) + + if pvalue: + value = myjit.vm.vm_get_mem(pvalue, length) + else: + value = None + print repr(value) + myjit.func_ret_stdcall(ret_ad, 0) + + +def advapi32_RegSetValueA(myjit): + advapi32_RegSetValue(myjit, whoami(), get_str_ansi) + + +def advapi32_RegSetValueW(myjit): + advapi32_RegSetValue(myjit, whoami(), get_str_unic) + + +def kernel32_GetThreadLocale(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, 0x40c) + + +def kernel32_GetLocaleInfo(myjit, funcname, set_str): + ret_ad, args = myjit.func_args_stdcall(4) + localeid, lctype, lplcdata, cchdata = args + + buf = None + ret = 0 + if localeid == 0x40c: + if lctype == 0x3: + buf = "ENGLISH" + buf = buf[:cchdata - 1] + print 'SET', buf + myjit.vm.vm_set_mem(lplcdata, set_str(buf)) + ret = len(buf) + else: + raise ValueError('unimpl localeid') + + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetLocaleInfoA(myjit): + kernel32_GetLocaleInfo(myjit, whoami(), set_str_ansi) + + +def kernel32_GetLocaleInfoW(myjit): + kernel32_GetLocaleInfo(myjit, whoami(), set_str_unic) + + +def kernel32_TlsAlloc(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + winobjs.tls_index += 1 + myjit.func_ret_stdcall(ret_ad, winobjs.tls_index) + + +def kernel32_TlsFree(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_TlsSetValue(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + tlsindex, tlsvalue = args + winobjs.tls_values[tlsindex] = tlsvalue + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_TlsGetValue(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + tlsindex, = args + + print whoami(), hex(tlsindex) + + if not tlsindex in winobjs.tls_values: + raise ValueError("unknown tls val", repr(tlsindex)) + myjit.func_ret_stdcall(ret_ad, winobjs.tls_values[tlsindex]) + + +def user32_GetKeyboardType(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + typeflag, = args + + ret = 0 + if typeflag == 0: + ret = 4 + else: + raise ValueError('unimpl keyboard type') + + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetStartupInfo(myjit, funcname, set_str): + ret_ad, args = myjit.func_args_stdcall(1) + ptr, = args + + print funcname, hex(ptr) + + s = "\x00" * 0x2c + "\x81\x00\x00\x00" + "\x0a" + + myjit.vm.vm_set_mem(ptr, s) + myjit.func_ret_stdcall(ret_ad, ptr) + + +def kernel32_GetStartupInfoA(myjit): + kernel32_GetStartupInfo(myjit, whoami(), set_str_ansi) + + +def kernel32_GetStartupInfoW(myjit): + kernel32_GetStartupInfo(myjit, whoami(), set_str_unic) + + +def kernel32_GetCurrentThreadId(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, 0x113377) + + +def kernel32_InitializeCriticalSection(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + lpcritic, = args + myjit.func_ret_stdcall(ret_ad, 0) + + +def user32_GetSystemMetrics(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + nindex, = args + + ret = 0 + if nindex in [0x2a, 0x4a]: + ret = 0 + else: + raise ValueError('unimpl index') + myjit.func_ret_stdcall(ret_ad, ret) + + +def wsock32_WSAStartup(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + version, pwsadata = args + myjit.vm.vm_set_mem(pwsadata, "\x01\x01\x02\x02WinSock 2.0\x00") + + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_GetLocalTime(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + lpsystemtime, = args + + s = struct.pack('HHHHHHHH', + 2011, # year + 10, # month + 5, # dayofweek + 7, # day + 13, # hour + 37, # minutes + 00, # seconds + 999, # millisec + ) + myjit.vm.vm_set_mem(lpsystemtime, s) + myjit.func_ret_stdcall(ret_ad, lpsystemtime) + + +def kernel32_GetSystemTime(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + lpsystemtime, = args + + print whoami(), hex(ret_ad), hex(lpsystemtime) + + s = struct.pack('HHHHHHHH', + 2011, # year + 10, # month + 5, # dayofweek + 7, # day + 13, # hour + 37, # minutes + 00, # seconds + 999, # millisec + ) + myjit.vm.vm_set_mem(lpsystemtime, s) + myjit.func_ret_stdcall(ret_ad, lpsystemtime) + + +def kernel32_CreateFileMapping(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(6) + hfile, lpattr, flprotect, dwmaximumsizehigh, dwmaximumsizelow, lpname = args + + if lpname: + f = get_str(myjit, lpname) + else: + f = None + print repr(f) + + if not hfile in winobjs.handle_pool: + raise ValueError('unknown handle') + + ret = winobjs.handle_pool.add('filemapping', hfile) + + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_CreateFileMappingA(myjit): + kernel32_CreateFileMapping(myjit, whoami(), get_str_ansi) + + +def kernel32_CreateFileMappingW(myjit): + kernel32_CreateFileMapping(myjit, whoami(), get_str_unic) + + +def kernel32_MapViewOfFile(myjit): + ret_ad, args = myjit.func_args_stdcall(5) + hfile, flprotect, dwfileoffsethigh, dwfileoffsetlow, length = args + + if not hfile in winobjs.handle_pool: + raise ValueError('unknown handle') + hmap = winobjs.handle_pool[hfile] + print hmap + if not hmap.info in winobjs.handle_pool: + raise ValueError('unknown file handle') + + hfile_o = winobjs.handle_pool[hmap.info] + print hfile_o + fd = hfile_o.info + fd.seek((dwfileoffsethigh << 32) | dwfileoffsetlow) + if length: + data = fd.read(length) + else: + data = fd.read() + length = len(data) + + print 'mapp total:', hex(len(data)) + access_dict = {0x0: 0, + 0x1: 0, + 0x2: PAGE_READ, + 0x4: PAGE_READ | PAGE_WRITE, + 0x10: PAGE_EXEC, + 0x20: PAGE_EXEC | PAGE_READ, + 0x40: PAGE_EXEC | PAGE_READ | PAGE_WRITE, + 0x100: 0 + } + access_dict_inv = dict([(x[1], x[0]) for x in access_dict.items()]) + + if not flprotect in access_dict: + raise ValueError('unknown access dw!') + + alloc_addr = alloc_mem(myjit, len(data)) + myjit.vm.vm_set_mem(alloc_addr, data) + + winobjs.handle_mapped[ + alloc_addr] = hfile_o, dwfileoffsethigh, dwfileoffsetlow, length + print 'return', hex(alloc_addr) + + myjit.func_ret_stdcall(ret_ad, alloc_addr) + + +def kernel32_UnmapViewOfFile(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + ad, = args + + if not ad in winobjs.handle_mapped: + raise NotImplementedError("Untested case") + """ + hfile_o, dwfileoffsethigh, dwfileoffsetlow, length = winobjs.handle_mapped[ad] + off = (dwfileoffsethigh<<32) | dwfileoffsetlow + s = myjit.vm.vm_get_mem(ad, length) + hfile_o.info.seek(off) + hfile_o.info.write(s) + hfile_o.info.close() + """ + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GetDriveType(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(1) + pathname, = args + + print funcname, hex(pathname) + + p = get_str(myjit, pathname) + print repr(p) + p = p.upper() + + ret = 0 + if p[0] == "C": + ret = 3 + + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetDriveTypeA(myjit): + kernel32_GetDriveType(myjit, whoami(), get_str_ansi) + + +def kernel32_GetDriveTypeW(myjit): + kernel32_GetDriveType(myjit, whoami(), get_str_unic) + + +def kernel32_GetDiskFreeSpace(myjit, funcname, get_str): + ret_ad, args = myjit.func_args_stdcall(5) + (lprootpathname, lpsectorpercluster, lpbytespersector, + lpnumberoffreeclusters, lptotalnumberofclusters) = args + + if lprootpathname: + rootpath = get_str(myjit, lprootpathname) + else: + rootpath = "" + print repr(rootpath) + + myjit.vm.vm_set_mem(lpsectorpercluster, pck32(8)) + myjit.vm.vm_set_mem(lpbytespersector, pck32(0x200)) + myjit.vm.vm_set_mem(lpnumberoffreeclusters, pck32(0x222222)) + myjit.vm.vm_set_mem(lptotalnumberofclusters, pck32(0x333333)) + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GetDiskFreeSpaceA(myjit): + kernel32_GetDiskFreeSpace(myjit, whoami(), get_str_ansi) + + +def kernel32_GetDiskFreeSpaceW(myjit): + kernel32_GetDiskFreeSpace(myjit, whoami(), get_str_unic) + + +def kernel32_VirtualQuery(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + ad, lpbuffer, dwl = args + + access_dict = {0x0: 0, + 0x1: 0, + 0x2: PAGE_READ, + 0x4: PAGE_READ | PAGE_WRITE, + 0x10: PAGE_EXEC, + 0x20: PAGE_EXEC | PAGE_READ, + 0x40: PAGE_EXEC | PAGE_READ | PAGE_WRITE, + 0x100: 0 + } + access_dict_inv = dict([(x[1], x[0]) for x in access_dict.items()]) + + all_mem = myjit.vm.vm_get_all_memory() + found = None + for basead, m in all_mem.items(): + if basead <= ad < basead + m['size']: + found = ad, m + break + if not found: + raise ValueError('cannot find mem', hex(ad)) + + if dwl != 0x1c: + raise ValueError('strange mem len', hex(dwl)) + s = struct.pack('IIIIIII', + ad, + basead, + access_dict_inv[m['access']], + m['size'], + 0x1000, + access_dict_inv[m['access']], + 0x01000000) + myjit.vm.vm_set_mem(lpbuffer, s) + + myjit.func_ret_stdcall(ret_ad, dwl) + + +def kernel32_GetProcessAffinityMask(myjit): + ret_ad, args = myjit.func_args_stdcall(3) + hprocess, procaffmask, systemaffmask = args + myjit.vm.vm_set_mem(procaffmask, pck32(1)) + myjit.vm.vm_set_mem(systemaffmask, pck32(1)) + + myjit.func_ret_stdcall(ret_ad, 1) + + +def msvcrt_rand(myjit): + ret_ad, args = myjit.func_args_cdecl(0) + myjit.func_ret_stdcall(ret_ad, 0x666) + + +def kernel32_SetFilePointer(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + hwnd, distance, p_distance_high, movemethod = args + + if hwnd == winobjs.module_cur_hwnd: + pass + elif hwnd in winobjs.handle_pool: + pass + else: + raise ValueError('unknown hwnd!') + + eax = 0xffffffff + data = None + if hwnd in winobjs.files_hwnd: + winobjs.files_hwnd[winobjs.module_cur_hwnd].seek(distance) + elif hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[hwnd] + data = wh.info.seek(distance) + else: + raise ValueError('unknown filename') + myjit.func_ret_stdcall(ret_ad, distance) + + +def kernel32_SetFilePointerEx(myjit): + ret_ad, args = myjit.func_args_stdcall(5) + hwnd, distance_l, distance_h, pnewfileptr, movemethod = args + + distance = distance_l | (distance_h << 32) + if distance: + TODO_XXX + + if pnewfileptr: + TODO_XXX + if hwnd == winobjs.module_cur_hwnd: + pass + elif hwnd in winobjs.handle_pool: + pass + else: + raise ValueError('unknown hwnd!') + + eax = 0xffffffff + data = None + if hwnd in winobjs.files_hwnd: + winobjs.files_hwnd[winobjs.module_cur_hwnd].seek(distance) + elif hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[hwnd] + data = wh.info.seek(distance) + else: + raise ValueError('unknown filename') + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_SetEndOfFile(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + hwnd, = args + if hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[hwnd] + wh.info.seek(0, 2) + else: + raise ValueError('unknown filename') + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_FlushFileBuffers(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + hwnd, = args + if hwnd in winobjs.handle_pool: + pass + else: + raise ValueError('unknown filename') + myjit.func_ret_stdcall(ret_ad, 1) + + +def kernel32_WriteFile(myjit): + ret_ad, args = myjit.func_args_stdcall(5) + (hwnd, lpbuffer, nnumberofbytestowrite, + lpnumberofbyteswrite, lpoverlapped) = args + + data = myjit.vm.vm_get_mem(lpbuffer, nnumberofbytestowrite) + + if hwnd == winobjs.module_cur_hwnd: + pass + elif hwnd in winobjs.handle_pool: + pass + else: + raise ValueError('unknown hwnd!') + + eax = 0xffffffff + if hwnd in winobjs.files_hwnd: + winobjs.files_hwnd[winobjs.module_cur_hwnd].write(data) + elif hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[hwnd] + wh.info.write(data) + else: + raise ValueError('unknown filename') + + if (lpnumberofbyteswrite): + myjit.vm.vm_set_mem(lpnumberofbyteswrite, pck32(len(data))) + + myjit.func_ret_stdcall(ret_ad, 1) + + +def user32_IsCharUpperA(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + c, = args + + if c & 0x20: + ret = 0 + else: + ret = 1 + myjit.func_ret_stdcall(ret_ad, ret) + + +def user32_IsCharLowerA(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + c, = args + + if c & 0x20: + ret = 1 + else: + ret = 0 + + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetSystemDefaultLangID(myjit): + ret_ad, args = myjit.func_args_stdcall(0) + myjit.func_ret_stdcall(ret_ad, 0x409) # encglish + + +def msvcrt_malloc(myjit): + ret_ad, args = myjit.func_args_cdecl(1) + msize, = args + addr = alloc_mem(myjit, msize) + myjit.func_ret_cdecl(ret_ad, addr) + + +def msvcrt_free(myjit): + ret_ad, args = myjit.func_args_cdecl(1) + ptr, = args + myjit.func_ret_cdecl(ret_ad, 0) + + +def msvcrt_fopen(myjit): + ret_ad, args = myjit.func_args_cdecl(2) + fname, rw = args + + fname = get_str_ansi(myjit, fname) + rw = get_str_ansi(myjit, rw) + print fname, rw + if rw in ['rb', 'wb+']: + fname = fname.replace('\\', "/").lower() + f = os.path.join('file_sb', fname) + h = open(f, rw) + eax = winobjs.handle_pool.add(f, h) + alloc_addr = alloc_mem(myjit, 0x20) + myjit.vm.vm_set_mem(alloc_addr, pck32(0x11112222) + pck32( + 0) + pck32(0) + pck32(0) + pck32(eax)) # pck32(0x11112222) + else: + raise NotImplementedError("Untested case") + + myjit.func_ret_cdecl(ret_ad, alloc_addr) + + +def msvcrt_fseek(myjit): + ret_ad, args = myjit.func_args_cdecl(3) + stream, offset, orig = args + fd = upck32(myjit.vm.vm_get_mem(stream + 0x10, 4)) + print hex(fd) + + if not fd in winobjs.handle_pool: + raise NotImplementedError("Untested case") + o = winobjs.handle_pool[fd] + o.info.seek(offset, orig) + myjit.func_ret_cdecl(ret_ad, 0) + + +def msvcrt_ftell(myjit): + ret_ad, args = myjit.func_args_cdecl(1) + stream, = args + fd = upck32(myjit.vm.vm_get_mem(stream + 0x10, 4)) + print hex(fd) + + if not fd in winobjs.handle_pool: + raise NotImplementedError("Untested case") + o = winobjs.handle_pool[fd] + off = o.info.tell() + myjit.func_ret_cdecl(ret_ad, off) + + +def msvcrt_rewind(myjit): + ret_ad, args = myjit.func_args_cdecl(1) + stream, = args + fd = upck32(myjit.vm.vm_get_mem(stream + 0x10, 4)) + print hex(fd) + + if not fd in winobjs.handle_pool: + raise NotImplementedError("Untested case") + o = winobjs.handle_pool[fd] + off = o.info.seek(0, 0) + myjit.func_ret_cdecl(ret_ad, 0) + + +def msvcrt_fread(myjit): + ret_ad, args = myjit.func_args_cdecl(4) + buf, size, nmemb, stream = args + fd = upck32(myjit.vm.vm_get_mem(stream + 0x10, 4)) + print hex(fd) + if not fd in winobjs.handle_pool: + raise NotImplementedError("Untested case") + + data = winobjs.handle_pool[fd].info.read(size * nmemb) + myjit.vm.vm_set_mem(buf, data) + myjit.func_ret_cdecl(ret_ad, nmemb) + + +def msvcrt_fclose(myjit): + ret_ad, args = myjit.func_args_cdecl(1) + stream, = args + fd = upck32(myjit.vm.vm_get_mem(stream + 0x10, 4)) + print hex(fd) + + if not fd in winobjs.handle_pool: + raise NotImplementedError("Untested case") + o = winobjs.handle_pool[fd] + off = o.info.close() + myjit.func_ret_cdecl(ret_ad, 0) + + +def msvcrt_atexit(myjit): + ret_ad, args = myjit.func_args_cdecl(1) + func, = args + myjit.func_ret_cdecl(ret_ad, 0) + + +def user32_MessageBoxA(myjit): + ret_ad, args = myjit.func_args_stdcall(4) + hwnd, lptext, lpcaption, utype = args + + text = get_str_ansi(myjit, lptext) + caption = get_str_ansi(myjit, lpcaption) + + print 'Caption:', repr(caption), 'Text:', repr(text) + + myjit.func_ret_stdcall(ret_ad, 0) + + +def kernel32_myGetTempPath(myjit, func): + ret_ad, args = myjit.func_args_stdcall(2) + l, buf = args + + l = 'c:\\temp\\' + myjit.vm.vm_set_mem(buf, func(l + '\x00')) + myjit.func_ret_stdcall(ret_ad, len(l)) + + +def kernel32_GetTempPathA(myjit): + kernel32_myGetTempPath(myjit, set_str_ansi) + + +def kernel32_GetTempPathW(myjit): + kernel32_myGetTempPath(myjit, set_str_unic) + + +temp_num = 0 + + +def kernel32_GetTempFileNameA(myjit): + global temp_num + ret_ad, args = myjit.func_args_stdcall(4) + path, ext, unique, buf = args + + temp_num += 1 + if ext: + ext = get_str_ansi(myjit, ext) + else: + ext = 'tmp' + if path: + path = get_str_ansi(myjit, path) + else: + path = "xxx" + print ext, path + fname = path + "\\" + "temp%.4d" % temp_num + "." + ext + print fname + myjit.vm.vm_set_mem(buf, fname) + + myjit.func_ret_stdcall(ret_ad, 0) + + +class win32_find_data: + fileattrib = 0 + creationtime = 0 + lastaccesstime = 0 + lastwritetime = 0 + filesizehigh = 0 + filesizelow = 0 + dwreserved0 = 0 + dwreserved1 = 0x1337beef + cfilename = "" + alternamefilename = "" + + def __init__(self, **kargs): + for k, v in kargs.items(): + setattr(self, k, v) + + def toStruct(self): + s = struct.pack('=IQQQIIII', + self.fileattrib, + self.creationtime, + self.lastaccesstime, + self.lastwritetime, + self.filesizehigh, + self.filesizelow, + self.dwreserved0, + self.dwreserved1) + fname = self.cfilename + '\x00' * win_api_x86_32.MAX_PATH + fname = fname[:win_api_x86_32.MAX_PATH] + s += fname + fname = self.alternamefilename + '\x00' * 14 + fname = fname[:14] + s += fname + return s + + +class find_data_mngr: + + def __init__(self): + self.patterns = {} + self.flist = [] + # handle number -> (flist index, current index in list) + self.handles = {} + + def add_list(self, pattern, flist): + index = len(self.flist) + self.flist.append(flist) + + self.patterns[pattern] = index + + def findfirst(self, pattern): + assert(pattern in self.patterns) + findex = self.patterns[pattern] + h = len(self.handles) + 1 + self.handles[h] = [findex, 0] + return h + + def findnext(self, h): + assert(h in self.handles) + findex, index = self.handles[h] + if index >= len(self.flist[findex]): + return None + fname = self.flist[findex][index] + self.handles[h][1] += 1 + + return fname + + +def kernel32_FindFirstFileA(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + pfilepattern, pfindfiledata = args + + filepattern = get_str_ansi(myjit, pfilepattern) + print repr(filepattern) + h = winobjs.find_data.findfirst(filepattern) + + fname = winobjs.find_data.findnext(h) + fdata = win32_find_data(cfilename=fname) + + myjit.vm.vm_set_mem(pfindfiledata, fdata.toStruct()) + myjit.func_ret_stdcall(ret_ad, h) + + +def kernel32_FindNextFileA(myjit): + ret_ad, args = myjit.func_args_stdcall(2) + handle, pfindfiledata = args + + fname = winobjs.find_data.findnext(handle) + if fname is None: + ret = 0 + else: + ret = 1 + fdata = win32_find_data(cfilename=fname) + myjit.vm.vm_set_mem(pfindfiledata, fdata.toStruct()) + + myjit.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetNativeSystemInfo(myjit): + ret_ad, args = myjit.func_args_stdcall(1) + sys_ptr, = args + sysinfo = systeminfo() + myjit.vm.vm_set_mem(sys_ptr, sysinfo.pack()) + myjit.func_ret_stdcall(ret_ad, 0) + + +def raw2guid(r): + o = struct.unpack('IHHHBBBBBB', r) + return '{%.8X-%.4X-%.4X-%.4X-%.2X%.2X%.2X%.2X%.2X%.2X}' % o + + +digs = string.digits + string.lowercase + + +def int2base(x, base): + if x < 0: + sign = -1 + elif x == 0: + return '0' + else: + sign = 1 + x *= sign + digits = [] + while x: + digits.append(digs[x % base]) + x /= base + if sign < 0: + digits.append('-') + digits.reverse() + return ''.join(digits) + + +def msvcrt__ultow(myjit): + ret_ad, args = myjit.func_args_cdecl(3) + value, p, radix = args + + value &= 0xFFFFFFFF + if not radix in [10, 16, 20]: + TODO_TEST + s = int2base(value, radix) + myjit.vm.vm_set_mem(p, set_str_unic(s + "\x00")) + myjit.func_ret_cdecl(ret_ad, p) + + +def msvcrt_myfopen(myjit, func): + ret_ad, args = myjit.func_args_cdecl(2) + pfname, pmode = args + + + fname = func(myjit, pfname) + rw = func(myjit, pmode) + print repr(fname) + print repr(rw) + + if rw in ['r', 'rb', 'wb+']: + fname = fname.replace('\\', "/").lower() + f = os.path.join('file_sb', fname) + h = open(f, rw) + eax = winobjs.handle_pool.add(f, h) + dwsize = 0x20 + alloc_addr = alloc_mem(myjit, dwsize) + pp = pck32(0x11112222)+pck32(0)+pck32(0)+pck32(0)+pck32(eax)#pdw(0x11112222) + myjit.vm.vm_set_mem(alloc_addr, pp) + + + else: + raise ValueError('unknown access mode %s'%rw) + + myjit.func_ret_cdecl(ret_ad, alloc_addr) + +def msvcrt__wfopen(myjit): + msvcrt_myfopen(myjit, get_str_unic) + +def msvcrt_fopen(myjit): + msvcrt_myfopen(myjit, get_str_ansi) diff --git a/miasm2/jitter/os_dep/win_api_x86_32_seh.py b/miasm2/jitter/os_dep/win_api_x86_32_seh.py new file mode 100644 index 00000000..4136c592 --- /dev/null +++ b/miasm2/jitter/os_dep/win_api_x86_32_seh.py @@ -0,0 +1,896 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +import logging +from elfesteem import pe_init +from miasm2.jitter.vm_mngr import * +from miasm2.jitter.csts import * +from miasm2.core.utils import * +from miasm2.arch import x86 +import os + +# Constants Windows +EXCEPTION_BREAKPOINT = 0x80000003 +EXCEPTION_ACCESS_VIOLATION = 0xc0000005 +EXCEPTION_INT_DIVIDE_BY_ZERO = 0xc0000094 +EXCEPTION_PRIV_INSTRUCTION = 0xc0000096 +EXCEPTION_ILLEGAL_INSTRUCTION = 0xc000001d + + +log = logging.getLogger("seh_helper") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + +FS_0_AD = 0x7ff70000 +PEB_AD = 0x7ffdf000 +LDR_AD = 0x340000 + +MAX_MODULES = 0x40 + +# fs:[0] Page (TIB) +tib_address = FS_0_AD +peb_address = PEB_AD +peb_ldr_data_offset = 0x1ea0 +peb_ldr_data_address = LDR_AD + peb_ldr_data_offset # PEB_AD + 0x1000 + + +modules_list_offset = 0x1f00 + +InInitializationOrderModuleList_offset = 0x1ee0 # 0x1f48 +InInitializationOrderModuleList_address = LDR_AD + \ + InInitializationOrderModuleList_offset # PEB_AD + 0x2000 + +InLoadOrderModuleList_offset = 0x1ee0 + \ + MAX_MODULES * 0x1000 # 0x1f48 + MAX_MODULES*0x1000 +InLoadOrderModuleList_address = LDR_AD + \ + InLoadOrderModuleList_offset # PEB_AD + 0x2000 + +# in_load_order_module_1 = LDR_AD + +# in_load_order_module_list_offset#PEB_AD + 0x3000 +default_seh = PEB_AD + 0x20000 + +process_environment_address = 0x10000 +process_parameters_address = 0x200000 + +context_address = 0x201000 +exception_record_address = context_address + 0x1000 +return_from_exception = 0x6eadbeef + +FAKE_SEH_B_AD = context_address + 0x2000 + +cur_seh_ad = FAKE_SEH_B_AD + +loaded_modules = ["ntdll.dll", "kernel32.dll"] +main_pe = None +main_pe_name = "c:\\xxx\\toto.exe" + + +def build_fake_teb(): + """ + +0x000 NtTib : _NT_TIB + +0x01c EnvironmentPointer : Ptr32 Void + +0x020 ClientId : _CLIENT_ID + +0x028 ActiveRpcHandle : Ptr32 Void + +0x02c ThreadLocalStoragePointer : Ptr32 Void + +0x030 ProcessEnvironmentBlock : Ptr32 _PEB + +0x034 LastErrorValue : Uint4B + ... + """ + o = "" + o += pck32(default_seh) + o += (0x18 - len(o)) * "\x00" + o += pck32(tib_address) + + o += (0x30 - len(o)) * "\x00" + o += pck32(peb_address) + o += pck32(0x11223344) + + return o + + +def build_fake_peb(): + """ + +0x000 InheritedAddressSpace : UChar + +0x001 ReadImageFileExecOptions : UChar + +0x002 BeingDebugged : UChar + +0x003 SpareBool : UChar + +0x004 Mutant : Ptr32 Void + +0x008 ImageBaseAddress : Ptr32 Void + +0x00c Ldr : Ptr32 _PEB_LDR_DATA + +0x010 processparameter + """ + + offset_serverdata = 0x100 + offset_data1 = 0x108 + offset_data2 = 0x110 + o = "" + o += "\x00" * 0x8 + if main_pe: + o += pck32(main_pe.NThdr.ImageBase) + else: + o += "AAAA" + o += pck32(peb_ldr_data_address) + o += pck32(process_parameters_address) + + o += (0x54 - len(o)) * "A" + o += pck32(peb_address + offset_serverdata) + o += (0x64 - len(o)) * "E" + o += pck32(1) # number of cpu + + o += (offset_serverdata - len(o)) * "B" + o += pck32(0x33333333) + o += pck32(peb_address + offset_data1) + o += (offset_data1 - len(o)) * "C" + o += pck32(0x44444444) + o += pck32(peb_address + offset_data2) + o += (offset_data2 - len(o)) * "D" + o += pck32(0x55555555) + o += pck32(0x0077007C) + return o + + +def build_fake_ldr_data(modules_info): + """ + +0x000 Length : Uint4B + +0x004 Initialized : UChar + +0x008 SsHandle : Ptr32 Void + +0x00c InLoadOrderModuleList : _LIST_ENTRY + +0x014 InMemoryOrderModuleList : _LIST_ENTRY + +0x01C InInitializationOrderModuleList : _LIST_ENTRY + """ + o = "" + # ldr offset pad + o += "\x00" * peb_ldr_data_offset + o += "\x00" * 0xc + # text XXX + + # get main pe info + m_e = None + for bname, (addr, e) in modules_info.items(): + if e == main_pe: + m_e = (e, bname, addr) + break + if not m_e: + log.warn('no main pe, ldr data will be unconsistant') + else: + print 'inloadorder first', hex(m_e[2]) + o += pck32(m_e[2]) + pck32(0) + + # get ntdll + ntdll_e = None + for bname, (addr, e) in modules_info.items(): + if bname[::2].lower() == "ntdll.dll": + ntdll_e = (e, bname, addr) + continue + if not ntdll_e: + log.warn('no ntdll, ldr data will be unconsistant') + else: + print 'ntdll', hex(ntdll_e[2]) + o += pck32(ntdll_e[2] + 0x8) + pck32(0) # XXX TODO + o += pck32(ntdll_e[2] + 0x10) + pck32(0) + + return o + +# def build_fake_InInitializationOrderModuleList(modules_name): +# """ +# +0x000 Flink : Ptr32 -+ This distance +# +0x004 Blink : Ptr32 | is eight bytes +# +0x018 DllBase : Ptr32 Void -+ DllBase +# +0x01c EntryPoint : Ptr32 Void +# +0x020 SizeOfImage : Uint4B +# +0x024 FullDllName : _UNICODE_STRING +# +0x02c BaseDllName : _UNICODE_STRING +# +0x034 Flags : Uint4B +# +0x038 LoadCount : Uint2B +# +0x03a TlsIndex : Uint2B +# +0x03c HashLinks : _LIST_ENTRY +# +0x03c SectionPointer : Ptr32 Void +# +0x040 CheckSum : Uint4B +# +0x044 TimeDateStamp : Uint4B +# +0x044 LoadedImports : Ptr32 Void +# +0x048 EntryPointActivationContext : Ptr32 Void +# +0x04c PatchInformation : Ptr32 Void +# """ +# +# o = "" +# offset_name = 0x700 +# for i, m in enumerate(modules_name): +# fname = os.path.join('win_dll', m) +# if isinstance(m, tuple): +# fname, e = m +# else: +# fname, e = m, None +# bname = os.path.split(fname)[1].lower() +# bname = "\x00".join(bname)+"\x00" +# print "add module", repr(bname) +# print hex(InInitializationOrderModuleList_address+i*0x1000) +# if e == None: +# e = pe_init.PE(open(fname, 'rb').read()) +# +# next_ad = InInitializationOrderModuleList_address + (i+1)*0x1000 +# if i == len(modules_name) -1: +# next_ad = InInitializationOrderModuleList_address +# m_o = "" +# m_o += pck32(next_ad ) +# m_o += pck32(InInitializationOrderModuleList_address + (i-1)*0x1000) +# m_o += pck32(next_ad + 8 ) +# m_o += pck32(InInitializationOrderModuleList_address +# + (i-1)*0x1000 + 8) +# m_o += pck32(next_ad + 0x10 ) +# m_o += pck32(InInitializationOrderModuleList_address +# + (i-1)*0x1000 + 0x10) +# m_o += pck32(e.NThdr.ImageBase) +# m_o += pck32(e.rva2virt(e.Opthdr.AddressOfEntryPoint)) +# m_o += pck32(e.NThdr.sizeofimage) +# +# m_o += (0x24 - len(m_o))*"A" +# print hex(len(bname)), repr(bname) +# m_o += struct.pack('HH', len(bname), len(bname)+2) +# m_o += pck32(InInitializationOrderModuleList_address +# + i*0x1000+offset_name) +# +# m_o += (0x2C - len(m_o))*"A" +# m_o += struct.pack('HH', len(bname), len(bname)+2) +# m_o += pck32(InInitializationOrderModuleList_address +# + i*0x1000+offset_name) +# +# m_o += (offset_name - len(m_o))*"B" +# m_o += bname +# m_o += "\x00"*3 +# +# +# m_o += (0x1000 - len(m_o))*"J" +# +# print "module", "%.8X"%e.NThdr.ImageBase, fname +# +# o += m_o +# return o +# +dummy_e = pe_init.PE() +dummy_e.NThdr.ImageBase = 0 +dummy_e.Opthdr.AddressOfEntryPoint = 0 +dummy_e.NThdr.sizeofimage = 0 + + +def create_modules_chain(myjit, modules_name): + modules_info = {} + base_addr = LDR_AD + modules_list_offset # XXXX + offset_name = 0x500 + offset_path = 0x600 + + out = "" + for i, m in enumerate([(main_pe_name, main_pe), + ("", dummy_e)] + modules_name): + addr = base_addr + i * 0x1000 + # fname = os.path.join('win_dll', m) + if isinstance(m, tuple): + fname, e = m + else: + fname, e = m, None + bpath = fname.replace('/', '\\') + bname = os.path.split(fname)[1].lower() + bname = "\x00".join(bname) + "\x00" + # print "add module", repr(bname), repr(bpath) + # print hex(InInitializationOrderModuleList_address+i*0x1000) + if e is None: + if i == 0: + full_name = fname + else: + full_name = os.path.join("win_dll", fname) + try: + e = pe_init.PE(open(full_name, 'rb').read()) + except IOError: + log.error('no main pe, ldr data will be unconsistant!!') + e = None + if e is None: + continue + print "add module", hex(e.NThdr.ImageBase), repr(bname) + + modules_info[bname] = addr, e + + m_o = "" + m_o += pck32(0) + m_o += pck32(0) + m_o += pck32(0) + m_o += pck32(0) + m_o += pck32(0) + m_o += pck32(0) + m_o += pck32(e.NThdr.ImageBase) + m_o += pck32(e.rva2virt(e.Opthdr.AddressOfEntryPoint)) + m_o += pck32(e.NThdr.sizeofimage) + + m_o += (0x24 - len(m_o)) * "A" + print hex(len(bname)), repr(bname) + m_o += struct.pack('HH', len(bname), len(bname) + 2) + m_o += pck32(addr + offset_path) + + m_o += (0x2C - len(m_o)) * "A" + m_o += struct.pack('HH', len(bname), len(bname) + 2) + m_o += pck32(addr + offset_name) + + m_o += (offset_name - len(m_o)) * "B" + m_o += bname + m_o += "\x00" * 3 + + m_o += (offset_path - len(m_o)) * "B" + m_o += "\x00".join(bpath) + "\x00" + m_o += "\x00" * 3 + # out += m_o + myjit.vm.vm_set_mem(addr, m_o) + return modules_info + + +def fix_InLoadOrderModuleList(myjit, module_info): + print "fix inloadorder" + # first binary is PE + # last is dumm_e + olist = [] + m_e = None + d_e = None + for m in [main_pe_name, ""] + loaded_modules: + + if isinstance(m, tuple): + fname, e = m + else: + fname, e = m, None + + if "/" in fname: + fname = fname[fname.rfind("/") + 1:] + bname = '\x00'.join(fname) + '\x00' + if not bname.lower() in module_info: + log.warn('module not found, ldr data will be unconsistant') + continue + + addr, e = module_info[bname.lower()] + # for bname, (addr, e) in module_info.items(): + print bname + if e == main_pe: + m_e = (e, bname, addr) + continue + elif e == dummy_e: + d_e = (e, bname, addr) + continue + olist.append((e, bname, addr)) + if not m_e or not d_e: + log.warn('no main pe, ldr data will be unconsistant') + else: + olist[0:0] = [m_e] + olist.append(d_e) + + last_addr = 0 + for i in xrange(len(olist)): + e, bname, addr = olist[i] + p_e, p_bname, p_addr = olist[(i - 1) % len(olist)] + n_e, n_bname, n_addr = olist[(i + 1) % len(olist)] + myjit.vm.vm_set_mem(addr + 0, pck32(n_addr) + pck32(p_addr)) + + +def fix_InMemoryOrderModuleList(myjit, module_info): + # first binary is PE + # last is dumm_e + olist = [] + m_e = None + d_e = None + for m in [main_pe_name, ""] + loaded_modules: + + if isinstance(m, tuple): + fname, e = m + else: + fname, e = m, None + + if "/" in fname: + fname = fname[fname.rfind("/") + 1:] + bname = '\x00'.join(fname) + '\x00' + if not bname.lower() in module_info: + log.warn('module not found, ldr data will be unconsistant') + continue + addr, e = module_info[bname.lower()] + # for bname, (addr, e) in module_info.items(): + print bname + if e == main_pe: + m_e = (e, bname, addr) + continue + elif e == dummy_e: + d_e = (e, bname, addr) + continue + olist.append((e, bname, addr)) + if not m_e or not d_e: + log.warn('no main pe, ldr data will be unconsistant') + else: + olist[0:0] = [m_e] + olist.append(d_e) + + last_addr = 0 + + for i in xrange(len(olist)): + e, bname, addr = olist[i] + p_e, p_bname, p_addr = olist[(i - 1) % len(olist)] + n_e, n_bname, n_addr = olist[(i + 1) % len(olist)] + myjit.vm.vm_set_mem( + addr + 0x8, pck32(n_addr + 0x8) + pck32(p_addr + 0x8)) + + +def fix_InInitializationOrderModuleList(myjit, module_info): + # first binary is ntdll + # second binary is kernel32 + olist = [] + ntdll_e = None + kernel_e = None + for bname, (addr, e) in module_info.items(): + if bname[::2].lower() == "ntdll.dll": + ntdll_e = (e, bname, addr) + continue + elif bname[::2].lower() == "kernel32.dll": + kernel_e = (e, bname, addr) + continue + elif e == dummy_e: + d_e = (e, bname, addr) + continue + elif e == main_pe: + continue + olist.append((e, bname, addr)) + if not ntdll_e or not kernel_e or not d_e: + log.warn('no kernel ntdll, ldr data will be unconsistant') + else: + olist[0:0] = [ntdll_e] + olist[1:1] = [kernel_e] + + olist.append(d_e) + + last_addr = 0 + for i in xrange(len(olist)): + e, bname, addr = olist[i] + p_e, p_bname, p_addr = olist[(i - 1) % len(olist)] + n_e, n_bname, n_addr = olist[(i + 1) % len(olist)] + myjit.vm.vm_set_mem( + addr + 0x10, pck32(n_addr + 0x10) + pck32(p_addr + 0x10)) + + +def add_process_env(myjit): + env_str = 'ALLUSEESPROFILE=C:\\Documents and Settings\\All Users\x00' + env_str = '\x00'.join(env_str) + env_str += "\x00" * 0x10 + myjit.vm.vm_add_memory_page(process_environment_address, + PAGE_READ | PAGE_WRITE, + env_str) + myjit.vm.vm_set_mem(process_environment_address, env_str) + + +def add_process_parameters(myjit): + o = "" + o += pck32(0x1000) # size + o += "E" * (0x48 - len(o)) + o += pck32(process_environment_address) + myjit.vm.vm_add_memory_page(process_parameters_address, + PAGE_READ | PAGE_WRITE, + o) + + +def build_fake_InLoadOrderModuleList(modules_name): + """ + +0x000 Flink : Ptr32 -+ This distance + +0x004 Blink : Ptr32 | is eight bytes + +0x018 DllBase : Ptr32 Void -+ DllBase -> _IMAGE_DOS_HEADER + +0x01c EntryPoint : Ptr32 Void + +0x020 SizeOfImage : Uint4B + +0x024 FullDllName : _UNICODE_STRING + +0x02c BaseDllName : _UNICODE_STRING + +0x034 Flags : Uint4B + +0x038 LoadCount : Uint2B + +0x03a TlsIndex : Uint2B + +0x03c HashLinks : _LIST_ENTRY + +0x03c SectionPointer : Ptr32 Void + +0x040 CheckSum : Uint4B + +0x044 TimeDateStamp : Uint4B + +0x044 LoadedImports : Ptr32 Void + +0x048 EntryPointActivationContext : Ptr32 Void + +0x04c PatchInformation : Ptr32 Void + """ + + o = "" + offset_name = 0x700 + first_name = "\x00".join(main_pe_name + "\x00\x00") + + o = "" + o += pck32(InLoadOrderModuleList_address) + o += pck32(InLoadOrderModuleList_address + + (len(modules_name) - 1) * 0x1000) + o += pck32(InLoadOrderModuleList_address + 8) + o += pck32(InLoadOrderModuleList_address + + (len(modules_name) - 1) * 0x1000 + 8) + o += pck32(InLoadOrderModuleList_address + 0x10) + o += pck32(InLoadOrderModuleList_address + + (len(modules_name) - 1) * 0x1000 + 0x10) + + if main_pe: + o += pck32(main_pe.NThdr.ImageBase) + o += pck32(main_pe.rva2virt(main_pe.Opthdr.AddressOfEntryPoint)) + else: + # no fixed values + pass + + o += (0x24 - len(o)) * "A" + o += struct.pack('HH', len(first_name), len(first_name)) + o += pck32(InLoadOrderModuleList_address + offset_name) + + o += (0x2C - len(o)) * "A" + o += struct.pack('HH', len(first_name), len(first_name)) + o += pck32(InLoadOrderModuleList_address + offset_name) + + o += (offset_name - len(o)) * "B" + o += first_name + o += (0x1000 - len(o)) * "C" + for i, m in enumerate(modules_name): + # fname = os.path.join('win_dll', m) + if isinstance(m, tuple): + fname, e = m + else: + fname, e = m, None + bname = os.path.split(fname)[1].lower() + bname = "\x00".join(bname) + "\x00" + print hex(InLoadOrderModuleList_address + i * 0x1000) + if e is None: + e = pe_init.PE(open(fname, 'rb').read()) + + print "add module", hex(e.NThdr.ImageBase), repr(bname) + + next_ad = InLoadOrderModuleList_address + (i + 1) * 0x1000 + if i == len(modules_name) - 1: + next_ad = InLoadOrderModuleList_address + m_o = "" + m_o += pck32(next_ad) + m_o += pck32(InLoadOrderModuleList_address + (i - 1) * 0x1000) + m_o += pck32(next_ad + 8) + m_o += pck32(InLoadOrderModuleList_address + (i - 1) * 0x1000 + 8) + m_o += pck32(next_ad + 0x10) + m_o += pck32(InLoadOrderModuleList_address + (i - 1) * 0x1000 + 0x10) + m_o += pck32(e.NThdr.ImageBase) + m_o += pck32(e.rva2virt(e.Opthdr.AddressOfEntryPoint)) + m_o += pck32(e.NThdr.sizeofimage) + + m_o += (0x24 - len(m_o)) * "A" + print hex(len(bname)), repr(bname) + m_o += struct.pack('HH', len(bname), len(bname) + 2) + m_o += pck32(InLoadOrderModuleList_address + i * 0x1000 + offset_name) + + m_o += (0x2C - len(m_o)) * "A" + m_o += struct.pack('HH', len(bname), len(bname) + 2) + m_o += pck32(InLoadOrderModuleList_address + i * 0x1000 + offset_name) + + m_o += (offset_name - len(m_o)) * "B" + m_o += bname + m_o += "\x00" * 3 + + m_o += (0x1000 - len(m_o)) * "J" + + print "module", "%.8X" % e.NThdr.ImageBase, fname + + o += m_o + return o + + +all_seh_ad = dict([(x, None) + for x in xrange(FAKE_SEH_B_AD, FAKE_SEH_B_AD + 0x1000, 0x20)]) +# http://blog.fireeye.com/research/2010/08/download_exec_notes.html + + +def init_seh(myjit): + global seh_count + seh_count = 0 + # myjit.vm.vm_add_memory_page(tib_address, PAGE_READ | PAGE_WRITE, + # p(default_seh) + p(0) * 11 + p(peb_address)) + myjit.vm.vm_add_memory_page( + FS_0_AD, PAGE_READ | PAGE_WRITE, build_fake_teb()) + # myjit.vm.vm_add_memory_page(peb_address, PAGE_READ | PAGE_WRITE, p(0) * + # 3 + p(peb_ldr_data_address)) + myjit.vm.vm_add_memory_page( + peb_address, PAGE_READ | PAGE_WRITE, build_fake_peb()) + # myjit.vm.vm_add_memory_page(peb_ldr_data_address, PAGE_READ | + # PAGE_WRITE, p(0) * 3 + p(in_load_order_module_list_address) + p(0) * + # 0x20) + + """ + ldr_data += "\x00"*(InInitializationOrderModuleList_offset - len(ldr_data)) + ldr_data += build_fake_InInitializationOrderModuleList(loaded_modules) + ldr_data += "\x00"*(InLoadOrderModuleList_offset - len(ldr_data)) + ldr_data += build_fake_InLoadOrderModuleList(loaded_modules) + """ + myjit.vm.vm_add_memory_page( + LDR_AD, PAGE_READ | PAGE_WRITE, "\x00" * MAX_MODULES * 0x1000) + module_info = create_modules_chain(myjit, loaded_modules) + fix_InLoadOrderModuleList(myjit, module_info) + fix_InMemoryOrderModuleList(myjit, module_info) + fix_InInitializationOrderModuleList(myjit, module_info) + + ldr_data = build_fake_ldr_data(module_info) + myjit.vm.vm_set_mem(LDR_AD, ldr_data) + add_process_env(myjit) + add_process_parameters(myjit) + + # myjit.vm.vm_add_memory_page(in_load_order_module_list_address, + # PAGE_READ | PAGE_WRITE, p(0) * 40) + # myjit.vm.vm_add_memory_page(in_load_order_module_list_address, + # PAGE_READ | PAGE_WRITE, build_fake_inordermodule(loaded_modules)) + myjit.vm.vm_add_memory_page(default_seh, PAGE_READ | PAGE_WRITE, pck32( + 0xffffffff) + pck32(0x41414141) + pck32(0x42424242)) + + myjit.vm.vm_add_memory_page( + context_address, PAGE_READ | PAGE_WRITE, '\x00' * 0x2cc) + myjit.vm.vm_add_memory_page( + exception_record_address, PAGE_READ | PAGE_WRITE, '\x00' * 200) + + myjit.vm.vm_add_memory_page( + FAKE_SEH_B_AD, PAGE_READ | PAGE_WRITE, 0x10000 * "\x00") + +# http://www.codeproject.com/KB/system/inject2exe.aspx#RestorethefirstRegistersContext5_1 + + +def regs2ctxt(regs): + ctxt = "" + ctxt += '\x00\x00\x00\x00' # ContextFlags + ctxt += '\x00\x00\x00\x00' * 6 # drX + ctxt += '\x00' * 112 # float context + ctxt += '\x00\x00\x00\x00' + '\x3b\x00\x00\x00' + \ + '\x23\x00\x00\x00' + '\x23\x00\x00\x00' # segment selectors + ctxt += pck32(regs['EDI']) + pck32(regs['ESI']) + pck32(regs['EBX']) + \ + pck32(regs['EDX']) + pck32(regs['ECX']) + pck32(regs['EAX']) + \ + pck32(regs['EBP']) + pck32(regs['EIP']) # gpregs + ctxt += '\x23\x00\x00\x00' # cs + ctxt += '\x00\x00\x00\x00' # eflags + ctxt += pck32(regs['ESP']) # esp + ctxt += '\x23\x00\x00\x00' # ss segment selector + return ctxt + + +def ctxt2regs(ctxt): + ctxt = ctxt[:] + regs = {} + # regs['ctxtsflags'] = upck32(ctxt[:4]) + ctxt = ctxt[4:] + for i in xrange(8): + if i in [4, 5]: + continue + # regs['dr%d'%i] = upck32(ctxt[:4]) + ctxt = ctxt[4:] + + ctxt = ctxt[112:] # skip float + + # regs['seg_gs'] = upck32(ctxt[:4]) + ctxt = ctxt[4:] + # regs['seg_fs'] = upck32(ctxt[:4]) + ctxt = ctxt[4:] + # regs['seg_es'] = upck32(ctxt[:4]) + ctxt = ctxt[4:] + # regs['seg_ds'] = upck32(ctxt[:4]) + ctxt = ctxt[4:] + + regs['EDI'], regs['ESI'], regs['EBX'], regs['EDX'], regs['ECX'], regs[ + 'EAX'], regs['EBP'], regs['EIP'] = struct.unpack('I' * 8, ctxt[:4 * 8]) + ctxt = ctxt[4 * 8:] + + # regs['seg_cs'] = upck32(ctxt[:4]) + ctxt = ctxt[4:] + + # regs['eflag'] = upck32(ctxt[:4]) + ctxt = ctxt[4:] + + regs['ESP'] = upck32(ctxt[:4]) + ctxt = ctxt[4:] + + for a, b in regs.items(): + print a, hex(b) + # skip extended + return regs + + +def get_free_seh_place(): + global all_seh_ad + ads = all_seh_ad.keys() + ads.sort() + for ad in ads: + v = all_seh_ad[ad] + if v is None: + print 'TAKING SEH', hex(ad) + all_seh_ad[ad] = True + return ad + raise ValueError('too many stacked seh ') + + +def free_seh_place(ad): + print 'RELEASING SEH', hex(ad) + + if not ad in all_seh_ad: + raise ValueError('zarb seh ad!', hex(ad)) + if all_seh_ad[ad] is not True: + # @wisk typolol + raise ValueError('seh alreaedy remouvede?!!', hex(ad)) + all_seh_ad[ad] = None + + +def fake_seh_handler(myjit, except_code): + global seh_count + regs = myjit.cpu.vm_get_gpreg() + print '-> exception at', hex(myjit.cpu.EIP), seh_count + seh_count += 1 + + # Help lambda + p = lambda s: struct.pack('I', s) + + # dump_gpregs_py() + # jitarch.dump_gpregs() + # Forge a CONTEXT + ctxt = '\x00\x00\x00\x00' + '\x00\x00\x00\x00' * 6 + '\x00' * 112 + ctxt += '\x00\x00\x00\x00' + '\x3b\x00\x00\x00' + '\x23\x00\x00\x00' + ctxt += '\x23\x00\x00\x00' + ctxt += pck32(myjit.cpu.EDI) + pck32(myjit.cpu.ESI) + \ + pck32(myjit.cpu.EBX) + pck32(myjit.cpu.EDX) + \ + pck32(myjit.cpu.ECX) + pck32(myjit.cpu.EAX) + \ + pck32(myjit.cpu.EBP) + pck32(myjit.cpu.EIP) + ctxt += '\x23\x00\x00\x00' + '\x00\x00\x00\x00' + pck32(myjit.cpu.ESP) + ctxt += '\x23\x00\x00\x00' + # ctxt = regs2ctxt(regs) + + # Find a room for seh + # seh = (get_memory_page_max_address_py()+0x1000)&0xfffff000 + + # Get current seh (fs:[0]) + seh_ptr = upck32(myjit.vm.vm_get_mem(tib_address, 4)) + + # Retrieve seh fields + old_seh, eh, safe_place = struct.unpack( + 'III', myjit.vm.vm_get_mem(seh_ptr, 0xc)) + + print '-> seh_ptr', hex(seh_ptr), '-> { old_seh', + print hex(old_seh), 'eh', hex(eh), 'safe_place', hex(safe_place), '}' + # print '-> write SEH at', hex(seh&0xffffffff) + + # Write current seh + # myjit.vm.vm_add_memory_page(seh, PAGE_READ | PAGE_WRITE, p(old_seh) + + # p(eh) + p(safe_place) + p(0x99999999)) + + # Write context + myjit.vm.vm_set_mem(context_address, ctxt) + + # Write exception_record + + """ + #http://msdn.microsoft.com/en-us/library/aa363082(v=vs.85).aspx + + typedef struct _EXCEPTION_RECORD { + DWORD ExceptionCode; + DWORD ExceptionFlags; + struct _EXCEPTION_RECORD *ExceptionRecord; + PVOID ExceptionAddress; + DWORD NumberParameters; + ULONG_PTR ExceptionInformation[EXCEPTION_MAXIMUM_PARAMETERS]; + } EXCEPTION_RECORD, *PEXCEPTION_RECORD; + """ + + myjit.vm.vm_set_mem(exception_record_address, pck32(except_code) + + pck32(0) + pck32(0) + pck32(myjit.cpu.EIP) + + pck32(0) + pck32(0)) + + # Prepare the stack + myjit.vm_push_uint32_t(context_address) # Context + myjit.vm_push_uint32_t(seh_ptr) # SEH + myjit.vm_push_uint32_t(exception_record_address) # ExceptRecords + myjit.vm_push_uint32_t(return_from_exception) # Ret address + + # Set fake new current seh for exception + fake_seh_ad = get_free_seh_place() + print hex(fake_seh_ad) + myjit.vm.vm_set_mem(fake_seh_ad, pck32(seh_ptr) + pck32( + 0xaaaaaaaa) + pck32(0xaaaaaabb) + pck32(0xaaaaaacc)) + myjit.vm.vm_set_mem(tib_address, pck32(fake_seh_ad)) + + dump_seh(myjit) + + print '-> jumping at', hex(eh) + myjit.vm.vm_set_exception(0) + myjit.cpu.vm_set_exception(0) + + # XXX set ebx to nul? + myjit.cpu.EBX = 0 + + return eh + +fake_seh_handler.base = FAKE_SEH_B_AD + + +def dump_seh(myjit): + print 'dump_seh:' + print '-> tib_address:', hex(tib_address) + cur_seh_ptr = upck32(myjit.vm.vm_get_mem(tib_address, 4)) + indent = 1 + loop = 0 + while True: + if loop > 5: + print "too many seh, quit" + return + prev_seh, eh = struct.unpack('II', myjit.vm.vm_get_mem(cur_seh_ptr, 8)) + print '\t' * indent + 'seh_ptr:', hex(cur_seh_ptr), + print ' -> { prev_seh:', hex(prev_seh), 'eh:', hex(eh), '}' + if prev_seh in [0xFFFFFFFF, 0]: + break + cur_seh_ptr = prev_seh + indent += 1 + loop += 1 + + +def set_win_fs_0(myjit, fs=4): + regs = myjit.cpu.vm_get_gpreg() + regs['FS'] = 0x4 + myjit.cpu.vm_set_gpreg(regs) + myjit.cpu.vm_set_segm_base(regs['FS'], FS_0_AD) + segm_to_do = set([x86.regs.FS]) + return segm_to_do + + +def add_modules_info(pe_in, pe_in_name="toto.exe", all_pe=None): + global main_pe, main_pe_name, loaded_modules + if all_pe is None: + all_pe = [] + main_pe = pe_in + main_pe_name = pe_in_name + loaded_modules = all_pe + + +def return_from_seh(myjit): + "Handle return after a call to fake seh handler" + + # Get current context + myjit.cpu.ESP = upck32(myjit.vm.vm_get_mem(context_address + 0xc4, 4)) + logging.info('-> new esp: %x' % myjit.cpu.ESP) + + # Rebuild SEH + old_seh = upck32(myjit.vm.vm_get_mem(tib_address, 4)) + new_seh = upck32(myjit.vm.vm_get_mem(old_seh, 4)) + logging.info('-> old seh: %x' % old_seh) + logging.info('-> new seh: %x' % new_seh) + myjit.vm.vm_set_mem(tib_address, pck32(new_seh)) + + dump_seh(myjit) + + # Release SEH + free_seh_place(old_seh) + + if myjit.cpu.EAX == 0x0: + # ExceptionContinueExecution + print '-> seh continues' + ctxt_ptr = context_address + print '-> context:', hex(ctxt_ptr) + + # Get registers changes + ctxt_str = myjit.vm.vm_get_mem(ctxt_ptr, 0x2cc) + regs = ctxt2regs(ctxt_str) + myjit.pc = regs["EIP"] + for reg_name, reg_value in regs.items(): + setattr(myjit.cpu, reg_name, reg_value) + + logging.info('-> context::Eip: %x' % myjit.pc) + + elif myjit.cpu.EAX == -1: + raise NotImplementedError("-> seh try to go to the next handler") + + elif myjit.cpu.EAX == 1: + # ExceptionContinueSearch + raise NotImplementedError("-> seh, gameover") diff --git a/miasm2/jitter/queue.h b/miasm2/jitter/queue.h new file mode 100644 index 00000000..0caf72fb --- /dev/null +++ b/miasm2/jitter/queue.h @@ -0,0 +1,553 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + * $FreeBSD$ + */ + +#ifndef _SYS_QUEUE_H_ +#define _SYS_QUEUE_H_ + +//#include <sys/cdefs.h> + +/* + * This file defines four types of data structures: singly-linked lists, + * singly-linked tail queues, lists and tail queues. + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A singly-linked tail queue is headed by a pair of pointers, one to the + * head of the list and the other to the tail of the list. The elements are + * singly linked for minimum space and pointer manipulation overhead at the + * expense of O(n) removal for arbitrary elements. New elements can be added + * to the list after an existing element, at the head of the list, or at the + * end of the list. Elements being removed from the head of the tail queue + * should use the explicit macro for this purpose for optimum efficiency. + * A singly-linked tail queue may only be traversed in the forward direction. + * Singly-linked tail queues are ideal for applications with large datasets + * and few or no removals or for implementing a FIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * For details on the use of these macros, see the queue(3) manual page. + * + * + * SLIST LIST STAILQ TAILQ + * _HEAD + + + + + * _HEAD_INITIALIZER + + + + + * _ENTRY + + + + + * _INIT + + + + + * _EMPTY + + + + + * _FIRST + + + + + * _NEXT + + + + + * _PREV - - - + + * _LAST - - + + + * _FOREACH + + + + + * _FOREACH_SAFE + + + + + * _FOREACH_REVERSE - - - + + * _FOREACH_REVERSE_SAFE - - - + + * _INSERT_HEAD + + + + + * _INSERT_BEFORE - + - + + * _INSERT_AFTER + + + + + * _INSERT_TAIL - - + + + * _CONCAT - - + + + * _REMOVE_HEAD + - + - + * _REMOVE + + + + + * + */ +#define QUEUE_MACRO_DEBUG 0 +#if QUEUE_MACRO_DEBUG +/* Store the last 2 places the queue element or head was altered */ +struct qm_trace { + char * lastfile; + int lastline; + char * prevfile; + int prevline; +}; + +#define TRACEBUF struct qm_trace trace; +#define TRASHIT(x) do {(x) = (void *)-1;} while (0) + +#define QMD_TRACE_HEAD(head) do { \ + (head)->trace.prevline = (head)->trace.lastline; \ + (head)->trace.prevfile = (head)->trace.lastfile; \ + (head)->trace.lastline = __LINE__; \ + (head)->trace.lastfile = __FILE__; \ +} while (0) + +#define QMD_TRACE_ELEM(elem) do { \ + (elem)->trace.prevline = (elem)->trace.lastline; \ + (elem)->trace.prevfile = (elem)->trace.lastfile; \ + (elem)->trace.lastline = __LINE__; \ + (elem)->trace.lastfile = __FILE__; \ +} while (0) + +#else +#define QMD_TRACE_ELEM(elem) +#define QMD_TRACE_HEAD(head) +#define TRACEBUF +#define TRASHIT(x) +#endif /* QUEUE_MACRO_DEBUG */ + +/* + * Singly-linked List declarations. + */ +#define SLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define SLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define SLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List functions. + */ +#define SLIST_EMPTY(head) ((head)->slh_first == NULL) + +#define SLIST_FIRST(head) ((head)->slh_first) + +#define SLIST_FOREACH(var, head, field) \ + for ((var) = SLIST_FIRST((head)); \ + (var); \ + (var) = SLIST_NEXT((var), field)) + +#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = SLIST_FIRST((head)); \ + (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ + for ((varp) = &SLIST_FIRST((head)); \ + ((var) = *(varp)) != NULL; \ + (varp) = &SLIST_NEXT((var), field)) + +#define SLIST_INIT(head) do { \ + SLIST_FIRST((head)) = NULL; \ +} while (0) + +#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ + SLIST_NEXT((slistelm), field) = (elm); \ +} while (0) + +#define SLIST_INSERT_HEAD(head, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ + SLIST_FIRST((head)) = (elm); \ +} while (0) + +#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) + +#define SLIST_REMOVE(head, elm, type, field) do { \ + if (SLIST_FIRST((head)) == (elm)) { \ + SLIST_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = SLIST_FIRST((head)); \ + while (SLIST_NEXT(curelm, field) != (elm)) \ + curelm = SLIST_NEXT(curelm, field); \ + SLIST_NEXT(curelm, field) = \ + SLIST_NEXT(SLIST_NEXT(curelm, field), field); \ + } \ +} while (0) + +#define SLIST_REMOVE_HEAD(head, field) do { \ + SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ +} while (0) + +/* + * Singly-linked Tail queue declarations. + */ +#define STAILQ_HEAD(name, type) \ +struct name { \ + struct type *stqh_first;/* first element */ \ + struct type **stqh_last;/* addr of last next element */ \ +} + +#define STAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).stqh_first } + +#define STAILQ_ENTRY(type) \ +struct { \ + struct type *stqe_next; /* next element */ \ +} + +/* + * Singly-linked Tail queue functions. + */ +#define STAILQ_CONCAT(head1, head2) do { \ + if (!STAILQ_EMPTY((head2))) { \ + *(head1)->stqh_last = (head2)->stqh_first; \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_INIT((head2)); \ + } \ +} while (0) + +#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) + +#define STAILQ_FIRST(head) ((head)->stqh_first) + +#define STAILQ_FOREACH(var, head, field) \ + for((var) = STAILQ_FIRST((head)); \ + (var); \ + (var) = STAILQ_NEXT((var), field)) + + +#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = STAILQ_FIRST((head)); \ + (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define STAILQ_INIT(head) do { \ + STAILQ_FIRST((head)) = NULL; \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_NEXT((tqelm), field) = (elm); \ +} while (0) + +#define STAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_FIRST((head)) = (elm); \ +} while (0) + +#define STAILQ_INSERT_TAIL(head, elm, field) do { \ + STAILQ_NEXT((elm), field) = NULL; \ + *(head)->stqh_last = (elm); \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_LAST(head, type, field) \ + (STAILQ_EMPTY((head)) ? \ + NULL : \ + ((struct type *) \ + ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) + +#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) + +#define STAILQ_REMOVE(head, elm, type, field) do { \ + if (STAILQ_FIRST((head)) == (elm)) { \ + STAILQ_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = STAILQ_FIRST((head)); \ + while (STAILQ_NEXT(curelm, field) != (elm)) \ + curelm = STAILQ_NEXT(curelm, field); \ + if ((STAILQ_NEXT(curelm, field) = \ + STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((curelm), field);\ + } \ +} while (0) + +#define STAILQ_REMOVE_HEAD(head, field) do { \ + if ((STAILQ_FIRST((head)) = \ + STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_REMOVE_HEAD_UNTIL(head, elm, field) do { \ + if ((STAILQ_FIRST((head)) = STAILQ_NEXT((elm), field)) == NULL) \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +/* + * List declarations. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ + +#define LIST_EMPTY(head) ((head)->lh_first == NULL) + +#define LIST_FIRST(head) ((head)->lh_first) + +#define LIST_FOREACH(var, head, field) \ + for ((var) = LIST_FIRST((head)); \ + (var); \ + (var) = LIST_NEXT((var), field)) + +#define LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = LIST_FIRST((head)); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define LIST_INIT(head) do { \ + LIST_FIRST((head)) = NULL; \ +} while (0) + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ + if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ + LIST_NEXT((listelm), field)->field.le_prev = \ + &LIST_NEXT((elm), field); \ + LIST_NEXT((listelm), field) = (elm); \ + (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + LIST_NEXT((elm), field) = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \ + if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ + LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ + LIST_FIRST((head)) = (elm); \ + (elm)->field.le_prev = &LIST_FIRST((head)); \ +} while (0) + +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define LIST_REMOVE(elm, field) do { \ + if (LIST_NEXT((elm), field) != NULL) \ + LIST_NEXT((elm), field)->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = LIST_NEXT((elm), field); \ +} while (0) + +/* + * Tail queue declarations. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ +} + +#define TAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first } + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ +} + +/* + * Tail queue functions. + */ +#define TAILQ_CONCAT(head1, head2, field) do { \ + if (!TAILQ_EMPTY(head2)) { \ + *(head1)->tqh_last = (head2)->tqh_first; \ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ + (head1)->tqh_last = (head2)->tqh_last; \ + TAILQ_INIT((head2)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_HEAD(head2); \ + } \ +} while (0) + +#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) + +#define TAILQ_FIRST(head) ((head)->tqh_first) + +#define TAILQ_FOREACH(var, head, field) \ + for ((var) = TAILQ_FIRST((head)); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST((head)); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_INIT(head) do { \ + TAILQ_FIRST((head)) = NULL; \ + (head)->tqh_last = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else { \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + } \ + TAILQ_NEXT((listelm), field) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + TAILQ_NEXT((elm), field) = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ + TAILQ_FIRST((head))->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + TAILQ_FIRST((head)) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ + TAILQ_NEXT((elm), field) = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) + +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + +#define TAILQ_REMOVE(head, elm, field) do { \ + if ((TAILQ_NEXT((elm), field)) != NULL) \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else { \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + QMD_TRACE_HEAD(head); \ + } \ + *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ + TRASHIT((elm)->field.tqe_next); \ + TRASHIT((elm)->field.tqe_prev); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + + +#ifdef _KERNEL + +/* + * XXX insque() and remque() are an old way of handling certain queues. + * They bogusly assumes that all queue heads look alike. + */ + +struct quehead { + struct quehead *qh_link; + struct quehead *qh_rlink; +}; + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) + +static __inline void +insque(void *a, void *b) +{ + struct quehead *element = (struct quehead *)a, + *head = (struct quehead *)b; + + element->qh_link = head->qh_link; + element->qh_rlink = head; + head->qh_link = element; + element->qh_link->qh_rlink = element; +} + +static __inline void +remque(void *a) +{ + struct quehead *element = (struct quehead *)a; + + element->qh_link->qh_rlink = element->qh_rlink; + element->qh_rlink->qh_link = element->qh_link; + element->qh_rlink = 0; +} + +#else /* !(__GNUC__ || __INTEL_COMPILER) */ + +void insque(void *a, void *b); +void remque(void *a); + +#endif /* __GNUC__ || __INTEL_COMPILER */ + +#endif /* _KERNEL */ + +#endif /* !_SYS_QUEUE_H_ */ diff --git a/miasm2/jitter/vm_mngr.c b/miasm2/jitter/vm_mngr.c new file mode 100644 index 00000000..7d47fadc --- /dev/null +++ b/miasm2/jitter/vm_mngr.c @@ -0,0 +1,1640 @@ +/* +** Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License along +** with this program; if not, write to the Free Software Foundation, Inc., +** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ +#include <Python.h> + +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include <string.h> + +#include <stdint.h> +#include <inttypes.h> +#include <math.h> + +#include "queue.h" +#include "vm_mngr.h" + + + + +/* +struct memory_page_list_head memory_page_pool; +struct code_bloc_list_head code_bloc_pool; + +struct memory_breakpoint_info_head memory_breakpoint_pool; +*/ + +/****************memory manager**************/ + + + + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + + +//#define DEBUG_MIASM_AUTOMOD_CODE + + + + + +void print_val(uint64_t base, uint64_t addr) +{ + uint64_t *ptr = (uint64_t *) addr; + fprintf(stderr, "addr 0x%"PRIX64" val 0x%"PRIX64"\n", addr-base, *ptr); +} + + +int is_mem_mapped(vm_mngr_t* vm_mngr, uint64_t ad) +{ + struct memory_page_node * mpn; + /* + mpn = memory_page_pool_tab[ad>>MEMORY_PAGE_POOL_MASK_BIT]; + if ( mpn && (mpn->ad <= ad) && (ad < mpn->ad + mpn->size)) + return 1; + */ + LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + if ((mpn->ad <= ad) && (ad <mpn->ad + mpn->size)) + return 1; + } + + return 0; +} + + +/* return the address base of the memory page + containing addr +*/ +uint64_t get_mem_base_addr(vm_mngr_t* vm_mngr, uint64_t ad, uint64_t *addr_base) +{ + struct memory_page_node * mpn; + /* + mpn = memory_page_pool_tab[ad>>MEMORY_PAGE_POOL_MASK_BIT]; + if ( mpn && (mpn->ad <= ad) && (ad < mpn->ad + mpn->size)){ + *addr_base = mpn->ad; + return 1; + } + */ + LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + if ((mpn->ad <= ad) && (ad <mpn->ad + mpn->size)) { + *addr_base = mpn->ad; + return 1; + } + } + return 0; +} + +struct memory_page_node * get_memory_page_from_address(vm_mngr_t* vm_mngr, uint64_t ad) +{ + struct memory_page_node * mpn; +#if 0 + mpn = memory_page_pool_tab[ad>>MEMORY_PAGE_POOL_MASK_BIT]; + if ( mpn && (mpn->ad <= ad) && (ad < mpn->ad + mpn->size)) + return mpn; + + fprintf(stderr, "WARNING: address 0x%"PRIX64" is not mapped in virtual memory:\n", ad); + //dump_memory_page_pool(); + //dump_gpregs(); + //exit(-1); + vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; + + return NULL; +#else + + //printf("search for page ad: %X\n", ad); + LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + if ((mpn->ad <= ad) && (ad < mpn->ad + mpn->size)) + return mpn; + } + fprintf(stderr, "WARNING: address 0x%"PRIX64" is not mapped in virtual memory:\n", ad); + //dump_memory_page_pool(); + //dump_gpregs(); + //exit(-1); + vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; + return NULL; +#endif +} + + + + +static inline uint64_t memory_page_read(vm_mngr_t* vm_mngr, unsigned int my_size, uint64_t ad) +{ + struct memory_page_node * mpn; + unsigned char * addr; + uint64_t ret = 0; + struct memory_breakpoint_info * b; + + + mpn = get_memory_page_from_address(vm_mngr, ad); + if (!mpn) + return 0; + + if ((mpn->access & PAGE_READ) == 0){ + fprintf(stderr, "access to non readable page!! %"PRIX64"\n", ad); + vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; + return 0; + } + + /* check read breakpoint*/ + LIST_FOREACH(b, &vm_mngr->memory_breakpoint_pool, next){ + if ((b->access & BREAKPOINT_READ) == 0) + continue; + if ((b->ad <= ad) && (ad < b->ad + b->size)) + vm_mngr->exception_flags |= EXCEPT_BREAKPOINT_INTERN; + } + + + addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; + + /* read fits in a page */ + if (ad - mpn->ad + my_size/8 <= mpn->size){ + switch(my_size){ + case 8: + ret = *((unsigned char*)addr)&0xFF; + break; + case 16: + ret = *((unsigned short*)addr)&0xFFFF; + ret = Endian16_Swap(ret); + break; + case 32: + ret = *((unsigned int*)addr)&0xFFFFFFFF; + ret = Endian32_Swap(ret); + break; + case 64: + ret = *((uint64_t*)addr)&0xFFFFFFFFFFFFFFFFULL; + ret = Endian64_Swap(ret); + break; + default: + exit(0); + break; + } + } + /* read is multiple page wide */ + else{ + unsigned int new_size = my_size; + int index = 0; + fprintf(stderr, "read multiple page! %"PRIX64" %d\n", ad, new_size); + dump_memory_page_pool(vm_mngr); + while (new_size){ + mpn = get_memory_page_from_address(vm_mngr, ad); + if (!mpn) + return 0; + addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; + ret |= (*((unsigned char*)addr)&0xFF)<<(index); + index +=8; + new_size -= 8; + ad ++; + } + switch(my_size){ + case 8: + ret = ret; + break; + case 16: + ret = Endian16_Swap(ret); + break; + case 32: + ret = Endian32_Swap(ret); + break; + case 64: + ret = Endian64_Swap(ret); + break; + default: + exit(0); + break; + } + } + return ret; +} + +static inline void memory_page_write(vm_mngr_t* vm_mngr, unsigned int my_size, + uint64_t ad, uint64_t src) +{ + struct memory_page_node * mpn; + unsigned char * addr; + struct memory_breakpoint_info * b; + + mpn = get_memory_page_from_address(vm_mngr, ad); + if (!mpn) + return; + + if ((mpn->access & PAGE_WRITE) == 0){ + fprintf(stderr, "access to non writable page!! %"PRIX64"\n", ad); + vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; + return ; + } + + /* check read breakpoint*/ + LIST_FOREACH(b, &vm_mngr->memory_breakpoint_pool, next){ + if ((b->access & BREAKPOINT_WRITE) == 0) + continue; + if ((b->ad <= ad) && (ad < b->ad + b->size)) + vm_mngr->exception_flags |= EXCEPT_BREAKPOINT_INTERN; + } + + addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; + + /* write fits in a page */ + if (ad - mpn->ad + my_size/8 <= mpn->size){ + switch(my_size){ + case 8: + *((unsigned char*)addr) = src&0xFF; + break; + case 16: + src = Endian16_Swap(src); + *((unsigned short*)addr) = src&0xFFFF; + break; + case 32: + src = Endian32_Swap(src); + *((unsigned int*)addr) = src&0xFFFFFFFF; + break; + case 64: + src = Endian64_Swap(src); + *((uint64_t*)addr) = src&0xFFFFFFFFFFFFFFFFULL; + break; + default: + exit(0); + break; + } + } + /* write is multiple page wide */ + else{ + fprintf(stderr, "write multiple page! %"PRIX64" %d\n", ad, my_size); + dump_memory_page_pool(vm_mngr); + switch(my_size){ + + case 8: + src = src; + break; + case 16: + src = Endian16_Swap(src); + break; + case 32: + src = Endian32_Swap(src); + break; + case 64: + src = Endian64_Swap(src); + break; + default: + exit(0); + break; + } + while (my_size){ + mpn = get_memory_page_from_address(vm_mngr, ad); + if (!mpn) + return; + + addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; + *((unsigned char*)addr) = src&0xFF; + my_size -= 8; + src >>=8; + ad ++; + } + } +} + +/* TODO: Those functions have to be moved to a common operations file, with + * parity, ... + */ + +inline uint16_t bcdadd_16(uint16_t a, uint16_t b) +{ + int carry = 0; + int i,j = 0; + uint16_t res = 0; + int nib_a, nib_b; + for (i = 0; i < 16; i += 4) { + nib_a = (a >> i) & (0xF); + nib_b = (b >> i) & (0xF); + + j = (carry + nib_a + nib_b); + if (j >= 10) { + carry = 1; + j -= 10; + j &=0xf; + } + else { + carry = 0; + } + res += j << i; + } + return res; +} + +inline uint16_t bcdadd_cf_16(uint16_t a, uint16_t b) +{ + int carry = 0; + int i,j = 0; + int nib_a, nib_b; + for (i = 0; i < 16; i += 4) { + nib_a = (a >> i) & (0xF); + nib_b = (b >> i) & (0xF); + + j = (carry + nib_a + nib_b); + if (j >= 10) { + carry = 1; + j -= 10; + j &=0xf; + } + else { + carry = 0; + } + } + return carry; +} +// ################## + +void dump_code_bloc(vm_mngr_t* vm_mngr) +{ + struct code_bloc_node * cbp; + LIST_FOREACH(cbp, &vm_mngr->code_bloc_pool, next){ + fprintf(stderr, "%"PRIX64"%"PRIX64"\n", cbp->ad_start, cbp->ad_stop); + } + +} + +inline void check_write_code_bloc(vm_mngr_t* vm_mngr, unsigned int my_size, uint64_t addr) +{ + struct code_bloc_node * cbp; + vm_mngr->last_write_ad = addr; + vm_mngr->last_write_size = my_size; + + + + //if(vmmngr.my_tick> my_tick) + // printf("M_WRITE %2d %.8X %.8X\n", my_size, addr, src); + if (!(addr + my_size/8 <= vm_mngr->code_bloc_pool_ad_min || + addr >=vm_mngr->code_bloc_pool_ad_max)){ + LIST_FOREACH(cbp, &vm_mngr->code_bloc_pool, next){ + if ((cbp->ad_start < addr + my_size/8) && + (addr < cbp->ad_stop)){ +#ifdef DEBUG_MIASM_AUTOMOD_CODE + fprintf(stderr, "**********************************\n"); + fprintf(stderr, "self modifying code %"PRIX64" %.8X\n", + addr, my_size); + fprintf(stderr, "**********************************\n"); + //dump_code_bloc(vm_mngr); +#endif + vm_mngr->exception_flags |= EXCEPT_CODE_AUTOMOD; + + break; + } + } + } +} + +PyObject* addr2BlocObj(vm_mngr_t* vm_mngr, uint64_t addr) +{ + PyObject* pyaddr; + PyObject* b; + + //printf("addr2blocobj %"PRIx64"\n", addr); + pyaddr = PyLong_FromUnsignedLongLong(addr); + /* + Py_INCREF(pyaddr); + return pyaddr; + */ + b = PyDict_GetItem(vm_mngr->addr2obj, pyaddr); + if (b == NULL) { + Py_INCREF(Py_None); + return Py_None; + } + + //printf("addr2blocobj OBJ %p\n", b); + Py_INCREF(b); + return b; +} + +/* +PyObject* add_code_resolver(vm_mngr_t* vm_mngr, uint64_t addr) +{ + pyaddr = PyLong_FromUnsignedLongLong(addr); + + func_resolver* f = malloc(sizeof(func_resolver)); + f->func = addr2blocobj; + fsdfsd + return f; +} +*/ +/* +void MEM_WRITE(vm_mngr_t* vm_mngr, unsigned int my_size, uint64_t addr, unsigned int src) +{ + struct code_bloc_node * cbp; + + vm_mngr->last_write_ad = addr; + vm_mngr->last_write_size = my_size; + + //if(vmmngr.my_tick> my_tick) + // printf("M_WRITE %2d %.8X %.8X\n", my_size, addr, src); + if (!(addr + my_size/8 <= vm_mngr->code_bloc_pool_ad_min || + addr >= vm_mngr->code_bloc_pool_ad_max)){ + LIST_FOREACH(cbp, &vm_mngr->code_bloc_pool, next){ + if ((cbp->ad_start <= addr + my_size/8) && + (addr < cbp->ad_stop)){ +#ifdef DEBUG_MIASM_AUTOMOD_CODE + fprintf(stderr, "self modifying code %"PRIX64" %.8X\n", + addr, my_size); +#endif + vm_mngr->exception_flags |= EXCEPT_CODE_AUTOMOD; + break; + } + } + } + + memory_page_write(vm_mngr, my_size, addr, src); +} +*/ +void MEM_WRITE_08(vm_mngr_t* vm_mngr, uint64_t addr, unsigned char src) +{ + check_write_code_bloc(vm_mngr, 8, addr); + memory_page_write(vm_mngr, 8, addr, src); +} + +void MEM_WRITE_16(vm_mngr_t* vm_mngr, uint64_t addr, unsigned short src) +{ + check_write_code_bloc(vm_mngr, 16, addr); + memory_page_write(vm_mngr, 16, addr, src); +} +void MEM_WRITE_32(vm_mngr_t* vm_mngr, uint64_t addr, unsigned int src) +{ + check_write_code_bloc(vm_mngr, 32, addr); + memory_page_write(vm_mngr, 32, addr, src); +} +void MEM_WRITE_64(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t src) +{ + check_write_code_bloc(vm_mngr, 64, addr); + memory_page_write(vm_mngr, 64, addr, src); +} + +unsigned int MEM_LOOKUP(vm_mngr_t* vm_mngr, unsigned int my_size, uint64_t addr) +{ + unsigned int ret; + ret = memory_page_read(vm_mngr, my_size, addr); + return ret; +} + +unsigned char MEM_LOOKUP_08(vm_mngr_t* vm_mngr, uint64_t addr) +{ + unsigned char ret; + ret = memory_page_read(vm_mngr, 8, addr); + return ret; +} +unsigned short MEM_LOOKUP_16(vm_mngr_t* vm_mngr, uint64_t addr) +{ + unsigned short ret; + ret = memory_page_read(vm_mngr, 16, addr); + return ret; +} +unsigned int MEM_LOOKUP_32(vm_mngr_t* vm_mngr, uint64_t addr) +{ + unsigned int ret; + ret = memory_page_read(vm_mngr, 32, addr); + return ret; +} +uint64_t MEM_LOOKUP_64(vm_mngr_t* vm_mngr, uint64_t addr) +{ + uint64_t ret; + ret = memory_page_read(vm_mngr, 64, addr); + return ret; +} + +inline unsigned int parity(unsigned int a) +{ +#if defined(__builtin_parity) + return __builtin_parity(a); +#else + unsigned int tmp, cpt; + + tmp = a&0xFF; + cpt = 1; + while (tmp!=0){ + cpt^=tmp&1; + tmp>>=1; + } + return cpt; +#endif +} + + +int shift_right_arith(unsigned int size, int a, unsigned int b) +{ + int i32_a; + short i16_a; + char i8_a; + switch(size){ + case 8: + i8_a = a; + return (i8_a >> b)&0xff; + case 16: + i16_a = a; + return (i16_a >> b)&0xffff; + case 32: + i32_a = a; + return (i32_a >> b)&0xffffffff; + default: + fprintf(stderr, "inv size in shift %d\n", size); + exit(0); + } +} +/* +int shift_right_arith_08(int a, unsigned int b) +{ + char i8_a; + i8_a = a; + return (i8_a >> b)&0xff; +} + +int shift_right_arith_16(int a, unsigned int b) +{ + short i16_a; + i16_a = a; + return (i16_a >> b)&0xffff; +} + +int shift_right_arith_32(int a, unsigned int b) +{ + int i32_a; + i32_a = a; + return (i32_a >> b)&0xffffffff; +} +*/ +unsigned int shift_right_logic(unsigned int size, + unsigned int a, unsigned int b) +{ + unsigned int u32_a; + unsigned short u16_a; + unsigned char u8_a; + switch(size){ + case 8: + u8_a = a; + return (u8_a >> b)&0xff; + case 16: + u16_a = a; + return (u16_a >> b)&0xffff; + case 32: + u32_a = a; + return (u32_a >> b)&0xffffffff; + default: + fprintf(stderr, "inv size in shift %d\n", size); + exit(0); + } +} +/* +int shift_right_logic_08(unsigned int a, unsigned int b) +{ + unsigned char u8_a; + u8_a = a; + return (u8_a >> b)&0xff; +} + +int shift_right_logic_16(unsigned int a, unsigned int b) +{ + unsigned short u16_a; + u16_a = a; + return (u16_a >> b)&0xffff; +} + +int shift_right_logic_32(unsigned int a, unsigned int b) +{ + unsigned int u32_a; + u32_a = a; + return (u32_a >> b)&0xffffffff; +} +*/ +int shift_left_logic(unsigned int size, unsigned int a, unsigned int b) +{ + switch(size){ + case 8: + return (a<<b)&0xff; + case 16: + return (a<<b)&0xffff; + case 32: + return (a<<b)&0xffffffff; + default: + fprintf(stderr, "inv size in shift %d\n", size); + exit(0); + } +} +/* +int shift_left_logic_O8(unsigned int a, unsigned int b) +{ + return (a<<b)&0xff; +} + +int shift_left_logic_16(unsigned int a, unsigned int b) +{ + return (a<<b)&0xffff; +} + +int shift_left_logic_32(unsigned int a, unsigned int b) +{ + return (a<<b)&0xffffffff; +} +*/ + +unsigned int mul_lo_op(unsigned int size, unsigned int a, unsigned int b) +{ + unsigned int mask; + + switch (size) { + case 8: mask = 0xff; break; + case 16: mask = 0xffff; break; + case 32: mask = 0xffffffff; break; + default: fprintf(stderr, "inv size in mul %d\n", size); exit(0); + } + + a &= mask; + b &= mask; + return ((int64_t)a * (int64_t) b) & mask; +} + +unsigned int mul_hi_op(unsigned int size, unsigned int a, unsigned int b) +{ + uint64_t res = 0; + unsigned int mask; + + switch (size) { + case 8: mask = 0xff; break; + case 16: mask = 0xffff; break; + case 32: mask = 0xffffffff; break; + default: fprintf(stderr, "inv size in mul %d\n", size); exit(0); + } + + a &= mask; + b &= mask; + res = ((uint64_t)a * (uint64_t)b); + return (res >> 32) & mask; +} + + +unsigned int imul_lo_op_08(char a, char b) +{ + return a*b; +} + +unsigned int imul_lo_op_16(short a, short b) +{ + return a*b; +} + +unsigned int imul_lo_op_32(int a, int b) +{ + return a*b; +} + +int imul_hi_op_08(char a, char b) +{ + int64_t res = 0; + res = a*b; + return res>>8; +} + +int imul_hi_op_16(short a, short b) +{ + int64_t res = 0; + res = a*b; + return res>>16; +} + +int imul_hi_op_32(int a, int b) +{ + int64_t res = 0; + res = (int64_t)a*(int64_t)b; + //printf("%x %x dd %"PRIx64"\n", a, b, res); + return res>>32ULL; +} + +unsigned int umul16_lo(unsigned short a, unsigned short b) +{ + return (a*b) & 0xffff; +} + +unsigned int umul16_hi(unsigned short a, unsigned short b) +{ + uint32_t c; + c = a*b; + return (c>>16) & 0xffff; +} + + + + +unsigned int div_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c) +{ + int64_t num; + if (c == 0) + { + //vmmngr.exception_flags |= EXCEPT_INT_DIV_BY_ZERO; + return 0; + } + num = ((int64_t)a << size) + b; + num/=(int64_t)c; + return num; +} + + +unsigned int rem_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c) +{ + int64_t num; + + if (c == 0) + { + //vmmngr.exception_flags |= EXCEPT_INT_DIV_BY_ZERO; + return 0; + } + + num = ((int64_t)a << size) + b; + num = (int64_t)num-c*(num/c); + return num; +} + + +unsigned int rot_left(unsigned int size, unsigned int a, unsigned int b) +{ + unsigned int tmp; + + b = b&0x1F; + b %= size; + switch(size){ + case 8: + tmp = (a << b) | ((a&0xFF) >> (size-b)); + return tmp&0xff; + case 16: + tmp = (a << b) | ((a&0xFFFF) >> (size-b)); + return tmp&0xffff; + case 32: + tmp = (a << b) | ((a&0xFFFFFFFF) >> (size-b)); + return tmp&0xffffffff; + default: + fprintf(stderr, "inv size in rotleft %d\n", size); + exit(0); + } +} + +unsigned int rot_right(unsigned int size, unsigned int a, unsigned int b) +{ + unsigned int tmp; + + b = b&0x1F; + b %= size; + switch(size){ + case 8: + tmp = ((a&0xFF) >> b) | (a << (size-b)); + return tmp&0xff; + case 16: + tmp = ((a&0xFFFF) >> b) | (a << (size-b)); + return tmp&0xffff; + case 32: + tmp = ((a&0xFFFFFFFF) >> b) | (a << (size-b)); + return tmp&0xffffffff; + default: + fprintf(stderr, "inv size in rotleft %d\n", size); + exit(0); + } +} + + +int rcl_rez_op(unsigned int size, unsigned int a, unsigned int b, unsigned int cf) +{ + uint64_t tmp; + + + size++; + b %= size; + + if (b == 0) { + switch(size){ + case 8+1: + return a&0xff; + case 16+1: + return a&0xffff; + case 32+1: + return a&0xffffffff; + default: + fprintf(stderr, "inv size in rclleft %d\n", size); + exit(0); + } + } + + tmp = (a<<1) | cf; + b -=1; + switch(size){ + case 8+1: + tmp = (tmp << b) | ((tmp&0x1FF) >> (size-b)); + return tmp&0xff; + case 16+1: + tmp = (tmp << b) | ((tmp&0x1FFFF) >> (size-b)); + return tmp&0xffff; + case 32+1: + tmp = (tmp << b) | ((tmp&0x1FFFFFFFFULL) >> (size-b)); + return tmp&0xffffffff; + default: + fprintf(stderr, "inv size in rclleft %d\n", size); + exit(0); + } +} + +int rcr_rez_op(unsigned int size, unsigned int a, unsigned int b, unsigned int cf) +{ + return rcl_rez_op(size, a, size+1-b, cf); + +} + + +int rcl_cf_op(unsigned int size, unsigned int a, unsigned int b, unsigned int cf) +{ + uint64_t tmp; + + tmp = (cf<< size) | a; + + size++; + b %= size; + + switch(size){ + case 8+1: + tmp = (tmp << b) | ((tmp&0x1FF) >> (size-b)); + return (tmp>>8)&1; + case 16+1: + tmp = (tmp << b) | ((tmp&0x1FFFF) >> (size-b)); + return (tmp>>16)&1; + case 32+1: + tmp = (tmp << b) | ((tmp&0x1FFFFFFFFULL) >> (size-b)); + return (tmp>>32)&1; + default: + fprintf(stderr, "inv size in rclleft %d\n", size); + exit(0); + } +} + +int rcr_cf_op(unsigned int size, unsigned int a, unsigned int b, unsigned int cf) +{ + return rcl_cf_op(size, a, size+1-b, cf); +} +unsigned int my_bsr(unsigned int a, unsigned int b) +{ + int i; + + for (i=31; i>=0; i--){ + if (b & (1<<i)) + return i; + } + return a; +} + +unsigned int my_bsf(unsigned int a, unsigned int b) +{ + int i; + + for (i=0; i<32; i++){ + if (b & (1<<i)) + return i; + } + return a; +} + + +unsigned int my_imul08(unsigned int a, unsigned int b) +{ + char a08, b08; + short a16; + + a08 = a&0xFF; + b08 = b&0xFF; + a16 = a08*b08; + return (int)a16; +} + + + +unsigned int cpuid(unsigned int a, unsigned int reg_num) +{ + if (reg_num >3){ + fprintf(stderr, "not implemented cpuid reg %x\n", reg_num); + exit(-1); + } + + if (a == 0){ + switch(reg_num){ + case 0: + return 0xa; + case 1: + return 0x756E6547; + case 2: + return 0x6C65746E; + case 3: + return 0x49656E69; + } + } + + else if (a == 1){ + switch(reg_num){ + case 0: + //return 0x000006FB; + return 0x00020652; + case 1: + //return 0x02040800; + return 0x00000800; + case 2: + //return 0x0004E3BD; + return 0x00000209; + case 3: + //return 0xBFEBFBFF; + return 0x078bf9ff; + } + } + else{ + fprintf(stderr, "WARNING not implemented cpuid index %X!\n", a); + //exit(-1); + } + return 0; +} + +#define DEBUG_MIASM_DOUBLE + +void dump_float(void) +{ + /* + printf("%e\n", vmmngr.float_st0); + printf("%e\n", vmmngr.float_st1); + printf("%e\n", vmmngr.float_st2); + printf("%e\n", vmmngr.float_st3); + printf("%e\n", vmmngr.float_st4); + printf("%e\n", vmmngr.float_st5); + printf("%e\n", vmmngr.float_st6); + printf("%e\n", vmmngr.float_st7); + */ +} + +double mem_32_to_double(unsigned int m) +{ + float f; + double d; + + f = *((float*)&m); + d = f; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%d float %e\n", m, d); +#endif + return d; +} + + +double mem_64_to_double(uint64_t m) +{ + double d; + d = *((double*)&m); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%"PRId64" double %e\n", m, d); +#endif + return d; +} + +double int_16_to_double(unsigned int m) +{ + double d; + + d = (double)(m&0xffff); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%d double %e\n", m, d); +#endif + return d; +} + +double int_32_to_double(unsigned int m) +{ + double d; + + d = (double)m; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%d double %e\n", m, d); +#endif + return d; +} + +double int_64_to_double(uint64_t m) +{ + double d; + + d = (double)m; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%"PRId64" double %e\n", m, d); +#endif + return d; +} + +int32_t double_to_int_32(double d) +{ + int32_t i; + + i = (int32_t)d; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e int %d\n", d, i); +#endif + return i; +} + +int64_t double_to_int_64(double d) +{ + int64_t i; + + i = (int64_t)d; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e int %"PRId64"\n", d, i); +#endif + return i; +} + + +double fadd(double a, double b) +{ + double c; + c = a + b; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e + %e -> %e\n", a, b, c); +#endif + return c; +} + +double fsub(double a, double b) +{ + double c; + c = a - b; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e - %e -> %e\n", a, b, c); +#endif + return c; +} + +double fmul(double a, double b) +{ + double c; + c = a * b; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e * %e -> %e\n", a, b, c); +#endif + return c; +} + +double fdiv(double a, double b) +{ + double c; + c = a / b; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e / %e -> %e\n", a, b, c); +#endif + return c; +} + +double ftan(double a) +{ + double b; + b = tan(a); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e tan %e\n", a, b); +#endif + return b; +} + +double frndint(double a) +{ + int64_t b; + double c; + b = (int64_t)a; + c = (double)b; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e double %e\n", a, c); +#endif + return c; +} + +double fsin(double a) +{ + double b; + b = sin(a); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e sin %e\n", a, b); +#endif + return b; +} + +double fcos(double a) +{ + double b; + b = cos(a); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e cos %e\n", a, b); +#endif + return b; +} + + +double fscale(double a, double b) +{ + double c; + c = a * exp2(trunc(b)); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e *exp2 %e -> %e\n", a, b, c); +#endif + return c; +} + +double f2xm1(double a) +{ + double b; + b = exp2(a)-1; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e exp2 -1 %e\n", a, b); +#endif + return b; +} + +double fsqrt(double a) +{ + double b; + b = sqrt(a); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e sqrt %e\n", a, b); +#endif + return b; +} + +double fabs(double a) +{ + double b; + b = abs(a); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e abs %e\n", a, b); +#endif + return b; +} + + + +unsigned int fcom_c0(double a, double b) +{ + if (a>=b) + return 0; + return 1; +} +unsigned int fcom_c1(double a, double b) +{ + //XXX + return 0; +} +unsigned int fcom_c2(double a, double b) +{ + return 0; +} +unsigned int fcom_c3(double a, double b) +{ + if (a==b) + return 1; + return 0; +} + + +unsigned int double_to_mem_32(double d) +{ + unsigned int m; + float f; + f = d; + m = *((unsigned int*)&f); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%d %e\n", m, d); +#endif + return m; +} + +uint64_t double_to_mem_64(double d) +{ + uint64_t m; + m = *((uint64_t*)&d); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%"PRId64" %e\n", m, d); +#endif + return m; +} + +struct memory_page_node * create_memory_page_node(uint64_t ad, unsigned int size, unsigned int access) +{ + struct memory_page_node * mpn; + void* p; + + mpn = malloc(sizeof(*mpn)); + if (!mpn){ + fprintf(stderr, "cannot alloc mpn\n"); + return NULL; + } + p = malloc(size); + if (!p){ + fprintf(stderr, "cannot alloc %d\n", size); + return NULL; + } + mpn->ad = ad; + mpn->size = size; + mpn->access = access; + mpn->ad_hp = p; + + return mpn; +} + + +struct code_bloc_node * create_code_bloc_node(uint64_t ad_start, uint64_t ad_stop) +{ + struct code_bloc_node * cbp; + + cbp = malloc(sizeof(*cbp)); + if (!cbp){ + fprintf(stderr, "cannot alloc cbp\n"); + exit(-1); + } + + cbp->ad_start = ad_start; + cbp->ad_stop = ad_stop; + + return cbp; +} + + +void add_code_bloc(vm_mngr_t* vm_mngr, struct code_bloc_node* cbp) +{ + LIST_INSERT_HEAD(&vm_mngr->code_bloc_pool, cbp, next); + if (vm_mngr->code_bloc_pool_ad_min> cbp->ad_start) + vm_mngr->code_bloc_pool_ad_min = cbp->ad_start; + if (vm_mngr->code_bloc_pool_ad_max< cbp->ad_stop) + vm_mngr->code_bloc_pool_ad_max = cbp->ad_stop; +} + +void dump_code_bloc_pool(vm_mngr_t* vm_mngr) +{ + struct code_bloc_node * cbp; + + LIST_FOREACH(cbp, &vm_mngr->code_bloc_pool, next){ + printf("ad start %"PRIX64" ad_stop %"PRIX64"\n", + cbp->ad_start, + cbp->ad_stop); + } +} + + +void init_memory_page_pool(vm_mngr_t* vm_mngr) +{ + unsigned int i; + LIST_INIT(&vm_mngr->memory_page_pool); + for (i=0;i<MAX_MEMORY_PAGE_POOL_TAB; i++) + vm_mngr->memory_page_pool_tab[i] = NULL; +} + +void init_code_bloc_pool(vm_mngr_t* vm_mngr) +{ + LIST_INIT(&vm_mngr->code_bloc_pool); + vm_mngr->code_bloc_pool_ad_min = 0xffffffff; + vm_mngr->code_bloc_pool_ad_max = 0; +} + +void init_memory_breakpoint(vm_mngr_t* vm_mngr) +{ + LIST_INIT(&vm_mngr->memory_breakpoint_pool); +} + + +void reset_memory_page_pool(vm_mngr_t* vm_mngr) +{ + struct memory_page_node * mpn; + unsigned int i; + + while (!LIST_EMPTY(&vm_mngr->memory_page_pool)) { + mpn = LIST_FIRST(&vm_mngr->memory_page_pool); + LIST_REMOVE(mpn, next); + free(mpn->ad_hp); + free(mpn); + } + for (i=0;i<MAX_MEMORY_PAGE_POOL_TAB; i++) + vm_mngr->memory_page_pool_tab[i] = NULL; + +} + + +void reset_code_bloc_pool(vm_mngr_t* vm_mngr) +{ + struct code_bloc_node * cbp; + + + while (!LIST_EMPTY(&vm_mngr->code_bloc_pool)) { + cbp = LIST_FIRST(&vm_mngr->code_bloc_pool); + LIST_REMOVE(cbp, next); + free(cbp); + } + vm_mngr->code_bloc_pool_ad_min = 0xffffffff; + vm_mngr->code_bloc_pool_ad_max = 0; +} + + +void reset_memory_breakpoint(vm_mngr_t* vm_mngr) +{ + struct memory_breakpoint_info * mpn; + + while (!LIST_EMPTY(&vm_mngr->memory_breakpoint_pool)) { + mpn = LIST_FIRST(&vm_mngr->memory_breakpoint_pool); + LIST_REMOVE(mpn, next); + free(mpn); + } + +} + + +int is_mpn_in_tab(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) +{ + struct memory_page_node * mpn; + + /* + for (i=mpn_a->ad >> MEMORY_PAGE_POOL_MASK_BIT; + i<(mpn_a->ad + mpn_a->size + PAGE_SIZE - 1)>>MEMORY_PAGE_POOL_MASK_BIT; + i++){ + if (memory_page_pool_tab[i] !=NULL){ + return 1; + } + } + */ + LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + if (mpn->ad >= mpn_a->ad + mpn_a->size) + continue; + if (mpn->ad + mpn->size <= mpn_a->ad) + continue; + printf("is mpn in! %"PRIX64" %"PRIX64" \n", mpn_a->ad, mpn_a->size); + printf("known:! %"PRIX64" %"PRIX64" \n", mpn->ad, mpn->size); + + return 1; + } + + return 0; +} + +void insert_mpn_in_tab(struct memory_page_node* mpn_a) +{ + /* + for (i=mpn_a->ad >> MEMORY_PAGE_POOL_MASK_BIT; + i<(mpn_a->ad + mpn_a->size + PAGE_SIZE - 1)>>MEMORY_PAGE_POOL_MASK_BIT; + i++){ + if (memory_page_pool_tab[i] !=NULL){ + fprintf(stderr, "known page in tab\n"); + exit(1); + } + memory_page_pool_tab[i] = mpn_a; + } + */ + +} + +void add_memory_page(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) +{ + struct memory_page_node * mpn; + struct memory_page_node * lmpn; + + if (LIST_EMPTY(&vm_mngr->memory_page_pool)){ + LIST_INSERT_HEAD(&vm_mngr->memory_page_pool, mpn_a, next); + insert_mpn_in_tab(mpn_a); + return; + } + LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + lmpn = mpn; + if (mpn->ad < mpn_a->ad) + continue; + LIST_INSERT_BEFORE(mpn, mpn_a, next); + insert_mpn_in_tab(mpn_a); + return; + } + LIST_INSERT_AFTER(lmpn, mpn_a, next); + insert_mpn_in_tab(mpn_a); + +} + +void dump_memory_page_pool(vm_mngr_t* vm_mngr) +{ + struct memory_page_node * mpn; + + LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + printf("ad %"PRIX64" size %"PRIX64" %c%c%c hpad %p\n", + mpn->ad, + mpn->size, + mpn->access & PAGE_READ? 'R':'_', + mpn->access & PAGE_WRITE? 'W':'_', + mpn->access & PAGE_EXEC? 'X':'_', + mpn->ad_hp + ); + } +} + +void dump_memory_breakpoint_pool(vm_mngr_t* vm_mngr) +{ + struct memory_breakpoint_info * mpn; + + LIST_FOREACH(mpn, &vm_mngr->memory_breakpoint_pool, next){ + printf("ad %"PRIX64" size %"PRIX64" access %"PRIX64"\n", + mpn->ad, + mpn->size, + mpn->access + ); + } +} + + +void add_memory_breakpoint(vm_mngr_t* vm_mngr, uint64_t ad, uint64_t size, unsigned int access) +{ + struct memory_breakpoint_info * mpn_a; + mpn_a = malloc(sizeof(*mpn_a)); + if (!mpn_a) { + printf("cannot alloc\n"); + exit(0); + } + mpn_a->ad = ad; + mpn_a->size = size; + mpn_a->access = access; + + LIST_INSERT_HEAD(&vm_mngr->memory_breakpoint_pool, mpn_a, next); + +} + +void remove_memory_breakpoint(vm_mngr_t* vm_mngr, uint64_t ad, unsigned int access) +{ + struct memory_breakpoint_info * mpn; + + LIST_FOREACH(mpn, &vm_mngr->memory_breakpoint_pool, next){ + if (mpn->ad == ad && mpn->access == access) + LIST_REMOVE(mpn, next); + } + +} + + + + + + + +unsigned int get_memory_page_next(vm_mngr_t* vm_mngr, unsigned int n_ad) +{ + struct memory_page_node * mpn; + uint64_t ad = 0; + + LIST_FOREACH(mpn, &vm_mngr->memory_page_pool, next){ + if (mpn->ad < n_ad) + continue; + + if (ad == 0 || mpn->ad <ad) + ad = mpn->ad; + } + return ad; +} + + +#if 0 +unsigned int get_memory_page_from_min_ad(unsigned int size) +{ + struct memory_page_node * mpn; + unsigned int c_ad ; + unsigned int min_ad = min_page_ad; + int end = 0; + /* first, find free min ad */ + while (!end){ + end = 1; + LIST_FOREACH(mpn, &memory_page_pool, next){ + c_ad = (mpn->ad + mpn->size+0x1000)&0xfffff000; + if (c_ad <= min_ad) + continue; + if (mpn->ad <= min_ad){ + min_ad = c_ad; + end = 0; + break; + } + if (mpn->ad - min_ad < size){ + min_ad = c_ad; + end = 0; + break; + } + } + } + return min_ad; + } +#endif + + + +/********************************************/ + +void hexdump(char* m, unsigned int l) +{ + int i, j, last; + last = 0; + for (i=0;i<l;i++){ + if (!(i%0x10) && i){ + last = i; + printf(" "); + for (j=-0x10;j<0;j++){ + if (isprint(m[i+j])){ + printf("%c", m[i+j]); + } + else{ + printf("."); + } + } + printf("\n"); + } + printf("%.2X ", m[i]&0xFF); + } + l-=last; + if (l){ + for (j=i;j<last+0x10;j++) + printf(" "); + printf(" "); + for (j = 0;l;j++){ + if (isprint(m[last+j])){ + printf("%c", m[last+j]); + } + else{ + printf("."); + } + l--; + } + } + printf("\n"); + +} + + + + +unsigned int access_segment(unsigned int d) +{ + // XXX TODO + printf("access segment %X\n", d); + return 0; +} +unsigned int access_segment_ok(unsigned int d) +{ + // XXX TODO + printf("access segment ok %X\n", d); + return 0; +} + +unsigned int load_segment_limit(unsigned int d) +{ + // XXX TODO + printf("load segment limit %X\n", d); + return 0; +} +unsigned int load_segment_limit_ok(unsigned int d) +{ + // XXX TODO + printf("load segment limit ok %X\n", d); + return 0; +} + +unsigned int load_tr_segment_selector(unsigned int d) +{ + // XXX TODO + return 0; +} + +// Return vm_mngr's exception flag value +uint64_t get_exception_flag(vm_mngr_t* vm_mngr) +{ + return vm_mngr->exception_flags; +} diff --git a/miasm2/jitter/vm_mngr.h b/miasm2/jitter/vm_mngr.h new file mode 100644 index 00000000..1e032200 --- /dev/null +++ b/miasm2/jitter/vm_mngr.h @@ -0,0 +1,363 @@ +/* +** Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License along +** with this program; if not, write to the Free Software Foundation, Inc., +** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ +#ifndef CODENAT_H +#define CODENAT_H + + +#if __BYTE_ORDER == __BIG_ENDIAN +#define Endian16_Swap(value) \ + ((((uint16_t)((value) & 0x00FF)) << 8) | \ + (((uint16_t)((value) & 0xFF00)) >> 8)) + +#define Endian32_Swap(value) \ + ((((uint32_t)((value) & 0x000000FF)) << 24) | \ + (((uint32_t)((value) & 0x0000FF00)) << 8) | \ + (((uint32_t)((value) & 0x00FF0000)) >> 8) | \ + (((uint32_t)((value) & 0xFF000000)) >> 24)) + +#define Endian64_Swap(value) \ + (((((uint64_t)value)<<56) & 0xFF00000000000000ULL) | \ + ((((uint64_t)value)<<40) & 0x00FF000000000000ULL) | \ + ((((uint64_t)value)<<24) & 0x0000FF0000000000ULL) | \ + ((((uint64_t)value)<< 8) & 0x000000FF00000000ULL) | \ + ((((uint64_t)value)>> 8) & 0x00000000FF000000ULL) | \ + ((((uint64_t)value)>>24) & 0x0000000000FF0000ULL) | \ + ((((uint64_t)value)>>40) & 0x000000000000FF00ULL) | \ + ((((uint64_t)value)>>56) & 0x00000000000000FFULL)) +#else +#define Endian16_Swap(value) (value) + +#define Endian32_Swap(value) (value) + +#define Endian64_Swap(value) (value) +#endif + + + + +LIST_HEAD(memory_page_list_head, memory_page_node); +LIST_HEAD(code_bloc_list_head, code_bloc_node); +LIST_HEAD(memory_breakpoint_info_head, memory_breakpoint_info); + + +#define BREAKPOINT_READ 1 +#define BREAKPOINT_WRITE 2 + + + +#define MAX_MEMORY_PAGE_POOL_TAB 0x100000 +#define MEMORY_PAGE_POOL_MASK_BIT 12 +#define PAGE_SIZE (1<<MEMORY_PAGE_POOL_MASK_BIT) + +typedef struct { + + struct memory_page_list_head memory_page_pool; + struct code_bloc_list_head code_bloc_pool; + struct memory_breakpoint_info_head memory_breakpoint_pool; + + struct memory_page_node *memory_page_pool_tab[MAX_MEMORY_PAGE_POOL_TAB]; + + unsigned int *code_addr_tab; + unsigned int code_bloc_pool_ad_min; + unsigned int code_bloc_pool_ad_max; + + uint64_t exception_flags; + uint64_t exception_flags_new; + uint64_t last_write_ad; + uint64_t last_write_size ; + PyObject *cb_automod; + PyObject *addr2obj; +}vm_mngr_t; + + + +typedef struct { + PyObject *func; +}func_resolver; + + + + +//extern vm_mngr_t vmmngr; + +struct memory_page_node { + uint64_t ad; + uint64_t size; + uint64_t access; + void* ad_hp; + LIST_ENTRY(memory_page_node) next; +}; + + + +struct code_bloc_node { + uint64_t ad_start; + uint64_t ad_stop; + uint64_t ad_code; + LIST_ENTRY(code_bloc_node) next; +}; + + +struct memory_breakpoint_info { + uint64_t ad; + uint64_t size; + uint64_t access; + LIST_ENTRY(memory_breakpoint_info) next; +}; + + +#define PAGE_READ 1 +#define PAGE_WRITE 2 +#define PAGE_EXEC 4 + +#define EXCEPT_DO_NOT_UPDATE_PC (1<<25) + +// interrupt with eip update after instr +#define EXCEPT_CODE_AUTOMOD (1<<0) +#define EXCEPT_SOFT_BP (1<<1) +#define EXCEPT_INT_XX (1<<2) + +#define EXCEPT_BREAKPOINT_INTERN (1<<10) + +#define EXCEPT_NUM_UPDT_EIP (1<<11) +// interrupt with eip at instr +#define EXCEPT_UNK_MEM_AD ((1<<12) | EXCEPT_DO_NOT_UPDATE_PC) +#define EXCEPT_THROW_SEH ((1<<13) | EXCEPT_DO_NOT_UPDATE_PC) +#define EXCEPT_UNK_EIP ((1<<14) | EXCEPT_DO_NOT_UPDATE_PC) +#define EXCEPT_ACCESS_VIOL ((1<<14) | EXCEPT_DO_NOT_UPDATE_PC) +#define EXCEPT_INT_DIV_BY_ZERO ((1<<16) | EXCEPT_DO_NOT_UPDATE_PC) +#define EXCEPT_PRIV_INSN ((1<<17) | EXCEPT_DO_NOT_UPDATE_PC) +#define EXCEPT_ILLEGAL_INSN ((1<<18) | EXCEPT_DO_NOT_UPDATE_PC) +#define EXCEPT_UNK_MNEMO ((1<<19) | EXCEPT_DO_NOT_UPDATE_PC) + + +int is_mem_mapped(vm_mngr_t* vm_mngr, uint64_t ad); +uint64_t get_mem_base_addr(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t *addr_base); +void MEM_WRITE(vm_mngr_t* vm_mngr, unsigned int my_size, uint64_t addr, unsigned int src); +unsigned int MEM_LOOKUP(vm_mngr_t* vm_mngr, unsigned int my_size, uint64_t addr); + + +void MEM_WRITE_08(vm_mngr_t* vm_mngr, uint64_t addr, unsigned char src); +void MEM_WRITE_16(vm_mngr_t* vm_mngr, uint64_t addr, unsigned short src); +void MEM_WRITE_32(vm_mngr_t* vm_mngr, uint64_t addr, unsigned int src); +void MEM_WRITE_64(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t src); + + +unsigned char MEM_LOOKUP_08(vm_mngr_t* vm_mngr, uint64_t addr); +unsigned short MEM_LOOKUP_16(vm_mngr_t* vm_mngr, uint64_t addr); +unsigned int MEM_LOOKUP_32(vm_mngr_t* vm_mngr, uint64_t addr); +uint64_t MEM_LOOKUP_64(vm_mngr_t* vm_mngr, uint64_t addr); + + +void MEM_WRITE_08_PASSTHROUGH(uint64_t addr, unsigned char src); +void MEM_WRITE_16_PASSTHROUGH(uint64_t addr, unsigned short src); +void MEM_WRITE_32_PASSTHROUGH(uint64_t addr, unsigned int src); +void MEM_WRITE_64_PASSTHROUGH(uint64_t addr, uint64_t src); +unsigned char MEM_LOOKUP_08_PASSTHROUGH(uint64_t addr); +unsigned short MEM_LOOKUP_16_PASSTHROUGH(uint64_t addr); +unsigned int MEM_LOOKUP_32_PASSTHROUGH(uint64_t addr); +uint64_t MEM_LOOKUP_64_PASSTHROUGH(uint64_t addr); + + +inline unsigned int parity(unsigned int a); +unsigned int my_imul08(unsigned int a, unsigned int b); + +void vm_throw(vm_mngr_t* vm_mngr, unsigned long flags); +int shift_right_arith(unsigned int size, int a, unsigned int b); +unsigned int shift_right_logic(unsigned int size, unsigned int a, unsigned int b); +int shift_left_logic(unsigned int size, unsigned int a, unsigned int b); +/* +int shift_left_logic_08(unsigned int a, unsigned int b); +int shift_left_logic_16(unsigned int a, unsigned int b); +int shift_left_logic_32(unsigned int a, unsigned int b); +*/ +unsigned int mul_lo_op(unsigned int size, unsigned int a, unsigned int b); +unsigned int mul_hi_op(unsigned int size, unsigned int a, unsigned int b); +unsigned int imul_lo_op_08(char a, char b); +unsigned int imul_lo_op_16(short a, short b); +unsigned int imul_lo_op_32(int a, int b); +int imul_hi_op_08(char a, char b); +int imul_hi_op_16(short a, short b); +int imul_hi_op_32(int a, int b); + + +unsigned int umul16_lo(unsigned short a, unsigned short b); +unsigned int umul16_hi(unsigned short a, unsigned short b); + + +unsigned int div_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c); +unsigned int rem_op(unsigned int size, unsigned int a, unsigned int b, unsigned int c); +unsigned int rot_left(unsigned int size, unsigned int a, unsigned int b); +unsigned int rot_right(unsigned int size, unsigned int a, unsigned int b); +int rcl_rez_op(unsigned int size, unsigned int a, unsigned int b, unsigned int cf); +int rcl_cf_op(unsigned int size, unsigned int a, unsigned int b, unsigned int cf); + +//PyObject* _vm_push_uint32_t(PyObject *item); +//PyObject* _vm_pop_uint32_t(void); +////PyObject* _vm_put_str(PyObject *item); +//PyObject* _vm_set_mem(PyObject *item, PyObject *item_str); +//PyObject* _vm_set_mem_access(PyObject *addr, PyObject *access); +//PyObject* _vm_get_str(PyObject *item, PyObject *item_len); +//PyObject* _vm_add_memory_page(PyObject *item, PyObject *access, PyObject *item_str); +//PyObject* _vm_add_code_bloc(PyObject *item1, PyObject *item2);//, PyObject *item3); +//PyObject* _call_pyfunc_from_globals(char* funcname); +//PyObject* _call_pyfunc_from_eip(void); +// +//PyObject* call_pyfunc_from_globals(char* funcname); +// +//PyObject* _vm_get_gpreg(void); + +void hexdump(char* m, unsigned int l); + +struct code_bloc_node * create_code_bloc_node(uint64_t ad_start, uint64_t ad_stop); +void add_code_bloc(vm_mngr_t* vm_mngr, struct code_bloc_node* cbp); + +struct memory_page_node * create_memory_page_node(uint64_t ad, unsigned int size, unsigned int access);//memory_page* mp); +void init_memory_page_pool(vm_mngr_t* vm_mngr); +void init_code_bloc_pool(vm_mngr_t* vm_mngr); +void reset_memory_page_pool(vm_mngr_t* vm_mngr); +void reset_code_bloc_pool(vm_mngr_t* vm_mngr); +void dump_code_bloc_pool(vm_mngr_t* vm_mngr); +void add_memory_page(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a); + + +void init_memory_breakpoint(vm_mngr_t* vm_mngr); +void reset_memory_breakpoint(vm_mngr_t* vm_mngr); +void add_memory_breakpoint(vm_mngr_t* vm_mngr, uint64_t ad, uint64_t size, unsigned int access); +void remove_memory_breakpoint(vm_mngr_t* vm_mngr, uint64_t ad, unsigned int access); + +void add_memory_page(vm_mngr_t* vm_mngr, struct memory_page_node* mpn); + +void dump_memory_page_pool(vm_mngr_t* vm_mngr); +void dump_memory_breakpoint_pool(vm_mngr_t* vm_mngr); +//PyObject* _vm_get_all_memory(void); +PyObject* addr2BlocObj(vm_mngr_t* vm_mngr, uint64_t addr); + + + + +/********************************************/ + +//PyObject* _vm_get_cpu_state(void); +//PyObject* _vm_set_cpu_state(PyObject * s_cpustate); + + +//void memory_page_write(unsigned int my_size, uint64_t ad, unsigned int src); +//unsigned int memory_page_read(unsigned int my_size, uint64_t ad); +unsigned int get_memory_page_max_address(void); +unsigned int get_memory_page_max_user_address(void); + + +int is_mpn_in_tab(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a); + + +void _func_free(void); +void _func_alloc(void); +unsigned int _get_memory_page_max_address_py(void); +unsigned int _get_memory_page_max_user_address_py(void); +unsigned int _get_memory_page_from_min_ad_py(unsigned int size); + +void _func_malloc_memory_page(void); +void _func_free_memory_page(void); +void _func_virtualalloc_memory_page(void); +void _func_virtualfree_memory_page(void); +void _func_loadlib_fake(void); +void _func_getproc_fake(void); + + +void func_free(void); +void func_alloc(void); +unsigned int get_memory_page_max_address_py(void); +unsigned int get_memory_page_max_user_address_py(void); +unsigned int get_memory_page_from_min_ad_py(unsigned int size); +struct memory_page_node * get_memory_page_from_address(vm_mngr_t*, uint64_t ad); +void func_malloc_memory_page(void); +void func_free_memory_page(void); +void func_virtualalloc_memory_page(void); +void func_virtualfree_memory_page(void); +void func_loadlib_fake(void); +void func_getproc_fake(void); + + +//PyObject* _vm_exec_bloc(PyObject* my_eip, PyObject* known_blocs); + +unsigned int cpuid(unsigned int a, unsigned int reg_num); +double int2double(unsigned int m); +//PyObject* _vm_exec_blocs(PyObject* my_eip); + +double fadd(double a, double b); +double fsub(double a, double b); +double fmul(double a, double b); +double fdiv(double a, double b); +double ftan(double a); +double frndint(double a); +double fsin(double a); +double fcos(double a); +double fscale(double a, double b); +double f2xm1(double a); +double fsqrt(double a); +double fabs(double a); +unsigned int fcom_c0(double a, double b); +unsigned int fcom_c1(double a, double b); +unsigned int fcom_c2(double a, double b); +unsigned int fcom_c3(double a, double b); + + + +double mem_32_to_double(unsigned int m); +double mem_64_to_double(uint64_t m); +double int_16_to_double(unsigned int m); +double int_32_to_double(unsigned int m); +double int_64_to_double(uint64_t m); +int32_t double_to_int_32(double d); +int64_t double_to_int_64(double d); +double fadd(double a, double b); +unsigned int double_to_mem_32(double d); +uint64_t double_to_mem_64(double d); + +unsigned int access_segment(unsigned int d); +unsigned int access_segment_ok(unsigned int d); + +unsigned int load_segment_limit(unsigned int d); +unsigned int load_segment_limit_ok(unsigned int d); + +unsigned int load_tr_segment_selector(unsigned int d); + +#define shift_right_arith_08(a, b)\ + ((((char)(a)) >> ((int)(b)&0x1f))&0xff) +#define shift_right_arith_16(a, b)\ + ((((short)(a)) >> ((int)(b)&0x1f))&0xffff) +#define shift_right_arith_32(a, b)\ + ((((int)(a)) >> ((int)(b)&0x1f))&0xffffffff) + + +#define shift_right_logic_08(a, b)\ + ((((unsigned char)(a)) >> ((unsigned int)(b)&0x1f))&0xff) +#define shift_right_logic_16(a, b)\ + ((((unsigned short)(a)) >> ((unsigned int)(b)&0x1f))&0xffff) +#define shift_right_logic_32(a, b)\ + ((((unsigned int)(a)) >> ((unsigned int)(b)&0x1f))&0xffffffff) + +#define shift_left_logic_08(a, b)\ + (((a)<<((b)&0x1f))&0xff) +#define shift_left_logic_16(a, b)\ + (((a)<<((b)&0x1f))&0xffff) +#define shift_left_logic_32(a, b)\ + (((a)<<((b)&0x1f))&0xffffffff) + +#endif diff --git a/miasm2/jitter/vm_mngr_py.c b/miasm2/jitter/vm_mngr_py.c new file mode 100644 index 00000000..a8085d92 --- /dev/null +++ b/miasm2/jitter/vm_mngr_py.c @@ -0,0 +1,939 @@ +/* +** Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License along +** with this program; if not, write to the Free Software Foundation, Inc., +** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ +#include <Python.h> +#include "structmember.h" +#include <stdint.h> +#include <inttypes.h> +#include "queue.h" +#include "vm_mngr.h" + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +extern struct memory_page_list_head memory_page_pool; +extern struct code_bloc_list_head code_bloc_pool; + +#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} + + + +typedef struct { + PyObject_HEAD + PyObject *vmmngr; + vm_mngr_t vm_mngr; +} VmMngr; + + +PyObject* _vm_get_exception(unsigned int xcpt) +{ + PyObject*p; + + if (!xcpt) + p = NULL; + else if (xcpt & EXCEPT_CODE_AUTOMOD) + p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_CODE_AUTOMOD" ); + else if (xcpt & EXCEPT_UNK_EIP) + p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_UNK_EIP" ); + else if (xcpt & EXCEPT_UNK_MEM_AD) + p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_UNK_MEM_AD" ); + + else p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_UNKNOWN" ); + return p; +} + + +#define PyGetInt(item, value) \ + if (PyInt_Check(item)){ \ + value = (uint64_t)PyInt_AsLong(item); \ + } \ + else if (PyLong_Check(item)){ \ + value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ + } \ + else{ \ + RAISE(PyExc_TypeError,"arg must be int"); \ + } \ + + +PyObject* vm_is_mem_mapped(VmMngr* self, PyObject* item) +{ + PyObject *addr; + uint64_t page_addr; + uint32_t ret; + if (!PyArg_ParseTuple(item, "O", &addr)) + return NULL; + + PyGetInt(addr, page_addr); + + ret = is_mem_mapped(&self->vm_mngr, page_addr); + return PyInt_FromLong((long)ret); +} + + + +PyObject* vm_get_mem_base_addr(VmMngr* self, PyObject* item) +{ + PyObject *addr; + + uint64_t page_addr; + uint64_t addr_base; + unsigned int ret; + + if (!PyArg_ParseTuple(item, "O", &addr)) + return NULL; + + PyGetInt(addr, page_addr); + + ret = get_mem_base_addr(&self->vm_mngr, page_addr, &addr_base); + if (ret == 0){ + Py_INCREF(Py_None); + return Py_None; + } + return PyLong_FromUnsignedLongLong((uint64_t)addr_base); +} + + +PyObject* vm_add_memory_page(VmMngr* self, PyObject* args) +{ + PyObject *addr; + PyObject *access; + PyObject *item_str; + uint64_t buf_size; + char* buf_data; + Py_ssize_t length; + uint64_t ret = 0x1337beef; + uint64_t page_addr; + uint64_t page_access; + + struct memory_page_node * mpn; + + if (!PyArg_ParseTuple(args, "OOO", &addr, &access, &item_str)) + return NULL; + + PyGetInt(addr, page_addr); + PyGetInt(access, page_access); + + if(!PyString_Check(item_str)) + RAISE(PyExc_TypeError,"arg must be str"); + + buf_size = PyString_Size(item_str); + PyString_AsStringAndSize(item_str, &buf_data, &length); + + fprintf(stderr, "add page %"PRIX64" %"PRIX64" %"PRIX64"\n", page_addr, buf_size, page_access); + mpn = create_memory_page_node(page_addr, buf_size, page_access); + if (mpn == NULL) + RAISE(PyExc_TypeError,"cannot create page"); + if (is_mpn_in_tab(&self->vm_mngr, mpn)) + RAISE(PyExc_TypeError,"known page in memory"); + + memcpy(mpn->ad_hp, buf_data, buf_size); + add_memory_page(&self->vm_mngr, mpn); + + return PyLong_FromUnsignedLongLong((uint64_t)ret); + +} + + + + +PyObject* vm_set_mem(VmMngr* self, PyObject* args) +{ + PyObject *addr; + PyObject *item_str; + + uint64_t buf_size; + char* buf_data; + Py_ssize_t length; + int ret = 0x1337; + uint64_t val; + uint64_t l; + + struct memory_page_node * mpn; + + if (!PyArg_ParseTuple(args, "OO", &addr, &item_str)) + return NULL; + + PyGetInt(addr, val); + + if(!PyString_Check(item_str)) + RAISE(PyExc_TypeError,"arg must be str"); + + buf_size = PyString_Size(item_str); + PyString_AsStringAndSize(item_str, &buf_data, &length); + + /* read is multiple page wide */ + while (buf_size){ + mpn = get_memory_page_from_address(&self->vm_mngr, val); + if (!mpn){ + PyErr_SetString(PyExc_RuntimeError, "cannot find address"); + return 0; + } + l = MIN(buf_size, mpn->size - (val-mpn->ad)); + memcpy(mpn->ad_hp + (val-mpn->ad), buf_data, l); + buf_data += l; + val += l; + buf_size -= l; + } + + return PyLong_FromUnsignedLongLong((uint64_t)ret); +} + +PyObject* vm_set_mem_access(VmMngr* self, PyObject* args) +{ + PyObject *addr; + PyObject *access; + + uint64_t ret = 0x1337beef; + uint64_t page_addr; + uint64_t page_access; + struct memory_page_node * mpn; + + if (!PyArg_ParseTuple(args, "OO", &addr, &access)) + return NULL; + + PyGetInt(addr, page_addr); + PyGetInt(access, page_access); + + mpn = get_memory_page_from_address(&self->vm_mngr, page_addr); + mpn->access = page_access; + return PyLong_FromUnsignedLongLong((uint64_t)ret); +} + + + + +PyObject* vm_get_mem(VmMngr* self, PyObject* args) +{ + PyObject *item; + PyObject *item_len; + + uint64_t buf_addr; + uint64_t buf_len; + PyObject *obj_out; + struct memory_page_node * mpn; + char * buf_out; + char * addr_tmp; + char * addr_out; + uint64_t off; + uint64_t l; + uint64_t my_size; + + if (!PyArg_ParseTuple(args, "OO", &item, &item_len)) + return NULL; + + PyGetInt(item, buf_addr); + PyGetInt(item_len, buf_len); + + my_size = buf_len; + buf_out = malloc(buf_len); + if (!buf_out){ + fprintf(stderr, "cannot alloc read\n"); + exit(-1); + } + + addr_out = buf_out; + + /* read is multiple page wide */ + while (my_size){ + mpn = get_memory_page_from_address(&self->vm_mngr, buf_addr); + if (!mpn){ + PyErr_SetString(PyExc_RuntimeError, "cannot find address"); + return 0; + } + + off = buf_addr - mpn->ad; + addr_tmp = &((char*)mpn->ad_hp)[off]; + + l = MIN(my_size, mpn->size - off); + memcpy(addr_out, addr_tmp, l); + my_size -= l; + addr_out +=l; + buf_addr +=l; + } + + obj_out = PyString_FromStringAndSize(buf_out, buf_len); + free(buf_out); + return obj_out; +} + +PyObject* vm_add_memory_breakpoint(VmMngr* self, PyObject* args) +{ + PyObject *ad; + PyObject *size; + PyObject *access; + + uint64_t b_ad; + uint64_t b_size; + uint64_t b_access; + + if (!PyArg_ParseTuple(args, "OOO", &ad, &size, &access)) + return NULL; + + PyGetInt(ad, b_ad); + PyGetInt(size, b_size); + PyGetInt(access, b_access); + + add_memory_breakpoint(&self->vm_mngr, b_ad, b_size, b_access); + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject* vm_remove_memory_breakpoint(VmMngr* self, PyObject* args) +{ + PyObject *ad; + PyObject *access; + uint64_t b_ad; + uint64_t b_access; + + if (!PyArg_ParseTuple(args, "OO", &ad, &access)) + return NULL; + + PyGetInt(ad, b_ad); + PyGetInt(access, b_access); + remove_memory_breakpoint(&self->vm_mngr, b_ad, b_access); + + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject* vm_get_last_write_ad(VmMngr* self, PyObject* args) +{ + return PyInt_FromLong((uint64_t)self->vm_mngr.last_write_ad); +} + +PyObject* vm_get_last_write_size(VmMngr* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)self->vm_mngr.last_write_size); +} + +PyObject* vm_set_exception(VmMngr* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + return NULL; + + PyGetInt(item1, i); + + self->vm_mngr.exception_flags = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_get_exception(VmMngr* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)self->vm_mngr.exception_flags); +} + + + + +PyObject* vm_init_memory_page_pool(VmMngr* self, PyObject* args) +{ + init_memory_page_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_init_code_bloc_pool(VmMngr* self, PyObject* args) +{ + init_code_bloc_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + +PyObject* vm_init_memory_breakpoint(VmMngr* self, PyObject* args) +{ + init_memory_breakpoint(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + +PyObject* vm_reset_memory_breakpoint(VmMngr* self, PyObject* args) +{ + reset_memory_breakpoint(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + + + + + + +PyObject* vm_dump_memory_page_pool(VmMngr* self, PyObject* args) +{ + dump_memory_page_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_dump_memory_breakpoint(VmMngr* self, PyObject* args) +{ + dump_memory_breakpoint_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject* vm_get_all_memory(VmMngr* self, PyObject* args) +{ + PyObject *o; + struct memory_page_node * mpn; + PyObject *dict; + PyObject *dict2; + + + dict = PyDict_New(); + + LIST_FOREACH(mpn, &self->vm_mngr.memory_page_pool, next){ + + dict2 = PyDict_New(); + + o = PyString_FromStringAndSize(mpn->ad_hp, mpn->size); + PyDict_SetItemString(dict2, "data", o); + Py_DECREF(o); + + o = PyInt_FromLong((long)mpn->size); + PyDict_SetItemString(dict2, "size", o); + Py_DECREF(o); + + o = PyInt_FromLong((long)mpn->access); + PyDict_SetItemString(dict2, "access", o); + Py_DECREF(o); + + o = PyInt_FromLong((long)mpn->ad); + PyDict_SetItem(dict, o, dict2); + Py_DECREF(o); + Py_DECREF(dict2); + } + return dict; +} + + +PyObject* vm_reset_memory_page_pool(VmMngr* self, PyObject* args) +{ + reset_memory_page_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + +PyObject* vm_reset_code_bloc_pool(VmMngr* self, PyObject* args) +{ + reset_code_bloc_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + + + + + + +PyObject* vm_call_pyfunc_from_globals(VmMngr* self, PyObject* args) +{ + char* funcname; + PyObject *mod, *func, *rslt, *globals, *func_globals; + + + if (!PyArg_ParseTuple(args, "s", &funcname)) + return NULL; + + + fprintf(stderr, "getting pyfunc %s\n", funcname); + mod = PyEval_GetBuiltins(); + + if (!mod) { + fprintf(stderr, "cannot find module\n"); + exit(0); + } + + func_globals = PyDict_GetItemString(mod, "globals"); + if (!func_globals) { + fprintf(stderr, "cannot find function globals\n"); + exit(0); + } + + if (!PyCallable_Check (func_globals)) { + fprintf(stderr, "function not callable\n"); + exit(0); + } + + globals = PyObject_CallObject (func_globals, NULL); + if (!globals) { + fprintf(stderr, "cannot get globals\n"); + exit(0); + } + + func = PyDict_GetItemString (globals, funcname); + if (!func) { + fprintf(stderr, "cannot find function %s\n", funcname); + exit(0); + } + + if (!PyCallable_Check (func)) { + fprintf(stderr, "function not callable\n"); + exit(0); + } + + rslt = PyObject_CallObject (func, NULL); + return rslt; +} + +PyObject* vm_add_code_bloc(VmMngr *self, PyObject *args) +{ + PyObject *item1; + PyObject *item2; + uint64_t ret = 0x1337beef; + uint64_t ad_start, ad_stop, ad_code = 0; + + struct code_bloc_node * cbp; + + if (!PyArg_ParseTuple(args, "OO", &item1, &item2)) + return NULL; + + PyGetInt(item1, ad_start); + PyGetInt(item2, ad_stop); + + cbp = create_code_bloc_node(ad_start, ad_stop); + cbp->ad_start = ad_start; + cbp->ad_stop = ad_stop; + cbp->ad_code = ad_code; + add_code_bloc(&self->vm_mngr, cbp); + return PyLong_FromUnsignedLongLong((uint64_t)ret); +} + +PyObject* vm_dump_code_bloc_pool(VmMngr* self) +{ + dump_code_bloc_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + + +PyObject* vm_exec_blocs(VmMngr* self, PyObject* args) +{ + PyObject* my_eip; + PyObject* b; + PyObject* module; + PyObject* func; + PyObject* meip; + uint64_t tmp; + + PyObject* known_blocs; + PyObject* e; + + if (!PyArg_ParseTuple(args, "OO", &my_eip, &known_blocs)) + return NULL; + + if(!PyDict_Check(known_blocs)) + RAISE(PyExc_TypeError, "arg must be dict"); + + PyGetInt(my_eip, tmp); + meip = PyLong_FromUnsignedLongLong((uint64_t)tmp); + while (1){ + b = PyDict_GetItem(known_blocs, meip); + if (b == NULL) + return meip; + + module = PyObject_GetAttrString(b, "module_c"); + if (module == NULL){ + fprintf(stderr, "assert eip module_c in pyobject\n"); + exit(0); + } + func = PyObject_GetAttrString(module, "func"); + if (func == NULL){ + fprintf(stderr, "assert func module_c in pyobject\n"); + exit(0); + } + + Py_DECREF(module); + if (!PyCallable_Check (func)) { + fprintf(stderr, "function not callable\n"); + exit(0); + } + Py_DECREF(meip); + //printf("exec bloc %"PRIX64"\n", tmp); + meip = PyObject_CallObject (func, NULL); + + Py_DECREF(func); + e = PyErr_Occurred (); + if (e){ + fprintf(stderr, "exception\n"); + return meip; + } + + if (self->vm_mngr.exception_flags) + return meip; + + } +} + + + +PyObject* vm_exec_bloc(PyObject* self, PyObject* args) +{ + PyObject* b; + PyObject* module; + PyObject* func; + PyObject* meip; + uint64_t tmp; + + PyObject* my_eip; + PyObject* known_blocs; + PyObject* e; + + if (!PyArg_ParseTuple(args, "OO", &my_eip, &known_blocs)) + return NULL; + + + if (PyInt_Check(my_eip)){ + tmp = (uint64_t)PyInt_AsLong(my_eip); + } + else if (PyLong_Check(my_eip)){ + tmp = (uint64_t)PyLong_AsUnsignedLongLong(my_eip); + } + else{ + RAISE(PyExc_TypeError,"arg1 must be int"); + } + + meip = PyInt_FromLong((long)tmp); + b = PyDict_GetItem(known_blocs, my_eip); + if (b == NULL) + return meip; + module = PyObject_GetAttrString(b, "module_c"); + if (module == NULL) + return meip; + func = PyObject_GetAttrString(module, "func"); + if (func == NULL) + return meip; + Py_DECREF(module); + if (!PyCallable_Check (func)) { + fprintf(stderr, "function not callable\n"); + exit(0); + } + Py_DECREF(meip); + meip = PyObject_CallObject (func, NULL); + + Py_DECREF(func); + e = PyErr_Occurred (); + if (e){ + fprintf(stderr, "exception\n"); + return meip; + } + + return meip; +} + + +PyObject* vm_set_automod_cb(VmMngr* self, PyObject* args) +{ + PyObject* cb_automod; + + if (!PyArg_ParseTuple(args, "O", &cb_automod)) + return NULL; + + if (self->vm_mngr.cb_automod != NULL){ + Py_DECREF(self->vm_mngr.cb_automod); + } + + Py_INCREF(cb_automod); + self->vm_mngr.cb_automod = cb_automod; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_set_addr2obj(VmMngr* self, PyObject* args) +{ + PyObject* addr2obj; + + if (!PyArg_ParseTuple(args, "O", &addr2obj)) + return NULL; + + if (self->vm_mngr.addr2obj != NULL){ + Py_DECREF(self->vm_mngr.addr2obj); + } + + Py_INCREF(addr2obj); + self->vm_mngr.addr2obj = addr2obj; + Py_INCREF(Py_None); + return Py_None; +} + + + +/* +PyObject* add_jitbloc(VmMngr* self, PyObject* args) +{ + PyObject* jitobj; + + if (!PyArg_ParseTuple(args, "O", &addr2obj)) + return NULL; + + Py_INCREF(Py_None); + return Py_None; + +} +*/ + + + + +static void +VmMngr_dealloc(VmMngr* self) +{ + self->ob_type->tp_free((PyObject*)self); +} + + +static PyObject * +VmMngr_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + VmMngr *self; + + self = (VmMngr *)type->tp_alloc(type, 0); + return (PyObject *)self; +} + +static PyObject * +VmMngr_get_vmmngr(VmMngr *self, void *closure) +{ + return PyLong_FromUnsignedLongLong((uint64_t)&(self->vm_mngr)); +} + +static int +VmMngr_set_vmmngr(VmMngr *self, PyObject *value, void *closure) +{ + PyErr_SetString(PyExc_TypeError, "immutable vmmngr"); + return -1; +} + +static PyMemberDef VmMngr_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef VmMngr_methods[] = { + {"vm_init_memory_page_pool", (PyCFunction)vm_init_memory_page_pool, METH_VARARGS, + "X"}, + {"vm_init_memory_breakpoint", (PyCFunction)vm_init_memory_breakpoint, METH_VARARGS, + "X"}, + {"vm_init_code_bloc_pool",(PyCFunction)vm_init_code_bloc_pool, METH_VARARGS, + "X"}, + {"vm_set_mem_access", (PyCFunction)vm_set_mem_access, METH_VARARGS, + "X"}, + {"vm_set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, + "X"}, + {"vm_set_automod_cb", (PyCFunction)vm_set_automod_cb, METH_VARARGS, + "X"}, + {"vm_set_addr2obj", (PyCFunction)vm_set_addr2obj, METH_VARARGS, + "X"}, + {"vm_add_code_bloc",(PyCFunction)vm_add_code_bloc, METH_VARARGS, + "X"}, + {"vm_exec_bloc",(PyCFunction)vm_exec_bloc, METH_VARARGS, + "X"}, + {"vm_exec_blocs",(PyCFunction)vm_exec_blocs, METH_VARARGS, + "X"}, + {"vm_get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, + "X"}, + {"vm_add_memory_page",(PyCFunction)vm_add_memory_page, METH_VARARGS, + "X"}, + {"vm_add_memory_breakpoint",(PyCFunction)vm_add_memory_breakpoint, METH_VARARGS, + "X"}, + {"vm_remove_memory_breakpoint",(PyCFunction)vm_remove_memory_breakpoint, METH_VARARGS, + "X"}, + {"vm_set_exception", (PyCFunction)vm_set_exception, METH_VARARGS, + "X"}, + {"vm_dump_memory_page_pool", (PyCFunction)vm_dump_memory_page_pool, METH_VARARGS, + "X"}, + {"vm_dump_memory_breakpoint", (PyCFunction)vm_dump_memory_breakpoint, METH_VARARGS, + "X"}, + {"vm_get_all_memory",(PyCFunction)vm_get_all_memory, METH_VARARGS, + "X"}, + {"vm_reset_memory_page_pool", (PyCFunction)vm_reset_memory_page_pool, METH_VARARGS, + "X"}, + {"vm_reset_memory_breakpoint", (PyCFunction)vm_reset_memory_breakpoint, METH_VARARGS, + "X"}, + {"vm_reset_code_bloc_pool", (PyCFunction)vm_reset_code_bloc_pool, METH_VARARGS, + "X"}, + {"vm_call_pyfunc_from_globals",(PyCFunction)vm_call_pyfunc_from_globals, METH_VARARGS, + "X"}, + + {"vm_get_exception",(PyCFunction)vm_get_exception, METH_VARARGS, + "X"}, + {"vm_get_exception",(PyCFunction)vm_get_exception, METH_VARARGS, + "X"}, + {"vm_get_last_write_ad", (PyCFunction)vm_get_last_write_ad, METH_VARARGS, + "X"}, + {"vm_get_last_write_size",(PyCFunction)vm_get_last_write_size, METH_VARARGS, + "X"}, + + {NULL} /* Sentinel */ +}; + +static int +VmMngr_init(VmMngr *self, PyObject *args, PyObject *kwds) +{ + + + fprintf(stderr, "ad cpu: %p\n", &(self->vm_mngr)); + memset(&(self->vm_mngr), 0, sizeof(self->vm_mngr)); + return 0; +} + +static PyGetSetDef VmMngr_getseters[] = { + {"vmmngr", + (getter)VmMngr_get_vmmngr, (setter)VmMngr_set_vmmngr, + "first name", + NULL}, + {NULL} /* Sentinel */ +}; + + +static PyTypeObject VmMngrType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "JitCore_x86_32.VmMngr", /*tp_name*/ + sizeof(VmMngr), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)VmMngr_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "VmMngr objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + VmMngr_methods, /* tp_methods */ + VmMngr_members, /* tp_members */ + VmMngr_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)VmMngr_init, /* tp_init */ + 0, /* tp_alloc */ + VmMngr_new, /* tp_new */ +}; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +static PyObject *Vm_Mngr_Error; + + +static PyMethodDef Vm_Mngr_Methods[] = { + + {NULL, NULL, 0, NULL} /* Sentinel */ + +}; + + +PyMODINIT_FUNC +initvm_mngr(void) +{ + PyObject *m; + + if (PyType_Ready(&VmMngrType) < 0) + return; + + m = Py_InitModule("vm_mngr", Vm_Mngr_Methods); + if (m == NULL) + return; + + Vm_Mngr_Error = PyErr_NewException("vm_mngr_.error", NULL, NULL); + Py_INCREF(Vm_Mngr_Error); + PyModule_AddObject(m, "error", Vm_Mngr_Error); + + Py_INCREF(&VmMngrType); + PyModule_AddObject(m, "VmMngr", (PyObject *)&VmMngrType); + +} + diff --git a/setup.py b/setup.py new file mode 100755 index 00000000..5292a849 --- /dev/null +++ b/setup.py @@ -0,0 +1,135 @@ +#! /usr/bin/env python + +from distutils.core import setup, Extension +from distutils.util import get_platform +import shutil +import os, sys + +def buil_all(): + packages=['miasm2', + 'miasm2/arch', + 'miasm2/arch/x86', + 'miasm2/arch/arm', + 'miasm2/arch/msp430', + 'miasm2/arch/sh4', + 'miasm2/core', + 'miasm2/expression', + 'miasm2/ir', + 'miasm2/analysis', + 'miasm2/jitter', + 'miasm2/jitter/arch', + 'miasm2/jitter/os_dep', + ] + + ext_modules_no_tcc = [ + Extension("miasm2.jitter.vm_mngr", + ["miasm2/jitter/vm_mngr.c", + "miasm2/jitter/vm_mngr_py.c"]), + Extension("miasm2.jitter.arch.JitCore_x86", + ["miasm2/jitter/arch/JitCore_x86.c"]), + Extension("miasm2.jitter.arch.JitCore_arm", + ["miasm2/jitter/arch/JitCore_arm.c"]), + Extension("miasm2.jitter.arch.JitCore_msp430", + ["miasm2/jitter/arch/JitCore_msp430.c"]), + Extension("miasm2.jitter.Jitllvm", + ["miasm2/jitter/Jitllvm.c"]), + ] + + ext_modules_all = [ + Extension("miasm2.jitter.vm_mngr", + ["miasm2/jitter/vm_mngr.c", + "miasm2/jitter/vm_mngr_py.c"]), + Extension("miasm2.jitter.arch.JitCore_x86", + ["miasm2/jitter/arch/JitCore_x86.c"]), + Extension("miasm2.jitter.arch.JitCore_arm", + ["miasm2/jitter/arch/JitCore_arm.c"]), + Extension("miasm2.jitter.arch.JitCore_msp430", + ["miasm2/jitter/arch/JitCore_msp430.c"]), + Extension("miasm2.jitter.Jitllvm", + ["miasm2/jitter/Jitllvm.c"]), + Extension("miasm2.jitter.Jittcc", + ["miasm2/jitter/Jittcc.c"], + libraries=["tcc"]) + ] + + + print 'building' + build_ok = False + for name, ext_modules in [('all', ext_modules_all), + ('notcc', ext_modules_no_tcc)]: + print 'build with', repr(name) + try: + s = setup( + name = 'Miasm', + version = '2.0', + packages = packages, + package_data = {'miasm2':['jitter/*.h', + 'jitter/arch/*.h',]}, + ext_modules = ext_modules, + # Metadata + author = 'Fabrice Desclaux', + author_email = 'serpilliere@droid-corp.org', + description = 'Machine code manipulation library', + license = 'GPLv2', + # keywords = '', + # url = '', + ) + except SystemExit, e: + print repr(e) + continue + build_ok = True + break + if not build_ok: + raise ValueError('Unable to build Miasm!') + print 'build', name + if name == 'notcc': + print + print "*"*80 + print "Warning: TCC is not properly installed," + print "Miasm will be installed without TCC Jitter" + print "Etheir install TCC or use LLVM jitter" + print "*"*80 + print + # we copy libraries from build dir to current miasm directory + build_base = None + if 'build' in s.command_options: + if 'build_base' in s.command_options['build']: + build_base = s.command_options['build']['build_base'] + if build_base is None: + build_base = "build" + plat_specifier = ".%s-%s" % (get_platform(), sys.version[0:3]) + build_base = os.path.join('build','lib' + plat_specifier) + print build_base + +def buil_no_tcc(): + setup( + name = 'Miasm', + version = '2.0', + packages=['miasm2', 'miasm2/tools', + 'miasm2/expression', 'miasm2/graph', 'miasm2/arch', + 'miasm2/core', 'miasm2/tools/emul_lib' ], + package_data = {'miasm2':['tools/emul_lib/*.h']}, + # data_files = [('toto', ['miasm2/tools/emul_lib/queue.h'])], + # Metadata + author = 'Fabrice Desclaux', + author_email = 'serpilliere@droid-corp.org', + description = 'Machine code manipulation library', + license = 'GPLv2', + # keywords = '', + # url = '', + ) + + +def try_build(): + buil_all() + """ + try: + buil_all() + return + except: + print "WARNING cannot build with libtcc!, trying without it" + print "Miasm will not be able to emulate code" + buil_no_tcc() + """ + +try_build() diff --git a/test/arch/arm/arch.py b/test/arch/arm/arch.py new file mode 100644 index 00000000..e6b3fdda --- /dev/null +++ b/test/arch/arm/arch.py @@ -0,0 +1,517 @@ +import os +import time +from miasm2.arch.arm.arch import * + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + +if 0: + a = bs('00') + b = bs('01') + c = bs(l=2) + d = bs(l=4, fname='rd') + e = bs_name(l=1, name={'ADD': 0, 'SUB': 1}) + assert(isinstance(e, bs_divert)) + scc = bs_mod_name(l=1, mn_mod=['', 'S']) + f = bs(l=1, cls=(arm_reg,)) + + class arm_mov(mn_arm): + fields = [bs('0000'), bs('0000'), bs('0000')] + + class arm_DATA(mn_arm): + fields = [bs('1111'), e, scc, f, bs('0')] + mn = mn_arm.dis(0xF000000) + + +if 0: + import cProfile + cProfile.run('mn_arm.dis("\xe1\xa0\xa0\x06", mode_arm)') + # l = mn_arm.dis(bin_stream("\xe1\xa0\xa0\x06"), mode_arm) + # print l + """ + mode = 64 + l = mn_x86.fromstring("ADC DWORD PTR [RAX], 0x11223344", mode) + print 'xx' + #t= time.time() + import cProfile + def f(): + x = l.asm(mode) + print x + cProfile.run('f()') + """ + + +def h2i(s): + return s.replace(' ', '').decode('hex') + + +def u16swap(i): + return struct.unpack('<H', struct.pack('>H', i))[0] + +reg_tests_arm = [ + ("001504F4 MOV R1, LR", + "0e10a0e1"), + ("00150500 ADD R2, R8, R0", + "002088e0"), + ("001504E8 MOV LR, 0x3E8", + "faefa0e3"), + ("001504F0 RSB R0, R0, R3", + "030060e0"), + ("000E6F50 MUL R2, LR, R6", + "9e0602e0"), + ("000620D8 MLA R12, R0, R5, R3", + "90352ce0"), + ("00026798 ADDS R2, R4, R0", + "002094e0"), + ("0003EA9C MVN R7, R2", + "0270e0e1"), + ("C00CD4DC BL 0x7C", + "1F0000EB"), + ("C00CF110 BL 0xFFFFFDEC", + "7BFFFFEB"), + + + ("000829b0 BLNE 0xFFF87110", + "441cfe1b"), + + ("C00EC608 TEQ R4, R5", + "050034e1"), + ("C00CD53C CMP R9, R8", + "080059e1"), + ("C00CD5D8 MOV R1, 0x60000000", + "0612a0e3"), + ("C00CEC18 MOV R2, R1 LSL 0x14", + "012aa0e1"), + ("C00CF828 ORR R0, R2, R1 LSL R0", + "110082e1"), + ("C00D8A14 EOR R7, R2, R7 LSR 0x8", + "277422e0"), + ("C00CD2E4 MRS R1, CPSR_cxsf", + "00100fe1"), + ("C019BE2C MRS R5, SPSR_cxsf", + "00504fe1"), + ("C00CD2F0 MSR CPSR_cf, R1", + "01f029e1"), + ("C00D8A24 LDRB R2, [R3, 0xFFFFFFFF]", # LDRB R2, [R3, #-1] + "012053e5"), + ("C01E59F8 LDREQ R0, [R1, R0 LSL 0x2]", # LDREQ R0, [R1, R0, LSL 2] + "00019107"), + ("C046855C LDR R0, [R9, R0 LSL 0x4]", # + "000299e7"), + ('c012a8d8 LDREQ R0, [R0]', + '00009005'), + ("C00D8AA8 LDR R0, [R2], 0x4", # LDR R0, [R2], 4 + "040092e4"), + ("C00D8A9C LDR R0, [PC, 0x514]", + "14059fe5"), + ("C03C7A38 LDR R5, [R0, 0xD4]!", + "d450b0e5"), + ("C00EA214 LDMIA R0, {R0, R1}", # LDMIA R0, {R0, R1} + "030090e8"), + ("C0121D70 LDMGEIA R1, {R0, R1}", + "030091a8"), + ("C0124E68 LDMIB R1, {R4, R12}", + "101091e9"), + ("C012D2A0 LDMDA R7, {R0, R2}", + "050017e8"), + ("C0130A64 LDMFD SP, {R0, R1}", + "03009de8"), + ("C016AAD0 LDMFD SP!, {R8}", + "0001bde8"), + ("C00E0F98 LDMED SP, {R4, R6}", + "50009de9"), + ("C0161AC0 STMFD SP!, {R8}", # stmfd + "00012de9"), + ("C00E0710 STMIA R5, {R8, R9}", + "000385e8"), + ("C0460580 STMFA SP, {R8, R10}", + "00058de9"), + ("C04FFBD0 STMEA SP, {R9, R10}", + "00068de8"), + ("C00CEB10 STMDB R8, {SP, LR}^", + "006048e9"), + ("C0129534 STMIB R6, {R0, R9}", + "010286e9"), + ("C01293BC STMFD SP!, {R4-R11, LR}", + "F04F2DE9"), + ("C02FA8B4 SVCEQ 0x196A0B", + "0B6a190f"), + ("C00EF814 SVCMI 0x495020", + "2050494F"), + ("C00ED5CC CDPCS p3, 0x2, c7, c14, c5, 0x3", + "65732e2e"), + ("C00EFE88 CDPVS p13, 0x2, c6, c0, c15, 0x3", + "6F6D206e"), + ("C0148ED0 LDCVS p11, c5, [R4], 0xFFFFFF94!", # -0x6C TODO XXX no wb ! + "1B5B346C"), + ("C00ED374 MRCHI p15, 0x5, LR, c14, c9, 0x7", + "f9efbe8e"), + ("C00F3D24 MCRVS p0, 0x3, R2, c9, c4, 0x3", + "7420696e"), + ("xxxxxxxx UND 0x0, 0x0", + "100000e6"), + ("xxxxxxxx BKPT 0x0, 0x0", + "700020e1"), + ("c00d153c LDRH R2, [R4, 0xCA]", + "ba2cd4e1"), + ("c00d18a8 LDRH R6, [R12]", + "b060dce1"), + ("c00d8134 STRH R3, [R6, 0x2]", + "b230c6e1"), + ("c00d80c4 STRH R3, [R6]", + "b030c6e1"), + + ("00031F40 LDRD R8, [R7]", + "D080C7E1"), + + ("c0104a34 LDRD R0, [SP, 0x8]", + "D800CDE1"), + ("C013DC68 LDRD R6, [R0, 0xFFFFFFF8]", + "D86040E1"), + + ("C0120CC0 LDRSB R1, [SP, 0x8]", + "D810DDE1"), + + ("C0105C28 LDRSH R0, [R8, 0xA]", + "FA00D8E1"), + + ("C00D8FF4 LDRH R3, [R12, R3]", + "B3309CE1"), + ("C012D1A4 LDRSB R2, [R2, R1]", + "D12092E1"), + + ("c0115a84 STRD R0, [SP, 0x18]", + "F801CDE1"), + ("c0124a18 STRD R2, [R0, 0xFFFFFFF8]", + "F82040E1"), + + ("0002F5A8 MOV R2, 0x2710", + "102702E3"), + + ("0002F5B0 UMULL R2, R3, R3, R2", + "932283E0"), + ("C045D260 SMULL R3, R2, LR, R2", + "9E32C2E0"), + ("C03E6440 SMLAL R2, R0, R1, R0", + "9120E0E0"), + + ("C00CFA40 BLX R12", + "3CFF2FE1"), + ("C010DE1C BLX 0x1ECCEA", + "3AB307FB"), + + ("00013028 MOV R9, 0x6E75", + "759E06E3"), + + ("0001302C MOVT R9, 0x64", + "649040E3"), + + ("0004A38C CLZ R3, R2", + "123F6FE1"), + + ("C0132564 BLX 0xFFFCF06C", + "1B3CFFFA"), + + ("C0297028 QADD R7, R6, R6", + "567006E1"), + + ("6330A0E1 MOV R3, R3 RRX", + "6330A0E1"), + +] +ts = time.time() + +for s, l in reg_tests_arm: + print "-" * 80 + s = s[12:] + b = h2i((l)) + mn = mn_arm.dis(b, mode_arm) + print [str(x) for x in mn.args] + print s + print mn + assert(str(mn) == s) + # print hex(b) + # print [str(x.get()) for x in mn.args] + l = mn_arm.fromstring(s, mode_arm) + # print l + assert(str(l) == s) + a = mn_arm.asm(l) + print [x for x in a] + print repr(b) + # print mn.args + assert(b in a) + +reg_tests_armt = [ + ("0006ff5c LSLS R2, R0, 0x1A", + "8206"), + ("0006fe06 LSRS R3, R3, 0x7", + "db09"), + ("0006af9c ASRS R0, R2, 0x1", + "5010"), + ("0006b1ea ADDS R1, R4, R5", + "6119"), + ("0006b304 ADDS R2, R0, 0x1", + "421c"), + ("0006bc80 SUBS R3, R1, 0x1", + "4b1e"), + ("0006f1d0 SUBS R2, R6, R3", + "f21a"), + ("0006af30 MOVS R3, 0x1", + "0123"), + ("0006b0ee CMP R3, 0x1", + "012b"), + ("C0100242 CMP R2, 0x0", + "002A"), + ("0006b0f2 SUBS R3, 0x1", + "013b"), + ("0006b12c ADDS R0, 0x4", + "0430"), + + ("0006b944 ANDS R2, R5", + "2a40"), + ("0014df06 EORS R2, R0", + "4240"), + ("0008b66e LSLS R7, R1", + "8f40"), + ("002e7e0c LSRS R4, R0", + "c440"), + ("003258b6 ASRS R2, R3", + "1a41"), + # adcs + # sbcs + # rors + ("0017b754 TST R0, R2", + "1042"), + ("0006e3fc NEGS R5, R5", + "6d42"), + ("0006b1fc CMP R6, R7", + "be42"), + ("001845ea CMN R3, R0", + "c342"), + ("001845ea ORRS R0, R4", + "2043"), + # muls + # bic + ("0006b90e MVNS R0, R3", + "d843"), + + ("0006bcac CMP R6, R9", + "4e45"), + ("0006bcf0 CMP R3, R1", + "8b42"), + ("0006c26c CMP R12, LR", + "f445"), + ("0006c8e4 CMP R8, R2", + "9045"), + ("0006af70 MOV R1, R0", + "0146"), + ("0006b3d0 MOV R1, SP", + "6946"), + ("0006b47c MOV R8, R0", + "8046"), + ("0006bc8e MOV R8, SP", + "e846"), + ("0006aee0 BX LR", + "7047"), + ("000a9d30 BX R2", + "1047"), + + ("0006b2dc LDR R0, [PC]", + "0048"), + ("00078798 LDR R3, [PC, 0x1]", + "014b"), + + ("00072dc2 LDR R3, [R3, R0]", + "1b58"), + ("0008e5d4 LDR R2, [R4, R0]", + "2258"), + ("0018e8ce LDRB R3, [R0, R4]", + "035d"), + ("0007b976 STR R6, [R5, R4]", + "2e51"), + ("000b5b42 STRB R7, [R1, R4]", + "0f55"), + + ("002b02ae STRH R1, [R0, R3]", + "c152"), + ("002ea7de LDRH R5, [R6, R4]", + "355b"), + # ldsb + # ldsh + + ("000a65c6 LDR R7, [R0, 0x10]", + "0769"), + ("0006b308 LDRB R5, [R1, 0x4]", + "0d79"), + ("0006b014 STR R4, [R4, 0x38]", + "a463"), + ("0006b006 STRB R5, [R0, 0x10]", + "0574"), + + ("0009b598 STRH R3, [R4, 0x2]", + "6380"), + ("000748da LDRH R2, [R6, 0x30]", + "328E"), + + ("0006aed2 STR R3, [SP, 0x24]", + "0993"), + ("0006ae6c LDR R3, [SP, 0x4]", + "019b"), + + ("0006aed0 ADD R1, SP, 0x20", + "08a9"), + ("000xxxxx ADD R1, PC, 0x20", + "08a1"), + + ("0006aed8 ADD SP, 0x30", + "0cb0"), + ("0006c1b0 SUB SP, 0x18", + "86b0"), + + + ("0006aeee POP {R4, PC}", + "10bd"), + ("0006b03a POP {R4-R6, PC}", + "70bd"), + ("0006aee4 PUSH {R4, LR}", + "10b5"), + ("0006b084 PUSH {R0, R1, R4-R6, LR}", + "73b5"), + ("003139a0 PUSH {LR}", + "00b5"), + ("00220f44 PUSH {R2, R3}", + "0cb4"), + + ("00076c54 LDMIA R1!, {R0, R1}", + "03c9"), + ("000a1c16 STMIA R6!, {R0-R3}", + "0fc6"), + + ("0006af78 BEQ 0x6", + "03d0"), + ("000747b4 BCC 0xFFFFFFE6", + "f3d3"), + # swi + + ("0007479c B 0xE", + "07e0"), + ("0006b946 B 0xFFFFFFE4", + "f2e7"), + ("C010163C BLX 0x1F916C", + "F9F1B6E8"), + ("C01015E8 BL 0x1F8D5C", + "F8F1AEFE"), + + + #("000xxxxx BL 0x0", + # "00F8"), + #("000xxxxx BL 0x4000", + # "04F0"), + #("000xxxxx BL 0xFFFFF000", + # "FFF7"), + + + #("0006aea4 MOV R5, R1", + # "460d"), + + # adc + # adc + ("00000000 UND ", + "01de"), + + ("00000000 BLX R7", + "B847"), + + ("00000000 CBZ R4, 0x2E", + "bcb1"), + ("00000000 CBNZ R0, 0x2A", + "a8b9"), + + +] +print "#" * 40, 'armthumb', '#' * 40 + +for s, l in reg_tests_armt: + print "-" * 80 + s = s[12:] + b = h2i((l)) + print b.encode('hex') + mn = mn_armt.dis(b, mode_armthumb) + print [str(x) for x in mn.args] + print s + print mn + assert(str(mn) == s) + # print hex(b) + # print [str(x.get()) for x in mn.args] + l = mn_armt.fromstring(s, mode_armthumb) + # print l + assert(str(l) == s) + a = mn_armt.asm(l) + print [x for x in a] + print repr(b) + # print mn.args + assert(b in a) + +""" +print "*"*30, "START SPECIAL PARSING", "*"*30 +parse_tests = [ + "MOV LR, toto", + "MOV LR, 1+toto", + "MOV LR, (lend-lstart)^toto<<<R1", + "MOV LR, R1 LSL (l_end-l_start)^toto<<<R1", + "MOV LR, R1 LSL (l_end-l_start)^toto<<<R1", + "EOR R0, R1, toto^titi+1", + ] + +for l in parse_tests: + print "-"*80 + l = mn_arm.fromstring(l, mode_arm) + print l.name, ", ".join([str(a) for a in l.args]) +""" + + +print 'TEST time', time.time() - ts + +# speed test arm +o = "" +for s, l in reg_tests_arm: + s = s[12:] + b = h2i((l)) + o += b + +while len(o) < 1000: + o += o +bs = bin_stream_str(o) +off = 0 +instr_num = 0 +ts = time.time() +while off < bs.getlen(): + mn = mn_arm.dis(bs, mode_arm, off) + instr_num += 1 + off += 4 +print 'instr per sec:', instr_num / (time.time() - ts) + + +# speed test thumb +o = "" +for s, l in reg_tests_armt: + s = s[12:] + b = h2i((l)) + o += b + +while len(o) < 1000: + o += o +bs = bin_stream_str(o) +off = 0 +instr_num = 0 +ts = time.time() +while off < bs.getlen(): + mn = mn_armt.dis(bs, mode_armthumb, off) + # print instr_num, off, str(mn) + instr_num += 1 + off += mn.l +print 'instr per sec:', instr_num / (time.time() - ts) + +import cProfile +cProfile.run(r'mn_arm.dis("\xe1\xa0\xa0\x06", mode_arm)') diff --git a/test/arch/arm/sem.py b/test/arch/arm/sem.py new file mode 100644 index 00000000..be36e90b --- /dev/null +++ b/test/arch/arm/sem.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import unittest +import logging + +from miasm2.ir.symbexec import symbexec +from miasm2.arch.arm.arch import mn_arm as mn, mode_arm as mode +from miasm2.arch.arm.sem import ir_arm as ir +from miasm2.arch.arm.regs import * +from miasm2.expression.expression import * + +logging.getLogger('cpuhelper').setLevel(logging.ERROR) +EXCLUDE_REGS = set() + + +def M(addr): + return ExprMem(ExprInt_fromsize(16, addr), 16) + + +def compute(asm, inputstate={}, debug=False): + sympool = dict(regs_init) + sympool.update({k: ExprInt_from(k, v) for k, v in inputstate.iteritems()}) + symexec = symbexec(mn, sympool) + instr = mn.fromstring(asm, mode) + code = mn.asm(instr)[0] + instr = mn.dis(code, mode) + instr.offset = inputstate.get(PC, 0) + interm = ir() + interm.add_instr(instr) + symexec.emul_ir_blocs(interm, instr.offset) + if debug: + for k, v in symexec.symbols.items(): + if regs_init.get(k, None) != v: + print k, v + return {k: v.arg.arg for k, v in symexec.symbols.items() + if k not in EXCLUDE_REGS and regs_init.get(k, None) != v} + + +class TestARMSemantic(unittest.TestCase): + + # def test_condition(self): + # §A8.3: Conditional execution + # pass + + def test_shift(self): + # §A8.4: Shifts applied to a register + self.assertEqual( + compute('MOV R4, R4 ', {R4: 0xDEADBEEF, }), {R4: 0xDEADBEEF, }) + self.assertRaises(ValueError, compute, 'MOV R4, R4 LSL 0') + self.assertEqual( + compute('MOV R4, R4 LSL 1', {R4: 0xDEADBEEF, }), {R4: 0xBD5B7DDE, }) + self.assertEqual( + compute('MOV R4, R4 LSL 16', {R4: 0xDEADBEEF, }), {R4: 0xBEEF0000, }) + self.assertEqual( + compute('MOV R4, R4 LSL 31', {R4: 0xDEADBEEF, }), {R4: 0x80000000, }) + self.assertRaises(ValueError, compute, 'MOV R4, R4 LSL 32') + self.assertEqual( + compute('MOV R4, R4 LSL R5', {R4: 0xDEADBEEF, R5: 0xBADBAD01, }), {R4: 0xBD5B7DDE, R5: 0xBADBAD01, }) + self.assertRaises(ValueError, compute, 'MOV R4, R4 LSR 0') + self.assertEqual( + compute('MOV R4, R4 LSR 1', {R4: 0xDEADBEEF, }), {R4: 0x6F56DF77, }) + self.assertEqual( + compute('MOV R4, R4 LSR 16', {R4: 0xDEADBEEF, }), {R4: 0x0000DEAD, }) + self.assertEqual( + compute('MOV R4, R4 LSR 31', {R4: 0xDEADBEEF, }), {R4: 0x00000001, }) + self.assertEqual( + compute('MOV R4, R4 LSR 32', {R4: 0xDEADBEEF, }), {R4: 0xDEADBEEF, }) + self.assertRaises(ValueError, compute, 'MOV R4, R4 LSR 33') + self.assertEqual( + compute('MOV R4, R4 LSR R5', {R4: 0xDEADBEEF, R5: 0xBADBAD01, }), {R4: 0x6F56DF77, R5: 0xBADBAD01, }) + self.assertRaises(ValueError, compute, 'MOV R4, R4 ASR 0') + self.assertEqual( + compute('MOV R4, R4 ASR 1', {R4: 0xDEADBEEF, }), {R4: 0xEF56DF77, }) + self.assertEqual( + compute('MOV R4, R4 ASR 16', {R4: 0xDEADBEEF, }), {R4: 0xFFFFDEAD, }) + self.assertEqual( + compute('MOV R4, R4 ASR 31', {R4: 0xDEADBEEF, }), {R4: 0xFFFFFFFF, }) + self.assertEqual( + compute('MOV R4, R4 ASR 32', {R4: 0xDEADBEEF, }), {R4: 0xDEADBEEF, }) + self.assertRaises(ValueError, compute, 'MOV R4, R4 ASR 33') + self.assertEqual( + compute('MOV R4, R4 ASR R5', {R4: 0xDEADBEEF, R5: 0xBADBAD01, }), {R4: 0xEF56DF77, R5: 0xBADBAD01, }) + self.assertRaises(ValueError, compute, 'MOV R4, R4 ROR 0') + self.assertEqual( + compute('MOV R4, R4 ROR 1', {R4: 0xDEADBEEF, }), {R4: 0xEF56DF77, }) + self.assertEqual( + compute('MOV R4, R4 ROR 16', {R4: 0xDEADBEEF, }), {R4: 0xBEEFDEAD, }) + self.assertEqual( + compute('MOV R4, R4 ROR 31', {R4: 0xDEADBEEF, }), {R4: 0xBD5B7DDF, }) + self.assertRaises(ValueError, compute, 'MOV R4, R4 ROR 32') + self.assertEqual( + compute('MOV R4, R4 ROR R5', {R4: 0xDEADBEEF, R5: 0xBADBAD01, }), {R4: 0xEF56DF77, R5: 0xBADBAD01, }) + self.assertEqual(compute('MOV R4, R4 RRX ', {cf: 0, R4: 0xDEADBEEF, }), { + cf: 0, R4: 0x6F56DF77, }) + self.assertEqual(compute('MOV R4, R4 RRX ', {cf: 1, R4: 0xDEADBEEF, }), { + cf: 1, R4: 0xEF56DF77, }) + + def test_ADC(self): + # §A8.8.1: ADC{S}{<c>}{<q>} {<Rd>,} <Rn>, #<const> + self.assertRaises( + ValueError, compute, 'ADC R4, 0x00000001 ') + self.assertEqual(compute('ADC R4, R4, 0x00000001 ', { + cf: 0, R4: 0x00000000, }), {cf: 0, R4: 0x00000001, }) + self.assertEqual(compute('ADC R4, R4, 0x00000000 ', { + cf: 1, R4: 0x00000000, }), {cf: 1, R4: 0x00000001, }) + self.assertEqual(compute('ADC PC, R4, 0x00000001 ', { + cf: 0, R4: 0xFFFFFFFF, PC: 0x55555555, }), {cf: 0, R4: 0xFFFFFFFF, PC: 0x00000000, }) + self.assertEqual(compute('ADC PC, R4, 0x00000000 ', { + cf: 1, R4: 0xFFFFFFFF, PC: 0x55555555, }), {cf: 1, R4: 0xFFFFFFFF, PC: 0x00000000, }) + self.assertEqual(compute('ADCS R4, R4, 0x80000000 ', {cf: 0, R4: 0x80000000, }), { + nf: 0, zf: 1, cf: 0, of: 1, R4: 0x00000000, }) + self.assertEqual(compute('ADCS R4, R4, 0xFF000000 ', {cf: 1, R4: 0x00FFFFFE, }), { + nf: 1, zf: 0, cf: 1, of: 0, R4: 0xFFFFFFFF, }) + self.assertEqual(compute('ADCS PC, R4, 0x00000000 ', { + cf: 0, R4: 0x00000000, PC: 0x55555555, }), {cf: 0, R4: 0x00000000, PC: 0x00000000, }) + self.assertEqual(compute('ADCS PC, R4, 0xFF000000 ', { + cf: 1, R4: 0x01000000, PC: 0x55555555, }), {cf: 1, R4: 0x01000000, PC: 0x00000001, }) + + # §A8.8.2: ADC{S}{<c>}{<q>} {<Rd>,} <Rn>, <Rm> {,<shift>} + self.assertRaises( + ValueError, compute, 'ADC R4, R5 ') + self.assertEqual(compute('ADC R4, R4, R5 ', { + cf: 1, R4: 0xFFFFFFFF, R5: 0x00000000, }), {cf: 1, R4: 0x00000000, R5: 0x00000000, }) + self.assertEqual(compute('ADC R4, R4, R5 LSL 1 ', { + cf: 0, R4: 0x00000001, R5: 0x00000008, }), {cf: 0, R4: 0x00000011, R5: 0x00000008, }) + self.assertEqual(compute('ADC R4, R4, R5 LSR 2 ', { + cf: 1, R4: 0x00000000, R5: 0x80000041, }), {cf: 1, R4: 0x20000011, R5: 0x80000041, }) + self.assertEqual(compute('ADC R4, R4, R5 ASR 3 ', { + cf: 0, R4: 0x00000001, R5: 0x80000081, }), {cf: 0, R4: 0xF0000011, R5: 0x80000081, }) + self.assertEqual(compute('ADC R4, R4, R5 ROR 4 ', { + cf: 1, R4: 0xFFFFFFFF, R5: 0x0000010F, }), {cf: 1, R4: 0xF0000010, R5: 0x0000010F, }) + self.assertEqual(compute('ADC R4, R4, R5 RRX ', { + cf: 1, R4: 0xFFFFFFFF, R5: 0x00000101, }), {cf: 1, R4: 0x80000080, R5: 0x00000101, }) + self.assertEqual(compute('ADCS R4, R4, R5 ', {cf: 1, R4: 0xFFFFFFFF, R5: 0x00000000, }), { + nf: 0, zf: 1, cf: 0, of: 0, R4: 0x00000000, R5: 0x00000000, }) + self.assertEqual(compute('ADCS R4, R4, R5 LSL 1 ', {cf: 0, R4: 0x00000001, R5: 0x00000008, }), { + nf: 0, zf: 0, cf: 1, of: 0, R4: 0x00000011, R5: 0x00000008, }) + self.assertEqual(compute('ADCS R4, R4, R5 LSR 2 ', {cf: 1, R4: 0x00000000, R5: 0x80000041, }), { + nf: 0, zf: 0, cf: 1, of: 0, R4: 0x20000011, R5: 0x80000041, }) + self.assertEqual(compute('ADCS R4, R4, R5 ASR 3 ', {cf: 0, R4: 0x00000001, R5: 0x80000081, }), { + nf: 1, zf: 0, cf: 1, of: 0, R4: 0xF0000011, R5: 0x80000081, }) + self.assertEqual(compute('ADCS R4, R4, R5 ROR 4 ', {cf: 1, R4: 0xFFFFFFFF, R5: 0x0000010F, }), { + nf: 1, zf: 0, cf: 0, of: 0, R4: 0xF0000010, R5: 0x0000010F, }) + self.assertEqual(compute('ADCS R4, R4, R5 RRX ', {cf: 1, R4: 0xFFFFFFFF, R5: 0x00000101, }), { + nf: 1, zf: 0, cf: 0, of: 0, R4: 0x80000080, R5: 0x00000101, }) + + # §A8.8.3: ADC{S}{<c>}{<q>} {<Rd>,} <Rn>, <Rm>, <type> <Rs> + self.assertEqual(compute('ADC R4, R6, R4 LSL R5', { + cf: 0, R4: 0x00000001, R5: 0x00000004, R6: 0, }), {cf: 0, R4: 0x00000010, R5: 0x00000004, R6: 0, }) + self.assertEqual(compute('ADC R4, R6, R4 LSR R5', { + cf: 1, R4: 0x00000110, R5: 0x80000004, R6: 0, }), {cf: 1, R4: 0x00000012, R5: 0x80000004, R6: 0, }) + self.assertEqual(compute('ADC R4, R6, R4 ASR R5', { + cf: 0, R4: 0x80000010, R5: 0xF0000001, R6: 0, }), {cf: 0, R4: 0xC0000008, R5: 0xF0000001, R6: 0, }) + self.assertEqual(compute('ADC R4, R6, R4 ROR R5', { + cf: 1, R4: 0x000000FF, R5: 0x00000F04, R6: 0, }), {cf: 1, R4: 0xF0000010, R5: 0x00000F04, R6: 0, }) + self.assertEqual(compute('ADCS R4, R6, R4 LSL R5', {cf: 0, R4: 0x00000001, R5: 0x00000004, R6: 0, }), { + nf: 0, zf: 0, cf: 1, of: 0, R4: 0x00000010, R5: 0x00000004, R6: 0, }) + self.assertEqual(compute('ADCS R4, R6, R4 LSR R5', {cf: 1, R4: 0x00000110, R5: 0x80000004, R6: 0, }), { + nf: 0, zf: 0, cf: 1, of: 0, R4: 0x00000012, R5: 0x80000004, R6: 0, }) + self.assertEqual(compute('ADCS R4, R6, R4 ASR R5', {cf: 0, R4: 0x80000010, R5: 0xF0000001, R6: 0, }), { + nf: 1, zf: 0, cf: 1, of: 0, R4: 0xC0000008, R5: 0xF0000001, R6: 0, }) + self.assertEqual(compute('ADCS R4, R6, R4 ROR R5', {cf: 1, R4: 0x000000FF, R5: 0x00000F04, R6: 0, }), { + nf: 1, zf: 0, cf: 1, of: 0, R4: 0xF0000010, R5: 0x00000F04, R6: 0, }) + + def test_ADD(self): + # §A8.8.{5,9}: ADD{S}{<c>}{<q>} {<Rd>,} <Rn>, #<const> + self.assertRaises( + ValueError, compute, 'ADD R4, 0x00000001 ') + self.assertEqual(compute('ADD R4, R4, 0x00000001 ', { + R4: 0x00000000, }), {R4: 0x00000001, }) + self.assertEqual(compute('ADD R4, R4, 0x00000000 ', { + R4: 0x00000000, }), {R4: 0x00000000, }) + self.assertEqual(compute('ADD PC, R4, 0x00000001 ', { + R4: 0xFFFFFFFF, PC: 0x55555555, }), {R4: 0xFFFFFFFF, PC: 0x00000000, }) + self.assertEqual(compute('ADD PC, R4, 0x00000000 ', { + R4: 0xFFFFFFFF, PC: 0x55555555, }), {R4: 0xFFFFFFFF, PC: 0xFFFFFFFF, }) + self.assertEqual(compute('ADDS R4, R4, 0x80000000 ', {R4: 0x80000000, }), { + nf: 0, zf: 1, cf: 0, of: 1, R4: 0x00000000, }) + self.assertEqual(compute('ADDS R4, R4, 0xFF000000 ', {R4: 0x00FFFFFE, }), { + nf: 1, zf: 0, cf: 1, of: 0, R4: 0xFFFFFFFE, }) + self.assertEqual(compute('ADDS PC, R4, 0x00000000 ', { + R4: 0x00000000, PC: 0x55555555, }), {R4: 0x00000000, PC: 0x00000000, }) + self.assertEqual(compute('ADDS PC, R4, 0xFF000000 ', { + R4: 0x01000000, PC: 0x55555555, }), {R4: 0x01000000, PC: 0x00000000, }) + # SP special part + self.assertEqual(compute('ADD R4, SP, 0x00000001 ', { + R4: 0x00000000, SP: 0x00000000, }), {R4: 0x00000001, SP: 0x00000000, }) + + # §A8.8.{7,11}: ADD{S}{<c>}{<q>} {<Rd>,} <Rn>, <Rm> {,<shift>} + self.assertRaises( + ValueError, compute, 'ADD R4, R5 ') + self.assertEqual(compute('ADD R4, R4, R5 ', { + R4: 0xFFFFFFFF, R5: 0x00000001, }), {R4: 0x00000000, R5: 0x00000001, }) + self.assertEqual(compute('ADD R4, R4, R5 LSL 1 ', { + R4: 0x00000001, R5: 0x00000008, }), {R4: 0x00000011, R5: 0x00000008, }) + self.assertEqual(compute('ADD R4, R4, R5 LSR 2 ', { + R4: 0x00000000, R5: 0x80000041, }), {R4: 0x20000010, R5: 0x80000041, }) + self.assertEqual(compute('ADD R4, R4, R5 ASR 3 ', { + R4: 0x00000001, R5: 0x80000081, }), {R4: 0xF0000011, R5: 0x80000081, }) + self.assertEqual(compute('ADD R4, R4, R5 ROR 4 ', { + R4: 0xFFFFFFFF, R5: 0x0000010F, }), {R4: 0xF000000F, R5: 0x0000010F, }) + self.assertEqual(compute('ADD R4, R4, R5 RRX ', { + cf: 1, R4: 0xFFFFFFFF, R5: 0x00000101, }), {cf: 1, R4: 0x8000007F, R5: 0x00000101, }) + self.assertEqual(compute('ADDS R4, R4, R5 ', {R4: 0xFFFFFFFF, R5: 0x00000001, }), { + nf: 0, zf: 1, cf: 0, of: 0, R4: 0x00000000, R5: 0x00000001, }) + self.assertEqual(compute('ADDS R4, R4, R5 LSL 1 ', {R4: 0x00000001, R5: 0x00000008, }), { + nf: 0, zf: 0, cf: 1, of: 0, R4: 0x00000011, R5: 0x00000008, }) + self.assertEqual(compute('ADDS R4, R4, R5 LSR 2 ', {R4: 0x00000000, R5: 0x80000041, }), { + nf: 0, zf: 0, cf: 1, of: 0, R4: 0x20000010, R5: 0x80000041, }) + self.assertEqual(compute('ADDS R4, R4, R5 ASR 3 ', {R4: 0x00000001, R5: 0x80000081, }), { + nf: 1, zf: 0, cf: 1, of: 0, R4: 0xF0000011, R5: 0x80000081, }) + self.assertEqual(compute('ADDS R4, R4, R5 ROR 4 ', {R4: 0xFFFFFFFF, R5: 0x0000010F, }), { + nf: 1, zf: 0, cf: 0, of: 0, R4: 0xF000000F, R5: 0x0000010F, }) + self.assertEqual(compute('ADDS R4, R4, R5 RRX ', {cf: 1, R4: 0xFFFFFFFF, R5: 0x00000101, }), { + nf: 1, zf: 0, cf: 0, of: 0, R4: 0x8000007F, R5: 0x00000101, }) + # SP special part + self.assertEqual(compute('ADD R4, SP, R4 LSR 1 ', { + R4: 0x00000002, SP: 0x00000000, }), {R4: 0x00000001, SP: 0x00000000, }) + + # §A8.8.8: ADD{S}{<c>}{<q>} {<Rd>,} <Rn>, <Rm>, <type> <Rs> + self.assertEqual(compute('ADD R4, R6, R4 LSL R5', { + R4: 0x00000001, R5: 0x00000004, R6: 0, }), {R4: 0x00000010, R5: 0x00000004, R6: 0, }) + self.assertEqual(compute('ADD R4, R6, R4 LSR R5', { + R4: 0x00000110, R5: 0x80000004, R6: 0, }), {R4: 0x00000011, R5: 0x80000004, R6: 0, }) + self.assertEqual(compute('ADD R4, R6, R4 ASR R5', { + R4: 0x80000010, R5: 0xF0000001, R6: 0, }), {R4: 0xC0000008, R5: 0xF0000001, R6: 0, }) + self.assertEqual(compute('ADD R4, R6, R4 ROR R5', { + R4: 0x000000FF, R5: 0x00000F04, R6: 0, }), {R4: 0xF000000F, R5: 0x00000F04, R6: 0, }) + self.assertEqual(compute('ADDS R4, R6, R4 LSL R5', {R4: 0x00000001, R5: 0x00000004, R6: 0, }), { + nf: 0, zf: 0, cf: 1, of: 0, R4: 0x00000010, R5: 0x00000004, R6: 0, }) + self.assertEqual(compute('ADDS R4, R6, R4 LSR R5', {R4: 0x00000110, R5: 0x80000004, R6: 0, }), { + nf: 0, zf: 0, cf: 1, of: 0, R4: 0x00000011, R5: 0x80000004, R6: 0, }) + self.assertEqual(compute('ADDS R4, R6, R4 ASR R5', {R4: 0x80000010, R5: 0xF0000001, R6: 0, }), { + nf: 1, zf: 0, cf: 1, of: 0, R4: 0xC0000008, R5: 0xF0000001, R6: 0, }) + self.assertEqual(compute('ADDS R4, R6, R4 ROR R5', {R4: 0x000000FF, R5: 0x00000F04, R6: 0, }), { + nf: 1, zf: 0, cf: 1, of: 0, R4: 0xF000000F, R5: 0x00000F04, R6: 0, }) + + def test_ADR(self): + # §A8.8.12: ADR{<c>}{<q>} <Rd>, <label> <==> ADD{<c>}{<q>} <Rd>, PC, #<const> + pass + + def test_AND(self): + # §A8.8.13: AND{S}{<c>}{<q>} {<Rd>,} <Rn>, #<const> + self.assertRaises( + ValueError, compute, 'AND R4, 0x00000001 ') + self.assertEqual(compute('AND R4, R4, 0x00000001 ', { + R4: 0xDEADBEEF, }), {R4: 0x00000001, }) + self.assertEqual(compute('AND R4, R4, 0x00000000 ', { + R4: 0x00000000, }), {R4: 0x00000000, }) + self.assertEqual(compute('AND PC, R4, 0x00000001 ', { + R4: 0xFFFFFFFF, PC: 0x55555555, }), {R4: 0xFFFFFFFF, PC: 0x00000001, }) + self.assertEqual(compute('AND PC, R4, 0x00000000 ', { + R4: 0xFFFFFFFF, PC: 0x55555555, }), {R4: 0xFFFFFFFF, PC: 0x00000000, }) + self.assertEqual(compute('ANDS R4, R4, 0x000000FF ', {R4: 0xFFFFFF00, }), { + nf: 0, zf: 1, cf: 0, R4: 0x00000000, }) + self.assertEqual(compute('ANDS R4, R4, 0xFF000000 ', {R4: 0xFF000000, }), { + nf: 1, zf: 0, cf: 0, R4: 0xFF000000, }) + self.assertEqual(compute('ANDS PC, R4, 0x000000FF ', { + R4: 0xFFFFFF00, PC: 0x55555555, }), {R4: 0xFFFFFF00, PC: 0x00000000, }) + self.assertEqual(compute('ANDS PC, R4, 0xFF000000 ', { + R4: 0xFF000000, PC: 0x55555555, }), {R4: 0xFF000000, PC: 0xFF000000, }) + + # §A8.8.14: AND{S}{<c>}{<q>} {<Rd>,} <Rn>, <Rm> {,<shift>} + self.assertRaises( + ValueError, compute, 'AND R4, R5 ') + self.assertEqual(compute('AND R4, R4, R5 ', { + R4: 0xFFFFFFFE, R5: 0x00000001, }), {R4: 0x00000000, R5: 0x00000001, }) + self.assertEqual(compute('AND R4, R4, R5 LSL 1 ', { + R4: 0x00000011, R5: 0x00000008, }), {R4: 0x00000010, R5: 0x00000008, }) + self.assertEqual(compute('AND R4, R4, R5 LSR 2 ', { + R4: 0xFFFFFFFF, R5: 0x80000041, }), {R4: 0x20000010, R5: 0x80000041, }) + self.assertEqual(compute('AND R4, R4, R5 ASR 3 ', { + R4: 0xF00000FF, R5: 0x80000081, }), {R4: 0xF0000010, R5: 0x80000081, }) + self.assertEqual(compute('AND R4, R4, R5 ROR 4 ', { + R4: 0xFFFFFFFF, R5: 0x000000FF, }), {R4: 0xF000000F, R5: 0x000000FF, }) + self.assertEqual(compute('AND R4, R4, R5 RRX ', { + cf: 1, R4: 0xFFFFFFFF, R5: 0x00000101, }), {cf: 1, R4: 0x80000080, R5: 0x00000101, }) + self.assertEqual(compute('ANDS R4, R4, R5 ', {R4: 0xFFFFFFFE, R5: 0x00000001, }), { + nf: 0, zf: 1, cf: 0, R4: 0x00000000, R5: 0x00000001, }) + self.assertEqual(compute('ANDS R4, R4, R5 LSL 1 ', {R4: 0x00000011, R5: 0x00000008, }), { + nf: 0, zf: 0, cf: 0, R4: 0x00000010, R5: 0x00000008, }) + self.assertEqual(compute('ANDS R4, R4, R5 LSR 2 ', {R4: 0xFFFFFFFF, R5: 0x80000041, }), { + nf: 0, zf: 0, cf: 0, R4: 0x20000010, R5: 0x80000041, }) + self.assertEqual(compute('ANDS R4, R4, R5 ASR 3 ', {R4: 0xF00000FF, R5: 0x80000081, }), { + nf: 1, zf: 0, cf: 0, R4: 0xF0000010, R5: 0x80000081, }) + self.assertEqual(compute('ANDS R4, R4, R5 ROR 4 ', {R4: 0xFFFFFFFF, R5: 0x000000FF, }), { + nf: 1, zf: 0, cf: 0, R4: 0xF000000F, R5: 0x000000FF, }) + self.assertEqual(compute('ANDS R4, R4, R5 RRX ', {cf: 1, R4: 0xFFFFFFFF, R5: 0x00000101, }), { + nf: 1, zf: 0, cf: 0, R4: 0x80000080, R5: 0x00000101, }) + + # §A8.8.15: AND{S}{<c>}{<q>} {<Rd>,} <Rn>, <Rm>, <type> <Rs> + self.assertEqual(compute('AND R4, R6, R4 LSL R5', { + R4: 0x00000001, R5: 0x00000004, R6: -1, }), {R4: 0x00000010, R5: 0x00000004, R6: 0xFFFFFFFF, }) + self.assertEqual(compute('AND R4, R6, R4 LSR R5', { + R4: 0x00000110, R5: 0x80000004, R6: -1, }), {R4: 0x00000011, R5: 0x80000004, R6: 0xFFFFFFFF, }) + self.assertEqual(compute('AND R4, R6, R4 ASR R5', { + R4: 0x80000010, R5: 0xF0000001, R6: -1, }), {R4: 0xC0000008, R5: 0xF0000001, R6: 0xFFFFFFFF, }) + self.assertEqual(compute('AND R4, R6, R4 ROR R5', { + R4: 0x000000FF, R5: 0x00000F04, R6: -1, }), {R4: 0xF000000F, R5: 0x00000F04, R6: 0xFFFFFFFF, }) + self.assertEqual(compute('ANDS R4, R6, R4 LSL R5', {R4: 0x00000001, R5: 0x00000004, R6: -1, }), { + nf: 0, zf: 0, cf: 0, R4: 0x00000010, R5: 0x00000004, R6: 0xFFFFFFFF, }) + self.assertEqual(compute('ANDS R4, R6, R4 LSR R5', {R4: 0x00000110, R5: 0x80000004, R6: -1, }), { + nf: 0, zf: 0, cf: 0, R4: 0x00000011, R5: 0x80000004, R6: 0xFFFFFFFF, }) + self.assertEqual(compute('ANDS R4, R6, R4 ASR R5', {R4: 0x80000010, R5: 0xF0000001, R6: -1, }), { + nf: 1, zf: 0, cf: 0, R4: 0xC0000008, R5: 0xF0000001, R6: 0xFFFFFFFF, }) + self.assertEqual(compute('ANDS R4, R6, R4 ROR R5', {R4: 0x000000FF, R5: 0x00000F04, R6: -1, }), { + nf: 1, zf: 0, cf: 0, R4: 0xF000000F, R5: 0x00000F04, R6: 0xFFFFFFFF, }) + + def test_ASR(self): + # §A8.8.16: ASR{S}{<c>}{<q>} {<Rd>,} <Rm>, #<imm> <==> MOV{S}{<c>}{<q>} {<Rd>,} <Rm>, ASR #<n> + pass + + # §A8.8.17: ASR{S}{<c>}{<q>} {<Rd>,} <Rn>, <Rm> <==> MOV{S}{<c>}{<q>} {<Rd>,} <Rn>, ASR <Rm> + pass + + +if __name__ == '__main__': + testsuite = unittest.TestLoader().loadTestsFromTestCase(TestARMSemantic) + report = unittest.TextTestRunner(verbosity=2).run(testsuite) + exit(len(report.errors + report.failures)) diff --git a/test/arch/msp430/arch.py b/test/arch/msp430/arch.py new file mode 100644 index 00000000..f3e82955 --- /dev/null +++ b/test/arch/msp430/arch.py @@ -0,0 +1,110 @@ + +import os +import time +from miasm2.arch.msp430.arch import * + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + + +def h2i(s): + return s.replace(' ', '').decode('hex') + + +def u16swap(i): + return struct.unpack('<H', struct.pack('>H', i))[0] + + +reg_tests_msp = [ + ("4456 mov.w SP, R4", + "0441"), + ("4d4f mov.b R13, R15", + "4f4d"), + ("49fe mov.w @R13, R9", + "294d"), + ("4982 mov.w 0x10(R14), R13", + "1d4e1000"), + ("4972 mov.w R14, 0x0(SP)", + "814e0000"), + ("46de mov.w 0x2(R14), 0x2(R13)", + "9d4e02000200"), + ("469e mov.w @0x2400, R11", + "1b420024"), + ("4c14 mov.w 0x4A96, R15", + "3f40964a"), + ("47c0 mov.w 0x1, R8", + "1843"), + ("48fc mov.w 0x2, R10", + "2a43"), + ("44fe mov.w 0x4, R7", + "2742"), + ("4a28 mov.w 0xFFFF, R15", + "3f43"), + ("4416 mov.w R5, @0x15C", + "82455c01"), + + ("4a22 add.w R11, R15", + "0f5b"), + ("448e sub.w R15, SP", + "018f"), + ("4474 cmp.b @R15, R13", + "6d9f"), + ("46a8 bit.w 0x1, R13", + "1db3"), + ("440a bis.w 0x5A08, R5", + "35d0085a"), + ("4c1a xor.w R15, R10", + "0aef"), + ("4408 and.b 0xFF, R5", + "75f3"), + + + ("4cf0 push.w SR", + "0212"), + ("4d6e push.w 0x0", + "0312"), + ("45dc push.w 0x2(R11)", + "1b120200"), + ("49cc push.w R11", + "0b12"), + + ("443a call 0x4B66", + "b012664b"), + + ("4442 jmp 0xFFFA", + "fd3f"), + ("4422 jnz 0xFFF2", + "f923"), + + ("xxxx mov.b @R13+, 0x0(R14)", + "fe4d0000"), + + ("4a36 mov.w @SP+, PC", + "3041"), + + +] + +ts = time.time() + +for s, l in reg_tests_msp: + print "-" * 80 + s = s[8:] + b = h2i((l)) + print repr(b) + mn = mn_msp430.dis(b, None) + print [str(x) for x in mn.args] + print s + print mn + assert(str(mn) == s) + # print hex(b) + # print [str(x.get()) for x in mn.args] + l = mn_msp430.fromstring(s, None) + # print l + assert(str(l) == s) + a = mn_msp430.asm(l) + print [x for x in a] + print repr(b) + # print mn.args + assert(b in a) diff --git a/test/arch/msp430/sem.py b/test/arch/msp430/sem.py new file mode 100644 index 00000000..55da5d56 --- /dev/null +++ b/test/arch/msp430/sem.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import unittest +import logging + +from miasm2.ir.symbexec import symbexec +from miasm2.arch.msp430.arch import mn_msp430 as mn, mode_msp430 as mode +from miasm2.arch.msp430.sem import ir_msp430 as ir +from miasm2.arch.msp430.regs import * +from miasm2.expression.expression import * + +logging.getLogger('cpuhelper').setLevel(logging.ERROR) +EXCLUDE_REGS = set([res]) + + +def M(addr): + return ExprMem(ExprInt_fromsize(16, addr), 16) + + +def compute(asm, inputstate={}, debug=False): + sympool = dict(regs_init) + sympool.update({k: ExprInt_from(k, v) for k, v in inputstate.iteritems()}) + symexec = symbexec(mn, sympool) + instr = mn.fromstring(asm, mode) + code = mn.asm(instr)[0] + instr = mn.dis(code, mode) + instr.offset = inputstate.get(PC, 0) + interm = ir() + interm.add_instr(instr) + symexec.emul_ir_blocs(interm, instr.offset) + if debug: + for k, v in symexec.symbols.items(): + if regs_init.get(k, None) != v: + print k, v + return {k: v.arg.arg for k, v in symexec.symbols.items() + if k not in EXCLUDE_REGS and regs_init.get(k, None) != v} + + +class TestMSP430Semantic(unittest.TestCase): + + def test_ADD_W(self): + # Testing status flags + self.assertEqual(compute('add.w 0x0000, R4', {R4: 0x0001, }), { + R4: 0x0001, nf: 0, zf: 0, cf: 0, of: 0}) + self.assertEqual(compute('add.w 0x0000, R4', {R4: 0xFFFF, }), { + R4: 0xFFFF, nf: 1, zf: 0, cf: 0, of: 0}) + self.assertEqual(compute('add.w 0x0000, R4', {R4: 0x0000, }), { + R4: 0x0000, nf: 0, zf: 1, cf: 0, of: 0}) + self.assertEqual(compute('add.w 0x0002, R4', {R4: 0xFFFF, }), { + R4: 0x0001, nf: 0, zf: 0, cf: 1, of: 0}) + self.assertEqual(compute('add.w 0x7FFF, R4', {R4: 0x7FFF, }), { + R4: 0xFFFE, nf: 1, zf: 0, cf: 0, of: 1}) + self.assertEqual(compute('add.w 0x8001, R4', {R4: 0x8001, }), { + R4: 0x0002, nf: 0, zf: 0, cf: 1, of: 1}) + # Testing addressing mode + self.assertEqual(compute('add.w R5, R4', {R4: 0x1F53, R5: 0x28C4, }), { + R4: 0x4817, R5: 0x28C4, nf: 0, zf: 0, cf: 0, of: 0}) + self.assertEqual(compute('add.w @R5, R4', {R4: 0x1F53, R5: 0x28C4, M(0x28C4): 0, }), { + R4: 0x1F53, R5: 0x28C4, M(0x28C4): 0, nf: 0, zf: 0, cf: 0, of: 0}) + self.assertEqual(compute('add.w @R5+, R4', {R4: 0x1F53, R5: 0x28C4, M(0x28C4): 0, }), { + R4: 0x1F53, R5: 0x28C6, M(0x28C4): 0, nf: 0, zf: 0, cf: 0, of: 0}) + self.assertEqual(compute('add.w 1(R5), R4', {R4: 0x1F53, R5: 0x28C4, M(0x28C5): 0, }), { + R4: 0x1F53, R5: 0x28C4, M(0x28C5): 0, nf: 0, zf: 0, cf: 0, of: 0}) + self.assertEqual(compute('add.w @0x0000, R4', {R4: 0x1F53, M(0): 0x28C4, }), { + R4: 0x4817, M(0): 0x28C4, nf: 0, zf: 0, cf: 0, of: 0}) + self.assertEqual(compute('add.w 0x0000, R4', {R4: 0x1F53, }), { + R4: 0x1F53, nf: 0, zf: 0, cf: 0, of: 0}) + + def test_AND_B(self): + # Testing status flags + self.assertEqual(compute('and.b 0x0001, R4', {R4: 0x0001, }), { + R4: 0x0001, nf: 0, zf: 0, cf: 1, of: 0}) + self.assertEqual(compute('and.b 0xFFFF, R4', {R4: 0xFFFF, }), { + R4: 0x00FF, nf: 1, zf: 0, cf: 1, of: 0}) + self.assertEqual(compute('and.b 0x0000, R4', {R4: 0x0000, }), { + R4: 0x0000, nf: 0, zf: 1, cf: 0, of: 0}) + # Testing addressing mode + self.assertEqual(compute('and.b R5, R4', {R4: 0x1F53, R5: 0x38C4, }), { + R4: 0x0040, R5: 0x38C4, nf: 0, zf: 0, cf: 1, of: 0}) + self.assertEqual(compute('and.b @R5, R4', {R4: 0x1F53, R5: 0x38C4, M(0x38C4): 0, }), { + R4: 0x0000, R5: 0x38C4, M(0x38C4): 0, nf: 0, zf: 1, cf: 0, of: 0}) + self.assertEqual(compute('and.b @R5+, R4', {R4: 0x1F53, R5: 0x38C4, M(0x38C4): 0, }), { + R4: 0x0000, R5: 0x38C5, M(0x38C4): 0, nf: 0, zf: 1, cf: 0, of: 0}) + self.assertEqual(compute('and.b 1(R5), R4', {R4: 0x1F53, R5: 0x38C4, M(0x38C5): 1, }), { + R4: 0x0001, R5: 0x38C4, M(0x38C5): 1, nf: 0, zf: 0, cf: 1, of: 0}) + self.assertEqual(compute('and.b @0x0000, R4', {R4: 0x1F53, M(0): 0x38C4, }), { + R4: 0x0040, M(0): 0x38C4, nf: 0, zf: 0, cf: 1, of: 0}) + self.assertEqual(compute('and.b 0xFFFF, R4', {R4: 0x1F53, }), { + R4: 0x0053, nf: 0, zf: 0, cf: 1, of: 0}) + + def test_AND_W(self): + # Testing status flags + self.assertEqual(compute('and.w 0x0001, R4', {R4: 0x0001, }), { + R4: 0x0001, nf: 0, zf: 0, cf: 1, of: 0}) + self.assertEqual(compute('and.w 0xFFFF, R4', {R4: 0xFFFF, }), { + R4: 0xFFFF, nf: 1, zf: 0, cf: 1, of: 0}) + self.assertEqual(compute('and.w 0x0000, R4', {R4: 0x0000, }), { + R4: 0x0000, nf: 0, zf: 1, cf: 0, of: 0}) + # Testing addressing mode + self.assertEqual(compute('and.w R5, R4', {R4: 0x1F53, R5: 0x38C4, }), { + R4: 0x1840, R5: 0x38C4, nf: 0, zf: 0, cf: 1, of: 0}) + self.assertEqual(compute('and.w @R5, R4', {R4: 0x1F53, R5: 0x38C4, M(0x38C4): 0, }), { + R4: 0x0000, R5: 0x38C4, M(0x38C4): 0, nf: 0, zf: 1, cf: 0, of: 0}) + self.assertEqual(compute('and.w @R5+, R4', {R4: 0x1F53, R5: 0x38C4, M(0x38C4): 0, }), { + R4: 0x0000, R5: 0x38C6, M(0x38C4): 0, nf: 0, zf: 1, cf: 0, of: 0}) + self.assertEqual(compute('and.w 1(R5), R4', {R4: 0x1F53, R5: 0x38C4, M(0x38C5): 1, }), { + R4: 0x0001, R5: 0x38C4, M(0x38C5): 1, nf: 0, zf: 0, cf: 1, of: 0}) + self.assertEqual(compute('and.w @0x0000, R4', {R4: 0x1F53, M(0): 0x38C4, }), { + R4: 0x1840, M(0): 0x38C4, nf: 0, zf: 0, cf: 1, of: 0}) + self.assertEqual(compute('and.w 0xFFFF, R4', {R4: 0x1F53, }), { + R4: 0x1F53, nf: 0, zf: 0, cf: 1, of: 0}) + + def test_BIC_B(self): + # Testing addressing mode + self.assertEqual( + compute('bic.b 0x0000, R4', {R4: 0x1F53, }), {R4: 0x0053, }) + # self.assertEqual(compute('bic.b 0x0000, @R4', {R4:0x1F53,M(0x1F53):0x569D, }), {R4:0x1F53,M(0x1F53):0x38C4, }) + # self.assertEqual(compute('bic.b 0x38C4, @R4+', {R4:0x1F53,M(0x1F53):0x569D, }), {R4:0x1F55,M(0x1F53):0x38C4, }) + # self.assertEqual(compute('bic.b 0x38C4, 1(R4)', {R4:0x1F53,M(0x1F54):0x569D, }), {R4:0x1F53,M(0x1F54):0x5619, }) + # self.assertEqual(compute('bic.b 0x0000, @0x0000', { M(0x0000):0x569D, }), { M(0x0000):0x38C4, }) + # self.assertEqual(compute('bic.b 0x38C4, 0xFFFE', { + # }), { }) + + def test_CALL(self): + # Testing addressing mode + self.assertEqual(compute('call R4', {PC: 0x0100, SP: 0x0400, R4: 0x1F53, }), { + PC: 0x1F53, SP: 0x03FE, R4: 0x1F53, M(0x03FE): 0x102, }) + self.assertEqual(compute('call @R4', {PC: 0x0100, SP: 0x0400, R4: 0x1F53, M(0x1F53): 0x38C4, }), { + PC: 0x38C4, SP: 0x03FE, R4: 0x1F53, M(0x1F53): 0x38C4, M(0x03FE): 0x102, }) + self.assertEqual(compute('call @R4+', {PC: 0x0100, SP: 0x0400, R4: 0x1F53, M(0x1F53): 0x38C4, }), { + PC: 0x38C4, SP: 0x03FE, R4: 0x1F55, M(0x1F53): 0x38C4, M(0x03FE): 0x102, }) + self.assertEqual(compute('call 1(R4)', {PC: 0x0100, SP: 0x0400, R4: 0x1F53, M(0x1F54): 0x38C4, }), { + PC: 0x38C4, SP: 0x03FE, R4: 0x1F53, M(0x1F54): 0x38C4, M(0x03FE): 0x104, }) + self.assertEqual(compute('call @0x0000', {PC: 0x0100, SP: 0x0400, M(0x0000): 0x38C4, }), { + PC: 0x38C4, SP: 0x03FE, M(0x0000): 0x38C4, M(0x03FE): 0x104, }) + self.assertEqual(compute('call 0xFFFE', {PC: 0x0100, SP: 0x0400, }), { + PC: 0xFFFE, SP: 0x03FE, M(0x03FE): 0x104, }) + +if __name__ == '__main__': + testsuite = unittest.TestLoader().loadTestsFromTestCase(TestMSP430Semantic) + report = unittest.TextTestRunner(verbosity=2).run(testsuite) + exit(len(report.errors + report.failures)) diff --git a/test/arch/sh4/arch.py b/test/arch/sh4/arch.py new file mode 100644 index 00000000..b7e79d30 --- /dev/null +++ b/test/arch/sh4/arch.py @@ -0,0 +1,436 @@ +import os +import time +from sys import stderr +from miasm2.arch.sh4.arch import * + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) + + +def h2i(s): + return s.replace(' ', '').decode('hex') + +reg_tests_sh4 = [ + # vxworks + ("c80022f2 MOV 0x10, R6", + "10e6"), + ("c8002250 MOV 0xFFFFFFFF, R0", + "ffe0"), + ("c800226a MOV.W @(PC,0xC0), R9", + "5e99"), + ("c8002006 MOV.L @(PC&0xFFFFFFFC,0x10), R15", + "03df"), + ("c800cfc4 MOV R4, R9", + "4369"), + ("C8005004 MOV.B R1, @R2", + "1022"), + ("C8002E04 MOV.W R0, @R8", + '0128'), + ("c800223e MOV.L R1, @R14", + "122E"), + + ("c8002002 MOV.L @R1, R0", + "1260"), + ("c8002E08 MOV.W @R8, R1", + "8161"), + ("c800357c MOV.B @R4, R1", + "4061"), + + ("c8002220 MOV.L R8, @-R15", + "862f"), + ("c8022a66 MOV.B R4, @-R0", + "4420"), + ("c8002310 MOV.L @R15+, R14", + "f66e"), + ("c80038a4 MOV.W @R8+, R5", + "8565"), + ("xxxxxxxx MOV.B R0, @(R8,0x2)", + "8280"), + ("xxxxxxxx MOV.W R0, @(R8,0x4)", + "8281"), + ("c8002274 MOV.L R0, @(R9,0x8)", + "0219"), + ("xxxxxxxx MOV.B @(R8,0x8), R0", + "8884"), + ("xxxxxxxx MOV.W @(R8,0x10), R0", + "8885"), + ("c8002500 MOV.L @(R14,0x4), R5", + "e155"), + ("xxxxxxxx MOV.B R4, @(R0,R8)", + "4408"), + ("xxxxxxxx MOV.W R4, @(R0,R8)", + "4508"), + ("xxxxxxxx MOV.L R4, @(R0,R8)", + "4608"), + ("xxxxxxxx MOV.B @(R0,R4), R8", + "4c08"), + ("xxxxxxxx MOV.W @(R0,R4), R8", + "4d08"), + ("xxxxxxxx MOV.L @(R0,R4), R8", + "4e08"), + ("xxxxxxxx MOV.B R0, @(GBR,0x4)", + "04c0"), + ("xxxxxxxx MOV.W R0, @(GBR,0x8)", + "04c1"), + ("xxxxxxxx MOV.L R0, @(GBR,0x10)", + "04c2"), + ("xxxxxxxx MOV.B @(GBR,0x4), R0", + "04c4"), + ("xxxxxxxx MOV.W @(GBR,0x8), R0", + "04c5"), + ("xxxxxxxx MOV.L @(GBR,0x10), R0", + "04c6"), + #("xxxxxxxx MOV PC&0xFFFFFFFC+0x14, R0", + # "04c7"), + ("xxxxxxxx SWAPB R2, R1", + "2861"), + ("c803f492 SWAPW R4, R9", + "4969"), + ("xxxxxxxx XTRCT R4, R9", + "4d29"), + ("c8002270 ADD R12, R9", + "cc39"), + ("c8002238 ADD 0xFFFFFFFC, R15", + "FC7F"), + ("c80164cc ADDC R0, R1", + "0e31"), + ("xxxxxxxx ADDV R0, R1", + "0f31"), + ("c8002994 CMPEQ 0x20, R0", + "2088"), + ("c80029d2 CMPEQ R2, R1", + "2031"), + ("c8003964 CMPHS R5, R3", + "5233"), + ("c8002df2 CMPGE R0, R1", + "0331"), + ("c80029a4 CMPHI R1, R0", + "1630"), + ("c8002bfe CMPGT R10, R8", + "a738"), + ("c8002bf8 CMPPZ R0", + "1140"), + ("c8006294 CMPPL R2", + "1542"), + ("c8033800 CMPSTR R14, R4", + "ec24"), + ("xxxxxxxx DIV1 R14, R4", + "e434"), + ("c8d960de DIV0S R0, R3", + "0723"), + ("xxxxxxxx DIV0U ", + "1900"), + ("c800dcd8 DMULS R1, R0", + "1d30"), + ("c80164da DMULU R3, R8", + "3538"), + ("c80024e2 DT R10", + "104a"), + ("c800343a EXTSB R1, R1", + "1e61"), + ("c8002bf6 EXTSW R0, R0", + "0f60"), + ("c8002fba EXTUB R0, R0", + "0c60"), + ("c8002398 EXTUW R0, R0", + "0d60"), + ("xxxxxxxx MAC.L @R5+, @R4+", + "5f04"), + ("xxxxxxxx MAC.W @R5+, @R4+", + "5f44"), + ("c8005112 MULL R1, R3", + "1703"), + ("xxxxxxxx MULSW R1, R3", + "1F23"), + ("xxxxxxxx MULUW R1, R3", + "1e23"), + ("c8004856 NEG R1, R8", + "1b68"), + ("c80054fc NEGC R9, R7", + "9a67"), + ("c8004b36 SUB R1, R5", + "1835"), + ("c800a536 SUBC R1, R0", + "1a30"), + ("xxxxxxxx SUBV R1, R0", + "1b30"), + ("c80023ca AND R0, R5", + "0925"), + ("c800257c AND 0x2, R0", + "02c9"), + ("xxxxxxxx AND.B 0x2, @(GBR,R0)", + "02cd"), + ("c80065fe NOT R5, R1", + "5761"), + ("c8002586 OR R10, R1", + "ab21"), + ("c80023aa OR 0x4, R0", + "04cb"), + ("xxxxxxxx OR.B 0x4, @(GBR,R0)", + "04cf"), + ("xxxxxxxx TAS.B @R8", + "1b48"), + ("c8002368 TST R10, R13", + "a82d"), + ("c8003430 TST 0x11, R0", + "11c8"), + ("xxxxxxxx TST.B 0x4, @(GBR,R0)", + "04cc"), + ("c8003978 XOR R1, R6", + "1a26"), + ("c8028270 XOR 0x1, R0", + "01ca"), + ("xxxxxxxx XOR.B 0x4, @(GBR,R0)", + "04cE"), + ("xxxxxxxx ROTL R9", + "0449"), + ("xxxxxxxx ROTR R9", + "0549"), + ("xxxxxxxx ROTCL R9", + "2449"), + ("xxxxxxxx ROTCR R9", + "2549"), + ("xxxxxxxx SHAL R11", + "204b"), + ("xxxxxxxx SHAR R11", + "214b"), + ("c800236c SHLD R6, R10", + "6d4a"), + ("xxxxxxxx SHLL R11", + "004b"), + ("xxxxxxxx SHLR R11", + "014b"), + ("xxxxxxxx SHLL2 R11", + "084b"), + ("xxxxxxxx SHLR2 R11", + "094b"), + ("xxxxxxxx SHLL8 R11", + "184b"), + ("xxxxxxxx SHLR8 R11", + "194b"), + ("xxxxxxxx SHLL16 R11", + "284b"), + ("xxxxxxxx SHLR16 R11", + "294b"), + ("c8002c00 BF 0xFFFFFFF4", + "f48b"), + ("c80023c2 BFS 0xFFFFFFD8", + "d88f"), + ("c8002266 BT 0x5B", + "5b89"), + ("c8002266 BTS 0x5C", + "5c8d"), + ("c8002326 BRA 0xFFFFFFF0", + "f0af"), + ("c8004b4a BRAF R1", + "2301"), + ("c8055da4 BSR 0xFFFFFE48", + "48be"), + ("xxxxxxxx BSRF R1", + "0301"), + ("c80027b4 JMP.L @R1", + "2b41"), + ("c800200c JSR.L @R0", + "0b40"), + ("c800231a RTS ", + "0b00"), + ("xxxxxxxx CLRMAC ", + "2800"), + ("xxxxxxxx CLRS ", + "4800"), + ("xxxxxxxx CLRT ", + "0800"), + ("c8002004 LDC R0, SR", + "0e40"), + ("c800200e LDC R1, GBR", + "1e41"), + ("c8064bd4 LDC R8, VBR", + "2e48"), + ("xxxxxxxx LDC R8, SSR", + "3e48"), + ("xxxxxxxx LDC R8, SPC", + "4e48"), + ("xxxxxxxx LDC R8, DBR", + "fa48"), + ("xxxxxxxx LDC R8, R0_BANK", + "8e48"), + ("xxxxxxxx LDC.L @R8+, SR", + "0748"), + ("xxxxxxxx LDC.L @R8+, GBR", + "1748"), + ("xxxxxxxx LDC.L @R8+, VBR", + "2748"), + ("xxxxxxxx LDC.L @R8+, SSR", + "3748"), + ("xxxxxxxx LDC.L @R8+, SPC", + "4748"), + ("xxxxxxxx LDC.L @R8+, DBR", + "f648"), + ("xxxxxxxx LDC.L @R8+, R2_BANK", + "a748"), + ("xxxxxxxx LDS R8, MACH", + "0a48"), + ("xxxxxxxx LDS R8, MACL", + "1a48"), + ("xxxxxxxx LDS R8, PR", + "2a48"), + ("xxxxxxxx LDS.L @R8+, MACH", + "0648"), + ("xxxxxxxx LDS.L @R8+, MACL", + "1648"), + ("xxxxxxxx LDTLB ", + "3800"), + ("xxxxxxxx MOVCA.L R0, @R8", + "c308"), + ("xxxxxxxx NOP ", + "0900"), + ("xxxxxxxx OCBI.L @R8", + "9308"), + ("xxxxxxxx OCBP.L @R8", + "a308"), + ("xxxxxxxx OCBWB.L @R8", + "b308"), + ("xxxxxxxx PREF.L @R8", + "8308"), + ("xxxxxxxx STS MACH, R8", + "0a08"), + ("xxxxxxxx STS MACL, R8", + "1a08"), + ("xxxxxxxx STS PR, R8", + "2a08"), + ("xxxxxxxx STS.L MACH, @-R8", + "0248"), + ("xxxxxxxx STS.L MACL, @-R8", + "1248"), + ("xxxxxxxx STS.L PR, @-R8", + "2248"), + + + + + + ("c8004b50 STC GBR, R0", + "1200"), + ("c8064516 STC VBR, R1", + "2201"), + ("c8004b54 STC SSR, R1", + "3201"), + ("c801ed6c STC SPC, R0", + "4200"), + ("xxxxxxxx STC SGR, R0", + "3a00"), + ("xxxxxxxx STC DBR, R0", + "fa00"), + ("c8004b56 STC R3_BANK, R1", + "B201"), + ("xxxxxxxx STC.L SR, @-R8", + "0348"), + ("xxxxxxxx STC.L GBR, @-R8", + "1348"), + ("xxxxxxxx STC.L VBR, @-R8", + "2348"), + ("xxxxxxxx STC.L SSR, @-R8", + "3348"), + ("xxxxxxxx STC.L SPC, @-R8", + "4348"), + ("xxxxxxxx STC.L DBR, @-R8", + "f248"), + ("xxxxxxxx STC.L R7_BANK, @-R8", + "f348"), + ("c803b130 TRAPA 0xE0", + "e0c3"), + + ("xxxxxxxx FLDI0 FR8", + "8df8"), + ("xxxxxxxx FLDI1 FR8", + "9df8"), + ("c8019ca8 FMOV FR15, FR5", + "fcf5"), + ("c800affe FMOV.S @R1, FR4", + "18f4"), + ("c80283f6 FMOV.S @(R0,R14), FR5", + "e6f5"), + ("c800aff8 FMOV.S @R1+, FR5", + "19f5"), + ("c80cb692 FMOV.S FR0, @R2", + "0af2"), + ("c80cb694 FMOV.S FR1, @-R2", + "1bf2"), + ("c80283aa FMOV.S FR1, @(R0,R14)", + "17fe"), + ("c800ce16 FLDS FR13, FPUL", + "1dfd"), + ("c800ce08 FSTS FPUL, FR13", + "0dfd"), + ("xxxxxxxx FABS FR8", + "5df8"), + ("c800cf28 FADD FR2, FR6", + "20f6"), + ("c805dacc FCMPEQ FR2, FR6", + "24f6"), + ("c8028406 FCMPGT FR4, FR2", + "45f2"), + ("c8019ca4 FDIV FR2, FR12", + "23fc"), + ("c800ce5e FLOAT FPUL, FR2", + "2df2"), + ("xxxxxxxx FMAC FR0, FR1, FR2", + "1ef2"), + ("c800b006 FMUL FR2, FR4", + "22f4"), + ("c805e412 FNEG FR14", + "4dfe"), + ("xxxxxxxx FSQRT FR14", + "6dfe"), + ("c8030400 FSUB FR4, FR2", + "41f2"), + ("c80303ba FTRC FR2, FPUL", + "3df2"), + +] + +for s, l in reg_tests_sh4: + print "-" * 80 + s = s[12:] + b = h2i((l)) + print b.encode('hex') + mn = mn_sh4.dis(b, None) + print [str(x) for x in mn.args] + print s + print mn + assert(str(mn) == s) + # print hex(b) + # print [str(x.get()) for x in mn.args] + l = mn_sh4.fromstring(s, None) + # print l + assert(str(l) == s) + a = mn_sh4.asm(l) + print [x for x in a] + print repr(b) + # print mn.args + assert(b in a) + + +# speed test +o = "" +for s, l, in reg_tests_sh4: + s = s[12:] + b = h2i((l)) + o += b + +while len(o) < 1000: + o += o +bs = bin_stream_str(o) +off = 0 +instr_num = 0 +ts = time.time() +while off < bs.getlen(): + mn = mn_sh4.dis(bs, None, off) + print instr_num, off, mn.l, str(mn) + instr_num += 1 + off += mn.l +print 'instr per sec:', instr_num / (time.time() - ts) + +import cProfile +cProfile.run(r'mn_sh4.dis("\x17\xfe", None)') diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py new file mode 100644 index 00000000..98545b4f --- /dev/null +++ b/test/arch/x86/arch.py @@ -0,0 +1,1779 @@ +import os +import time +from miasm2.arch.x86.arch import * + +filename = os.environ.get('PYTHONSTARTUP') +if filename and os.path.isfile(filename): + execfile(filename) +for s in ["[EAX]", + "[0x10]", + "[EBX + 0x10]", + "[EBX + ECX*0x10]", + "[EBX + ECX*0x10 + 0x1337]"]: + (e, a, b) = deref_mem_ad.scanString(s).next() + print 'expr', e[0] + +print '---' + +mylabel16 = ExprId('mylabel16', 16) +mylabel32 = ExprId('mylabel32', 32) +mylabel64 = ExprId('mylabel64', 64) + +reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) +reg_and_id.update({'mylabel16': mylabel16, + 'mylabel32': mylabel32, + 'mylabel64': mylabel64, + }) + + +def my_ast_id2expr(t): + r = reg_and_id.get(t, ExprId(t, size=32)) + return r + +my_var_parser = parse_ast(my_ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + +for s in ['EAX', + "BYTE PTR [EAX]", + "WORD PTR [EAX]", + "DWORD PTR [ECX+0x1337]", + "QWORD PTR [RAX+4*RCX + 0x1337]", + "DWORD PTR [EAX+EBX]", + "QWORD PTR [RAX+RBX+0x55667788]", + "BYTE PTR CS:[EAX]", + "QWORD PTR [RAX+RBX+mylabel64]", + "BYTE PTR [RAX+RBX+mylabel64]", + "BYTE PTR [AX+BX+mylabel16]", + "BYTE PTR [mylabel32]", + ]: + print '*' * 80 + print s + (e, a, b) = rmarg.scanString(s).next() + print 'expr', e[0] + e[0].visit(print_size) + + +def h2i(s): + return int(s.replace(' ', '').decode('hex')[::].encode('hex'), 16) + + +m16 = 16 # (16, 16) +m32 = 32 # (32, 32) +m64 = 64 # (64, 64) +reg_tests = [ + + (m32, "00000000 AAA", + "37"), + (m32, "00000000 AAS", + "3F"), + (m32, "00000000 AAD 0x11", + "d511"), + (m32, "00000000 AAM 0x11", + "d411"), + (m32, "00000000 ADC AL, 0x11", + "1411"), + (m32, "00000000 ADC EAX, 0x11223344", + "1544332211"), + (m16, "00000000 ADC AX, 0x1122", + "152211"), + (m64, "00000000 ADC EAX, 0x11223344", + "1544332211"), + (m64, "00000000 ADC RAX, 0x11223344", + "481544332211"), + (m32, "00000000 ADC EAX, 0xFFFFFFFC", + "83d0fc"), + (m64, "00000000 ADC RAX, 0xFFFFFFFFFFFFFFFC", + "4883d0fc"), + (m16, "00000000 ADC AX, 0xFFFC", + "83d0fc"), + (m64, "00000000 ADC EAX, 0xFFFFFFFC", + "15fcffffff"), + (m64, "00000000 ADC RAX, 0xFFFFFFFFFFFFFFFC", + "4815fcffffff"), + (m16, "00000000 ADC WORD PTR [BX+SI], 0x1122", + "81102211"), + (m32, "00000000 ADC DWORD PTR [EAX], 0x11223344", + "811044332211"), + (m32, "00000000 ADC DWORD PTR [EAX+EBX+0xFFFFFFFE], 0x11223344", + "815418fe44332211"), + (m32, "00000000 ADC DWORD PTR [EAX+EBX+0x55667788], 0x11223344", + "8194188877665544332211"), + (m64, "00000000 ADC DWORD PTR [RAX], 0x11223344", + "811044332211"), + (m64, "00000000 ADC QWORD PTR [RAX], 0x11223344", + "48811044332211"), + (m64, "00000000 ADC QWORD PTR [RAX+RBX], 0x11223344", + "4881141844332211"), + (m64, "00000000 ADC QWORD PTR [RAX+RBX+0x55667788], 0x11223344", + "488194188877665544332211"), + (m64, "00000000 ADC QWORD PTR [RAX+RBX+0xFFFFFFFFFFFFFFFE], 0x11223344", + "48815403fe44332211"), + (m64, "00000000 ADC QWORD PTR [EAX], 0x11223344", + "6748811044332211"), + (m32, "00000000 ADC BYTE PTR [EAX], 0x11", + "801011"), + (m16, "00000000 ADC DX, 0x1122", + "81d22211"), + (m32, "00000000 ADC EDX, 0x11223344", + "81d244332211"), + (m64, "00000000 ADC RDX, 0x11223344", + "4881d244332211"), + (m32, "00000000 ADC DWORD PTR [EAX+EBX], 0x11223344", + "81141844332211"), + (m32, "00000000 ADC DWORD PTR [EAX+EBX], EAX", + "110418"), + (m64, "00000000 ADC QWORD PTR [RAX+RBX], RAX", + "48110418"), + (m32, "00000000 ADC BYTE PTR [EAX+EBX], AL", + "100418"), + (m32, "00000000 ADC AL, BYTE PTR [EAX+EBX]", + "120418"), + (m16, "00000000 ADC WORD PTR [BX+SI], DX", + "1110"), + (m32, "00000000 ADC WORD PTR [BX+SI], DX", + "66671110"), + (m16, "00000000 ADC DWORD PTR [EBX+ESI], EDX", + "6667111433"), + + # prefix test + (m32, "00000000 ADC AX, 0x1122", + "66152211"), + + (m32, "00000000 ADC EAX, 0x11223344", + "1544332211"), + + (m16, "00000000 ADC WORD PTR [BX+DI], 0x1", + "831101"), + (m32, "00000000 ADC DWORD PTR [EAX+EBX+0xFFFFFFFE], 0x1", + "835403fe01"), + (m32, "00000000 ADC WORD PTR [EAX+EBX+0xFFFFFFFE], 0x1", + "66835403fe01"), + (m64, "00000000 ADC DWORD PTR [RAX+RBX+0xFFFFFFFFFFFFFFFE], 0x1", + "835403fe01"), + # + (m32, "00000000 ADC DWORD PTR [EAX+EBX*0x4+0xFFFFFFFE], 0x1", + "835498fe01"), + + (m64, "00000000 ADC QWORD PTR [RAX+RBX], R8", + "4c110418"), + (m64, "00000000 ADC QWORD PTR [RAX+RBX], R15", + "4c113c18"), + + (m64, "00000000 ADC QWORD PTR [R8], RAX", + "491100"), + (m64, "00000000 ADC QWORD PTR [R8+R9], RAX", + "4b110408"), + (m64, "00000000 ADC QWORD PTR [R8+RBP], RAX", + "49110428"), + (m64, "00000000 ADC QWORD PTR [RBP+R8*0x4], RAX", + "4a11448500"), + (m64, "00000000 ADC QWORD PTR [RBP+R12*0x4], RAX", + "4a1144a500"), + (m64, "00000000 ADC QWORD PTR [RSP+R12*0x4], RAX", + "4a1104a4"), + (m64, "00000000 ADC QWORD PTR [R12*0x5], RAX", + "4b1104a4"), + (m64, "00000000 ADC QWORD PTR [R12*0x5+0x11], RAX", + "4b1144a411"), + (m64, "00000000 ADC QWORD PTR [RBP+R12*0x4+0x10], RAX", + "4a1144a510"), + + (m32, "00000000 ADD AL, 0x11", + "0411"), + (m32, "00000000 ADD EAX, 0x11223344", + "0544332211"), + + + (m32, "00000000 AND AL, 0x11", + "2411"), + (m32, "00000000 AND EAX, 0x11223344", + "2544332211"), + (m64, "00000000 AND CX, R14W", + "664123CE"), + (m64, "00000000 AND R12W, R14W", + "664521f4"), + + + + (m32, "00000000 BSF EAX, DWORD PTR [EAX]", + "0fbc00"), + + (m32, "00000000 BSR EAX, DWORD PTR [EAX]", + "0fbd00"), + + (m32, "00000000 BSWAP EAX", + "0fc8"), + + (m32, "00000000 BT DWORD PTR [EAX], EAX", + "0fa300"), + (m32, "00000000 BT DWORD PTR [EAX], 0x11", + "0fba2011"), + (m32, "00000000 BT DWORD PTR [EAX], 0xFF", + "0fba20ff"), + (m64, "00000000 BT R9D, 0x1E", + "410fbae11e"), + + (m32, "00000000 BTC DWORD PTR [EAX], EAX", + "0fbb00"), + (m32, "00000000 BTC DWORD PTR [EAX], 0x42", + "0fba3842"), + + + (m32, "00000000 BTR DWORD PTR [EAX], EAX", + "0fb300"), + (m32, "00000000 BTR DWORD PTR [EAX], 0x42", + "0fba3042"), + + + (m32, "00000000 BTS DWORD PTR [EAX], EAX", + "0fab00"), + (m32, "00000000 BTS DWORD PTR [EAX], 0x42", + "0fba2842"), + + + (m32, "00000000 CALL 0x112233", + "e833221100"), + (m64, "00000000 CALL 0x112233", + "e833221100"), + (m32, "00000000 CALL DWORD PTR [EAX]", + "ff10"), + (m64, "00000000 CALL QWORD PTR [RAX]", + "ff10"), + + (m32, "00000000 CALL 0x6655:0x44332211", + "9a112233445566"), + (m32, "00000000 CALL 0x6655:0xFF332211", + "9a112233FF5566"), + + + (m16, "00000000 CBW", + "98"), + (m16, "00000000 CWDE", + "6698"), + (m32, "00000000 CWDE", + "98"), + (m64, "00000000 CWDE", + "98"), + (m64, "00000000 CDQE", + "4898"), + + (m32, "00000000 CMOVO EAX, DWORD PTR [EAX]", + "0f4000"), + (m32, "00000000 CMOVNO EAX, DWORD PTR [EAX]", + "0f4100"), + (m32, "00000000 CMOVB EAX, DWORD PTR [EAX]", + "0f4200"), + (m32, "00000000 CMOVAE EAX, DWORD PTR [EAX]", + "0f4300"), + (m32, "00000000 CMOVZ EAX, DWORD PTR [EAX]", + "0f4400"), + (m32, "00000000 CMOVNZ EAX, DWORD PTR [EAX]", + "0f4500"), + (m32, "00000000 CMOVBE EAX, DWORD PTR [EAX]", + "0f4600"), + (m32, "00000000 CMOVA EAX, DWORD PTR [EAX]", + "0f4700"), + (m32, "00000000 CMOVS EAX, DWORD PTR [EAX]", + "0f4800"), + (m32, "00000000 CMOVNS EAX, DWORD PTR [EAX]", + "0f4900"), + (m32, "00000000 CMOVPE EAX, DWORD PTR [EAX]", + "0f4A00"), + (m32, "00000000 CMOVNP EAX, DWORD PTR [EAX]", + "0f4B00"), + (m32, "00000000 CMOVL EAX, DWORD PTR [EAX]", + "0f4C00"), + (m32, "00000000 CMOVGE EAX, DWORD PTR [EAX]", + "0f4D00"), + (m32, "00000000 CMOVLE EAX, DWORD PTR [EAX]", + "0f4E00"), + (m32, "00000000 CMOVG EAX, DWORD PTR [EAX]", + "0f4F00"), + + (m32, "00000000 CMP EAX, DWORD PTR [EAX]", + "3b00"), + + (m32, "00000000 CMPXCHG BYTE PTR [EAX], AL", + "0fb000"), + (m32, "00000000 CMPXCHG DWORD PTR [EAX], EAX", + "0fb100"), + + (m32, "00000000 CDQ", + "99"), + (m64, "00000000 CQO", + "4899"), + + (m32, "00000000 DEC BYTE PTR [EAX]", + "fe08"), + (m32, "00000000 DEC DWORD PTR [EAX]", + "ff08"), + + (m32, "00000000 DEC ECX", + "49"), + + (m32, "00000000 DIV BL", + "f6f3"), + (m32, "00000000 DIV EBX", + "f7f3"), + + (m32, "00000000 ENTER 0x12, 0x0", + "c8120000"), + (m32, "00000000 ENTER 0x12, 0x66", + "c8120066"), + + (m32, "00000000 F2XM1", + "D9f0"), + (m32, "00000000 FABS", + "D9e1"), + + (m16, "00000000 FADD DWORD PTR [BX+SI]", + "D800"), + (m32, "00000000 FADD DWORD PTR [EAX]", + "D800"), + (m32, "00000000 FADD QWORD PTR [EAX]", + "DC00"), + + (m32, "00000000 FADD ST, ST(2)", + "D8C2"), + (m32, "00000000 FADD ST(2), ST", + "DCC2"), + + (m32, "00000000 FADDP ST(2), ST", + "DEC2"), + + (m16, "00000000 FIADD DWORD PTR [BX+SI]", + "DA00"), + (m32, "00000000 FIADD DWORD PTR [EAX]", + "DA00"), + (m32, "00000000 FIADD WORD PTR [EAX]", + "DE00"), + + (m32, "00000000 FBLD TBYTE PTR [EAX]", + "DF20"), + (m64, "00000000 FBLD TBYTE PTR [RAX]", + "DF20"), + + (m32, "00000000 FBLDP TBYTE PTR [EAX]", + "DF30"), + (m64, "00000000 FBLDP TBYTE PTR [RAX]", + "DF30"), + + (m16, "00000000 FCHS", + "d9e0"), + (m32, "00000000 FCHS", + "d9e0"), + (m64, "00000000 FCHS", + "d9e0"), + + + #(m32, "00000000 FCLEX", + # "9bdbe2"), + (m32, "00000000 FNCLEX", + "dbe2"), + + (m32, "00000000 FCMOVB ST, ST(2)", + "dac2"), + + (m32, "00000000 FCOM DWORD PTR [EAX]", + "d810"), + (m32, "00000000 FCOM QWORD PTR [EAX]", + "dC10"), + (m32, "00000000 FCOMP DWORD PTR [EAX]", + "d818"), + (m32, "00000000 FCOMP QWORD PTR [EAX]", + "dC18"), + (m32, "00000000 FCOMPP", + "ded9"), + + (m32, "00000000 FCOMI ST, ST(2)", + "dbf2"), + (m32, "00000000 FCOMIP ST, ST(2)", + "dff2"), + + (m32, "00000000 FUCOMI ST, ST(2)", + "dbea"), + (m32, "00000000 FUCOMIP ST, ST(2)", + "dfea"), + + (m32, "00000000 FCOS", + "d9ff"), + + (m32, "00000000 FDECSTP", + "d9f6"), + + + (m16, "00000000 FDIV DWORD PTR [BX+SI]", + "D830"), + (m32, "00000000 FDIV DWORD PTR [EAX]", + "D830"), + (m32, "00000000 FDIV QWORD PTR [EAX]", + "DC30"), + + (m32, "00000000 FDIV ST, ST(2)", + "D8F2"), + (m32, "00000000 FDIV ST(2), ST", + "DCFA"), + + (m32, "00000000 FDIVP ST(2), ST", + "DEFA"), + + (m16, "00000000 FIDIV DWORD PTR [BX+SI]", + "DA30"), + (m32, "00000000 FIDIV DWORD PTR [EAX]", + "DA30"), + (m32, "00000000 FIDIV WORD PTR [EAX]", + "DE30"), + + + + (m16, "00000000 FDIVR DWORD PTR [BX+SI]", + "D838"), + (m32, "00000000 FDIVR DWORD PTR [EAX]", + "D838"), + (m32, "00000000 FDIVR QWORD PTR [EAX]", + "DC38"), + + (m32, "00000000 FDIVR ST, ST(2)", + "D8Fa"), + (m32, "00000000 FDIVR ST(2), ST", + "DCF2"), + + (m32, "00000000 FDIVRP ST(2), ST", + "DEF2"), + + (m16, "00000000 FIDIVR DWORD PTR [BX+SI]", + "DA38"), + (m32, "00000000 FIDIVR DWORD PTR [EAX]", + "DA38"), + (m32, "00000000 FIDIVR WORD PTR [EAX]", + "DE38"), + + (m32, "00000000 FFREE ST(2)", + "DDC2"), + + (m32, "00000000 FICOM WORD PTR [EAX]", + "DE10"), + (m32, "00000000 FICOM DWORD PTR [EAX]", + "DA10"), + + (m32, "00000000 FICOMP WORD PTR [EAX]", + "DE18"), + (m32, "00000000 FICOMP DWORD PTR [EAX]", + "DA18"), + + (m32, "00000000 FILD WORD PTR [EAX]", + "DF00"), + (m32, "00000000 FILD DWORD PTR [EAX]", + "DB00"), + + + (m32, "00000000 FILD QWORD PTR [EAX]", + "DF28"), + + (m32, "00000000 FINCSTP", + "d9f7"), + + #(m32, "00000000 FINIT", + # "9bdbe3"), + (m32, "00000000 FNINIT", + "dbe3"), + + (m32, "00000000 FIST WORD PTR [EAX]", + "DF10"), + (m32, "00000000 FIST DWORD PTR [EAX]", + "DB10"), + + (m32, "00000000 FISTP WORD PTR [EAX]", + "DF18"), + (m32, "00000000 FISTP DWORD PTR [EAX]", + "DB18"), + + (m32, "00000000 FISTP QWORD PTR [EAX]", + "Df38"), + + (m32, "00000000 FISTTP WORD PTR [EAX]", + "DF08"), + (m32, "00000000 FISTTP DWORD PTR [EAX]", + "DB08"), + + (m32, "00000000 FISTTP QWORD PTR [EAX]", + "Dd08"), + + (m32, "00000000 FLD DWORD PTR [EAX]", + "d900"), + (m32, "00000000 FLD QWORD PTR [EAX]", + "dd00"), + + (m32, "00000000 FLD TBYTE PTR [EAX]", + "db28"), + (m32, "00000000 FLD ST(2)", + "d9c2"), + + + (m32, "00000000 FLD1", + "d9e8"), + (m32, "00000000 FLDL2T", + "d9e9"), + (m32, "00000000 FLDL2E", + "d9eA"), + (m32, "00000000 FLDPI", + "d9eB"), + (m32, "00000000 FLDLG2", + "d9eC"), + (m32, "00000000 FLDLN2", + "d9eD"), + (m32, "00000000 FLDZ", + "d9eE"), + + (m32, "00000000 FLDCW WORD PTR [EAX]", + "d928"), + + + + (m16, "00000000 FMUL DWORD PTR [BX+SI]", + "D808"), + (m32, "00000000 FMUL DWORD PTR [EAX]", + "D808"), + (m32, "00000000 FMUL QWORD PTR [EAX]", + "DC08"), + + (m32, "00000000 FMUL ST, ST(2)", + "D8Ca"), + (m32, "00000000 FMUL ST(2), ST", + "DCCa"), + + (m32, "00000000 FMULP ST(2), ST", + "DECa"), + + (m16, "00000000 FIMUL DWORD PTR [BX+SI]", + "DA08"), + (m32, "00000000 FIMUL DWORD PTR [EAX]", + "DA08"), + (m32, "00000000 FIMUL WORD PTR [EAX]", + "DE08"), + + (m32, "00000000 FNOP", + "D9d0"), + (m32, "00000000 FPATAN", + "D9f3"), + (m32, "00000000 FPREM", + "D9f8"), + (m32, "00000000 FPREM1", + "D9f5"), + (m32, "00000000 FPTAN", + "D9f2"), + (m32, "00000000 FRNDINT", + "D9fc"), + + (m32, "00000000 FRSTOR TBYTE PTR [EAX]", + "dd20"), + + #(m32, "00000000 FSAVE TBYTE PTR [EAX]", + # "9bdd30"), + (m32, "00000000 FNSAVE TBYTE PTR [EAX]", + "dd30"), + + (m32, "00000000 FSCALE", + "d9fd"), + + (m32, "00000000 FSIN", + "d9fe"), + (m32, "00000000 FSINCOS", + "d9fb"), + (m32, "00000000 FSQRT", + "d9fa"), + + + + (m32, "00000000 FST DWORD PTR [EAX]", + "D910"), + (m32, "00000000 FST QWORD PTR [EAX]", + "DD10"), + + (m32, "00000000 FST ST(2)", + "ddd2"), + + (m32, "00000000 FSTP DWORD PTR [EAX]", + "D918"), + (m32, "00000000 FSTP QWORD PTR [EAX]", + "Dd18"), + (m32, "00000000 FSTP TBYTE PTR [EAX]", + "db38"), + + #(m32, "00000000 FSTCW WORD PTR [EAX]", + # "9bd938"), + (m32, "00000000 FNSTCW WORD PTR [EAX]", + "d938"), + + (m32, "00000000 FNSTENV TBYTE PTR [EAX]", + "d930"), + #(m32, "00000000 FSTENV TBYTE PTR [EAX]", + # "9bd930"), + + (m32, "00000000 FNSTSW WORD PTR [EAX]", + "dd38"), + #(m32, "00000000 FSTSW WORD PTR [EAX]", + # "9bdd38"), + + #(m32, "00000000 FSTSW AX", + # "9bdfe0"), + (m32, "00000000 FNSTSW AX", + "dfe0"), + + (m16, "00000000 FSUB DWORD PTR [BX+SI]", + "D820"), + (m32, "00000000 FSUB DWORD PTR [EAX]", + "D820"), + (m32, "00000000 FSUB QWORD PTR [EAX]", + "DC20"), + + (m32, "00000000 FSUB ST, ST(2)", + "D8E2"), + (m32, "00000000 FSUB ST(2), ST", + "DCEA"), + + (m32, "00000000 FSUBP ST(2), ST", + "DEEA"), + + (m16, "00000000 FISUB DWORD PTR [BX+SI]", + "DA20"), + (m32, "00000000 FISUB DWORD PTR [EAX]", + "DA20"), + (m32, "00000000 FISUB WORD PTR [EAX]", + "DE20"), + + + + (m16, "00000000 FSUBR DWORD PTR [BX+SI]", + "D828"), + (m32, "00000000 FSUBR DWORD PTR [EAX]", + "D828"), + (m32, "00000000 FSUBR QWORD PTR [EAX]", + "DC28"), + + (m32, "00000000 FSUBR ST, ST(2)", + "D8EA"), + (m32, "00000000 FSUBR ST(2), ST", + "DCE2"), + + (m32, "00000000 FSUBRP ST(2), ST", + "DEE2"), + + (m16, "00000000 FISUBR DWORD PTR [BX+SI]", + "DA28"), + (m32, "00000000 FISUBR DWORD PTR [EAX]", + "DA28"), + (m32, "00000000 FISUBR WORD PTR [EAX]", + "DE28"), + + (m32, "00000000 FTST", + "d9e4"), + + (m32, "00000000 FUCOM ST(2)", + "dde2"), + (m32, "00000000 FUCOMP ST(2)", + "DDEA"), + (m32, "00000000 FUCOMPP", + "DAe9"), + + (m32, "00000000 FXAM", + "d9e5"), + + (m32, "00000000 FXCH ST(2)", + "d9ca"), + + (m32, "00000000 FXRSTOR TBYTE PTR [EAX]", + "0fae08"), + (m32, "00000000 FXSAVE TBYTE PTR [EAX]", + "0fae00"), + + (m32, "00000000 FXTRACT", + "d9f4"), + (m32, "00000000 FYL2X", + "d9f1"), + (m32, "00000000 FYL2XP1", + "d9f9"), + + (m32, "00000000 HLT", + "f4"), + (m32, "00000000 ICEBP", + "f1"), + + (m32, "00000000 IDIV BYTE PTR [EAX]", + "f638"), + (m32, "00000000 IDIV DWORD PTR [EAX]", + "f738"), + + (m32, "00000000 IMUL EAX, DWORD PTR [EAX]", + "0faf00"), + + + (m32, "00000000 IMUL EAX, EBX, 0x8", + "6bc308"), + (m32, "00000000 IMUL EAX, EBX, 0xFFFFFFFF", + "6bc3FF"), + (m32, "00000000 IMUL EAX, DWORD PTR [EBX], 0x11223344", + "690344332211"), + (m64, "00000000 IMUL RAX, QWORD PTR [RBX], 0x11223344", + "48690344332211"), + (m64, "00000000 IMUL RAX, QWORD PTR [RBX], 0x11223344", + "48690344332211"), + (m64, "00000000 IMUL RAX, QWORD PTR [RBX], 0xFFFFFFFFF1223344", + "486903443322F1"), + (m16, "00000000 IMUL AX, BX, 0x8", + "6bc308"), + (m16, "00000000 IMUL AX, BX, 0xFFF0", + "6bc3F0"), + + (m32, "00000000 IN AL, 0x12", + "e412"), + (m32, "00000000 IN EAX, 0x12", + "e512"), + (m64, "00000000 IN RAX, 0x12", + "48e512"), + + (m32, "00000000 IN AL, DL", + "EC"), + (m32, "00000000 IN EAX, EDX", + "ED"), + (m32, "00000000 IN AX, DX", + "66ED"), + + (m32, "00000000 INC DWORD PTR [EAX]", + "ff00"), + (m32, "00000000 INC ECX", + "41"), + + (m32, "00000000 INT 0x3", + "CC"), + (m32, "00000000 INT 0x21", + "CD21"), + + (m16, "00000000 IRET", + "CF"), + (m32, "00000000 IRETD", + "CF"), + (m64, "00000000 IRETQ", + "48CF"), + + (m32, "00000000 JA 0x12", + "7712"), + (m32, "00000000 JA 0xFFFFFFEE", + "77EE"), + (m64, "00000000 JA 0xFFFFFFFFFFFFFFEE", + "77EE"), + + #(m32, "00000000 JA 0xFFEE", + # "6677EE"), + #(m64, "00000000 JA 0xFFEE", + # "6677EE"), + + + (m16, "00000000 JCXZ 0xFFEE", + "E3EE"), + (m16, "00000000 JECXZ 0xFFEE", + "67E3EE"), + (m32, "00000000 JECXZ 0xFFFFFFEE", + "E3EE"), + (m32, "00000000 JCXZ 0xFFFFFFEE", + "67E3EE"), + (m32, "00000000 JCXZ 0xFFEE", + "6667E3EE"), + (m64, "00000000 JRCXZ 0xFFFFFFFFFFFFFFEE", + "E3EE"), + (m64, "00000000 JECXZ 0xFFFFFFFFFFFFFFEE", + "67E3EE"), + + + (m32, "00000000 MOV BYTE PTR [EAX], AL", + "8800"), + (m32, "00000000 MOV AL, BYTE PTR [EAX]", + "8a00"), + (m32, "00000000 MOV EAX, DWORD PTR [EAX]", + "8b00"), + (m32, "00000000 MOV DWORD PTR [EAX], EAX", + "8900"), + (m64, "00000000 MOV ECX, DWORD PTR [RCX]", + "8b09"), + (m64, "00000000 MOV DWORD PTR [RCX], ECX", + "8909"), + (m64, "00000000 MOV QWORD PTR [RAX], RAX", + "488900"), + + (m32, "00000000 MOV EAX, EBX", + "89d8"), + (m32, "00000000 MOV EAX, EBX", + "8bc3"), + + + (m16, "00000000 MOV WORD PTR [BX+SI], ES", + "8c00"), + (m32, "00000000 MOV DWORD PTR [EAX], ES", + "8c00"), + (m32, "00000000 MOV ES, DWORD PTR [EAX]", + "8e00"), + (m32, "00000000 MOV DWORD PTR [EAX], CS", + "8c08"), + (m64, "00000000 MOV DWORD PTR [RCX], ES", + "8c01"), + + (m16, "00000000 MOV BH, 0x12", + "b712"), + (m16, "00000000 MOV DI, 0x1122", + "bf2211"), + + (m32, "00000000 MOV AL, 0x12", + "b012"), + (m32, "00000000 MOV EAX, 0x11223344", + "b844332211"), + (m32, "00000000 MOV BH, 0x12", + "b712"), + (m32, "00000000 MOV EDI, 0x11223344", + "bf44332211"), + + (m64, "00000000 MOV BH, 0x12", + "b712"), + (m64, "00000000 MOV EDI, 0x11223344", + "bf44332211"), + + (m16, "00000000 MOV WORD PTR [BX], 0x1122", + "c7072211"), + (m32, "00000000 MOV DWORD PTR [EAX], 0x11223344", + "c70044332211"), + (m64, "00000000 MOV DWORD PTR [RCX], 0x11223344", + "c70144332211"), + + (m32, "00000000 MOV CR0, EAX", + "0f22c0"), + (m32, "00000000 MOV EAX, CR0", + "0f20c0"), + + (m32, "00000000 MOV EAX, DR0", + "0f21c0"), + (m32, "00000000 MOV DR0, EAX", + "0f23c0"), + + (m64, "00000000 MOV DWORD PTR [RSP+0x20], 0x10", + "C744242010000000"), + (m64, "00000000 MOV DWORD PTR [RBX+0x20], 0x30", + "c744a32030000000"), + (m64, "00000000 MOV DWORD PTR [R12+0x20], 0x10", + "41C744242010000000"), + + (m32, "00000000 MOV BYTE PTR [EBX+0xFFFFFF98], 0xCC", + "C64398CC"), + + (m64, "00000000 MOV BYTE PTR [R11+0xFFFFFFFFFFFFFF98], 0xCC", + "41C64398CC"), + + (m64, "00000000 MOV RAX, 0x1122334455667788", + "48b88877665544332211"), + + (m64, "00000000 MOV RDX, 0x1122334455667788", + "48ba8877665544332211"), + + + + (m64, "00000000 MOV RAX, RBX", + "4889d8"), + (m64, "00000000 MOV RAX, RBX", + "4A89d8"), + (m64, "00000000 MOV RAX, R11", + "4C89d8"), + (m64, "00000000 MOV R8D, EBX", + "4189d8"), + (m64, "00000000 MOV R8D, EBX", + "4389d8"), + (m64, "00000000 MOV EAX, R11D", + "4489d8"), + (m64, "00000000 MOV R8D, R11D", + "4589d8"), + (m64, "00000000 MOV EAX, R11D", + "4689d8"), + (m64, "00000000 MOV R8D, R11D", + "4789d8"), + + (m64, "00000000 MOV BYTE PTR [RBX+0x3], R11B", + "44885B03"), + + (m32, "00000000 MOV AL, BYTE PTR [0x11223344]", + "A044332211"), + (m32, "00000000 MOV BYTE PTR [0x11223344], AL", + "A244332211"), + (m32, "00000000 MOV EAX, DWORD PTR [0x11223344]", + "A144332211"), + (m32, "00000000 MOV DWORD PTR [0x11223344], EAX", + "A344332211"), + + (m32, "00000000 MOV WORD PTR [0x11223344], AX", + "66A344332211"), + + (m32, "00000000 MOV DWORD PTR [0x1122], EAX", + "67A32211"), + + + + (m16, "00000000 MOV AL, BYTE PTR [0x1122]", + "A02211"), + (m16, "00000000 MOV BYTE PTR [0x1122], AL", + "A22211"), + (m16, "00000000 MOV AX, WORD PTR [0x1122]", + "A12211"), + (m16, "00000000 MOV WORD PTR [0x1122], AX", + "A32211"), + + (m64, "00000000 MOV AL, BYTE PTR [0x1122334455667788]", + "A08877665544332211"), + (m64, "00000000 MOV BYTE PTR [0x1122334455667788], AL", + "A28877665544332211"), + (m64, "00000000 MOV EAX, DWORD PTR [0x1122334455667788]", + "A18877665544332211"), + (m64, "00000000 MOV DWORD PTR [0x1122334455667788], EAX", + "A38877665544332211"), + + + + (m32, "00000000 MOV EAX, DWORD PTR CS:[EAX]", + "2e8b00"), + (m32, "00000000 MOV EAX, DWORD PTR SS:[EAX]", + "368b00"), + (m32, "00000000 MOV EAX, DWORD PTR DS:[EAX]", + "3e8b00"), + (m32, "00000000 MOV EAX, DWORD PTR ES:[EAX]", + "268b00"), + (m32, "00000000 MOV EAX, DWORD PTR FS:[EAX]", + "648b00"), + (m32, "00000000 MOV EAX, DWORD PTR GS:[EAX]", + "658b00"), + + + + (m32, "00000000 MOVSX EAX, BYTE PTR [EAX]", + "0fbe00"), + (m32, "00000000 MOVSX EAX, WORD PTR [EAX]", + "0fbf00"), + + (m64, "00000000 MOVSX RAX, BYTE PTR [RAX]", + "480fbe00"), + (m64, "00000000 MOVSX RAX, WORD PTR [RAX]", + "480fbf00"), + + (m16, "00000000 MOVZX AX, BYTE PTR [BX+SI]", + "0fb600"), + (m16, "00000000 MOVZX AX, WORD PTR [BX+SI]", + "0fb700"), + + (m32, "00000000 MOVZX EAX, BYTE PTR [EAX]", + "0fb600"), + (m32, "00000000 MOVZX EAX, WORD PTR [EAX]", + "0fb700"), + + (m64, "00000000 MOVSXD R8, EAX", + "4c63c0"), + + + (m32, "00000000 MUL BYTE PTR [EAX]", + "f620"), + (m32, "00000000 MUL EBX", + "f7e3"), + + (m16, "00000000 CMPSW", + "a7"), + (m32, "00000000 CMPSW", + "66a7"), + (m32, "00000000 CMPSD", + "a7"), + + (m64, "00000000 CMPSD", + "a7"), + (m64, "00000000 CMPSQ", + "48a7"), + + (m16, "00000000 LODSB", + "aC"), + (m32, "00000000 LODSB", + "66ac"), + (m16, "00000000 LODSW", + "ad"), + (m32, "00000000 LODSW", + "66ad"), + (m32, "00000000 LODSD", + "ad"), + + (m64, "00000000 LODSD", + "ad"), + (m64, "00000000 LODSQ", + "48ad"), + + + + (m32, "00000000 NEG BYTE PTR [EAX]", + "f618"), + (m32, "00000000 NEG EBX", + "f7db"), + + #(m32, "00000000 NOP", + # "90"), + + (m32, "00000000 NOP DWORD PTR [EAX]", + "0f1f00"), + + (m32, "00000000 NOT BYTE PTR [EAX]", + "f610"), + (m32, "00000000 NOT EBX", + "f7d3"), + + (m32, "00000000 OR AL, 0x11", + "0c11"), + (m32, "00000000 OR EAX, 0x11223344", + "0d44332211"), + (m32, "00000000 OR BYTE PTR [EAX], 0x11", + "800811"), + (m32, "00000000 OR DWORD PTR [EAX], 0x11223344", + "810844332211"), + (m32, "00000000 OR DWORD PTR [EAX], 0x11", + "830811"), + (m32, "00000000 OR BYTE PTR [EAX], AL", + "0800"), + (m32, "00000000 OR DWORD PTR [EAX], EAX", + "0900"), + (m32, "00000000 OR AL, BYTE PTR [EAX]", + "0A00"), + (m32, "00000000 OR EAX, DWORD PTR [EAX]", + "0B00"), + + (m32, "00000000 OUT 0x12, AL", + "e612"), + (m32, "00000000 OUT 0x12, EAX", + "e712"), + (m64, "00000000 OUT 0x12, RAX", + "48e712"), + + (m32, "00000000 OUT DL, AL", + "EE"), + (m32, "00000000 OUT EDX, EAX", + "EF"), + (m32, "00000000 OUT DX, AX", + "66EF"), + + (m32, "00000000 OUTSB", + "6e"), + (m32, "00000000 OUTSD", + "6f"), + (m32, "00000000 OUTSW", + "666f"), + (m64, "00000000 OUTSD", + "6f"), + (m64, "00000000 OUTSW", + "666f"), + + #(m32, "00000000 PAUSE", + # "f390"), + + + (m16, "00000000 POP WORD PTR [BX+SI]", + "8f00"), + (m32, "00000000 POP DWORD PTR [EAX]", + "8f00"), + (m64, "00000000 POP QWORD PTR [RAX]", + "8f00"), + + + (m32, "00000000 POP EAX", + "8fC0"), + (m64, "00000000 POP RAX", + "8fC0"), + + (m32, "00000000 POP EAX", + "58"), + (m64, "00000000 POP RAX", + "58"), + (m64, "00000000 POP R10", + "415a"), + + (m32, "00000000 POP DS", + "1f"), + (m32, "00000000 POP ES", + "07"), + (m32, "00000000 POP SS", + "17"), + (m32, "00000000 POP FS", + "0fa1"), + (m32, "00000000 POP GS", + "0fa9"), + + (m16, "00000000 POPA", + "61"), + (m32, "00000000 POPAD", + "61"), + + (m16, "00000000 POPF", + "9d"), + (m32, "00000000 POPFD", + "9d"), + (m64, "00000000 POPFD", + "9d"), + (m64, "00000000 POPFQ", + "489d"), + + (m32, "00000000 PREFETCH0 BYTE PTR [EAX]", + "0f1808"), + (m32, "00000000 PREFETCH1 BYTE PTR [EAX]", + "0f1810"), + (m32, "00000000 PREFETCH2 BYTE PTR [EAX]", + "0f1818"), + (m32, "00000000 PREFETCHNTA BYTE PTR [EAX]", + "0f1800"), + + + (m16, "00000000 PUSH AX", + "50"), + (m32, "00000000 PUSH EAX", + "50"), + (m64, "00000000 PUSH RAX", + "50"), + (m64, "00000000 PUSH R10", + "4152"), + (m16, "00000000 PUSH WORD PTR [BX+SI]", + "FF30"), + (m16, "00000000 PUSH WORD PTR [EAX]", + "67FF30"), + (m16, "00000000 PUSH DWORD PTR [EAX]", + "6667FF30"), + (m32, "00000000 PUSH DWORD PTR [EAX]", + "FF30"), + (m64, "00000000 PUSH QWORD PTR [RAX]", + "FF30"), + + (m16, "00000000 PUSH 0x11", + "6a11"), + (m32, "00000000 PUSH 0x11223344", + "6844332211"), + (m32, "00000000 PUSH 0x1122", + "66682211"), + (m32, "00000000 PUSH 0x80", + "6880000000"), + + (m64, "00000000 PUSH 0x11223344", + "6844332211"), + + (m32, "00000000 PUSH CS", + "0e"), + (m32, "00000000 PUSH SS", + "16"), + (m32, "00000000 PUSH DS", + "1E"), + (m32, "00000000 PUSH ES", + "06"), + (m32, "00000000 PUSH FS", + "0fa0"), + (m32, "00000000 PUSH GS", + "0fa8"), + + (m16, "00000000 PUSHA", + "60"), + (m32, "00000000 PUSHAD", + "60"), + + (m16, "00000000 PUSHF", + "9c"), + (m32, "00000000 PUSHFD", + "9c"), + (m64, "00000000 PUSHFD", + "9c"), + (m64, "00000000 PUSHFQ", + "489c"), + + (m32, "00000000 RCL BYTE PTR [EAX], 0x1", + "D010"), + (m32, "00000000 RCL BYTE PTR [EAX], CL", + "d210"), + + (m32, "00000000 RCL DWORD PTR [EAX], 0x1", + "D110"), + (m32, "00000000 RCL DWORD PTR [EAX], CL", + "d310"), + + (m32, "00000000 RCL BYTE PTR [EAX], 0x11", + "c01011"), + (m32, "00000000 RCL DWORD PTR [EAX], 0x11", + "c11011"), + + (m64, "00000000 RCL QWORD PTR [RAX], 0x1", + "48D110"), + (m64, "00000000 RCL QWORD PTR [RAX], CL", + "48d310"), + + (m64, "00000000 RCL BYTE PTR [RAX], 0x11", + "c01011"), + (m64, "00000000 RCL QWORD PTR [RAX], 0x11", + "48c11011"), + + + + + (m32, "00000000 RCR BYTE PTR [EAX], 0x1", + "D018"), + (m32, "00000000 RCR BYTE PTR [EAX], CL", + "d218"), + + (m32, "00000000 RCR DWORD PTR [EAX], 0x1", + "D118"), + (m32, "00000000 RCR DWORD PTR [EAX], CL", + "d318"), + + (m32, "00000000 RCR BYTE PTR [EAX], 0x11", + "c01811"), + (m32, "00000000 RCR DWORD PTR [EAX], 0x11", + "c11811"), + + (m64, "00000000 RCR QWORD PTR [RAX], 0x1", + "48D118"), + (m64, "00000000 RCR QWORD PTR [RAX], CL", + "48d318"), + + (m64, "00000000 RCR BYTE PTR [RAX], 0x11", + "c01811"), + (m64, "00000000 RCR QWORD PTR [RAX], 0x11", + "48c11811"), + + + + + (m32, "00000000 ROL BYTE PTR [EAX], 0x1", + "D000"), + (m32, "00000000 ROL BYTE PTR [EAX], CL", + "d200"), + + (m32, "00000000 ROL DWORD PTR [EAX], 0x1", + "D100"), + (m32, "00000000 ROL DWORD PTR [EAX], CL", + "d300"), + + (m32, "00000000 ROL BYTE PTR [EAX], 0x11", + "c00011"), + (m32, "00000000 ROL DWORD PTR [EAX], 0x11", + "c10011"), + + (m64, "00000000 ROL QWORD PTR [RAX], 0x1", + "48D100"), + (m64, "00000000 ROL QWORD PTR [RAX], CL", + "48d300"), + + (m64, "00000000 ROL BYTE PTR [RAX], 0x11", + "c00011"), + (m64, "00000000 ROL QWORD PTR [RAX], 0x11", + "48c10011"), + + + + (m32, "00000000 ROR BYTE PTR [EAX], 0x1", + "D008"), + (m32, "00000000 ROR BYTE PTR [EAX], CL", + "d208"), + + (m32, "00000000 ROR DWORD PTR [EAX], 0x1", + "D108"), + (m32, "00000000 ROR DWORD PTR [EAX], CL", + "d308"), + + (m32, "00000000 ROR BYTE PTR [EAX], 0x11", + "c00811"), + (m32, "00000000 ROR DWORD PTR [EAX], 0x11", + "c10811"), + + (m64, "00000000 ROR QWORD PTR [RAX], 0x1", + "48D108"), + (m64, "00000000 ROR QWORD PTR [RAX], CL", + "48d308"), + + (m64, "00000000 ROR BYTE PTR [RAX], 0x11", + "c00811"), + (m64, "00000000 ROR QWORD PTR [RAX], 0x11", + "48c10811"), + + + + (m32, "00000000 RDMSR", + "0f32"), + (m32, "00000000 RDPMC", + "0f33"), + (m32, "00000000 RDTSC", + "0f31"), + + (m32, "00000000 INSB", + "6C"), + (m16, "00000000 INSW", + "6D"), + (m32, "00000000 INSD", + "6D"), + (m64, "00000000 INSD", + "486D"), + (m64, "00000000 INSD", + "6D"), + + + (m32, "00000000 MOVSB", + "a4"), + (m16, "00000000 MOVSW", + "a5"), + (m32, "00000000 MOVSD", + "a5"), + (m64, "00000000 MOVSQ", + "48a5"), + (m64, "00000000 MOVSD", + "a5"), + + (m32, "00000000 OUTSB", + "6e"), + (m16, "00000000 OUTSW", + "6f"), + (m32, "00000000 OUTSD", + "6f"), + (m64, "00000000 OUTSD", + "486f"), + (m64, "00000000 OUTSD", + "6f"), + + + (m32, "00000000 LODSB", + "ac"), + (m16, "00000000 LODSW", + "ad"), + (m32, "00000000 LODSD", + "ad"), + (m64, "00000000 LODSQ", + "48ad"), + (m64, "00000000 LODSD", + "ad"), + + (m32, "00000000 STOSB", + "aa"), + (m16, "00000000 STOSW", + "ab"), + (m32, "00000000 STOSD", + "ab"), + (m64, "00000000 STOSQ", + "48ab"), + (m64, "00000000 STOSD", + "ab"), + + + (m32, "00000000 CMPSB", + "a6"), + (m16, "00000000 CMPSW", + "a7"), + (m32, "00000000 CMPSD", + "a7"), + (m64, "00000000 CMPSQ", + "48a7"), + (m64, "00000000 CMPSD", + "a7"), + + + (m32, "00000000 SCASB", + "ae"), + (m16, "00000000 SCASW", + "af"), + (m32, "00000000 SCASD", + "af"), + (m64, "00000000 SCASQ", + "48af"), + (m64, "00000000 SCASD", + "af"), + + (m32, "00000000 REPNE SCASB", + "F2AE"), + (m32, "00000000 REPE SCASB", + "F3AE"), + (m32, "00000000 REPE LODSD", + "F3ad"), + + (m32, "00000000 RET", + "c3"), + + (m32, "00000000 RET 0x1122", + "C22211"), + + (m32, "00000000 RETF 0x1122", + "CA2211"), + + (m32, "00000000 RSM", + "0faa"), + (m32, "00000000 SAHF", + "9e"), + + (m32, "00000000 SAL BYTE PTR [EAX], 0x1", + "D030"), + (m32, "00000000 SAL BYTE PTR [EAX], CL", + "d230"), + + (m32, "00000000 SAR BYTE PTR [EAX], 0x1", + "D038"), + (m32, "00000000 SAR BYTE PTR [EAX], CL", + "d238"), + + (m32, "00000000 SHL BYTE PTR [EAX], 0x1", + "D020"), + (m32, "00000000 SHL BYTE PTR [EAX], CL", + "d220"), + + (m32, "00000000 SHR BYTE PTR [EAX], 0x1", + "D028"), + (m32, "00000000 SHR BYTE PTR [EAX], CL", + "d228"), + + + (m32, "00000000 SBB AL, 0x11", + "1c11"), + (m32, "00000000 SBB EAX, 0x11223344", + "1D44332211"), + (m32, "00000000 SBB BYTE PTR [EAX], 0x11", + "801811"), + (m32, "00000000 SBB DWORD PTR [EAX], 0x11223344", + "811844332211"), + (m32, "00000000 SBB BYTE PTR [EAX], AL", + "1800"), + (m32, "00000000 SBB DWORD PTR [EAX], EAX", + "1900"), + (m32, "00000000 SBB AL, BYTE PTR [EAX]", + "1A00"), + (m32, "00000000 SBB EAX, DWORD PTR [EAX]", + "1B00"), + (m64, "00000000 SBB QWORD PTR [RAX], RAX", + "481900"), + + + (m32, "00000000 SETA BYTE PTR [EAX]", + "0f9700"), + (m32, "00000000 SETO BYTE PTR [EAX]", + "0f9000"), + (m32, "00000000 SETNZ AL", + "0f95C0"), + + (m32, "00000000 SGDT DWORD PTR [EAX]", + "0f0100"), + + (m32, "00000000 SHLD DWORD PTR [EAX], EAX, 0x11", + "0fa40011"), + (m32, "00000000 SHLD DWORD PTR [EAX], EAX, CL", + "0fa500"), + + (m64, "00000000 SHLD QWORD PTR [RAX], RAX, 0x11", + "480fa40011"), + (m64, "00000000 SHLD QWORD PTR [RAX], RAX, CL", + "480fa500"), + + (m32, "00000000 SHRD DWORD PTR [EAX], EAX, 0x11", + "0fac0011"), + (m32, "00000000 SHRD DWORD PTR [EAX], EAX, CL", + "0fad00"), + + (m64, "00000000 SHRD QWORD PTR [RAX], RAX, 0x11", + "480fac0011"), + (m64, "00000000 SHRD QWORD PTR [RAX], RAX, CL", + "480fad00"), + + (m32, "00000000 SIDT DWORD PTR [EAX]", + "0f0108"), + + + + (m32, "00000000 SUB AL, 0x11", + "2c11"), + (m32, "00000000 SUB EAX, 0x11223344", + "2D44332211"), + (m32, "00000000 SUB BYTE PTR [EAX], 0x11", + "802811"), + (m32, "00000000 SUB DWORD PTR [EAX], 0x11223344", + "812844332211"), + (m32, "00000000 SUB BYTE PTR [EAX], AL", + "2800"), + (m32, "00000000 SUB DWORD PTR [EAX], EAX", + "2900"), + (m32, "00000000 SUB AL, BYTE PTR [EAX]", + "2A00"), + (m32, "00000000 SUB EAX, DWORD PTR [EAX]", + "2B00"), + (m32, "00000000 SUB EBX, DWORD PTR [EBP+0xFFFFF858]", + "2b9d58f8ffff"), + + + (m64, "00000000 SYSCALL", + "0f05"), + (m64, "00000000 SYSENTER", + "0f34"), + (m64, "00000000 SYSEXIT", + "0f35"), + (m64, "00000000 SYSRET", + "0f07"), + + + + (m32, "00000000 TEST AL, 0x11", + "a811"), + (m32, "00000000 TEST EAX, 0x11223344", + "A944332211"), + + (m32, "00000000 TEST BYTE PTR [EAX], 0x11", + "f60011"), + (m32, "00000000 TEST DWORD PTR [EAX], 0x11223344", + "f70044332211"), + + (m32, "00000000 TEST BYTE PTR [EAX], AL", + "8400"), + (m32, "00000000 TEST DWORD PTR [EAX], EAX", + "8500"), + + (m32, "00000000 UD2", + "0f0b"), + + (m32, "00000000 VERR DWORD PTR [EAX]", + "0f0020"), + + (m32, "00000000 VERW DWORD PTR [EAX]", + "0f0028"), + + (m32, "00000000 WBIND", + "0f09"), + + (m32, "00000000 WRMSR", + "0f30"), + + (m32, "00000000 XADD BYTE PTR [EAX], AL", + "0fc000"), + (m32, "00000000 XADD DWORD PTR [EAX], EAX", + "0fc100"), + + (m16, "00000000 XCHG AX, CX", + "91"), + + (m32, "00000000 XCHG EAX, ECX", + "91"), + + (m64, "00000000 XCHG EAX, ECX", + "91"), + (m64, "00000000 XCHG RAX, RCX", + "4891"), + + (m32, "00000000 NOP", + "90"), + + + (m32, "00000000 XCHG BYTE PTR [EAX], AL", + "8600"), + (m32, "00000000 XCHG DWORD PTR [EAX], EAX", + "8700"), + + + (m32, "00000000 XOR AL, 0x11", + "3411"), + (m32, "00000000 XOR EAX, 0x11223344", + "3544332211"), + (m32, "00000000 XOR BYTE PTR [EAX], 0x11", + "803011"), + (m32, "00000000 XOR DWORD PTR [EAX], 0x11223344", + "813044332211"), + (m32, "00000000 XOR DWORD PTR [EAX], 0xFFFFFFFF", + "8330FF"), + (m32, "00000000 XOR BYTE PTR [EAX], AL", + "3000"), + (m32, "00000000 XOR DWORD PTR [EAX], EAX", + "3100"), + (m32, "00000000 XOR EAX, DWORD PTR [EAX]", + "3300"), + + (m32, "00000000 XORPS XMM1, XMM2", + "0f57ca"), + (m32, "00000000 XORPS XMM1, DWORD PTR [EDI+0x42]", + "0f574f42"), + (m32, "00000000 XORPD XMM1, XMM2", + "660f57ca"), + + (m32, "00000000 MOVAPS DWORD PTR [EBP+0xFFFFFFB8], XMM0", + "0f2945b8"), + (m32, "00000000 MOVAPS XMM0, DWORD PTR [EBP+0xFFFFFFB8]", + "0f2845b8"), + (m32, "00000000 MOVAPD WORD PTR [EBP+0xFFFFFFB8], XMM0", + "660f2945b8"), + + (m32, "00000000 MOVUPS XMM2, DWORD PTR [ECX]", + "0f1011"), + (m32, "00000000 MOVSD XMM2, DWORD PTR [ECX]", + "f20f1011"), + (m32, "00000000 MOVSD DWORD PTR [EBP+0xFFFFFFD8], XMM0", + "f20f1145d8"), + (m32, "00000000 MOVSS XMM2, DWORD PTR [ECX]", + "f30f1011"), + (m32, "00000000 MOVUPD XMM2, DWORD PTR [ECX]", + "660f1011"), + + (m32, "00000000 ADDSS XMM2, DWORD PTR [ECX]", + "f30f5811"), + (m32, "00000000 ADDSD XMM2, DWORD PTR [ECX]", + "f20f5811"), + + (m32, "00000000 MULSD XMM2, DWORD PTR [ECX]", + "f20f5911"), + + + (m32, "00000000 PXOR XMM0, XMM0", + "0fefc0"), + (m32, "00000000 UCOMISD XMM0, DWORD PTR [EBP+0xFFFFFFD8]", + "660f2e45d8"), + (m32, "00000000 ANDPD XMM0, DWORD PTR [EBX+0x2CBD27]", + "660f548327bd2c00"), + + (m32, "00000000 SUBSD XMM1, XMM0", + "f20f5cc8"), + + (m32, "00000000 MAXSD XMM0, DWORD PTR [EBX+0x2CBD37]", + "f20f5f8337bd2c00"), + + (m32, "00000000 CVTSI2SD XMM0, EBX", + "f20f2ac3"), + + (m32, "00000000 PMINSW MM0, MM1", + "0feac1"), + (m32, "00000000 PMINSW XMM0, XMM1", + "660feac1"), + + (m64, "00000000 MOV BYTE PTR [RSI], DIL", + "40883E"), + (m32, "00000000 MOVZX EAX, BH", + "0fb6c7"), + (m64, "00000000 MOVZX EAX, BH", + "0fb6c7"), + (m64, "00000000 MOVZX EAX, DIL", + "400fb6c7"), + (m64, "00000000 MOV BYTE PTR [RCX], SIL", + "408831"), + (m64, "00000000 CMP SIL, CL", + "4038ce"), + + (m64, "00000000 SETZ DIL", + "400f94c7"), + (m64, "00000000 SETNZ BPL", + "400f95c5"), + (m64, "00000000 MOV CL, BPL", + "4088e9"), + (m64, "00000000 AND DIL, 0x0", + "4080e700"), + (m64, "00000000 MOV DIL, AL", + "4088c7"), + (m64, "00000000 MOV DIL, BYTE PTR [RSI]", + "408a3e"), + (m64, "00000000 DEC DIL", + "40fecf"), + + (m64, "00000000 TEST DIL, DIL", + "4084ff"), + (m32, "00000000 JMP EDX", + "FFE2"), + (m64, "00000000 JMP RDX", + "FFE2"), +] + + + # mode = 64 + # l = mn_x86.dis('\x4D\x11\x7c\x18\x00', mode) + # print l + #""" + # mode = 64 + # l = mn_x86.fromstring("ADC DWORD PTR [RAX], 0x11223344", mode) + # print 'xx' + # t= time.time() + # import cProfile + # def f(): + # x = l.asm(mode) + # print x + # cProfile.run('f()') + # l.asm(mode) + # print time.time()-t +# reg_tests = reg_tests[-1:] + +fname64 = ('exe64.bin', 'r+') +if not os.access(fname64[0], os.R_OK): + fname64 = ('regression_test64_ia32.bin', 'w') + +test_file = {16: open('regression_test16_ia32.bin', 'w'), + 32: open('regression_test32_ia32.bin', 'w'), + # 64:open('regression_test64_ia32.bin', 'w+')} + # 64:open('testmnemo', 'r+')} + 64: open(*fname64)} +ts = time.time() +# test_file[16].write("\x90"*0x10000) +# test_file[32].write("\x90"*0x10000) +file64off = 0x2524c +test_file[64].seek(0x400) +test_file[64].write('\x90' * 0x30000) +test_file[64].seek(file64off) +for mode, s, l, in reg_tests: + print "-" * 80 + s = s[12:] + b = l.decode('hex') + print mode, repr(b) + mn = mn_x86.dis(b, mode) + print "dis args", [(str(x), x.size) for x in mn.args] + print s + print mn + assert(str(mn).strip() == s) + # print hex(b) + # print [str(x.get()) for x in mn.args] + print 'fromstring', repr(s) + l = mn_x86.fromstring(s, mode) + # print l + print 'str args', [(str(x), x.size) for x in l.args] + assert(str(l).strip(' ') == s) + a = mn_x86.asm(l) + print 'asm result', [x for x in a] + print repr(b) + # test_file[mode[0]].write(b) + + for x in a: + print "BYTES", repr(x) + test_file[mode].write(x) + test_file[mode].write("\x90" * 2) + + print 'test re dis' + for x in a: + print repr(x) + rl = mn_x86.dis(x, mode) + assert(str(rl).strip(' ') == s) + print repr(b), a + assert(b in a) + # print mn.args +print 'TEST time', time.time() - ts + + +# speed test thumb +o = "" +mode_x = m32 +for mode, s, l, in reg_tests: + if mode != mode_x: + continue + s = s[12:] + b = l.decode('hex') + o += b + +while len(o) < 1000: + o += o +open('x86_speed_reg_test.bin', 'w').write(o) + + +def profile_dis(o): + bs = bin_stream_str(o) + off = 0 + instr_num = 0 + ts = time.time() + while off < bs.getlen(): + mn = mn_x86.dis(bs, mode_x, off) + # print instr_num, off, mn.l, str(mn) + instr_num += 1 + off += mn.l + print 'instr per sec:', instr_num / (time.time() - ts) + +import cProfile +# cProfile.run(r'mn_x86.dis("\x81\x54\x18\xfe\x44\x33\x22\x11", m32)') +cProfile.run('profile_dis(o)') +# profile_dis(o) diff --git a/test/core/graph.py b/test/core/graph.py new file mode 100644 index 00000000..a419a686 --- /dev/null +++ b/test/core/graph.py @@ -0,0 +1,18 @@ +from miasm2.core.graph import * + +g = DiGraph() +g.add_node('a') +g.add_node('b') + +g.add_edge('a', 'b') +g.add_edge('a', 'c') +g.add_edge('a', 'c') +g.add_edge('c', 'c') + +print g + +print [x for x in g.successors('a')] +print [x for x in g.predecessors('a')] +print [x for x in g.predecessors('b')] +print [x for x in g.predecessors('c')] +print [x for x in g.successors('c')] diff --git a/test/core/interval.py b/test/core/interval.py new file mode 100644 index 00000000..34537d25 --- /dev/null +++ b/test/core/interval.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.core.interval import * +from random import randint +from pdb import pm + +i1 = interval([(1, 3)]) +i2 = interval([(2, 5)]) +i3 = interval([(3, 5)]) +i4 = interval([(5, 8)]) + +i5 = interval([(1, 5)]) +i6 = interval([(1, 3), (5, 8)]) +i7 = interval([(2, 8)]) +i8 = interval([(1, 8)]) +i9 = interval([(4, 5)]) + +i10 = interval([(1, 1)]) +i11 = interval([(1, 2)]) +i12 = interval([(2, 2)]) +i13 = interval([(2, 4)]) +i14 = interval([(0, 1), (3, 5), (7, 10)]) +i15 = interval([(0, 12)]) +i16 = interval([(2, 8)]) + +i_empty = interval() + +assert(repr(i_empty) == '[]') + +assert(interval(i1) == i1) + +i1.cannon() +i1.cannon() + +assert(cmp_interval(i1.intervals[0], i2.intervals[0]) == INT_JOIN) +assert(cmp_interval(i1.intervals[0], i3.intervals[0]) == INT_JOIN) +assert(cmp_interval(i1.intervals[0], i4.intervals[0]) == INT_DISJOIN) +assert(cmp_interval(i2.intervals[0], i3.intervals[0]) == INT_B_IN_A) +assert(cmp_interval(i3.intervals[0], i2.intervals[0]) == INT_A_IN_B) +assert(cmp_interval(i1.intervals[0], i1.intervals[0]) == INT_EQ) +assert(cmp_interval(i1.intervals[0], i9.intervals[0]) == INT_JOIN_AB) +assert(cmp_interval(i9.intervals[0], i1.intervals[0]) == INT_JOIN_BA) + +assert((i1 in i2) is False) +assert((i2 in i1) is False) +assert((i1 in i3) is False) +assert((i2 in i3) is False) + +assert((i3 in i2)) +assert((i2 in i3) is False) + +assert(interval.cannon_list(i1.intervals) == i1.intervals) + +assert(i1 + i2 == i5) +assert(i1 + i3 == i5) +assert(i1 + i4 == i6) + +assert(i2 + i3 == i2) +assert(i2 + i4 == i7) +assert(i1 + i2 + i4 == i8) + +assert(i1 - i2 == i10) +assert(i1 - i3 == i11) +assert(i1 - i4 == i1) +assert(i2 - i3 == i12) +assert(i2 - i4 == i13) +assert(i8 - i1 == interval([(4, 8)])) +assert(i8 - i2 == interval([(1, 1), (6, 8)])) + +assert(i10 + i12 == i11) +assert(i1 - i1 == interval()) +assert(i6 - i6 == interval()) +assert(i6 - i6 - i1 == interval()) +assert(i1 - i10 == interval([(2, 3)])) + +assert(i1 & i1 == i1) +assert(i1 & i2 == interval([(2, 3)])) +assert(i1 & i3 == interval([(3, 3)])) +assert(i3 & i1 == interval([(3, 3)])) +assert(i1 & i4 == interval([])) +assert(i4 & i1 == interval([])) +assert(i1 & i5 == i1) +assert(i5 & i1 == i1) +assert(i1 & i6 == i1) +assert(i5 & i13 == i13) +assert(i6 & i6 == i6) +assert(i14 & i15 == i14) +assert(i15 & i14 == i14) +assert(i14 & i16 == interval([(3, 5), (7, 8)])) + +x1 = [(7, 87), (76, 143), (94, 129), (79, 89), (46, 100)] +assert(interval(x1) == interval([(7, 143)])) +x2 = [(11, 16), (35, 74), (18, 114), (91, 188), (3, 75)] +assert(interval(x2) == interval([(3, 188)])) + +i1.hull() +i1.show(dry_run=True) + +assert(i_empty.hull() == (None, None)) + + +def gen_random_interval(l=100): + r = [] + for j in xrange(5): + a = randint(0, l) + b = a + randint(0, l) + r.append((a, b)) + return r + + +def check_add(r1, r2): + i_sum = interval(r1) + interval(r2) + for a, b in r1 + r2: + for i in xrange(a, b + 1): + assert(i in i_sum) + + +def check_sub(r1, r2): + i1 = interval(r1) + i2 = interval(r2) + i_sub = i1 - i2 + for a, b in r1: + for i in xrange(a, b + 1): + if i in i2: + assert(i not in i_sub) + else: + assert(i in i_sub) + + +def check_and(r1, r2): + i1 = interval(r1) + i2 = interval(r2) + i_and = i1 & i2 + for a, b in r1: + for i in xrange(a, b + 1): + if i in i2: + assert(i in i_and) + else: + assert(i not in i_and) + + +for i in xrange(1000): + r1 = gen_random_interval() + r2 = gen_random_interval() + r3 = gen_random_interval() + + check_add(r1, r2) + check_sub(r1, r2) + check_and(r1, r2) + + a = interval(r1) + b = interval(r2) + c = interval(r3) + assert((a & b) - c == a & (b - c) == (a - c) & (b - c)) + assert(a - (b & c) == (a - b) + (a - c)) diff --git a/test/core/parse_asm.py b/test/core/parse_asm.py new file mode 100644 index 00000000..c2a6dc72 --- /dev/null +++ b/test/core/parse_asm.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import unittest + + +class TestParseAsm(unittest.TestCase): + + def test_ParseTxt(self): + from miasm2.arch.x86.arch import mn_x86 + from miasm2.core.parse_asm import parse_txt + + ASM0 = ''' + ; + .LFB0: + .LA: + .text + .data + .bss + .string + .ustring + .byte 0 0x0 + .byte a + .comm + .split + .dontsplit + .file + .cfi_0 + label: + JMP EAX ;comment + ''' + ASM1 = ''' + .XXX + ''' + self.assertTrue(parse_txt(mn_x86, 32, ASM0)) + self.assertRaises(ValueError, parse_txt, mn_x86, 32, ASM1) + +if __name__ == '__main__': + testsuite = unittest.TestLoader().loadTestsFromTestCase(TestParseAsm) + report = unittest.TextTestRunner(verbosity=2).run(testsuite) + exit(len(report.errors + report.failures)) diff --git a/test/expression/modint.py b/test/expression/modint.py new file mode 100644 index 00000000..e7c19d0c --- /dev/null +++ b/test/expression/modint.py @@ -0,0 +1,59 @@ +from miasm2.expression.modint import * + +a = uint8(0x42) +b = uint8(0xFF) +c = uint8(0x4) + +d = uint1(0) +e = uint1(1) + +f = uint8(0x1) + + +print a, b, c +print a + b, a + c, b + c +print a == a, a == b, a == 0x42, a == 0x78 +print a != b, a != a +print d, e +print d + e, d + d, e + e, e + e + e, e + 0x11 + +assert(f == 1) +assert(f + 1 == 2) +assert(2 == f + 1) +assert(f + 0xff == 0) +assert(f & 0 == 0) +assert(f & 0xff == f) +assert(0xff & f == f) +assert(f / 1 == f) +assert(1 / f == f) +assert(int(f) == 1) +assert(long(f) == 1) +assert(~f == 0xfe) +assert(f << 1 == 2) +assert(f << 8 == 0) +assert(1 << f == 2) +assert(0x80 << f == 0) +assert(f % 2 == f) +assert(f % 1 == 0) +assert(2 % f == 0) +assert(f * 2 == 2) +assert(2 * f == 2) +assert(f * f == 1) +assert(f * uint8(0x80) == 0x80) +assert(-f == 0xff) +assert(f | f == f) +assert(f | 0 == f) +assert(2 | f == 3) +assert(f >> 0 == f) +assert(f >> 1 == 0) +assert(0x10 >> f == 0x8) +assert(0x100 >> f == 0x80) # XXXX +assert(0x1000 >> f == 0x0) # XXXX +assert(f ^ f == 0) +assert(f ^ 0 == f) +assert(0 ^ f == f) +assert(1 ^ f == 0) + +print e + c, c + e, c - e, e - c +print 1000 * a +print hex(a) diff --git a/test/expression/simplifications.py b/test/expression/simplifications.py new file mode 100644 index 00000000..19f43d6e --- /dev/null +++ b/test/expression/simplifications.py @@ -0,0 +1,258 @@ +# +# Expression simplification regression tests # +# +from pdb import pm +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp + +# Define example objects +a = ExprId('a') +b = ExprId('b') +c = ExprId('c') +d = ExprId('d') +e = ExprId('e') + +m = ExprMem(a) +s = a[:8] + +i1 = ExprInt(uint32(0x1)) +i2 = ExprInt(uint32(0x2)) +cc = ExprCond(a, b, c) + +o = ExprCompose([(a[:8], 8, 16), + (a[8:16], 0, 8)]) + +o2 = ExprCompose([(a[8:16], 0, 8), + (a[:8], 8, 16)]) + +l = [a[:8], b[:8], c[:8], m[:8], s, i1[:8], i2[:8], o[:8]] +l2 = l[::-1] + + +x = ExprMem(a + b + ExprInt32(0x42)) + +# Define tests: (expression to simplify, expected value) +to_test = [(ExprInt32(1) - ExprInt32(1), ExprInt32(0)), + ((ExprInt32(5) + c + a + b - a + ExprInt32(1) - ExprInt32(5)), + b + c + ExprInt32(1)), + (a + b + c - a - b - c + a, a), + (a + a + b + c - (a + (b + c)), a), + (c ^ b ^ a ^ c ^ b, a), + (a ^ ExprInt32(0), a), + ((a + b) - b, a), + (-(ExprInt32(0) - ((a + b) - b)), a), + + (ExprOp('<<<', a, ExprInt32(32)), a), + (ExprOp('>>>', a, ExprInt32(32)), a), + (ExprOp('>>>', a, ExprInt32(0)), a), + (ExprOp('<<', a, ExprInt32(0)), a), + + (ExprOp('<<<', a, ExprOp('<<<', b, c)), + ExprOp('<<<', a, ExprOp('<<<', b, c))), + (ExprOp('<<<', ExprOp('<<<', a, b), c), + ExprOp('<<<', ExprOp('<<<', a, b), c)), + (ExprOp('<<<', ExprOp('>>>', a, b), c), + ExprOp('<<<', ExprOp('>>>', a, b), c)), + (ExprOp('>>>', ExprOp('<<<', a, b), c), + ExprOp('>>>', ExprOp('<<<', a, b), c)), + (ExprOp('>>>', ExprOp('<<<', a, b), b), + ExprOp('>>>', ExprOp('<<<', a, b), b)), + + + (ExprOp('>>>', ExprOp('<<<', a, ExprInt32(10)), ExprInt32(2)), + ExprOp('<<<', a, ExprInt32(8))), + + (ExprOp('>>>', ExprOp('<<<', a, ExprInt32(10)), ExprInt32(2)) ^ ExprOp('>>>', ExprOp('<<<', a, ExprInt32(10)), ExprInt32(2)), + ExprInt32(0)), + (ExprOp(">>", (a & ExprInt32(0xF)), ExprInt32(0x15)), + ExprInt32(0)), + (ExprOp(">>", (ExprInt32(0x12345678)), ExprInt32(0x4)), + ExprInt32(0x1234567)), + (ExprOp("a>>", (ExprInt32(0x12345678)), ExprInt32(0x4)), + ExprInt32(0x1234567)), + (ExprOp("a>>", (ExprInt32(0xF1234567)), ExprInt32(0x4)), + ExprInt32(0xFF123456)), + (ExprOp("a>>", (ExprInt32(0xF1234567)), ExprInt32(28)), + ExprInt32(0xFFFFFFFF)), + (ExprOp("==", ExprInt32(12), ExprInt32(10)), ExprInt32(0)), + (ExprOp("==", ExprInt32(12), ExprInt32(12)), ExprInt32(1)), + (ExprOp("==", a | ExprInt32(12), ExprInt32(0)), ExprInt32(0)), + (ExprOp("==", a | ExprInt32(12), ExprInt32(14)), + ExprOp("==", a | ExprInt32(12), ExprInt32(14))), + (ExprOp("parity", ExprInt32(0xf)), ExprInt1(1)), + (ExprOp("parity", ExprInt32(0xe)), ExprInt1(0)), + (ExprInt32(0x4142)[:32], ExprInt32(0x4142)), + (ExprInt32(0x4142)[:8], ExprInt8(0x42)), + (ExprInt32(0x4142)[8:16], ExprInt8(0x41)), + (a[:32], a), + (a[:8][:8], a[:8]), + (a[:16][:8], a[:8]), + (a[8:16][:8], a[8:16]), + (a[8:32][:8], a[8:16]), + (a[:16][8:16], a[8:16]), + (ExprCompose([(a, 0, 32)]), a), + (ExprCompose([(a[:16], 0, 16)]), a[:16]), + (ExprCompose([(a[:16], 0, 16), (a[:16], 16, 32)]), + ExprCompose([(a[:16], 0, 16), (a[:16], 16, 32)]),), + (ExprCompose([(a[:16], 0, 16), (a[16:32], 16, 32)]), a), + + (ExprMem(a)[:32], ExprMem(a)), + (ExprMem(a)[:16], ExprMem(a, size=16)), + + (ExprCond(ExprInt32(1), a, b), a), + (ExprCond(ExprInt32(0), b, a), a), + + (ExprInt32(0x80000000)[31:32], ExprInt1(1)), + (ExprCompose([ + (ExprInt16(0x1337)[ + :8], 0, 8), (ExprInt16(0x1337)[8:16], 8, 16)]), + ExprInt16(0x1337)), + + (ExprCompose([(ExprInt32(0x1337beef)[8:16], 8, 16), + (ExprInt32(0x1337beef)[:8], 0, 8), + (ExprInt32(0x1337beef)[16:32], 16, 32)]), + ExprInt32(0x1337BEEF)), + (ExprCond(a, + ExprCond(a, + b, + c), + d), ExprCond(a, b, d)), + ((a & b & ExprInt32(0x12))[31:32], ExprInt1(0)), + + (ExprCompose([ + (ExprCond(a, ExprInt16(0x10), ExprInt16(0x20)), 0, 16), + (ExprInt16(0x1337), 16, 32)]), + ExprCond(a, ExprInt32(0x13370010), ExprInt32(0x13370020))), + (ExprCond(ExprCond(a, ExprInt1(0), ExprInt1(1)), b, c), + ExprCond(a, c, b)), + (ExprCond(a, ExprInt32(0x10), ExprInt32(0x20)) + ExprInt32(0x13370000), + ExprCond(a, ExprInt32(0x13370010), ExprInt32(0x13370020))), + + (ExprCond(a, ExprInt32(0x10), ExprInt32(0x20)) + ExprCond(a, ExprInt32(0x13370000), ExprInt32(0x13380000)), + ExprCond(a, ExprInt32(0x13370010), ExprInt32(0x13380020))), + (-ExprCond(a, ExprInt32(0x1), ExprInt32(0x2)), + ExprCond(a, ExprInt32(-0x1), ExprInt32(-0x2))), + (ExprOp('*', a, b, c, ExprInt32(0x12))[0:17], + ExprOp( + '*', a[0:17], b[0:17], c[0:17], ExprInt(mod_size2uint[17](0x12)))), + (ExprOp('*', a, ExprInt32(0xffffffff)), + -a), + (ExprOp('*', -a, -b, c, ExprInt32(0x12)), + ExprOp('*', a, b, c, ExprInt32(0x12))), + (ExprOp('*', -a, -b, -c, ExprInt32(0x12)), + ExprOp('*', -a, b, c, ExprInt32(0x12))), + (a | ExprInt32(0xffffffff), + ExprInt32(0xffffffff)), + (ExprCond(a, ExprInt32(1), ExprInt32(2)) * ExprInt32(4), + ExprCond(a, ExprInt32(4), ExprInt32(8))), + (ExprCond(a, b, c) + ExprCond(a, d, e), + ExprCond(a, b + d, c + e)), + (ExprCond(a, b, c) * ExprCond(a, d, e), + ExprCond(a, b * d, c * e)), + + (ExprCond(a, ExprInt32(8), ExprInt32(4)) >> ExprInt32(1), + ExprCond(a, ExprInt32(4), ExprInt32(2))), + + (ExprCond(a, b, c) >> ExprCond(a, d, e), + ExprCond(a, b >> d, c >> e)), + + (a & b & ExprInt_fromsize(a.size, -1), a & b), + (a | b | ExprInt_fromsize(a.size, -1), + ExprInt_fromsize(a.size, -1)), +] + +for e, e_check in to_test[:]: + # + print "#" * 80 + e_check = expr_simp(e_check) + # print str(e), str(e_check) + e_new = expr_simp(e) + print "original: ", str(e), "new: ", str(e_new) + rez = e_new == e_check + if not rez: + raise ValueError( + 'bug in expr_simp simp(%s) is %s and should be %s' % (e, e_new, e_check)) + + +x = ExprId('x') +y = ExprId('y') +z = ExprId('z') +a = ExprId('a') +b = ExprId('b') +c = ExprId('c') + + +jra = ExprId('jra') +jrb = ExprId('jrb') +jrint1 = ExprId('jrint1') + + +e1 = ExprMem((a & ExprInt32(0xFFFFFFFC)) + ExprInt32(0x10), 32) +e2 = ExprMem((a & ExprInt32(0xFFFFFFFC)) + b, 32) +e3 = (a ^ b ^ ((a ^ b) & (b ^ (b - a))) ^ (b - a)).canonize() + +match_tests = [ + (MatchExpr(ExprInt32(12), a, [a]), {a: ExprInt32(12)}), + (MatchExpr(x, a, [a]), {a: x}), + (MatchExpr(x + y, a, [a]), {a: x + y}), + (MatchExpr(x + y, a + y, [a]), {a: x}), + (MatchExpr(x + y, x + a, [a]), {a: y}), + (MatchExpr(x + y, a + b, [a, b]), {a: x, b: y}), + (MatchExpr(x + ExprInt32(12), a + b, [a, b]), {a: x, b: ExprInt32(12)}), + (MatchExpr(ExprMem(x), a, [a]), {a: ExprMem(x)}), + (MatchExpr(ExprMem(x), ExprMem(a), [a]), {a: x}), + (MatchExpr(x[0:8], a, [a]), {a: x[0:8]}), + (MatchExpr(x[0:8], a[0:8], [a]), {a: x}), + (MatchExpr(ExprCond(x, y, z), a, [a]), {a: ExprCond(x, y, z)}), + (MatchExpr(ExprCond(x, y, z), + ExprCond(a, b, c), [a, b, c]), + {a: x, b: y, c: z}), + (MatchExpr(ExprCompose([(x[:8], 0, 8), (y[:8], 8, 16)]), a, [a]), + {a: ExprCompose([(x[:8], 0, 8), (y[:8], 8, 16)])}), + (MatchExpr(ExprCompose([(x[:8], 0, 8), (y[:8], 8, 16)]), + ExprCompose([(a[:8], 0, 8), (b[:8], 8, 16)]), [a, b]), + {a: x, b: y}), + (MatchExpr(e1, e2, [b]), {b: ExprInt32(0x10)}), + (MatchExpr(e3, + (((jra ^ jrb) & (jrb ^ jrint1)) + ^ jra ^ jrb ^ jrint1).canonize(), + [jra, jrb, jrint1]), + {jra: a, jrb: b, jrint1: b - a}), +] + +for test, res in match_tests: + assert(test == res) + + +get_tests = [ + (ExprAff(ExprMem(a), ExprMem(b)).get_r(True), set([a, b, ExprMem(b)])), + (ExprAff(ExprMem(a), ExprMem(b)).get_w(), set([ExprMem(a)])), + (ExprAff(ExprMem(ExprMem(a)), ExprMem(b)) + .get_r(True), set([a, b, ExprMem(b), ExprMem(a)])), +] + + +for test, res in get_tests: + assert(test == res) + + +to_test = [(a + b, b + a), + (a + m, m + a), + ((a[:8] + s), (s + a[:8])), + ((m[:8] + s), (s + m[:8])), + ((i1 + i2), (i2 + i1)), + ((a + i2), (i2 + a)), + ((m + i2), (i2 + m)), + ((s + i2[:8]), (i2[:8] + s)), + (o, o2), + (ExprOp('+', *l), ExprOp('+', *l2)), + ] + +for x, y in to_test: + x, y = x.canonize(), y.canonize() + + assert(x == y) + assert(str(x) == str(y)) + print x + +print 'all tests ok' diff --git a/test/expression/stp.py b/test/expression/stp.py new file mode 100644 index 00000000..fe09e865 --- /dev/null +++ b/test/expression/stp.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import unittest + + +class TestIrIr2STP(unittest.TestCase): + + def test_ExprOp_strcst(self): + from miasm2.expression.expression import ExprInt32, ExprOp + import miasm2.expression.stp # /!\ REALLY DIRTY HACK + args = [ExprInt32(i) for i in xrange(9)] + + self.assertEqual( + ExprOp('|', *args[:2]).strcst(), r'(0bin00000000000000000000000000000000 | 0bin00000000000000000000000000000001)') + self.assertEqual( + ExprOp('-', *args[:2]).strcst(), r'BVUMINUS(0bin00000000000000000000000000000000)') + self.assertEqual( + ExprOp('+', *args[:3]).strcst(), r'BVPLUS(32,BVPLUS(32,0bin00000000000000000000000000000000, 0bin00000000000000000000000000000001), 0bin00000000000000000000000000000010)') + self.assertRaises(ValueError, ExprOp('X', *args[:1]).strcst) + + def test_ExprSlice_strcst(self): + from miasm2.expression.expression import ExprInt32, ExprSlice + import miasm2.expression.stp # /!\ REALLY DIRTY HACK + args = [ExprInt32(i) for i in xrange(9)] + + self.assertEqual( + args[0][1:2].strcst(), r'(0bin00000000000000000000000000000000)[1:1]') + self.assertRaises(ValueError, args[0].__getitem__, slice(1,7,2)) + +if __name__ == '__main__': + testsuite = unittest.TestLoader().loadTestsFromTestCase(TestIrIr2STP) + report = unittest.TextTestRunner(verbosity=2).run(testsuite) + exit(len(report.errors + report.failures)) + diff --git a/test/ir/ir2C.py b/test/ir/ir2C.py new file mode 100644 index 00000000..c5ae1b8f --- /dev/null +++ b/test/ir/ir2C.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import unittest + + +class TestIrIr2C(unittest.TestCase): + + def test_ExprOp_toC(self): + from miasm2.expression.expression import ExprInt32, ExprOp + import miasm2.ir.ir2C # /!\ REALLY DIRTY HACK + args = [ExprInt32(i) for i in xrange(9)] + + # Unary operators + self.assertEqual( + ExprOp('parity', *args[:1]).toC(), r'parity(0x0&0xffffffff)') + self.assertEqual( + ExprOp('!', *args[:1]).toC(), r'(~ 0x0)&0xffffffff') + self.assertEqual( + ExprOp('hex2bcd', *args[:1]).toC(), r'hex2bcd_32(0x0)') + self.assertEqual(ExprOp('fabs', *args[:1]).toC(), r'fabs(0x0)') + self.assertRaises(ValueError, ExprOp('X', *args[:1]).toC) + + # Binary operators + self.assertEqual( + ExprOp('==', *args[:2]).toC(), r'(((0x0&0xffffffff) == (0x1&0xffffffff))?1:0)') + self.assertEqual( + ExprOp('%', *args[:2]).toC(), r'(((0x0&0xffffffff)%(0x1&0xffffffff))&0xffffffff)') + self.assertEqual( + ExprOp('-', *args[:2]).toC(), r'(((0x0&0xffffffff) - (0x1&0xffffffff))&0xffffffff)') + self.assertEqual( + ExprOp('bsr', *args[:2]).toC(), r'my_bsr(0x0, 0x1)') + self.assertEqual( + ExprOp('cpuid0', *args[:2]).toC(), r'cpuid0(0x0, 0x1)') + self.assertEqual( + ExprOp('fcom0', *args[:2]).toC(), r'fcom0(0x0, 0x1)') + self.assertEqual( + ExprOp('fadd', *args[:2]).toC(), r'fadd(0x0, 0x1)') + self.assertEqual( + ExprOp('segm', *args[:2]).toC(), r'segm2addr(vmcpu, 0x0, 0x1)') + self.assertEqual( + ExprOp('imod', *args[:2]).toC(), r'imod32(vmcpu, 0x0, 0x1)') + self.assertEqual( + ExprOp('bcdadd', *args[:2]).toC(), r'bcdadd_32(0x0, 0x1)') + self.assertRaises(ValueError, ExprOp('X', *args[:2]).toC) + + # Ternary operators + self.assertEqual( + ExprOp('div8', *args[:3]).toC(), r'(div_op(32, 0x0, 0x1, 0x2) &0xffffffff)') + + # Other cases + self.assertEqual( + ExprOp('+', *args[:3]).toC(), r'(((0x0&0xffffffff)+(0x1&0xffffffff)+(0x2&0xffffffff))&0xffffffff)') + self.assertRaises(NotImplementedError, ExprOp('X', *args[:3]).toC) + +if __name__ == '__main__': + testsuite = unittest.TestLoader().loadTestsFromTestCase(TestIrIr2C) + report = unittest.TextTestRunner(verbosity=2).run(testsuite) + exit(len(report.errors + report.failures)) diff --git a/test/ir/symbexec.py b/test/ir/symbexec.py new file mode 100644 index 00000000..0d3db7e8 --- /dev/null +++ b/test/ir/symbexec.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import unittest + + +class TestSymbExec(unittest.TestCase): + + def test_ClassDef(self): + from miasm2.expression.expression import ExprInt32, ExprId, ExprMem, ExprCompose + from miasm2.arch.x86.arch import mn_x86 + from miasm2.ir.symbexec import symbexec + + addrX = ExprInt32(-1) + addr0 = ExprInt32(0) + addr1 = ExprInt32(1) + addr8 = ExprInt32(8) + addr9 = ExprInt32(9) + addr20 = ExprInt32(20) + addr40 = ExprInt32(40) + addr50 = ExprInt32(50) + mem0 = ExprMem(addr0) + mem1 = ExprMem(addr1) + mem8 = ExprMem(addr8) + mem9 = ExprMem(addr9) + mem20 = ExprMem(addr20) + mem40v = ExprMem(addr40, 8) + mem40w = ExprMem(addr40, 16) + mem50v = ExprMem(addr50, 8) + mem50w = ExprMem(addr50, 16) + id_x = ExprId('x') + id_y = ExprId('y', 8) + id_a = ExprId('a') + id_eax = ExprId('eax_init') + + e = symbexec( + mn_x86, {mem0: id_x, mem1: id_y, mem9: id_x, mem40w: id_x, mem50v: id_y, id_a: addr0, id_eax: addr0}) + self.assertEqual(e.find_mem_by_addr(addr0), mem0) + self.assertEqual(e.find_mem_by_addr(addrX), None) + self.assertEqual(e.eval_ExprMem(ExprMem(addr1 - addr1)), id_x) + self.assertEqual(e.eval_ExprMem(ExprMem(addr1, 8)), id_y) + self.assertEqual(e.eval_ExprMem(ExprMem(addr1 + addr1)), ExprCompose( + [(id_x[16:32], 0, 16), (ExprMem(ExprInt32(4), 16), 16, 32)])) + self.assertEqual(e.eval_ExprMem(mem8), ExprCompose( + [(id_x[0:24], 0, 24), (ExprMem(ExprInt32(11), 8), 24, 32)])) + self.assertEqual(e.eval_ExprMem(mem40v), id_x[:8]) + self.assertEqual(e.eval_ExprMem(mem50w), ExprCompose( + [(id_y, 0, 8), (ExprMem(ExprInt32(51), 8), 8, 16)])) + self.assertEqual(e.eval_ExprMem(mem20), mem20) + e.func_read = lambda x: x + self.assertEqual(e.eval_ExprMem(mem20), mem20) + self.assertEqual(set(e.modified()), set(e.symbols)) + self.assertRaises( + KeyError, e.symbols.__getitem__, ExprMem(ExprInt32(100))) + +if __name__ == '__main__': + testsuite = unittest.TestLoader().loadTestsFromTestCase(TestSymbExec) + report = unittest.TextTestRunner(verbosity=2).run(testsuite) + exit(len(report.errors + report.failures)) diff --git a/test/test_all.py b/test/test_all.py new file mode 100644 index 00000000..378755a4 --- /dev/null +++ b/test/test_all.py @@ -0,0 +1,443 @@ +import subprocess +import sys +import os +import time +import argparse +import tempfile + +# Available tests + +all_tests = { + "test": { + "architecture": [ + ["arch/x86/arch.py"], + ["arch/arm/arch.py"], + ["arch/arm/sem.py"], + ["arch/msp430/arch.py"], + ["arch/msp430/sem.py"], + ["arch/sh4/arch.py"], + ], + "core": [ + ["core/interval.py"], + ["core/graph.py"], + ["core/parse_asm.py"], + ], + "expression": [ + ["expression/modint.py"], + ["expression/stp.py"], + ["expression/simplifications.py"], + ], + "ir": [ + ["ir/ir2C.py"], + ["ir/symbexec.py"], + ], + "order": [ + "architecture", + "core", + "expression", + "ir", + ], + }, + "example": { + "assembler": [ + ["asm_x86.py"], + ["asm_arm.py"], + ["asm_box_x86_32.py"], + ["asm_box_x86_32_enc.py"], + ["asm_box_x86_32_mod.py"], + ["asm_box_x86_32_mod_self.py"], + ["asm_box_x86_32_repmod.py"], + ["disasm_01.py"], + ["disasm_02.py"], + ["disasm_03.py", "box_upx.exe", "0x410f90"], + ], + "expression": [ + ["symbol_exec.py"], + ["expression/manip_expression1.py"], + ["expression/manip_expression2.py"], + ["expression/manip_expression3.py"], + ["expression/manip_expression4.py", + "expression/sc_connect_back.bin", "0x2e"], + ["expression/manip_expression5.py"], + ["expression/manip_expression6.py"], + ["expression/manip_expression7.py"], + ["test_dis.py", "-g", "-s", "-m", "arm", "demo_arm.bin", "0"], + ["test_dis.py", "-g", "-s", "-m", + "x86_32", "box_x86_32.bin", "0x401000"], + ["expression/solve_condition_stp.py", + "expression/simple_test.bin"], + ], + "jitter": [ + ["unpack_upx.py", "--jitter", "tcc", "box_upx.exe"], + ["unpack_upx.py", "--jitter", "llvm", "box_upx.exe"], + ["test_jit_x86_32.py", "x86_32_sc.bin"], + ["test_jit_arm.py", "md5_arm", "A684"], + ["sandbox_pe_x86_32.py", "--jitter", "tcc", "box_x86_32.bin"], + ["sandbox_pe_x86_32.py", "--jitter", "llvm", "box_x86_32.bin"], + ["sandbox_pe_x86_32.py", "--jitter", "tcc", "box_x86_32_enc.bin"], + ["sandbox_pe_x86_32.py", "--jitter", "llvm", "box_x86_32_enc.bin"], + ["sandbox_pe_x86_32.py", "--jitter", "tcc", "box_x86_32_mod.bin"], + ["sandbox_pe_x86_32.py", "--jitter", "llvm", "box_x86_32_mod.bin"], + ["sandbox_pe_x86_32.py", "--jitter", + "tcc", "box_x86_32_mod_self.bin"], + ["sandbox_pe_x86_32.py", "--jitter", + "llvm", "box_x86_32_mod_self.bin"], + ["sandbox_pe_x86_32.py", "--jitter", + "tcc", "box_x86_32_repmod.bin"], + ["sandbox_pe_x86_32.py", "--jitter", + "llvm", "box_x86_32_repmod.bin"], + ], + "order": [ + "assembler", + "expression", + "jitter", + ], + }, + "order": [ + "test", + "example", + ], +} + +# Cosmetic + + +def getTerminalSize(): + "Return the size of the terminal : COLUMNS, LINES" + + env = os.environ + + def ioctl_GWINSZ(fd): + try: + import fcntl + import termios + import struct + import os + cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, + '1234')) + except: + return + return cr + cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2) + if not cr: + try: + fd = os.open(os.ctermid(), os.O_RDONLY) + cr = ioctl_GWINSZ(fd) + os.close(fd) + except: + pass + if not cr: + cr = (env.get('LINES', 25), env.get('COLUMNS', 80)) + return int(cr[1]), int(cr[0]) + + +WIDTH = getTerminalSize()[0] +colors = {"red": "\033[91;1m", + "end": "\033[0m", + "green": "\033[92;1m", + "lightcyan": "\033[96m", + "blue": "\033[94;1m"} + + +def write_colored(text, color, already_printed=0): + text_colored = colors[color] + text + colors["end"] + print " " * (WIDTH - already_printed - len(text)) + text_colored + + +def write_underline(text): + print "\033[4m" + text + colors["end"] + + +def print_conf(conf, value): + return colors["green"] + conf + ": " + colors["end"] + str(value) + + +def clr_screen(global_state, pstate): + "Update the screen to display some information" + + # Header + to_print = [] + to_print.append(" " * (global_state["termSize"][0] / 2 - 10) + colors[ + "blue"] + "Miasm2 Regression tests" + colors["end"]) + to_print.append("") + to_print.append("=" * global_state["termSize"][0]) + to_print.append("") + to_print.append(print_conf("Current mode", "Multiprocessing")) + to_print.append(print_conf("Nb CPU detected", global_state["cpu_c"])) + to_print.append("") + to_print.append("=" * global_state["termSize"][0]) + to_print.append("") + to_print.append( + print_conf("Current section", global_state["section"].upper())) + to_print.append( + print_conf("Current subsection", global_state["subsection"].upper())) + test_done = 0 + test_failed = 0 + message = global_state["message"] + "\n" + for k, v in pstate.items(): + if v["status"] != "running": + test_done += 1 + if v["status"] != 0: + test_failed += 1 + message += colors["red"] + "FAIL: " + colors["end"] + k + message += v["message"] + "\n" + + to_print.append(print_conf("Success rate", "%d/%d" % + (test_done - test_failed, test_done))) + printed_time = time.strftime( + "%M:%S", time.gmtime(time.time() - global_state["init_time"])) + to_print.append(print_conf("Cumulated time", printed_time)) + to_print.append("") + to_print.append("=" * global_state["termSize"][0]) + + cur = "\n".join(to_print) + cur += "\n" + + # Message + cur += message + print cur + already_printed = cur.count("\n") + + # Current state + current_job = [] + for t in pstate.values(): + if t["status"] == "running": + current_job.append(t) + print "\n" * (global_state["termSize"][1] - already_printed - 3 - len(current_job)) + + for j in current_job: + s = "[" + colors["lightcyan"] + j["command"] + colors["end"] + s_end = time.strftime( + "%M:%Ss", time.gmtime(time.time() - j["init_time"])) + l = len(j["command"]) + len(s_end) + 4 + len(str(j["pid"])) + 2 + s_end += " " + colors["blue"] + str(j["pid"]) + colors["end"] + "]" + print "%s%s%s" % (s, " " * (global_state["termSize"][0] - l), s_end) + +# Tests handling + + +def are_tests_finished(test_names, done): + for t in test_names: + if t not in done: + return False + return True + + +def are_tests_finished_multi(test_names, pstate): + for t in test_names: + t = " ".join(t) + if t not in pstate.keys(): + return False + if pstate[t]["status"] == "running": + return False + return True + + +def test_iter(done): + "Return an iterator on next tests, wait for previous sections" + + for section_name in all_tests["order"]: + # Go to the right directory + os.chdir(os.path.join("..", section_name)) + + # Update global state + section_content = all_tests[section_name] + write_underline(section_name.upper()) + + for subsection_name in section_content["order"]: + subsection_content = section_content[subsection_name] + write_underline("%s > %s" % (section_name.upper(), + subsection_name.upper())) + for test_line in subsection_content: + yield test_line + + while not(are_tests_finished(subsection_content, done)): + time.sleep(0.050) + + +def test_iter_multi(global_state, pstate): + "Multiprocessor version of test_iter" + + # Global message : subsections done + message = "" + + for section_name in all_tests["order"]: + # Update global state + section_content = all_tests[section_name] + global_state["section"] = section_name + + for subsection_name in section_content["order"]: + subsection_content = section_content[subsection_name] + beg_time = time.time() + global_state["subsection"] = subsection_name + + for test_line in subsection_content: + yield test_line + + while not(are_tests_finished_multi(subsection_content, pstate)): + # Wait for task to finish, update the screen + time.sleep(0.100) + clr_screen(global_state, pstate) + + message += "%s > %s completed in %.08f seconds\n" % (section_name.upper(), + subsection_name.upper( + ), + time.time() - beg_time) + global_state["message"] = message + + # Final update + clr_screen(global_state, pstate) + + +def run_test(test, coveragerc=None): + s = "Running tests on %s ..." % " ".join(test) + sys.stdout.write(s) + sys.stdout.flush() + + args = test + if coveragerc is not None: + args = ["-m", "coverage", "run", "--rcfile", coveragerc, "-a"] + test + + # Launch test + testpy = subprocess.Popen(["python"] + args, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + outputs = testpy.communicate() + + # Check result + if testpy.returncode == 0: + write_colored("OK", "green", len(s)) + else: + write_colored("ERROR", "red", len(s)) + print outputs[1] + + +def run_test_parallel(test, current, global_state): + + pid = os.getpid() + test_key = " ".join(test) + + # Keep current PID + current[test_key] = {"status": "running", + "pid": pid, + "command": test_key, + "init_time": time.time()} + + # Go to the right directory + os.chdir(os.path.join("..", global_state["section"])) + + # Launch test + testpy = subprocess.Popen(["python"] + test, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + outputs = testpy.communicate() + + # Check result + message = "" + if testpy.returncode != 0: + message = outputs[1] + + # Update result + current[test_key] = {"status": testpy.returncode, + "message": message} + +# Multiprocessing handling + +try: + from multiprocessing import Manager, Pool, cpu_count + multiproc = True +except ImportError: + multiproc = False + +# Argument parsing +parser = argparse.ArgumentParser(description="Miasm2 testing tool") +parser.add_argument("-m", "--mono", help="Force monothreading", + action="store_true") +parser.add_argument("-c", "--coverage", help="Include code coverage", + action="store_true") +args = parser.parse_args() + +if args.mono is True or args.coverage is True: + multiproc = False + +# Handle coverage +coveragerc = None +if args.coverage is True: + try: + import coverage + except ImportError: + print "%(red)s[Coverage]%(end)s Python 'coverage' module is required" % colors + exit(-1) + + # Create directory + suffix = "_" + str(int(time.time())) + cov_dir = tempfile.mkdtemp(suffix, "m2_coverage_") + + # Create configuration file + coveragerc = os.path.join(cov_dir, ".coveragerc") + coverage = os.path.join(cov_dir, ".coverage") + + from ConfigParser import ConfigParser + from os.path import expanduser + + config = ConfigParser() + config.read(['/etc/coveragerc', expanduser('~/.coveragerc')]) + if not config.has_section('run'): + config.add_section('run') + config.set('run', 'data_file', coverage) + config.write(open(coveragerc, 'w')) + + # Inform the user + d = {"blue": colors['blue'], + "end": colors['end'], + "cov_dir": cov_dir} + print "[%(blue)sCoverage%(end)s] Report will be written in %(cov_dir)s" % d + +# Handle llvm modularity + +llvm = True +try: + import llvm +except ImportError: + llvm = False + +# if llvm.version != (3,2): +# llvm = False + +if llvm is False: + print "%(red)s[LLVM]%(end)s Python 'py-llvm 3.2' module is required for llvm tests" % colors + + # Remove llvm tests + for test in all_tests["example"]["jitter"]: + if "llvm" in test: + all_tests["example"]["jitter"].remove(test) + print "%(red)s[LLVM]%(end)s Remove" % colors, " ".join(test) + + # Let the user see messages + time.sleep(0.5) + +# Run tests + +if multiproc is False: + done = list() + for test in test_iter(done): + run_test(test, coveragerc=coveragerc) + done.append(test) + +else: + # Parallel version + cpu_c = cpu_count() + global_state = {"cpu_c": cpu_c, + "init_time": time.time(), + "termSize": getTerminalSize(), + "message": ""} + + manager = Manager() + pool = Pool(processes=cpu_c) + current = manager.dict() + + for test in test_iter_multi(global_state, current): + pool.apply_async(run_test_parallel, (test, + current, + global_state)) + + pool.close() + pool.join() |