diff options
154 files changed, 5333 insertions, 4010 deletions
diff --git a/.travis.yml b/.travis.yml index 49d7eef2..e1b1debc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,9 +37,12 @@ before_script: - "cd z3/build" - "make install" - "cd ../.." -# install miasm +# Miasm - "cd ..;" - "cd miasm;" +# turn deprecation warning into RuntimeError +- "find . -name '*.py' | xargs sed -i 's/warnings\\.warn(/raise RuntimeError(/g'" +# install - "python setup.py build build_ext -I$(pwd)/../virtualenv/include -L$(pwd)/../virtualenv/tinycc" - "python setup.py install" script: "python -c 'import z3; x = z3.BitVec(chr(0x41), 32)' && cd test && python test_all.py && git ls-files -o --exclude-standard" diff --git a/README.md b/README.md index 118051df..9be3bdc6 100644 --- a/README.md +++ b/README.md @@ -45,19 +45,25 @@ Assembling / Disassembling -------------------------- Import Miasm x86 architecture: -``` +```pycon >>> from miasm2.arch.x86.arch import mn_x86 +>>> from miasm2.core.locationdb import LocationDB ``` -Assemble a line: +Get a location db: + +```pycon +>>> loc_db = LocationDB() ``` ->>> l = mn_x86.fromstring('XOR ECX, ECX', 32) +Assemble a line: +```pycon +>>> l = mn_x86.fromstring('XOR ECX, ECX', loc_db, 32) >>> print l XOR ECX, ECX >>> mn_x86.asm(l) ['1\xc9', '3\xc9', 'g1\xc9', 'g3\xc9'] ``` Modify an operand: -``` +```pycon >>> l.args[0] = mn_x86.regs.EAX >>> print l XOR EAX, ECX @@ -66,13 +72,13 @@ XOR EAX, ECX ['1\xc8', '3\xc1', 'g1\xc8', 'g3\xc1'] ``` Disassemble the result: -``` +```pycon >>> print mn_x86.dis(a[0], 32) XOR EAX, ECX ``` Using `Machine` abstraction: -``` +```pycon >>> from miasm2.analysis.machine import Machine >>> mn = Machine('x86_32').mn >>> print mn.dis('\x33\x30', 32) @@ -80,7 +86,7 @@ XOR ESI, DWORD PTR [EAX] ``` For Mips: -``` +```pycon >>> mn = Machine('mips32b').mn >>> print mn.dis('97A30020'.decode('hex'), "b") LHU V1, 0x20(SP) @@ -90,36 +96,39 @@ Intermediate representation Create an instruction: -``` +```pycon >>> machine = Machine('arml') ->>> l = machine.mn.dis('002088e0'.decode('hex'), 'l') ->>> print l +>>> instr = machine.mn.dis('002088e0'.decode('hex'), 'l') +>>> print instr ADD R2, R8, R0 ``` -Create an intermediate representation (IR) object: +Create an intermediate representation object: +```pycon +>>> ira = machine.ira(loc_db) ``` ->>> ira = machine.ira() +Create an empty ircfg +```pycon +>>> ircfg = ira.new_ircfg() ``` Add instruction to the pool: -``` ->>> ira.add_instr(l) +```pycon +>>> ira.add_instr_to_ircfg(instr, ircfg) ``` Print current pool: -``` ->>> for lbl, irblock in ira.blocks.items(): -... print irblock -... -loc_0000000000000000:0x00000000 +```pycon +>>> for lbl, irblock in ircfg.blocks.items(): +... print irblock.to_string(loc_db) +loc_0: +R2 = R8 + R0 - R2 = (R8+R0) +IRDst = loc_4 - IRDst = loc_0000000000000004:0x00000004 ``` Working with IR, for instance by getting side effects: -``` ->>> for lbl, irblock in ira.blocks.iteritems(): +```pycon +>>> for lbl, irblock in ircfg.blocks.iteritems(): ... for assignblk in irblock: ... rw = assignblk.get_rw() ... for dst, reads in rw.iteritems(): @@ -130,15 +139,16 @@ Working with IR, for instance by getting side effects: read: ['R8', 'R0'] written: R2 -read: ['loc_0000000000000004:0x00000004'] +read: [] written: IRDst + ``` Emulation --------- Giving a shellcode: -``` +```pycon 00000000 8d4904 lea ecx, [ecx+0x4] 00000003 8d5b01 lea ebx, [ebx+0x1] 00000006 80f901 cmp cl, 0x1 @@ -152,7 +162,7 @@ Giving a shellcode: ``` Import the shellcode thanks to the `Container` abstraction: -``` +```pycon >>> from miasm2.analysis.binary import Container >>> c = Container.from_string(s) >>> c @@ -161,42 +171,41 @@ Import the shellcode thanks to the `Container` abstraction: Disassembling the shellcode at address `0`: -``` +```pycon >>> from miasm2.analysis.machine import Machine >>> machine = Machine('x86_32') >>> mdis = machine.dis_engine(c.bin_stream) ->>> blocks = mdis.dis_multiblock(0) ->>> for block in blocks: -... print block +>>> asmcfg = mdis.dis_multiblock(0) +>>> for block in asmcfg.blocks: +... print block.to_string(asmcfg.loc_db) ... -loc_0000000000000000:0x00000000 -LEA ECX, DWORD PTR [ECX+0x4] -LEA EBX, DWORD PTR [EBX+0x1] +loc_0 +LEA ECX, DWORD PTR [ECX + 0x4] +LEA EBX, DWORD PTR [EBX + 0x1] CMP CL, 0x1 -JZ loc_0000000000000010:0x00000010 --> c_next:loc_000000000000000B:0x0000000b c_to:loc_0000000000000010:0x00000010 -loc_0000000000000010:0x00000010 -LEA EBX, DWORD PTR [EBX+0x1] --> c_next:loc_0000000000000013:0x00000013 -loc_000000000000000B:0x0000000b -LEA EBX, DWORD PTR [EBX+0xFFFFFFFF] -JMP loc_0000000000000013:0x00000013 --> c_to:loc_0000000000000013:0x00000013 -loc_0000000000000013:0x00000013 +JZ loc_10 +-> c_next:loc_b c_to:loc_10 +loc_10 +LEA EBX, DWORD PTR [EBX + 0x1] +-> c_next:loc_13 +loc_b +LEA EBX, DWORD PTR [EBX + 0xFFFFFFFF] +JMP loc_13 +-> c_to:loc_13 +loc_13 MOV EAX, EBX RET ->>> ``` Initializing the Jit engine with a stack: -``` +```pycon >>> jitter = machine.jitter(jit_type='python') >>> jitter.init_stack() ``` Add the shellcode in an arbitrary memory location: -``` +```pycon >>> run_addr = 0x40000000 >>> from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE >>> jitter.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, s) @@ -204,7 +213,7 @@ Add the shellcode in an arbitrary memory location: Create a sentinelle to catch the return of the shellcode: -``` +```Python def code_sentinelle(jitter): jitter.run = False jitter.pc = 0 @@ -216,14 +225,13 @@ def code_sentinelle(jitter): Active logs: -``` ->>> jitter.jit.log_regs = True ->>> jitter.jit.log_mn = True +```pycon +>>> jitter.set_trace_log() ``` Run at arbitrary address: -``` +```pycon >>> jitter.init_run(run_addr) >>> jitter.continue_run() RAX 0000000000000000 RBX 0000000000000000 RCX 0000000000000000 RDX 0000000000000000 @@ -252,7 +260,7 @@ RIP 0000000040000013 Interacting with the jitter: -``` +```pycon >>> jitter.vm ad 1230000 size 10000 RW_ hpad 0x2854b40 ad 40000000 size 16 RW_ hpad 0x25e0ed0 @@ -267,154 +275,220 @@ Symbolic execution Initializing the IR pool: -``` +```pycon >>> ira = machine.ira() ->>> for block in blocks: -... ira.add_block(block) -... +>>> ircfg = ira.new_ircfg_from_asmcfg(asmcfg) ``` Initializing the engine with default symbolic values: -``` +```pycon >>> from miasm2.ir.symbexec import SymbolicExecutionEngine ->>> sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init) +>>> sb = SymbolicExecutionEngine(ira) ``` Launching the execution: -``` ->>> symbolic_pc = sb.run_at(0) +```pycon +>>> symbolic_pc = sb.run_at(ircfg, 0) >>> print symbolic_pc -((ECX_init+0x4)[0:8]+0xFF)?(0xB,0x10) +((ECX + 0x4)[0:8] + 0xFF)?(0xB,0x10) ``` Same, with step logs (only changes are displayed): -``` +```pycon >>> sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init) ->>> symbolic_pc = sb.run_at(0, step=True) +>>> symbolic_pc = sb.run_at(ircfg, 0, step=True) +Instr LEA ECX, DWORD PTR [ECX + 0x4] +Assignblk: +ECX = ECX + 0x4 +________________________________________________________________________________ +ECX = ECX + 0x4 ________________________________________________________________________________ -ECX (ECX_init+0x4) +Instr LEA EBX, DWORD PTR [EBX + 0x1] +Assignblk: +EBX = EBX + 0x1 ________________________________________________________________________________ -ECX (ECX_init+0x4) -EBX (EBX_init+0x1) +EBX = EBX + 0x1 +ECX = ECX + 0x4 ________________________________________________________________________________ -zf ((ECX_init+0x4)[0:8]+0xFF)?(0x0,0x1) -nf ((ECX_init+0x4)[0:8]+0xFF)[7:8] -pf (parity ((ECX_init+0x4)[0:8]+0xFF)) -of ((((ECX_init+0x4)[0:8]+0xFF)^(ECX_init+0x4)[0:8])&((ECX_init+0x4)[0:8]^0x1))[7:8] -cf (((((ECX_init+0x4)[0:8]+0xFF)^(ECX_init+0x4)[0:8])&((ECX_init+0x4)[0:8]^0x1))^((ECX_init+0x4)[0:8]+0xFF)^(ECX_init+0x4)[0:8]^0x1)[7:8] -af (((ECX_init+0x4)[0:8]+0xFF)&0x10)?(0x1,0x0) -ECX (ECX_init+0x4) -EBX (EBX_init+0x1) +Instr CMP CL, 0x1 +Assignblk: +zf = (ECX[0:8] + -0x1)?(0x0,0x1) +nf = (ECX[0:8] + -0x1)[7:8] +pf = parity((ECX[0:8] + -0x1) & 0xFF) +of = ((ECX[0:8] ^ (ECX[0:8] + -0x1)) & (ECX[0:8] ^ 0x1))[7:8] +cf = (((ECX[0:8] ^ 0x1) ^ (ECX[0:8] + -0x1)) ^ ((ECX[0:8] ^ (ECX[0:8] + -0x1)) & (ECX[0:8] ^ 0x1)))[7:8] +af = ((ECX[0:8] ^ 0x1) ^ (ECX[0:8] + -0x1))[4:5] ________________________________________________________________________________ -IRDst ((ECX_init+0x4)[0:8]+0xFF)?(0xB,0x10) -zf ((ECX_init+0x4)[0:8]+0xFF)?(0x0,0x1) -nf ((ECX_init+0x4)[0:8]+0xFF)[7:8] -pf (parity ((ECX_init+0x4)[0:8]+0xFF)) -of ((((ECX_init+0x4)[0:8]+0xFF)^(ECX_init+0x4)[0:8])&((ECX_init+0x4)[0:8]^0x1))[7:8] -cf (((((ECX_init+0x4)[0:8]+0xFF)^(ECX_init+0x4)[0:8])&((ECX_init+0x4)[0:8]^0x1))^((ECX_init+0x4)[0:8]+0xFF)^(ECX_init+0x4)[0:8]^0x1)[7:8] -af (((ECX_init+0x4)[0:8]+0xFF)&0x10)?(0x1,0x0) -EIP ((ECX_init+0x4)[0:8]+0xFF)?(0xB,0x10) -ECX (ECX_init+0x4) -EBX (EBX_init+0x1) +af = (((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8] ^ 0x1)[4:5] +pf = parity((ECX + 0x4)[0:8] + 0xFF) +zf = ((ECX + 0x4)[0:8] + 0xFF)?(0x0,0x1) +ECX = ECX + 0x4 +of = ((((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8]) & ((ECX + 0x4)[0:8] ^ 0x1))[7:8] +nf = ((ECX + 0x4)[0:8] + 0xFF)[7:8] +cf = (((((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8]) & ((ECX + 0x4)[0:8] ^ 0x1)) ^ ((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8] ^ 0x1)[7:8] +EBX = EBX + 0x1 +________________________________________________________________________________ +Instr JZ loc_key_1 +Assignblk: +IRDst = zf?(loc_key_1,loc_key_2) +EIP = zf?(loc_key_1,loc_key_2) +________________________________________________________________________________ +af = (((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8] ^ 0x1)[4:5] +EIP = ((ECX + 0x4)[0:8] + 0xFF)?(0xB,0x10) +pf = parity((ECX + 0x4)[0:8] + 0xFF) +IRDst = ((ECX + 0x4)[0:8] + 0xFF)?(0xB,0x10) +zf = ((ECX + 0x4)[0:8] + 0xFF)?(0x0,0x1) +ECX = ECX + 0x4 +of = ((((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8]) & ((ECX + 0x4)[0:8] ^ 0x1))[7:8] +nf = ((ECX + 0x4)[0:8] + 0xFF)[7:8] +cf = (((((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8]) & ((ECX + 0x4)[0:8] ^ 0x1)) ^ ((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8] ^ 0x1)[7:8] +EBX = EBX + 0x1 +________________________________________________________________________________ +>>> ``` Retry execution with a concrete ECX. Here, the symbolic / concolic execution reach the shellcode's end: -``` ->>> from miasm2.expression.expression import ExprInt32 ->>> sb.symbols[machine.mn.regs.ECX] = ExprInt32(-3) ->>> symbolic_pc = sb.run_at(0, step=True) +```pycon +>>> from miasm2.expression.expression import ExprInt +>>> sb.symbols[machine.mn.regs.ECX] = ExprInt(-3) +>>> symbolic_pc = sb.run_at(ircfg, 0, step=True) +Instr LEA ECX, DWORD PTR [ECX + 0x4] +Assignblk: +ECX = ECX + 0x4 ________________________________________________________________________________ -ECX 0x1 +af = (((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8] ^ 0x1)[4:5] +EIP = ((ECX + 0x4)[0:8] + 0xFF)?(0xB,0x10) +pf = parity((ECX + 0x4)[0:8] + 0xFF) +IRDst = ((ECX + 0x4)[0:8] + 0xFF)?(0xB,0x10) +zf = ((ECX + 0x4)[0:8] + 0xFF)?(0x0,0x1) +ECX = 0x1 +of = ((((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8]) & ((ECX + 0x4)[0:8] ^ 0x1))[7:8] +nf = ((ECX + 0x4)[0:8] + 0xFF)[7:8] +cf = (((((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8]) & ((ECX + 0x4)[0:8] ^ 0x1)) ^ ((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8] ^ 0x1)[7:8] +EBX = EBX + 0x1 ________________________________________________________________________________ -ECX 0x1 -EBX (EBX_init+0x1) +Instr LEA EBX, DWORD PTR [EBX + 0x1] +Assignblk: +EBX = EBX + 0x1 ________________________________________________________________________________ -zf 0x1 -nf 0x0 -pf 0x1 -of 0x0 -cf 0x0 -af 0x0 -ECX 0x1 -EBX (EBX_init+0x1) +af = (((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8] ^ 0x1)[4:5] +EIP = ((ECX + 0x4)[0:8] + 0xFF)?(0xB,0x10) +pf = parity((ECX + 0x4)[0:8] + 0xFF) +IRDst = ((ECX + 0x4)[0:8] + 0xFF)?(0xB,0x10) +zf = ((ECX + 0x4)[0:8] + 0xFF)?(0x0,0x1) +ECX = 0x1 +of = ((((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8]) & ((ECX + 0x4)[0:8] ^ 0x1))[7:8] +nf = ((ECX + 0x4)[0:8] + 0xFF)[7:8] +cf = (((((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8]) & ((ECX + 0x4)[0:8] ^ 0x1)) ^ ((ECX + 0x4)[0:8] + 0xFF) ^ (ECX + 0x4)[0:8] ^ 0x1)[7:8] +EBX = EBX + 0x2 ________________________________________________________________________________ -IRDst 0x10 -zf 0x1 -nf 0x0 -pf 0x1 -of 0x0 -cf 0x0 -af 0x0 -EIP 0x10 -ECX 0x1 -EBX (EBX_init+0x1) +Instr CMP CL, 0x1 +Assignblk: +zf = (ECX[0:8] + -0x1)?(0x0,0x1) +nf = (ECX[0:8] + -0x1)[7:8] +pf = parity((ECX[0:8] + -0x1) & 0xFF) +of = ((ECX[0:8] ^ (ECX[0:8] + -0x1)) & (ECX[0:8] ^ 0x1))[7:8] +cf = (((ECX[0:8] ^ 0x1) ^ (ECX[0:8] + -0x1)) ^ ((ECX[0:8] ^ (ECX[0:8] + -0x1)) & (ECX[0:8] ^ 0x1)))[7:8] +af = ((ECX[0:8] ^ 0x1) ^ (ECX[0:8] + -0x1))[4:5] ________________________________________________________________________________ -IRDst 0x10 -zf 0x1 -nf 0x0 -pf 0x1 -of 0x0 -cf 0x0 -af 0x0 -EIP 0x10 -ECX 0x1 -EBX (EBX_init+0x2) +af = 0x0 +EIP = ((ECX + 0x4)[0:8] + 0xFF)?(0xB,0x10) +pf = 0x1 +IRDst = ((ECX + 0x4)[0:8] + 0xFF)?(0xB,0x10) +zf = 0x1 +ECX = 0x1 +of = 0x0 +nf = 0x0 +cf = 0x0 +EBX = EBX + 0x2 ________________________________________________________________________________ -IRDst 0x13 -zf 0x1 -nf 0x0 -pf 0x1 -of 0x0 -cf 0x0 -af 0x0 -EIP 0x10 -ECX 0x1 -EBX (EBX_init+0x2) +Instr JZ loc_key_1 +Assignblk: +IRDst = zf?(loc_key_1,loc_key_2) +EIP = zf?(loc_key_1,loc_key_2) ________________________________________________________________________________ -IRDst 0x13 -zf 0x1 -nf 0x0 -pf 0x1 -of 0x0 -cf 0x0 -af 0x0 -EIP 0x10 -EAX (EBX_init+0x2) -ECX 0x1 -EBX (EBX_init+0x2) +af = 0x0 +EIP = 0x10 +pf = 0x1 +IRDst = 0x10 +zf = 0x1 +ECX = 0x1 +of = 0x0 +nf = 0x0 +cf = 0x0 +EBX = EBX + 0x2 ________________________________________________________________________________ -IRDst @32[ESP_init] -zf 0x1 -nf 0x0 -pf 0x1 -of 0x0 -cf 0x0 -af 0x0 -EIP @32[ESP_init] -EAX (EBX_init+0x2) -ECX 0x1 -EBX (EBX_init+0x2) -ESP (ESP_init+0x4) ->>> print symbolic_pc -@32[ESP_init] ->>> sb.dump_id() -IRDst @32[ESP_init] -zf 0x1 -nf 0x0 -pf 0x1 -of 0x0 -cf 0x0 -af 0x0 -EIP @32[ESP_init] -EAX (EBX_init+0x2) -ECX 0x1 -EBX (EBX_init+0x2) -ESP (ESP_init+0x4) +Instr LEA EBX, DWORD PTR [EBX + 0x1] +Assignblk: +EBX = EBX + 0x1 +________________________________________________________________________________ +af = 0x0 +EIP = 0x10 +pf = 0x1 +IRDst = 0x10 +zf = 0x1 +ECX = 0x1 +of = 0x0 +nf = 0x0 +cf = 0x0 +EBX = EBX + 0x3 +________________________________________________________________________________ +Instr LEA EBX, DWORD PTR [EBX + 0x1] +Assignblk: +IRDst = loc_key_3 +________________________________________________________________________________ +af = 0x0 +EIP = 0x10 +pf = 0x1 +IRDst = 0x13 +zf = 0x1 +ECX = 0x1 +of = 0x0 +nf = 0x0 +cf = 0x0 +EBX = EBX + 0x3 +________________________________________________________________________________ +Instr MOV EAX, EBX +Assignblk: +EAX = EBX +________________________________________________________________________________ +af = 0x0 +EIP = 0x10 +pf = 0x1 +IRDst = 0x13 +zf = 0x1 +ECX = 0x1 +of = 0x0 +nf = 0x0 +cf = 0x0 +EBX = EBX + 0x3 +EAX = EBX + 0x3 +________________________________________________________________________________ +Instr RET +Assignblk: +IRDst = @32[ESP[0:32]] +ESP = {ESP[0:32] + 0x4 0 32} +EIP = @32[ESP[0:32]] +________________________________________________________________________________ +af = 0x0 +EIP = @32[ESP] +pf = 0x1 +IRDst = @32[ESP] +zf = 0x1 +ECX = 0x1 +of = 0x0 +nf = 0x0 +cf = 0x0 +EBX = EBX + 0x3 +ESP = ESP + 0x4 +EAX = EBX + 0x3 +________________________________________________________________________________ +>>> ``` @@ -440,7 +514,7 @@ An auto-generated documentation is available [here](http://miasmdoc.ajax.re). Obtaining Miasm =============== -* Clone the repository: [Miasm on GitHub](https://github.com/serpilliere/miasm) +* Clone the repository: [Miasm on GitHub](https://github.com/cea-sec/miasm/) * Get one of the Docker images at [Docker Hub](https://registry.hub.docker.com/u/miasm/) Software requirements @@ -465,7 +539,7 @@ Configuration ------------- * Install elfesteem -``` +```pycon git clone https://github.com/serpilliere/elfesteem.git elfesteem cd elfesteem python setup.py build @@ -480,7 +554,7 @@ To use the jitter, GCC or LLVM is recommended * Debian stable/Ubuntu/Kali/whatever: `pip install llvmlite` or install from [llvmlite](https://github.com/numba/llvmlite) * Windows: Not tested * Build and install Miasm: -``` +```pycon $ cd miasm_directory $ python setup.py build $ sudo python setup.py install @@ -505,7 +579,7 @@ Testing Miasm comes with a set of regression tests. To run all of them: -``` +```pycon cd miasm_directory/test python test_all.py ``` diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index 0c08a8a3..9be5b517 100755 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -8,6 +8,7 @@ from elfesteem.strpatchwork import StrPatchwork from miasm2.core import parse_asm, asmblock from miasm2.analysis.machine import Machine from miasm2.core.interval import interval +from miasm2.core.locationdb import LocationDB parser = ArgumentParser("Multi-arch (32 bits) assembler") parser.add_argument('architecture', help="architecture: " + @@ -65,31 +66,34 @@ with open(args.source) as fstream: source = fstream.read() -symbol_pool = asmblock.AsmSymbolPool() +loc_db = LocationDB() -blocks, symbol_pool = parse_asm.parse_txt(machine.mn, attrib, source, symbol_pool) +asmcfg, loc_db = parse_asm.parse_txt(machine.mn, attrib, source, loc_db) # Fix shellcode addrs -symbol_pool.set_offset(symbol_pool.getby_name("main"), addr_main) +loc_db.set_location_offset(loc_db.get_name_location("main"), addr_main) if args.PE: - symbol_pool.set_offset(symbol_pool.getby_name_create("MessageBoxA"), - pe.DirImport.get_funcvirt('USER32.dll', 'MessageBoxA')) + loc_db.set_location_offset(loc_db.get_or_create_name_location("MessageBoxA"), + pe.DirImport.get_funcvirt('USER32.dll', + 'MessageBoxA')) # Print and graph firsts blocks before patching it -for block in blocks: +for block in asmcfg.blocks: print block -open("graph.dot", "w").write(blocks.dot()) +open("graph.dot", "w").write(asmcfg.dot()) # Apply patches patches = asmblock.asm_resolve_final(machine.mn, - blocks, - symbol_pool, + asmcfg, + loc_db, dst_interval) if args.encrypt: # Encrypt code - ad_start = symbol_pool.getby_name_create(args.encrypt[0]).offset - ad_stop = symbol_pool.getby_name_create(args.encrypt[1]).offset + loc_start = loc_db.get_or_create_name_location(args.encrypt[0]) + loc_stop = loc_db.get_or_create_name_location(args.encrypt[1]) + ad_start = loc_db.get_location_offset(loc_start) + ad_stop = loc_db.get_location_offset(loc_stop) new_patches = dict(patches) for ad, val in patches.items(): diff --git a/example/asm/simple.py b/example/asm/simple.py index 62d2ff80..5480e2f5 100644 --- a/example/asm/simple.py +++ b/example/asm/simple.py @@ -6,7 +6,7 @@ from miasm2.core import parse_asm, asmblock # Assemble code -blocks, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +asmcfg, loc_db = parse_asm.parse_txt(mn_x86, 32, ''' main: MOV EAX, 1 MOV EBX, 2 @@ -21,14 +21,14 @@ loop: RET ''') -# Set 'main' label's offset -symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) +# Set 'main' loc_key's offset +loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) # Spread information and resolve instructions offset -patches = asmblock.asm_resolve_final(mn_x86, blocks, symbol_pool) +patches = asmblock.asm_resolve_final(mn_x86, asmcfg, loc_db) -# Show resolved blocks -for block in blocks: +# Show resolved asmcfg +for block in asmcfg.blocks: print block # Print offset -> bytes diff --git a/example/disasm/callback.py b/example/disasm/callback.py index a9bef20b..b9a09c09 100644 --- a/example/disasm/callback.py +++ b/example/disasm/callback.py @@ -1,9 +1,9 @@ from miasm2.core.bin_stream import bin_stream_str -from miasm2.core.asmblock import AsmLabel, AsmConstraint, expr_is_label +from miasm2.core.asmblock import AsmConstraint from miasm2.arch.x86.disasm import dis_x86_32, cb_x86_funcs -def cb_x86_callpop(cur_bloc, symbol_pool, *args, **kwargs): +def cb_x86_callpop(cur_bloc, loc_db, *args, **kwargs): """ 1000: call 1005 1005: pop @@ -21,12 +21,15 @@ def cb_x86_callpop(cur_bloc, symbol_pool, *args, **kwargs): last_instr = cur_bloc.lines[-1] if last_instr.name != 'CALL': return - ## The destination must be a label + ## The destination must be a location dst = last_instr.args[0] - if not expr_is_label(dst): + if not dst.is_loc(): return + + loc_key = dst.loc_key + offset = loc_db.get_location_offset(loc_key) ## The destination must be the next instruction - if dst.name.offset != last_instr.offset + last_instr.l: + if offset != last_instr.offset + last_instr.l: return # Update instruction instance @@ -34,7 +37,7 @@ def cb_x86_callpop(cur_bloc, symbol_pool, *args, **kwargs): # Update next blocks to process in the disassembly engine cur_bloc.bto.clear() - cur_bloc.add_cst(dst.name.offset, AsmConstraint.c_next, symbol_pool) + cur_bloc.add_cst(loc_key, AsmConstraint.c_next) # Prepare a tiny shellcode @@ -46,8 +49,8 @@ bin_stream = bin_stream_str(shellcode) mdis = dis_x86_32(bin_stream) print "Without callback:\n" -blocks = mdis.dis_multiblock(0) -print "\n".join(str(block) for block in blocks) +asmcfg = mdis.dis_multiblock(0) +print "\n".join(str(block) for block in asmcfg.blocks) # Enable callback cb_x86_funcs.append(cb_x86_callpop) @@ -56,9 +59,9 @@ cb_x86_funcs.append(cb_x86_callpop) print "=" * 40 print "With callback:\n" -blocks_after = mdis.dis_multiblock(0) -print "\n".join(str(block) for block in blocks_after) +asmcfg_after = mdis.dis_multiblock(0) +print "\n".join(str(block) for block in asmcfg_after.blocks) # Ensure the callback has been called -assert blocks.heads()[0].lines[0].name == "CALL" -assert blocks_after.heads()[0].lines[0].name == "PUSH" +assert asmcfg.loc_key_to_block(asmcfg.heads()[0]).lines[0].name == "CALL" +assert asmcfg_after.loc_key_to_block(asmcfg_after.heads()[0]).lines[0].name == "PUSH" diff --git a/example/disasm/file.py b/example/disasm/file.py index 88ba6162..196e1b1a 100644 --- a/example/disasm/file.py +++ b/example/disasm/file.py @@ -13,6 +13,6 @@ cont = Container.from_stream(open(sys.argv[1])) mdis = dis_x86_32(cont.bin_stream) # Inform the engine to avoid disassembling null instructions mdis.dont_dis_nulstart_bloc = True -blocks = mdis.dis_multiblock(addr) +asmcfg = mdis.dis_multiblock(addr) -open('graph.dot', 'w').write(blocks.dot()) +open('graph.dot', 'w').write(asmcfg.dot()) diff --git a/example/disasm/full.py b/example/disasm/full.py index 84c856e1..cfbfc80c 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -3,7 +3,7 @@ from argparse import ArgumentParser from pdb import pm from miasm2.analysis.binary import Container -from miasm2.core.asmblock import log_asmblock, AsmLabel, AsmCFG +from miasm2.core.asmblock import log_asmblock, AsmCFG from miasm2.expression.expression import ExprId from miasm2.core.interval import interval from miasm2.analysis.machine import Machine @@ -85,7 +85,7 @@ mn, dis_engine = machine.mn, machine.dis_engine ira, ir = machine.ira, machine.ir log.info('ok') -mdis = dis_engine(bs, symbol_pool=cont.symbol_pool) +mdis = dis_engine(bs, loc_db=cont.loc_db) # configure disasm engine mdis.dontdis_retcall = args.dontdis_retcall mdis.blocs_wd = args.blockwatchdog @@ -99,7 +99,9 @@ for addr in args.address: addrs.append(int(addr, 0)) except ValueError: # Second chance, try with symbol - addrs.append(mdis.symbol_pool.getby_name(addr).offset) + loc_key = mdis.loc_db.get_name_location(addr) + offset = mdis.loc_db.get_location_offset(loc_key) + addrs.append(offset) if len(addrs) == 0 and default_addr is not None: addrs.append(default_addr) @@ -121,27 +123,28 @@ while not finish and todo: if ad in done: continue done.add(ad) - allblocks = mdis.dis_multiblock(ad) + asmcfg = mdis.dis_multiblock(ad) log.info('func ok %.16x (%d)' % (ad, len(all_funcs))) all_funcs.add(ad) - all_funcs_blocks[ad] = allblocks - for block in allblocks: + all_funcs_blocks[ad] = asmcfg + for block in asmcfg.blocks: for l in block.lines: done_interval += interval([(l.offset, l.offset + l.l)]) if args.funcswatchdog is not None: args.funcswatchdog -= 1 if args.recurfunctions: - for block in allblocks: + for block in asmcfg.blocks: instr = block.get_subcall_instr() if not instr: continue - for dest in instr.getdstflow(mdis.symbol_pool): - if not (isinstance(dest, ExprId) and isinstance(dest.name, AsmLabel)): + for dest in instr.getdstflow(mdis.loc_db): + if not dest.is_loc(): continue - todo.append((mdis, instr, dest.name.offset)) + offset = mdis.loc_db.get_location_offset(dest.loc_key) + todo.append((mdis, instr, offset)) if args.funcswatchdog is not None and args.funcswatchdog <= 0: finish = True @@ -155,13 +158,13 @@ while not finish and todo: # Generate dotty graph -all_blocks = AsmCFG() +all_asmcfg = AsmCFG(mdis.loc_db) for blocks in all_funcs_blocks.values(): - all_blocks += blocks + all_asmcfg += blocks log.info('generate graph file') -open('graph_execflow.dot', 'w').write(all_blocks.dot(offset=True)) +open('graph_execflow.dot', 'w').write(all_asmcfg.dot(offset=True)) log.info('generate intervals') @@ -186,15 +189,19 @@ log.info('total lines %s' % total_l) if args.gen_ir: log.info("generating IR and IR analysis") - ir_arch = ir(mdis.symbol_pool) - ir_arch_a = ira(mdis.symbol_pool) + ir_arch = ir(mdis.loc_db) + ir_arch_a = ira(mdis.loc_db) + + ircfg = ir_arch.new_ircfg() + ircfg_a = ir_arch.new_ircfg() + ir_arch.blocks = {} ir_arch_a.blocks = {} - for ad, all_block in all_funcs_blocks.items(): + for ad, asmcfg in all_funcs_blocks.items(): log.info("generating IR... %x" % ad) - for block in all_block: - ir_arch_a.add_block(block) - ir_arch.add_block(block) + for block in asmcfg.blocks: + ir_arch.add_asmblock_to_ircfg(block, ircfg) + ir_arch_a.add_asmblock_to_ircfg(block, ircfg_a) log.info("Print blocks (without analyse)") for label, block in ir_arch.blocks.iteritems(): @@ -207,25 +214,25 @@ if args.gen_ir: print block if args.simplify > 0: - dead_simp(ir_arch_a) + dead_simp(ir_arch_a, ircfg_a) if args.defuse: reachings = ReachingDefinitions(ir_arch_a) open('graph_defuse.dot', 'w').write(DiGraphDefUse(reachings).dot()) - out = ir_arch_a.graph.dot() + out = ircfg.dot() open('graph_irflow.dot', 'w').write(out) - out = ir_arch.graph.dot() + out = ircfg_a.dot() open('graph_irflow_raw.dot', 'w').write(out) if args.simplify > 1: - ir_arch_a.simplify(expr_simp) + ircfg_a.simplify(expr_simp) modified = True while modified: modified = False - modified |= dead_simp(ir_arch_a) - modified |= ir_arch_a.remove_empty_assignblks() - modified |= ir_arch_a.remove_jmp_blocks() - modified |= ir_arch_a.merge_blocks() + modified |= dead_simp(ir_arch_a, ircfg_a) + modified |= ircfg_a.remove_empty_assignblks() + modified |= ircfg_a.remove_jmp_blocks() + modified |= ircfg_a.merge_blocks() - open('graph_irflow_reduced.dot', 'w').write(ir_arch_a.graph.dot()) + open('graph_irflow_reduced.dot', 'w').write(ircfg_a.dot()) diff --git a/example/disasm/function.py b/example/disasm/function.py index 89f65abb..10495dbc 100644 --- a/example/disasm/function.py +++ b/example/disasm/function.py @@ -8,9 +8,9 @@ from miasm2.arch.x86.disasm import dis_x86_32 # RET shellcode = '\xb8\xef\xbe7\x13\xb9\x04\x00\x00\x00\xc1\xc0\x08\xe2\xfb\xc3' mdis = dis_x86_32(shellcode) -blocks = mdis.dis_multiblock(0) +asmcfg = mdis.dis_multiblock(0) -for block in blocks: +for block in asmcfg.blocks: print block -open('graph.dot', 'w').write(blocks.dot()) +open('graph.dot', 'w').write(asmcfg.dot()) diff --git a/example/disasm/single_instr.py b/example/disasm/single_instr.py index 59b81de7..d17e303f 100644 --- a/example/disasm/single_instr.py +++ b/example/disasm/single_instr.py @@ -1,9 +1,9 @@ from miasm2.arch.x86.arch import mn_x86 from miasm2.arch.x86.regs import EDX -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB -symbol_pool = AsmSymbolPool() -l = mn_x86.fromstring('MOV EAX, EBX', symbol_pool, 32) +loc_db = LocationDB() +l = mn_x86.fromstring('MOV EAX, EBX', loc_db, 32) print "instruction:", l print "arg:", l.args[0] x = mn_x86.asm(l) diff --git a/example/expression/access_c.py b/example/expression/access_c.py index de158730..e8d5e318 100644 --- a/example/expression/access_c.py +++ b/example/expression/access_c.py @@ -54,11 +54,10 @@ from miasm2.core.objc import ExprToAccessC, CHandler from miasm2.core.objc import CTypesManagerNotPacked from miasm2.core.ctypesmngr import CAstTypes, CTypePtr, CTypeStruct - -def find_call(ira): +def find_call(ircfg): """Returns (irb, index) which call""" - for irb in ira.blocks.values(): + for irb in ircfg.blocks.values(): out = set() if len(irb) < 2: continue @@ -92,17 +91,17 @@ class MyExprToAccessC(ExprToAccessC): reduction_rules = ExprToAccessC.reduction_rules + [reduce_compose] -def get_funcs_arg0(ctx, ira, lbl_head): +def get_funcs_arg0(ctx, ira, ircfg, lbl_head): """Compute DependencyGraph on the func @lbl_head""" - g_dep = DependencyGraph(ira, follow_call=False) + g_dep = DependencyGraph(ircfg, follow_call=False) element = ira.arch.regs.RSI - for irb, index in find_call(ira): + for irb, index in find_call(ircfg): instr = irb[index].instr print 'Analysing references from:', hex(instr.offset), instr - g_list = g_dep.get(irb.label, set([element]), index, set([lbl_head])) + g_list = g_dep.get(irb.loc_key, set([element]), index, set([lbl_head])) for dep in g_list: - emul_result = dep.emul(ctx) + emul_result = dep.emul(ira, ctx) value = emul_result[element] yield value @@ -141,16 +140,15 @@ cont = Container.fallback_container(data, None, addr=0) machine = Machine("x86_64") dis_engine, ira = machine.dis_engine, machine.ira -mdis = dis_engine(cont.bin_stream, symbol_pool=cont.symbol_pool) +mdis = dis_engine(cont.bin_stream, loc_db=cont.loc_db) addr_head = 0 -blocks = mdis.dis_multiblock(addr_head) -lbl_head = mdis.symbol_pool.getby_offset(addr_head) +asmcfg = mdis.dis_multiblock(addr_head) +lbl_head = mdis.loc_db.get_offset_location(addr_head) -ir_arch_a = ira(mdis.symbol_pool) -for block in blocks: - ir_arch_a.add_block(block) +ir_arch_a = ira(mdis.loc_db) +ircfg = ir_arch_a.new_ircfg_from_asmcfg(asmcfg) -open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot()) +open('graph_irflow.dot', 'w').write(ircfg.dot()) # Main function's first argument's type is "struct ll_human*" ptr_llhuman = types_mngr.get_objc(CTypePtr(CTypeStruct('ll_human'))) @@ -161,7 +159,7 @@ expr_types = {arg0: (ptr_llhuman,), mychandler = MyCHandler(types_mngr, expr_types) -for expr in get_funcs_arg0(ctx, ir_arch_a, lbl_head): +for expr in get_funcs_arg0(ctx, ir_arch_a, ircfg, lbl_head): print "Access:", expr for c_str, ctype in mychandler.expr_to_c_and_types(expr): print '\taccess:', c_str diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py index 786b860e..7036d960 100644 --- a/example/expression/asm_to_ir.py +++ b/example/expression/asm_to_ir.py @@ -7,8 +7,9 @@ from miasm2.core import asmblock from miasm2.arch.x86.ira import ir_a_x86_32 from miasm2.analysis.data_flow import dead_simp + # First, asm code -blocks, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' +asmcfg, loc_db = parse_asm.parse_txt(mn_x86, 32, ''' main: MOV EAX, 1 MOV EBX, 2 @@ -24,33 +25,30 @@ loop: ''') -symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) -for block in blocks: +loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) +for block in asmcfg.blocks: print block print "symbols:" -print symbol_pool -patches = asmblock.asm_resolve_final(mn_x86, blocks, symbol_pool) +print loc_db +patches = asmblock.asm_resolve_final(mn_x86, asmcfg, loc_db) # Translate to IR -ir_arch = ir_a_x86_32(symbol_pool) -for block in blocks: - print 'add block' - print block - ir_arch.add_block(block) +ir_arch = ir_a_x86_32(loc_db) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Display IR -for lbl, irblock in ir_arch.blocks.items(): +for lbl, irblock in ircfg.blocks.items(): print irblock # Dead propagation -open('graph.dot', 'w').write(ir_arch.graph.dot()) +open('graph.dot', 'w').write(ircfg.dot()) print '*' * 80 -dead_simp(ir_arch) -open('graph2.dot', 'w').write(ir_arch.graph.dot()) +dead_simp(ir_arch, ircfg) +open('graph2.dot', 'w').write(ircfg.dot()) # Display new IR print 'new ir blocks' -for lbl, irblock in ir_arch.blocks.items(): +for lbl, irblock in ircfg.blocks.items(): print irblock diff --git a/example/expression/constant_propagation.py b/example/expression/constant_propagation.py index 70394580..d9c5fe65 100644 --- a/example/expression/constant_propagation.py +++ b/example/expression/constant_propagation.py @@ -28,27 +28,24 @@ machine = Machine("x86_32") cont = Container.from_stream(open(args.filename)) ira, dis_engine = machine.ira, machine.dis_engine mdis = dis_engine(cont.bin_stream) -ir_arch = ira(mdis.symbol_pool) +ir_arch = ira(mdis.loc_db) addr = int(args.address, 0) - -blocks = mdis.dis_multiblock(addr) -for block in blocks: - ir_arch.add_block(block) - +asmcfg = mdis.dis_multiblock(addr) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) init_infos = ir_arch.arch.regs.regs_init -cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) +cst_propag_link = propagate_cst_expr(ir_arch, ircfg, addr, init_infos) if args.simplify: - ir_arch.simplify(expr_simp) + ircfg.simplify(expr_simp) modified = True while modified: modified = False - modified |= dead_simp(ir_arch) - modified |= ir_arch.remove_empty_assignblks() - modified |= ir_arch.remove_jmp_blocks() - modified |= ir_arch.merge_blocks() + modified |= dead_simp(ir_arch, ircfg) + modified |= ircfg.remove_empty_assignblks() + modified |= ircfg.remove_jmp_blocks() + modified |= ircfg.merge_blocks() -open("%s.propag.dot" % args.filename, 'w').write(ir_arch.graph.dot()) +open("%s.propag.dot" % args.filename, 'w').write(ircfg.dot()) diff --git a/example/expression/get_read_write.py b/example/expression/get_read_write.py index 9e3b5caf..34d0f94a 100644 --- a/example/expression/get_read_write.py +++ b/example/expression/get_read_write.py @@ -1,9 +1,9 @@ from miasm2.arch.x86.arch import mn_x86 from miasm2.expression.expression import get_rw from miasm2.arch.x86.ira import ir_a_x86_32 -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB -symbol_pool = AsmSymbolPool() +loc_db = LocationDB() print """ @@ -12,14 +12,14 @@ Get read/written registers for a given instruction """ arch = mn_x86 -ir_arch = ir_a_x86_32() - -l = arch.fromstring('LODSB', symbol_pool, 32) -l.offset, l.l = 0, 15 -ir_arch.add_instr(l) +ir_arch = ir_a_x86_32(loc_db) +ircfg = ir_arch.new_ircfg() +instr = arch.fromstring('LODSB', loc_db, 32) +instr.offset, instr.l = 0, 15 +ir_arch.add_instr_to_ircfg(instr, ircfg) print '*' * 80 -for lbl, irblock in ir_arch.blocks.iteritems(): +for lbl, irblock in ircfg.blocks.iteritems(): print irblock for assignblk in irblock: rw = assignblk.get_rw() @@ -28,4 +28,4 @@ for lbl, irblock in ir_arch.blocks.iteritems(): print 'written:', dst print -open('graph_instr.dot', 'w').write(ir_arch.graph.dot()) +open('graph_instr.dot', 'w').write(ircfg.dot()) diff --git a/example/expression/graph_dataflow.py b/example/expression/graph_dataflow.py index 26fdd2ec..b30bd29f 100644 --- a/example/expression/graph_dataflow.py +++ b/example/expression/graph_dataflow.py @@ -24,11 +24,11 @@ def node_x_2_id(n, x): def get_node_name(label, i, n): - n_name = (label.name, i, n) + n_name = (label, i, n) return n_name -def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): +def intra_block_flow_symb(ir_arch, _, flow_graph, irblock, in_nodes, out_nodes): symbols_init = ir_arch.arch.regs.regs_init.copy() sb = SymbolicExecutionEngine(ir_arch, symbols_init) sb.eval_updt_irblock(irblock) @@ -47,7 +47,7 @@ def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): all_mems.update(get_expr_mem(n)) for n in all_mems: - node_n_w = get_node_name(irblock.label, 0, n) + node_n_w = get_node_name(irblock.loc_key, 0, n) if not n == src: continue o_r = n.arg.get_r(mem_read=False, cst_read=True) @@ -55,7 +55,7 @@ def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irblock.label, i, n_r) + node_n_r = get_node_name(irblock.loc_key, i, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_uniq_edge(node_n_r, node_n_w) @@ -69,13 +69,13 @@ def intra_block_flow_symb(ir_arch, flow_graph, irblock, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irblock.label, 0, n_r) + node_n_r = get_node_name(irblock.loc_key, 0, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_node(node_n_r) for n_w in nodes_w: - node_n_w = get_node_name(irblock.label, 1, n_w) + node_n_w = get_node_name(irblock.loc_key, 1, n_w) out_nodes[n_w] = node_n_w flow_graph.add_node(node_n_w) @@ -87,15 +87,18 @@ def node2str(self, node): return out -def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): - for irblock in ir_arch.blocks.values(): +def gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb): + for irblock in ircfg.blocks.values(): print irblock - dead_simp(ir_arch) + dead_simp(ir_arch, ircfg) + irblock_0 = None - for irblock in ir_arch.blocks.values(): - if irblock.label.offset == ad: + for irblock in ircfg.blocks.values(): + loc_key = irblock.loc_key + offset = ircfg.loc_db.get_location_offset(loc_key) + if offset == ad: irblock_0 = irblock break assert(irblock_0 is not None) @@ -105,20 +108,20 @@ def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): irb_in_nodes = {} irb_out_nodes = {} - for label in ir_arch.blocks: + for label in ircfg.blocks: irb_in_nodes[label] = {} irb_out_nodes[label] = {} - for label, irblock in ir_arch.blocks.iteritems(): - block_flow_cb(ir_arch, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) + for label, irblock in ircfg.blocks.iteritems(): + block_flow_cb(ir_arch, ircfg, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) - for label in ir_arch.blocks: + for label in ircfg.blocks: print label print 'IN', [str(x) for x in irb_in_nodes[label]] print 'OUT', [str(x) for x in irb_out_nodes[label]] print '*' * 20, 'interblock', '*' * 20 - inter_block_flow(ir_arch, flow_graph, irblock_0.label, irb_in_nodes, irb_out_nodes) + inter_block_flow(ir_arch, ircfg, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes) # from graph_qt import graph_qt # graph_qt(flow_graph) @@ -131,21 +134,16 @@ ad = int(args.addr, 16) print 'disasm...' mdis = dis_x86_32(data) mdis.follow_call = True -ab = mdis.dis_multiblock(ad) +asmcfg = mdis.dis_multiblock(ad) print 'ok' print 'generating dataflow graph for:' -ir_arch = ir_a_x86_32(mdis.symbol_pool) +ir_arch = ir_a_x86_32(mdis.loc_db) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) -blocks = ab -for block in blocks: - print block - ir_arch.add_block(block) -for irblock in ir_arch.blocks.values(): +for irblock in ircfg.blocks.values(): print irblock - if irblock.label.offset != 0: - continue if args.symb: @@ -153,7 +151,7 @@ if args.symb: else: block_flow_cb = intra_block_flow_raw -gen_block_data_flow_graph(ir_arch, ad, block_flow_cb) +gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb) print '*' * 40 print """ diff --git a/example/expression/simplification_tools.py b/example/expression/simplification_tools.py index 7c15b3e7..cb062fb3 100644 --- a/example/expression/simplification_tools.py +++ b/example/expression/simplification_tools.py @@ -32,7 +32,6 @@ x = ExprMem(a + b + ExprInt(0x42, 32), 32) def replace_expr(e): - # print 'visit', e dct = {c + ExprInt(0x42, 32): d, a + b: c, } if e in dct: diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index 201d9f26..acb3abf4 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -5,16 +5,14 @@ from pdb import pm from miasm2.analysis.machine import Machine from miasm2.expression.expression import ExprInt, ExprCond, ExprId, \ - get_expr_ids, ExprAff + get_expr_ids, ExprAff, ExprLoc from miasm2.core.bin_stream import bin_stream_str -from miasm2.core import asmblock from miasm2.ir.symbexec import SymbolicExecutionEngine, get_block from miasm2.expression.simplifications import expr_simp from miasm2.core import parse_asm from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine from miasm2.ir.translators.translator import Translator - machine = Machine("x86_32") @@ -28,7 +26,7 @@ if not args: sys.exit(0) -def emul_symb(ir_arch, mdis, states_todo, states_done): +def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print '*' * 40, "addr", addr, '*' * 40 @@ -36,11 +34,11 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): print 'Known state, skipping', addr continue states_done.add((addr, symbols, conds)) - symbexec = SymbolicExecutionEngine(ir_arch, {}) + symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] - irblock = get_block(ir_arch, mdis, addr) + irblock = get_block(ir_arch, ircfg, mdis, addr) print 'Run block:' print irblock @@ -55,8 +53,8 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) - if not (addr_a.is_int() or asmblock.expr_is_label(addr_a) and - addr_b.is_int() or asmblock.expr_is_label(addr_b)): + if not (addr_a.is_int() or addr_a.is_loc() and + addr_b.is_int() or addr_b.is_loc()): print str(addr_a), str(addr_b) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): @@ -68,11 +66,10 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): elif addr == ret_addr: print 'Return address reached' continue - elif isinstance(addr, ExprInt): + elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) - elif asmblock.expr_is_label(addr): - addr = addr.name + elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination") @@ -88,39 +85,51 @@ if __name__ == '__main__': addr = int(options.address, 16) - symbols_init = dict(machine.mn.regs.regs_init) - - ir_arch = machine.ir(mdis.symbol_pool) - symbexec = SymbolicExecutionEngine(ir_arch, symbols_init) + ir_arch = machine.ir(mdis.loc_db) + ircfg = ir_arch.new_ircfg() + symbexec = SymbolicExecutionEngine(ir_arch) - blocks, symbol_pool = parse_asm.parse_txt(machine.mn, 32, ''' + asmcfg, loc_db = parse_asm.parse_txt(machine.mn, 32, ''' + init: PUSH argv PUSH argc PUSH ret_addr ''', - symbol_pool=mdis.symbol_pool) + loc_db=mdis.loc_db) + + + argc_lbl = loc_db.get_name_location('argc') + argv_lbl = loc_db.get_name_location('argv') + ret_addr_lbl = loc_db.get_name_location('ret_addr') + init_lbl = loc_db.get_name_location('init') + argc_loc = ExprLoc(argc_lbl, 32) + argv_loc = ExprLoc(argv_lbl, 32) + ret_addr_loc = ExprLoc(ret_addr_lbl, 32) - argc_lbl = symbol_pool.getby_name('argc') - argv_lbl = symbol_pool.getby_name('argv') - ret_addr_lbl = symbol_pool.getby_name('ret_addr') - argc = ExprId(argc_lbl, 32) - argv = ExprId(argv_lbl, 32) - ret_addr = ExprId(ret_addr_lbl, 32) + ret_addr = ExprId("ret_addr", ret_addr_loc.size) + fix_args = { + argc_loc: ExprId("argc", argc_loc.size), + argv_loc: ExprId("argv", argv_loc.size), + ret_addr_loc: ret_addr, + } + + + + block = asmcfg.loc_key_to_block(init_lbl) + for instr in block.lines: + for i, arg in enumerate(instr.args): + instr.args[i]= arg.replace_expr(fix_args) + print block - b = list(blocks)[0] - print b # add fake address and len to parsed instructions - for i, line in enumerate(b.lines): - line.offset, line.l = i, 1 - ir_arch.add_block(b) - irb = get_block(ir_arch, mdis, 0) + ir_arch.add_asmblock_to_ircfg(block, ircfg) + irb = ircfg.blocks[init_lbl] symbexec.eval_updt_irblock(irb) symbexec.dump(ids=False) - # reset ir_arch blocks ir_arch.blocks = {} @@ -129,7 +138,7 @@ if __name__ == '__main__': states_todo.add((addr, symbexec.symbols, ())) # emul blocks, propagate states - emul_symb(ir_arch, mdis, states_todo, states_done) + emul_symb(ir_arch, ircfg, mdis, states_todo, states_done) all_info = [] @@ -144,7 +153,7 @@ if __name__ == '__main__': all_cases = set() - symbexec = SymbolicExecutionEngine(ir_arch, symbols_init) + symbexec = SymbolicExecutionEngine(ir_arch) for addr, reqs_cond in all_info: out = ['(set-logic QF_ABV)', '(set-info :smt-lib-version 2.0)'] diff --git a/example/ida/ctype_propagation.py b/example/ida/ctype_propagation.py index 9b9c2e95..e8b52e3e 100644 --- a/example/ida/ctype_propagation.py +++ b/example/ida/ctype_propagation.py @@ -10,7 +10,7 @@ from miasm2.arch.x86.ctype import CTypeAMD64_unk, CTypeX86_unk from miasm2.arch.msp430.ctype import CTypeMSP430_unk from miasm2.core.objc import CTypesManagerNotPacked, ExprToAccessC, CHandler from miasm2.core.ctypesmngr import CAstTypes -from miasm2.expression.expression import ExprId, ExprInt, ExprOp, ExprAff +from miasm2.expression.expression import ExprLoc, ExprInt, ExprOp, ExprAff from miasm2.ir.symbexec_types import SymbExecCType from miasm2.expression.parser import str_to_expr from miasm2.analysis.cst_propag import add_state, propagate_cst_expr @@ -19,9 +19,7 @@ from utils import guess_machine class TypePropagationForm(ida_kernwin.Form): - def __init__(self, ira): - - self.ira = ira + def __init__(self): default_types_info = r"""ExprId("RDX", 64): char *""" archs = ["AMD64_unk", "X86_32_unk", "msp430_unk"] @@ -201,10 +199,9 @@ class SymbExecCTypeFix(SymbExecCType): if expr.is_int(): continue for c_str, c_type in self.chandler.expr_to_c_and_types(expr, self.symbols): - expr = self.cst_propag_link.get((irb.label, index), {}).get(expr, expr) + expr = self.cst_propag_link.get((irb.loc_key, index), {}).get(expr, expr) offset2cmt.setdefault(instr.offset, set()).add( "\n%s: %s\n%s" % (expr, c_str, c_type)) - self.eval_updt_assignblk(assignblk) for offset, value in offset2cmt.iteritems(): idc.MakeComm(offset, '\n'.join(value)) @@ -243,42 +240,42 @@ def get_ira_call_fixer(ira): def analyse_function(): - - # Init - machine = guess_machine() - mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira - - bs = bin_stream_ida() - mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) - - - iraCallStackFixer = get_ira_call_fixer(ira) - ir_arch = iraCallStackFixer(mdis.symbol_pool) - - # Get settings - settings = TypePropagationForm(ir_arch) + settings = TypePropagationForm() ret = settings.Execute() if not ret: return + + end = None if settings.cScope.value == 0: addr = settings.functionAddr.value else: addr = settings.startAddr.value if settings.cScope.value == 2: end = settings.endAddr - mdis.dont_dis = [end] - blocks = mdis.dis_multiblock(addr) + # Init + machine = guess_machine(addr=addr) + mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira + + bs = bin_stream_ida() + mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) + if end is not None: + mdis.dont_dis = [end] + + + iraCallStackFixer = get_ira_call_fixer(ira) + ir_arch = iraCallStackFixer(mdis.loc_db) + + asmcfg = mdis.dis_multiblock(addr) # Generate IR - for block in blocks: - ir_arch.add_block(block) + ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) cst_propag_link = {} if settings.cUnalias.value: init_infos = {ir_arch.sp: ir_arch.arch.regs.regs_init[ir_arch.sp] } - cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) + cst_propag_link = propagate_cst_expr(ir_arch, ircfg, addr, init_infos) types_mngr = get_types_mngr(settings.headerFile.value, settings.arch.value) @@ -298,7 +295,8 @@ def analyse_function(): expr_str, ctype_str = expr_str.strip(), ctype_str.strip() expr = str_to_expr(expr_str) ast = mychandler.types_mngr.types_ast.parse_c_type( - ctype_str) + ctype_str + ) ctype = mychandler.types_mngr.types_ast.ast_parse_declaration(ast.ext[0]) objc = types_mngr.get_objc(ctype) print '=' * 20 @@ -306,18 +304,21 @@ def analyse_function(): infos_types[expr] = set([objc]) # Add fake head - lbl_real_start = ir_arch.symbol_pool.getby_offset(addr) - lbl_head = ir_arch.symbol_pool.getby_name_create("start") - - first_block = blocks.label2block(lbl_real_start) - - assignblk_head = AssignBlock([ExprAff(ir_arch.IRDst, ExprId(lbl_real_start, ir_arch.IRDst.size)), - ExprAff( - ir_arch.sp, ir_arch.arch.regs.regs_init[ir_arch.sp]) - ], first_block.lines[0]) + lbl_real_start = ir_arch.loc_db.get_offset_location(addr) + lbl_head = ir_arch.loc_db.get_or_create_name_location("start") + + first_block = asmcfg.label2block(lbl_real_start) + + assignblk_head = AssignBlock( + [ + ExprAff(ir_arch.IRDst, ExprLoc(lbl_real_start, ir_arch.IRDst.size)), + ExprAff(ir_arch.sp, ir_arch.arch.regs.regs_init[ir_arch.sp]) + ], + first_block.lines[0] + ) irb_head = IRBlock(lbl_head, [assignblk_head]) - ir_arch.blocks[lbl_head] = irb_head - ir_arch.graph.add_uniq_edge(lbl_head, lbl_real_start) + ircfg.blocks[lbl_head] = irb_head + ircfg.add_uniq_edge(lbl_head, lbl_real_start) state = TypePropagationEngine.StateEngine(infos_types) states = {lbl_head: state} @@ -330,24 +331,24 @@ def analyse_function(): if (lbl, state) in done: continue done.add((lbl, state)) - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue - symbexec_engine = TypePropagationEngine(ir_arch, types_mngr, state) - addr = symbexec_engine.run_block_at(lbl) + addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) - ir_arch._graph = None - sons = ir_arch.graph.successors(lbl) + sons = ircfg.successors(lbl) for son in sons: - add_state(ir_arch, todo, states, son, - symbexec_engine.get_state()) + add_state( + ircfg, todo, states, son, + symbexec_engine.get_state() + ) for lbl, state in states.iteritems(): - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue symbexec_engine = CTypeEngineFixer(ir_arch, types_mngr, state, cst_propag_link) - addr = symbexec_engine.run_block_at(lbl) + addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) diff --git a/example/ida/depgraph.py b/example/ida/depgraph.py index 5342313a..297877a1 100644 --- a/example/ida/depgraph.py +++ b/example/ida/depgraph.py @@ -19,16 +19,18 @@ from utils import guess_machine class depGraphSettingsForm(ida_kernwin.Form): - def __init__(self, ira): + def __init__(self, ira, ircfg): self.ira = ira + self.ircfg = ircfg self.stk_args = {'ARG%d' % i:i for i in xrange(10)} self.stk_unalias_force = False self.address = idc.ScreenEA() cur_block = None - for block in ira.getby_offset(self.address): - if block.label.offset is not None: + for block in ircfg.getby_offset(self.address): + offset = self.ircfg.loc_db.get_location_offset(block.loc_key) + if offset is not None: # Only one block non-generated assert cur_block is None cur_block = block @@ -38,8 +40,8 @@ class depGraphSettingsForm(ida_kernwin.Form): if assignblk.instr.offset == self.address: break assert line_nb is not None - cur_label = str(cur_block.label) - labels = sorted(map(str, ira.blocks.keys())) + cur_loc_key = str(cur_block.loc_key) + loc_keys = sorted(map(str, ircfg.blocks.keys())) regs = sorted(ira.arch.regs.all_regs_ids_byname.keys()) regs += self.stk_args.keys() reg_default = regs[0] @@ -85,21 +87,21 @@ Method to use: tp=ida_kernwin.Form.FT_RAWHEX, value=line_nb), 'cbBBL': ida_kernwin.Form.DropdownListControl( - items=labels, + items=loc_keys, readonly=False, - selval=cur_label), + selval=cur_loc_key), 'cColor': ida_kernwin.Form.ColorInput(value=0xc0c020), }) self.Compile() @property - def label(self): + def loc_key(self): value = self.cbBBL.value - for real_label in self.ira.blocks: - if str(real_label) == value: - return real_label - raise ValueError("Bad label") + for real_loc_key in self.ircfg.blocks: + if str(real_loc_key) == value: + return real_loc_key + raise ValueError("Bad loc_key") @property def line_nb(self): @@ -110,13 +112,13 @@ Method to use: elif mode == 1: return value + 1 else: - return len(self.ira.blocks[self.label]) + return len(self.ircfg.blocks[self.loc_key]) @property def elements(self): value = self.cbReg.value if value in self.stk_args: - line = self.ira.blocks[self.label][self.line_nb].instr + line = self.ircfg.blocks[self.loc_key][self.line_nb].instr arg_num = self.stk_args[value] stk_high = m2_expr.ExprInt(idc.GetSpd(line.offset), ir_arch.sp.size) stk_off = m2_expr.ExprInt(self.ira.sp.size/8 * arg_num, ir_arch.sp.size) @@ -134,7 +136,7 @@ Method to use: @property def depgraph(self): value = self.cMethod.value - return DependencyGraph(self.ira, + return DependencyGraph(self.ircfg, implicit=value & 4, follow_mem=value & 1, follow_call=value & 2) @@ -174,7 +176,7 @@ def treat_element(): for node in graph.relevant_nodes: try: - offset = ir_arch.blocks[node.label][node.line_nb].instr.offset + offset = ir_arch.blocks[node.loc_key][node.line_nb].instr.offset except IndexError: print "Unable to highlight %s" % node continue @@ -184,7 +186,7 @@ def treat_element(): if graph.has_loop: print 'Graph has dependency loop: symbolic execution is inexact' else: - print "Possible value: %s" % graph.emul().values()[0] + print "Possible value: %s" % graph.emul(self.ira).values()[0] for offset, elements in comments.iteritems(): idc.MakeComm(offset, ", ".join(map(str, elements))) @@ -197,38 +199,39 @@ def next_element(): def launch_depgraph(): global graphs, comments, sol_nb, settings, addr, ir_arch + # Get the current function + addr = idc.ScreenEA() + func = ida_funcs.get_func(addr) + # Init - machine = guess_machine() + machine = guess_machine(addr=func.startEA) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira bs = bin_stream_ida() mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) - ir_arch = ira(mdis.symbol_pool) + ir_arch = ira(mdis.loc_db) # Populate symbols with ida names for ad, name in idautils.Names(): if name is None: continue - mdis.symbol_pool.add_label(name, ad) + mdis.loc_db.add_location(name, ad) - # Get the current function - addr = idc.ScreenEA() - func = ida_funcs.get_func(addr) - blocks = mdis.dis_multiblock(func.startEA) + asmcfg = mdis.dis_multiblock(func.startEA) # Generate IR - for block in blocks: - ir_arch.add_block(block) + ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Get settings - settings = depGraphSettingsForm(ir_arch) + settings = depGraphSettingsForm(ir_arch, ircfg) settings.Execute() - label, elements, line_nb = settings.label, settings.elements, settings.line_nb + loc_key, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb # Simplify affectations for irb in ir_arch.blocks.values(): irs = [] - fix_stack = irb.label.offset is not None and settings.unalias_stack + offset = ir_arch.loc_db.get_location_offset(irb.loc_key) + fix_stack = offset is not None and settings.unalias_stack for assignblk in irb: if fix_stack: stk_high = m2_expr.ExprInt(idc.GetSpd(assignblk.instr.offset), ir_arch.sp.size) @@ -243,12 +246,12 @@ def launch_depgraph(): dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) - ir_arch.blocks[irb.label] = IRBlock(irb.label, irs) + ir_arch.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) # Get dependency graphs dg = settings.depgraph - graphs = dg.get(label, elements, line_nb, - set([ir_arch.symbol_pool.getby_offset(func.startEA)])) + graphs = dg.get(loc_key, elements, line_nb, + set([ir_arch.loc_db.get_offset_location(func.startEA)])) # Display the result comments = {} diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index 6dfa1f7d..afd00d5c 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -6,7 +6,7 @@ import idc import idautils from miasm2.core.bin_stream_ida import bin_stream_ida -from miasm2.core.asmblock import expr_is_label, AsmLabel, is_int +from miasm2.core.asmblock import is_int from miasm2.expression.simplifications import expr_simp from miasm2.analysis.data_flow import dead_simp from miasm2.ir.ir import AssignBlock, IRBlock @@ -33,17 +33,15 @@ def label_str(self): else: return "%s:%s" % (self.name, str(self.offset)) -AsmLabel.__init__ = label_init -AsmLabel.__str__ = label_str def color_irblock(irblock, ir_arch): out = [] - lbl = idaapi.COLSTR(str(irblock.label), idaapi.SCOLOR_INSN) + lbl = idaapi.COLSTR(ir_arch.loc_db.pretty_str(irblock.loc_key), idaapi.SCOLOR_INSN) out.append(lbl) for assignblk in irblock: for dst, src in sorted(assignblk.iteritems()): - dst_f = expr2colorstr(ir_arch.arch.regs.all_regs_ids, dst) - src_f = expr2colorstr(ir_arch.arch.regs.all_regs_ids, src) + dst_f = expr2colorstr(dst, loc_db=ir_arch.loc_db) + src_f = expr2colorstr(src, loc_db=ir_arch.loc_db) line = idaapi.COLSTR("%s = %s" % (dst_f, src_f), idaapi.SCOLOR_INSN) out.append(' %s' % line) out.append("") @@ -56,31 +54,29 @@ def color_irblock(irblock, ir_arch): class GraphMiasmIR(idaapi.GraphViewer): - def __init__(self, ir_arch, title, result): + def __init__(self, ircfg, title, result): idaapi.GraphViewer.__init__(self, title) - self.ir_arch = ir_arch + self.ircfg = ircfg self.result = result self.names = {} def OnRefresh(self): self.Clear() addr_id = {} - for irblock in self.ir_arch.blocks.values(): - id_irblock = self.AddNode(color_irblock(irblock, self.ir_arch)) + for irblock in self.ircfg.blocks.values(): + id_irblock = self.AddNode(color_irblock(irblock, self.ircfg)) addr_id[irblock] = id_irblock - for irblock in self.ir_arch.blocks.values(): + for irblock in self.ircfg.blocks.values(): if not irblock: continue - all_dst = self.ir_arch.dst_trackback(irblock) + all_dst = self.ircfg.dst_trackback(irblock) for dst in all_dst: - if not expr_is_label(dst): + if not dst.is_loc(): continue - - dst = dst.name - if not dst in self.ir_arch.blocks: + if not dst.loc_key in self.ircfg.blocks: continue - dst_block = self.ir_arch.blocks[dst] + dst_block = self.ircfg.blocks[dst.loc_key] node1 = addr_id[irblock] node2 = addr_id[dst_block] self.AddEdge(node1, node2) @@ -102,7 +98,9 @@ class GraphMiasmIR(idaapi.GraphViewer): def build_graph(verbose=False, simplify=False): - machine = guess_machine() + start_addr = idc.ScreenEA() + + machine = guess_machine(addr=start_addr) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira if verbose: @@ -114,43 +112,37 @@ def build_graph(verbose=False, simplify=False): bs = bin_stream_ida() mdis = dis_engine(bs) - ir_arch = ira(mdis.symbol_pool) + ir_arch = ira(mdis.loc_db) # populate symbols with ida names for addr, name in idautils.Names(): - # print hex(ad), repr(name) if name is None: continue - if (mdis.symbol_pool.getby_offset(addr) or - mdis.symbol_pool.getby_name(name)): + if (mdis.loc_db.get_offset_location(addr) or + mdis.loc_db.get_name_location(name)): # Symbol alias continue - mdis.symbol_pool.add_label(name, addr) + mdis.loc_db.add_location(name, addr) if verbose: print "start disasm" - addr = idc.ScreenEA() if verbose: print hex(addr) - blocks = mdis.dis_multiblock(addr) + asmcfg = mdis.dis_multiblock(start_addr) if verbose: print "generating graph" - open('asm_flow.dot', 'w').write(blocks.dot()) + open('asm_flow.dot', 'w').write(asmcfg.dot()) - print "generating IR... %x" % addr + print "generating IR... %x" % start_addr - for block in blocks: - if verbose: - print 'ADD' - print block - ir_arch.add_block(block) + ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) if verbose: - print "IR ok... %x" % addr + print "IR ok... %x" % start_addr - for irb in ir_arch.blocks.itervalues(): + for irb in ircfg.blocks.itervalues(): irs = [] for assignblk in irb: new_assignblk = { @@ -158,27 +150,27 @@ def build_graph(verbose=False, simplify=False): for dst, src in assignblk.iteritems() } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) - ir_arch.blocks[irb.label] = IRBlock(irb.label, irs) + ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) if verbose: - out = ir_arch.graph.dot() + out = ircfg.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" if simplify: - dead_simp(ir_arch) + dead_simp(ir_arch, ircfg) - ir_arch.simplify(expr_simp) + ircfg.simplify(expr_simp) modified = True while modified: modified = False - modified |= dead_simp(ir_arch) - modified |= ir_arch.remove_empty_assignblks() - modified |= ir_arch.remove_jmp_blocks() - modified |= ir_arch.merge_blocks() + modified |= dead_simp(ir_arch, ircfg) + modified |= ircfg.remove_empty_assignblks() + modified |= ircfg.remove_jmp_blocks() + modified |= ircfg.merge_blocks() title += " (simplified)" - g = GraphMiasmIR(ir_arch, title, None) + g = GraphMiasmIR(ircfg, title, None) g.Show() diff --git a/example/ida/symbol_exec.py b/example/ida/symbol_exec.py index f019f77d..ffaa9b27 100644 --- a/example/ida/symbol_exec.py +++ b/example/ida/symbol_exec.py @@ -34,8 +34,16 @@ class ActionHandlerTranslate(ActionHandler): class symbolicexec_t(idaapi.simplecustviewer_t): def add(self, key, value): - self.AddLine("%s = %s" % (expr2colorstr(self.machine.mn.regs.all_regs_ids, key), - expr2colorstr(self.machine.mn.regs.all_regs_ids, value))) + self.AddLine("%s = %s" % ( + expr2colorstr( + key, + loc_db=self.loc_db + ), + expr2colorstr( + value, + loc_db=self.loc_db + ) + )) def expand(self, linenum): element = self.line2eq[linenum] @@ -61,11 +69,12 @@ class symbolicexec_t(idaapi.simplecustviewer_t): form.Compile() form.Execute() - def Create(self, equations, machine, *args, **kwargs): + def Create(self, equations, machine, loc_db, *args, **kwargs): if not super(symbolicexec_t, self).Create(*args, **kwargs): return False self.machine = machine + self.loc_db = loc_db self.line2eq = sorted(equations.items(), key=operator.itemgetter(0)) self.lines_expanded = set() @@ -119,21 +128,25 @@ def symbolic_exec(): from utils import guess_machine + start, end = idc.SelStart(), idc.SelEnd() + bs = bin_stream_ida() - machine = guess_machine() + machine = guess_machine(addr=start) mdis = machine.dis_engine(bs) - start, end = idc.SelStart(), idc.SelEnd() + + if start == idc.BADADDR and end == idc.BADADDR: + start = idc.ScreenEA() + end = idc.next_head(start) # Get next instruction address mdis.dont_dis = [end] - blocks = mdis.dis_multiblock(start) - ira = machine.ira() - for block in blocks: - ira.add_block(block) + asmcfg = mdis.dis_multiblock(start) + ira = machine.ira(loc_db=mdis.loc_db) + ircfg = ira.new_ircfg_from_asmcfg(asmcfg) print "Run symbolic execution..." sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init) - sb.run_at(start) + sb.run_at(ircfg, start) modified = {} for dst, src in sb.modified(init_state=machine.mn.regs.regs_init): @@ -141,8 +154,9 @@ def symbolic_exec(): view = symbolicexec_t() all_views.append(view) - if not view.Create(modified, machine, - "Symbolic Execution - 0x%x to 0x%x" % (start, end)): + if not view.Create(modified, machine, mdis.loc_db, + "Symbolic Execution - 0x%x to 0x%x" + % (start, idc.prev_head(end))): return view.Show() diff --git a/example/ida/utils.py b/example/ida/utils.py index e026f2fc..c66475f2 100644 --- a/example/ida/utils.py +++ b/example/ida/utils.py @@ -5,7 +5,7 @@ from miasm2.analysis.machine import Machine from miasm2.ir.translators import Translator import miasm2.expression.expression as m2_expr -def guess_machine(): +def guess_machine(addr=None): "Return an instance of Machine corresponding to the IDA guessed processor" processor_name = GetLongPrm(INF_PROCNAME) @@ -39,7 +39,14 @@ def guess_machine(): (False, 64, True): "aarch64b", (False, 64, False): "aarch64l", } - is_armt = globals().get('armt', False) + + # Get T reg to detect arm/thumb function + # Default is arm + is_armt = False + if addr is not None: + t_reg = GetReg(addr, "T") + is_armt = t_reg == 1 + is_bigendian = info.is_be() infos = (is_armt, size, is_bigendian) if not infos in info2machine: @@ -72,22 +79,29 @@ class TranslatorIDA(Translator): # Implemented language __LANG__ = "ida_w_color" - def __init__(self, regs_ids=None, **kwargs): + def __init__(self, loc_db=None, **kwargs): super(TranslatorIDA, self).__init__(**kwargs) - if regs_ids is None: - regs_ids = {} - self.regs_ids = regs_ids + self.loc_db = loc_db def str_protected_child(self, child, parent): - return ("(%s)" % self.from_expr(child)) if m2_expr.should_parenthesize_child(child, parent) else self.from_expr(child) + return ("(%s)" % ( + self.from_expr(child)) if m2_expr.should_parenthesize_child(child, parent) + else self.from_expr(child) + ) def from_ExprInt(self, expr): return idaapi.COLSTR(str(expr), idaapi.SCOLOR_NUMBER) def from_ExprId(self, expr): - out = str(expr) - if expr in self.regs_ids: - out = idaapi.COLSTR(out, idaapi.SCOLOR_REG) + out = idaapi.COLSTR(str(expr), idaapi.SCOLOR_REG) + return out + + def from_ExprLoc(self, expr): + if self.loc_db is not None: + out = self.loc_db.pretty_str(expr.loc_key) + else: + out = str(expr) + out = idaapi.COLSTR(out, idaapi.SCOLOR_REG) return out def from_ExprMem(self, expr): @@ -126,20 +140,23 @@ class TranslatorIDA(Translator): return (' ' + expr._op + ' ').join([self.str_protected_child(arg, expr) for arg in expr._args]) return (expr._op + '(' + - ', '.join([self.from_expr(arg) for arg in expr._args]) + ')') + ', '.join( + self.from_expr(arg) + for arg in expr._args + ) + ')') def from_ExprAff(self, expr): return "%s = %s" % tuple(map(expr.from_expr, (expr.dst, expr.src))) -def expr2colorstr(regs_ids, expr): +def expr2colorstr(expr, loc_db): """Colorize an Expr instance for IDA - @regs_ids: list of ExprId corresponding to available registers @expr: Expr instance to colorize + @loc_db: LocationDB instance """ - translator = TranslatorIDA(regs_ids) + translator = TranslatorIDA(loc_db=loc_db) return translator.from_expr(expr) diff --git a/example/jitter/mips32.py b/example/jitter/mips32.py index c5b2f7f5..31ab03c8 100755 --- a/example/jitter/mips32.py +++ b/example/jitter/mips32.py @@ -5,16 +5,11 @@ from miasm2.analysis import debugging from miasm2.jitter.csts import * from miasm2.analysis.machine import Machine -from pdb import pm - parser = ArgumentParser( description="""Sandbox raw binary with mips32 engine (ex: jit_mips32.py example/mips32_sc_l.bin 0)""") -parser.add_argument("-r", "--log-regs", - help="Log registers value for each instruction", - action="store_true") -parser.add_argument("-m", "--log-mn", - help="Log desassembly conversion for each instruction", +parser.add_argument("-t", "--trace", + help="Log instructions/registers values", action="store_true") parser.add_argument("-n", "--log-newbloc", help="Log basic blocks processed by the Jitter", @@ -43,9 +38,11 @@ def jit_mips32_binary(args): myjit.init_stack() # Log level (if available with jitter engine) - myjit.jit.log_regs = args.log_regs - myjit.jit.log_mn = args.log_mn - myjit.jit.log_newbloc = args.log_newbloc + myjit.set_trace_log( + trace_instr=args.trace, + trace_regs=args.trace, + trace_new_blocks=args.log_newbloc + ) myjit.vm.add_memory_page(0, PAGE_READ | PAGE_WRITE, open(filepath).read()) myjit.add_breakpoint(0x1337BEEF, code_sentinelle) diff --git a/example/jitter/msp430.py b/example/jitter/msp430.py index 6dd67542..2f9b8649 100755 --- a/example/jitter/msp430.py +++ b/example/jitter/msp430.py @@ -8,11 +8,8 @@ from miasm2.analysis.machine import Machine parser = ArgumentParser( description="""Sandbox raw binary with msp430 engine (ex: jit_msp430.py example/msp430_sc.bin 0)""") -parser.add_argument("-r", "--log-regs", - help="Log registers value for each instruction", - action="store_true") -parser.add_argument("-m", "--log-mn", - help="Log desassembly conversion for each instruction", +parser.add_argument("-t", "--trace", + help="Log instructions/registers values", action="store_true") parser.add_argument("-n", "--log-newbloc", help="Log basic blocks processed by the Jitter", @@ -36,9 +33,11 @@ def jit_msp430_binary(args): myjit.init_stack() # Log level (if available with jitter engine) - myjit.jit.log_regs = args.log_regs - myjit.jit.log_mn = args.log_mn - myjit.jit.log_newbloc = args.log_newbloc + myjit.set_trace_log( + trace_instr=args.trace, + trace_regs=args.trace, + trace_new_blocks=args.log_newbloc + ) myjit.vm.add_memory_page(0, PAGE_READ | PAGE_WRITE, open(filepath, "rb").read()) myjit.add_breakpoint(0x1337, lambda _: exit(0)) diff --git a/example/jitter/sandbox_call.py b/example/jitter/sandbox_call.py index dc64af15..3eb0b86e 100644 --- a/example/jitter/sandbox_call.py +++ b/example/jitter/sandbox_call.py @@ -15,7 +15,8 @@ sb = Sandbox_Linux_arml(options.filename, options, globals()) with open(options.filename, "rb") as fdesc: cont = Container.from_stream(fdesc) - addr_to_call = cont.symbol_pool.getby_name("md5_starts").offset + loc_key = cont.loc_db.get_name_location("md5_starts") + addr_to_call = cont.loc_db.get_location_offset(loc_key) # Calling md5_starts(malloc(0x64)) addr = linobjs.heap.alloc(sb.jitter, 0x64) diff --git a/example/jitter/unpack_upx.py b/example/jitter/unpack_upx.py index f9b0aed1..665fa15a 100644 --- a/example/jitter/unpack_upx.py +++ b/example/jitter/unpack_upx.py @@ -53,20 +53,21 @@ if options.verbose is True: # Ensure there is one and only one leave (for OEP discovering) mdis = sb.machine.dis_engine(sb.jitter.bs) mdis.dont_dis_nulstart_bloc = True -ab = mdis.dis_multiblock(sb.entry_point) +asmcfg = mdis.dis_multiblock(sb.entry_point) -leaves = list(ab.get_bad_blocks_predecessors()) +leaves = list(asmcfg.get_bad_blocks_predecessors()) assert(len(leaves) == 1) l = leaves.pop() logging.info(l) -end_label = l.label.offset -logging.info('final label') -logging.info(end_label) +end_offset = mdis.loc_db.get_location_offset(l) + +logging.info('final offset') +logging.info(hex(end_offset)) # Export CFG graph (dot format) if options.graph is True: - open("graph.dot", "w").write(ab.graph.dot()) + open("graph.dot", "w").write(asmcfg.dot()) if options.verbose is True: @@ -85,7 +86,7 @@ def update_binary(jitter): return False # Set callbacks -sb.jitter.add_breakpoint(end_label, update_binary) +sb.jitter.add_breakpoint(end_offset, update_binary) # Run sb.run() diff --git a/example/jitter/x86_32.py b/example/jitter/x86_32.py index 1409d7aa..5272f732 100644 --- a/example/jitter/x86_32.py +++ b/example/jitter/x86_32.py @@ -24,8 +24,7 @@ data = open(args.filename).read() run_addr = 0x40000000 myjit.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, data) -myjit.jit.log_regs = True -myjit.jit.log_mn = True +myjit.set_trace_log() myjit.push_uint32_t(0x1337beef) myjit.add_breakpoint(0x1337beef, code_sentinelle) diff --git a/example/symbol_exec/depgraph.py b/example/symbol_exec/depgraph.py index b8d838ae..f306e6e3 100644 --- a/example/symbol_exec/depgraph.py +++ b/example/symbol_exec/depgraph.py @@ -47,7 +47,7 @@ for element in args.element: raise ValueError("Unknown element '%s'" % element) mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True) -ir_arch = machine.ira(mdis.symbol_pool) +ir_arch = machine.ira(mdis.loc_db) # Common argument forms init_ctx = {} @@ -59,21 +59,22 @@ if args.rename_args: init_ctx[e_mem] = ExprId("arg%d" % i, 32) # Disassemble the targeted function -blocks = mdis.dis_multiblock(int(args.func_addr, 0)) +asmcfg = mdis.dis_multiblock(int(args.func_addr, 0)) # Generate IR -for block in blocks: - ir_arch.add_block(block) +ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Get the instance -dg = DependencyGraph(ir_arch, implicit=args.implicit, - apply_simp=not args.do_not_simplify, - follow_mem=not args.unfollow_mem, - follow_call=not args.unfollow_call) +dg = DependencyGraph( + ircfg, implicit=args.implicit, + apply_simp=not args.do_not_simplify, + follow_mem=not args.unfollow_mem, + follow_call=not args.unfollow_call +) # Build information target_addr = int(args.target_addr, 0) -current_block = list(ir_arch.getby_offset(target_addr))[0] +current_block = list(ircfg.getby_offset(target_addr))[0] assignblk_index = 0 for assignblk_index, assignblk in enumerate(current_block): if assignblk.instr.offset == target_addr: @@ -81,12 +82,12 @@ for assignblk_index, assignblk in enumerate(current_block): # Enumerate solutions json_solutions = [] -for sol_nb, sol in enumerate(dg.get(current_block.label, elements, assignblk_index, set())): +for sol_nb, sol in enumerate(dg.get(current_block.loc_key, elements, assignblk_index, set())): fname = "sol_%d.dot" % sol_nb with open(fname, "w") as fdesc: fdesc.write(sol.graph.dot()) - results = sol.emul(ctx=init_ctx) + results = sol.emul(ir_arch, ctx=init_ctx) tokens = {str(k): str(v) for k, v in results.iteritems()} if not args.json: result = ", ".join("=".join(x) for x in tokens.iteritems()) diff --git a/example/symbol_exec/dse_strategies.py b/example/symbol_exec/dse_strategies.py index a981853a..5a4be321 100644 --- a/example/symbol_exec/dse_strategies.py +++ b/example/symbol_exec/dse_strategies.py @@ -67,7 +67,7 @@ jitter.init_run(run_addr) # Init a DSE instance with a given strategy dse = DSEPathConstraint(machine, produce_solution=strategy) dse.attach(jitter) -# Concretize everything exept the argument +# Concretize everything except the argument dse.update_state_from_concrete() regs = jitter.ir_arch.arch.regs arg = ExprId("ARG", 32) diff --git a/example/symbol_exec/single_instr.py b/example/symbol_exec/single_instr.py index 22a48fc6..c78f1f7f 100644 --- a/example/symbol_exec/single_instr.py +++ b/example/symbol_exec/single_instr.py @@ -2,35 +2,34 @@ from miasm2.core.bin_stream import bin_stream_str from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.analysis.machine import Machine -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB START_ADDR = 0 machine = Machine("x86_32") - -symbol_pool = AsmSymbolPool() - +loc_db = LocationDB() # Assemble and disassemble a MOV ## Ensure that attributes 'offset' and 'l' are set -line = machine.mn.fromstring("MOV EAX, EBX", symbol_pool, 32) +line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32) asm = machine.mn.asm(line)[0] # Get back block bin_stream = bin_stream_str(asm) -mdis = machine.dis_engine(bin_stream, symbol_pool=symbol_pool) +mdis = machine.dis_engine(bin_stream, loc_db=loc_db) mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) # Translate ASM -> IR -ira = machine.ira(mdis.symbol_pool) -ira.add_block(asm_block) +ira = machine.ira(mdis.loc_db) +ircfg = ira.new_ircfg() +ira.add_asmblock_to_ircfg(asm_block, ircfg) # Instanciate a Symbolic Execution engine with default value for registers -symb = SymbolicExecutionEngine(ira, {}) +symb = SymbolicExecutionEngine(ira) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks -cur_addr = symb.run_at(START_ADDR) +cur_addr = symb.run_at(ircfg, START_ADDR) # Modified elements print 'Modified registers:' diff --git a/miasm2/analysis/binary.py b/miasm2/analysis/binary.py index 6073e126..16e573bb 100644 --- a/miasm2/analysis/binary.py +++ b/miasm2/analysis/binary.py @@ -1,8 +1,9 @@ import logging +import warnings from miasm2.core.bin_stream import bin_stream_str, bin_stream_elf, bin_stream_pe from miasm2.jitter.csts import PAGE_READ -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB log = logging.getLogger("binary") @@ -94,7 +95,7 @@ class Container(object): self._bin_stream = None self._entry_point = None self._arch = None - self._symbol_pool = AsmSymbolPool() + self._loc_db = LocationDB() # Launch parsing self.parse(*args, **kwargs) @@ -120,10 +121,15 @@ class Container(object): return self._arch @property - def symbol_pool(self): - "AsmSymbolPool instance preloaded with container symbols (if any)" - return self._symbol_pool + def loc_db(self): + "LocationDB instance preloaded with container symbols (if any)" + return self._loc_db + @property + def symbol_pool(self): + "[DEPRECATED API]" + warnings.warn("Deprecated API: use 'loc_db'") + return self.loc_db ## Format dependent classes class ContainerPE(Container): @@ -202,14 +208,16 @@ class ContainerELF(Container): offset = symb.value if offset == 0: continue + if not name: + continue try: - self._symbol_pool.add_label(name, offset) + self._loc_db.add_location(name, offset) except ValueError: # Two symbols points on the same offset log.warning("Same offset (%s) for %s and %s", (hex(offset), name, - self._symbol_pool.getby_offset(offset))) + self._loc_db.get_offset_location(offset))) continue diff --git a/miasm2/analysis/cst_propag.py b/miasm2/analysis/cst_propag.py index 18829627..7f74324f 100644 --- a/miasm2/analysis/cst_propag.py +++ b/miasm2/analysis/cst_propag.py @@ -17,21 +17,20 @@ class SymbExecState(SymbolicExecutionEngine): """ State manager for SymbolicExecution """ - def __init__(self, ir_arch, state): + def __init__(self, ir_arch, ircfg, state): super(SymbExecState, self).__init__(ir_arch, {}) self.set_state(state) -def add_state(ir_arch, todo, states, addr, state): +def add_state(ircfg, todo, states, addr, state): """ Add or merge the computed @state for the block at @addr. Update @todo - @ir_arch: IR instance @todo: modified block set @states: dictionnary linking a label to its entering state. @addr: address of the concidered block @state: computed state """ - addr = ir_arch.get_label(addr) + addr = ircfg.get_loc_key(addr) todo.add(addr) if addr not in states: states[addr] = state @@ -67,7 +66,8 @@ class SymbExecStateFix(SymbolicExecutionEngine): # Function used to test if an Expression is considered as a constant is_expr_cst = lambda _, ir_arch, expr: is_expr_cst(ir_arch, expr) - def __init__(self, ir_arch, state, cst_propag_link): + def __init__(self, ir_arch, ircfg, state, cst_propag_link): + self.ircfg = ircfg super(SymbExecStateFix, self).__init__(ir_arch, {}) self.set_state(state) self.cst_propag_link = cst_propag_link @@ -108,14 +108,14 @@ class SymbExecStateFix(SymbolicExecutionEngine): for arg in assignblk.instr.args: new_arg = self.propag_expr_cst(arg) links[new_arg] = arg - self.cst_propag_link[(irb.label, index)] = links + self.cst_propag_link[(irb.loc_key, index)] = links self.eval_updt_assignblk(assignblk) assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) - self.ir_arch.blocks[irb.label] = IRBlock(irb.label, assignblks) + self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, assignblks) -def compute_cst_propagation_states(ir_arch, init_addr, init_infos): +def compute_cst_propagation_states(ir_arch, ircfg, init_addr, init_infos): """ Propagate "constant expressions" in a function. The attribute "constant expression" is true if the expression is based on @@ -128,7 +128,7 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): done = set() state = SymbExecState.StateEngine(init_infos) - lbl = ir_arch.get_label(init_addr) + lbl = ircfg.get_loc_key(init_addr) todo = set([lbl]) states = {lbl: state} @@ -140,11 +140,11 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): if (lbl, state) in done: continue done.add((lbl, state)) - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue - symbexec_engine = SymbExecState(ir_arch, state) - addr = symbexec_engine.run_block_at(lbl) + symbexec_engine = SymbExecState(ir_arch, ircfg, state) + addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) for dst in possible_values(addr): @@ -153,14 +153,16 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): LOG_CST_PROPAG.warning('Bad destination: %s', value) continue elif value.is_int(): - value = ir_arch.get_label(value) - add_state(ir_arch, todo, states, value, - symbexec_engine.get_state()) + value = ircfg.get_loc_key(value) + add_state( + ircfg, todo, states, value, + symbexec_engine.get_state() + ) return states -def propagate_cst_expr(ir_arch, addr, init_infos): +def propagate_cst_expr(ir_arch, ircfg, addr, init_infos): """ Propagate "constant expressions" in a @ir_arch. The attribute "constant expression" is true if the expression is based on @@ -172,11 +174,11 @@ def propagate_cst_expr(ir_arch, addr, init_infos): Returns a mapping between replaced Expression and their new values. """ - states = compute_cst_propagation_states(ir_arch, addr, init_infos) + states = compute_cst_propagation_states(ir_arch, ircfg, addr, init_infos) cst_propag_link = {} for lbl, state in states.iteritems(): - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue - symbexec = SymbExecStateFix(ir_arch, state, cst_propag_link) - symbexec.eval_updt_irblock(ir_arch.blocks[lbl]) + symbexec = SymbExecStateFix(ir_arch, ircfg, state, cst_propag_link) + symbexec.eval_updt_irblock(ircfg.blocks[lbl]) return cst_propag_link diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index bceb0bd8..9c21fd51 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -4,12 +4,11 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine def get_node_name(label, i, n): - # n_name = "%s_%d_%s"%(label.name, i, n) n_name = (label, i, n) return n_name -def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): +def intra_block_flow_raw(ir_arch, ircfg, flow_graph, irb, in_nodes, out_nodes): """ Create data flow for an irbloc using raw IR expressions """ @@ -27,7 +26,7 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): continue for n in all_mems: - node_n_w = get_node_name(irb.label, i, n) + node_n_w = get_node_name(irb.loc_key, i, n) if not n in nodes_r: continue o_r = n.arg.get_r(mem_read=False, cst_read=True) @@ -35,7 +34,7 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irb.label, i, n_r) + node_n_r = get_node_name(irb.loc_key, i, n_r) current_nodes[n_r] = node_n_r in_nodes[n_r] = node_n_r flow_graph.add_uniq_edge(node_n_r, node_n_w) @@ -46,80 +45,40 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irb.label, i, n_r) + node_n_r = get_node_name(irb.loc_key, i, n_r) current_nodes[n_r] = node_n_r in_nodes[n_r] = node_n_r flow_graph.add_node(node_n_r) - node_n_w = get_node_name(irb.label, i + 1, node_w) + node_n_w = get_node_name(irb.loc_key, i + 1, node_w) out_nodes[node_w] = node_n_w flow_graph.add_node(node_n_w) flow_graph.add_uniq_edge(node_n_r, node_n_w) -def intra_block_flow_symbexec(ir_arch, flow_graph, irb, in_nodes, out_nodes): - """ - Create data flow for an irbloc using symbolic execution - """ - current_nodes = {} - - symbols_init = dict(ir_arch.arch.regs.regs_init) - - sb = SymbolicExecutionEngine(ir_arch, dict(symbols_init)) - sb.emulbloc(irb) - # print "*"*40 - # print irb - # print sb.dump_id() - # print sb.dump_mem() - - for n_w in sb.symbols: - # print n_w - v = sb.symbols[n_w] - if n_w in symbols_init and symbols_init[n_w] == v: - continue - read_values = v.get_r(cst_read=True) - # print n_w, v, [str(x) for x in read_values] - node_n_w = get_node_name(irb.label, len(irb), n_w) - for n_r in read_values: - if n_r in current_nodes: - node_n_r = current_nodes[n_r] - else: - node_n_r = get_node_name(irb.label, 0, n_r) - current_nodes[n_r] = node_n_r - in_nodes[n_r] = node_n_r - - out_nodes[n_w] = node_n_w - flow_graph.add_uniq_edge(node_n_r, node_n_w) - - -def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data): +def inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data): lbl, current_nodes, exec_nodes = todo - # print 'TODO' - # print lbl - # print [(str(x[0]), str(x[1])) for x in current_nodes] current_nodes = dict(current_nodes) # link current nodes to bloc in_nodes - if not lbl in ir_arch.blocks: + if not lbl in ircfg.blocks: print "cannot find bloc!!", lbl return set() - irb = ir_arch.blocks[lbl] - # pp(('IN', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) + irb = ircfg.blocks[lbl] to_del = set() - for n_r, node_n_r in irb_in_nodes[irb.label].items(): + for n_r, node_n_r in irb_in_nodes[irb.loc_key].items(): if not n_r in current_nodes: continue - # print 'add link', current_nodes[n_r], node_n_r flow_graph.add_uniq_edge(current_nodes[n_r], node_n_r) to_del.add(n_r) # if link exec to data, all nodes depends on exec nodes if link_exec_to_data: for n_x_r in exec_nodes: - for n_r, node_n_r in irb_in_nodes[irb.label].items(): + for n_r, node_n_r in irb_in_nodes[irb.loc_key].items(): if not n_x_r in current_nodes: continue if isinstance(n_r, ExprInt): @@ -127,18 +86,16 @@ def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo flow_graph.add_uniq_edge(current_nodes[n_x_r], node_n_r) # update current nodes using bloc out_nodes - for n_w, node_n_w in irb_out_nodes[irb.label].items(): + for n_w, node_n_w in irb_out_nodes[irb.loc_key].items(): current_nodes[n_w] = node_n_w # get nodes involved in exec flow x_nodes = tuple(sorted(list(irb.dst.get_r()))) todo = set() - for lbl_dst in ir_arch.graph.successors(irb.label): + for lbl_dst in ircfg.successors(irb.loc_key): todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes)) - # pp(('OUT', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) - return todo @@ -150,36 +107,29 @@ def create_implicit_flow(ir_arch, flow_graph, irb_in_nodes, irb_out_ndes): while todo: lbl = todo.pop() irb = ir_arch.blocks[lbl] - for lbl_son in ir_arch.graph.successors(irb.label): + for lbl_son in ir_arch.graph.successors(irb.loc_key): if not lbl_son in ir_arch.blocks: print "cannot find bloc!!", lbl continue irb_son = ir_arch.blocks[lbl_son] - for n_r in irb_in_nodes[irb_son.label]: - if n_r in irb_out_nodes[irb.label]: + for n_r in irb_in_nodes[irb_son.loc_key]: + if n_r in irb_out_nodes[irb.loc_key]: continue if not isinstance(n_r, ExprId): continue - # print "###", n_r - # print "###", irb - # print "###", 'OUT', [str(x) for x in irb.out_nodes] - # print "###", irb_son - # print "###", 'IN', [str(x) for x in irb_son.in_nodes] - - node_n_w = irb.label, len(irb), n_r - irb_out_nodes[irb.label][n_r] = node_n_w - if not n_r in irb_in_nodes[irb.label]: - irb_in_nodes[irb.label][n_r] = irb.label, 0, n_r - node_n_r = irb_in_nodes[irb.label][n_r] - # print "###", node_n_r - for lbl_p in ir_arch.graph.predecessors(irb.label): + node_n_w = irb.loc_key, len(irb), n_r + irb_out_nodes[irb.loc_key][n_r] = node_n_w + if not n_r in irb_in_nodes[irb.loc_key]: + irb_in_nodes[irb.loc_key][n_r] = irb.loc_key, 0, n_r + node_n_r = irb_in_nodes[irb.loc_key][n_r] + for lbl_p in ir_arch.graph.predecessors(irb.loc_key): todo.add(lbl_p) flow_graph.add_uniq_edge(node_n_r, node_n_w) -def inter_block_flow(ir_arch, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True): +def inter_block_flow(ir_arch, ircfg, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True): todo = set() done = set() @@ -190,7 +140,7 @@ def inter_block_flow(ir_arch, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, li if state in done: continue done.add(state) - out = inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data) + out = inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data) todo.update(out) @@ -220,46 +170,11 @@ class symb_exec_func: b = self.ir_arch.get_block(ad) if b is None: raise ValueError("unknown bloc! %s" % ad) - """ - dead = b.dead[0] - for d in dead: - if d in variables: - del(variables[d]) - """ variables = variables.items() s = parent, ad, tuple(sorted(variables)) - """ - state_var = s[1] - if s in self.states_var_done: - print 'skip state' - return - if not ad in self.stateby_ad: - self.stateby_ad[ad] = set() - self.stateby_ad[ad].add(state_var) - - """ self.todo.add(s) - """ - if not ad in self.cpt: - self.cpt[ad] = 0 - """ - """ - def get_next_min(self): - state_by_ad = {} - for state in self.todo: - ad = state[1] - if not ad in state_by_ad: - state_by_ad[ad] = [] - state_by_ad[ad].append(state) - print "XX", [len(x) for x in state_by_ad.values()] - state_by_ad = state_by_ad.items() - state_by_ad.sort(key=lambda x:len(x[1])) - state_by_ad.reverse() - return state_by_ad.pop()[1][0] - """ - def get_next_state(self): state = self.todo.pop() return state @@ -273,16 +188,10 @@ class symb_exec_func: self.total_done += 1 print 'CPT', self.total_done while self.todo: - # if self.total_done>20: - # self.get_next_min() - # state = self.todo.pop() state = self.get_next_state() parent, ad, s = state self.states_done.add(state) self.states_var_done.add(state) - # if s in self.states_var_done: - # print "state done" - # continue sb = SymbolicExecutionEngine(self.ir_arch, dict(s)) diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py index d9f61c56..9e5203a6 100644 --- a/miasm2/analysis/data_flow.py +++ b/miasm2/analysis/data_flow.py @@ -29,16 +29,16 @@ class ReachingDefinitions(dict): { (block, index): { lvalue: set((block, index)) } } """ - ir_a = None + ircfg = None - def __init__(self, ir_a): + def __init__(self, ircfg): super(ReachingDefinitions, self).__init__() - self.ir_a = ir_a + self.ircfg = ircfg self.compute() def get_definitions(self, block_lbl, assignblk_index): """Returns the dict { lvalue: set((def_block_lbl, def_index)) } - associated with self.ir_a.@block.assignblks[@assignblk_index] + associated with self.ircfg.@block.assignblks[@assignblk_index] or {} if it is not yet computed """ return self.get((block_lbl, assignblk_index), {}) @@ -48,7 +48,7 @@ class ReachingDefinitions(dict): modified = True while modified: modified = False - for block in self.ir_a.blocks.itervalues(): + for block in self.ircfg.blocks.itervalues(): modified |= self.process_block(block) def process_block(self, block): @@ -57,15 +57,15 @@ class ReachingDefinitions(dict): the assignblk in block @block. """ predecessor_state = {} - for pred_lbl in self.ir_a.graph.predecessors(block.label): - pred = self.ir_a.blocks[pred_lbl] + for pred_lbl in self.ircfg.predecessors(block.loc_key): + pred = self.ircfg.blocks[pred_lbl] for lval, definitions in self.get_definitions(pred_lbl, len(pred)).iteritems(): predecessor_state.setdefault(lval, set()).update(definitions) - modified = self.get((block.label, 0)) != predecessor_state + modified = self.get((block.loc_key, 0)) != predecessor_state if not modified: return False - self[(block.label, 0)] = predecessor_state + self[(block.loc_key, 0)] = predecessor_state for index in xrange(len(block)): modified |= self.process_assignblock(block, index) @@ -80,13 +80,13 @@ class ReachingDefinitions(dict): """ assignblk = block[assignblk_index] - defs = self.get_definitions(block.label, assignblk_index).copy() + defs = self.get_definitions(block.loc_key, assignblk_index).copy() for lval in assignblk: - defs.update({lval: set([(block.label, assignblk_index)])}) + defs.update({lval: set([(block.loc_key, assignblk_index)])}) - modified = self.get((block.label, assignblk_index + 1)) != defs + modified = self.get((block.loc_key, assignblk_index + 1)) != defs if modified: - self[(block.label, assignblk_index + 1)] = defs + self[(block.loc_key, assignblk_index + 1)] = defs return modified @@ -126,7 +126,7 @@ class DiGraphDefUse(DiGraph): # For dot display self._filter_node = None self._dot_offset = None - self._blocks = reaching_defs.ir_a.blocks + self._blocks = reaching_defs.ircfg.blocks super(DiGraphDefUse, self).__init__(*args, **kwargs) self._compute_def_use(reaching_defs, @@ -149,9 +149,9 @@ class DiGraphDefUse(DiGraph): def _compute_def_use_block(self, block, reaching_defs, deref_mem=False): for index, assignblk in enumerate(block): - assignblk_reaching_defs = reaching_defs.get_definitions(block.label, index) + assignblk_reaching_defs = reaching_defs.get_definitions(block.loc_key, index) for lval, expr in assignblk.iteritems(): - self.add_node(AssignblkNode(block.label, index, lval)) + self.add_node(AssignblkNode(block.loc_key, index, lval)) read_vars = expr.get_r(mem_read=deref_mem) if deref_mem and lval.is_mem(): @@ -159,7 +159,7 @@ class DiGraphDefUse(DiGraph): for read_var in read_vars: for reach in assignblk_reaching_defs.get(read_var, set()): self.add_data_edge(AssignblkNode(reach[0], reach[1], read_var), - AssignblkNode(block.label, index, lval)) + AssignblkNode(block.loc_key, index, lval)) def del_edge(self, src, dst): super(DiGraphDefUse, self).del_edge(src, dst) @@ -189,7 +189,7 @@ class DiGraphDefUse(DiGraph): yield self.DotCellDescription(text="", attr={}) -def dead_simp_useful_assignblks(defuse, reaching_defs): +def dead_simp_useful_assignblks(irarch, defuse, reaching_defs): """Mark useful statements using previous reach analysis and defuse Source : Kennedy, K. (1979). A survey of data flow analysis techniques. @@ -200,13 +200,13 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): PRE: compute_reach(self) """ - ir_a = reaching_defs.ir_a + ircfg = reaching_defs.ircfg useful = set() - for block_lbl, block in ir_a.blocks.iteritems(): - successors = ir_a.graph.successors(block_lbl) + for block_lbl, block in ircfg.blocks.iteritems(): + successors = ircfg.successors(block_lbl) for successor in successors: - if successor not in ir_a.blocks: + if successor not in ircfg.blocks: keep_all_definitions = True break else: @@ -217,7 +217,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): valid_definitions = reaching_defs.get_definitions(block_lbl, len(block)) for lval, definitions in valid_definitions.iteritems(): - if (lval in ir_a.get_out_regs(block) + if (lval in irarch.get_out_regs(block) or keep_all_definitions): for definition in definitions: useful.add(AssignblkNode(definition[0], definition[1], lval)) @@ -226,7 +226,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): for index, assignblk in enumerate(block): for lval, rval in assignblk.iteritems(): if (lval.is_mem() - or ir_a.IRDst == lval + or irarch.IRDst == lval or rval.is_function_call()): useful.add(AssignblkNode(block_lbl, index, lval)) @@ -235,7 +235,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): for parent in defuse.reachable_parents(node): yield parent -def dead_simp(ir_a): +def dead_simp(irarch, ircfg): """ Remove useless affectations. @@ -245,21 +245,21 @@ def dead_simp(ir_a): Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 - @ir_a: IntermediateRepresentation instance + @ircfg: IntermediateRepresentation instance """ modified = False - reaching_defs = ReachingDefinitions(ir_a) + reaching_defs = ReachingDefinitions(ircfg) defuse = DiGraphDefUse(reaching_defs, deref_mem=True) - useful = set(dead_simp_useful_assignblks(defuse, reaching_defs)) - for block in ir_a.blocks.itervalues(): + useful = set(dead_simp_useful_assignblks(irarch, defuse, reaching_defs)) + for block in ircfg.blocks.itervalues(): irs = [] for idx, assignblk in enumerate(block): new_assignblk = dict(assignblk) for lval in assignblk: - if AssignblkNode(block.label, idx, lval) not in useful: + if AssignblkNode(block.loc_key, idx, lval) not in useful: del new_assignblk[lval] modified = True irs.append(AssignBlock(new_assignblk, assignblk.instr)) - ir_a.blocks[block.label] = IRBlock(block.label, irs) + ircfg.blocks[block.loc_key] = IRBlock(block.loc_key, irs) return modified diff --git a/miasm2/analysis/debugging.py b/miasm2/analysis/debugging.py index fc03eb17..6b88f00a 100644 --- a/miasm2/analysis/debugging.py +++ b/miasm2/analysis/debugging.py @@ -273,7 +273,7 @@ class DebugCmd(cmd.Cmd, object): def add_breakpoints(self, bp_addr): for addr in bp_addr: - addr = int(addr, 0) + addr = int(addr, 0) good = True for i, dbg_obj in enumerate(self.dbg.bp_list): diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py index f7949c88..93b3edb5 100644 --- a/miasm2/analysis/depgraph.py +++ b/miasm2/analysis/depgraph.py @@ -1,8 +1,8 @@ """Provide dependency graph""" -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprInt, ExprLoc, ExprAff from miasm2.core.graph import DiGraph -from miasm2.core.asmblock import AsmLabel, expr_is_int_or_label, expr_is_label +from miasm2.core.locationdb import LocationDB from miasm2.expression.simplifications import expr_simp from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import IRBlock, AssignBlock @@ -20,23 +20,23 @@ class DependencyNode(object): """Node elements of a DependencyGraph A dependency node stands for the dependency on the @element at line number - @line_nb in the IRblock named @label, *before* the evaluation of this + @line_nb in the IRblock named @loc_key, *before* the evaluation of this line. """ - __slots__ = ["_label", "_element", "_line_nb", "_hash"] + __slots__ = ["_loc_key", "_element", "_line_nb", "_hash"] - def __init__(self, label, element, line_nb): + def __init__(self, loc_key, element, line_nb): """Create a dependency node with: - @label: AsmLabel instance + @loc_key: LocKey instance @element: Expr instance @line_nb: int """ - self._label = label + self._loc_key = loc_key self._element = element self._line_nb = line_nb self._hash = hash( - (self._label, self._element, self._line_nb)) + (self._loc_key, self._element, self._line_nb)) def __hash__(self): """Returns a hash of @self to uniquely identify @self""" @@ -46,7 +46,7 @@ class DependencyNode(object): """Returns True if @self and @depnode are equals.""" if not isinstance(depnode, self.__class__): return False - return (self.label == depnode.label and + return (self.loc_key == depnode.loc_key and self.element == depnode.element and self.line_nb == depnode.line_nb) @@ -55,13 +55,13 @@ class DependencyNode(object): if not isinstance(node, self.__class__): return cmp(self.__class__, node.__class__) - return cmp((self.label, self.element, self.line_nb), - (node.label, node.element, node.line_nb)) + return cmp((self.loc_key, self.element, self.line_nb), + (node.loc_key, node.element, node.line_nb)) def __str__(self): """Returns a string representation of DependencyNode""" return "<%s %s %s %s>" % (self.__class__.__name__, - self.label.name, self.element, + self.loc_key, self.element, self.line_nb) def __repr__(self): @@ -69,9 +69,9 @@ class DependencyNode(object): return self.__str__() @property - def label(self): + def loc_key(self): "Name of the current IRBlock" - return self._label + return self._loc_key @property def element(self): @@ -90,9 +90,9 @@ class DependencyState(object): Store intermediate depnodes states during dependencygraph analysis """ - def __init__(self, label, pending, line_nb=None): - self.label = label - self.history = [label] + def __init__(self, loc_key, pending, line_nb=None): + self.loc_key = loc_key + self.history = [loc_key] self.pending = {k: set(v) for k, v in pending.iteritems()} self.line_nb = line_nb self.links = set() @@ -101,22 +101,22 @@ class DependencyState(object): self._graph = None def __repr__(self): - return "<State: %r (%r) (%r)>" % (self.label, + return "<State: %r (%r) (%r)>" % (self.loc_key, self.pending, self.links) - def extend(self, label): + def extend(self, loc_key): """Return a copy of itself, with itself in history - @label: AsmLabel instance for the new DependencyState's label + @loc_key: LocKey instance for the new DependencyState's loc_key """ - new_state = self.__class__(label, self.pending) + new_state = self.__class__(loc_key, self.pending) new_state.links = set(self.links) - new_state.history = self.history + [label] + new_state.history = self.history + [loc_key] return new_state def get_done_state(self): """Returns immutable object representing current state""" - return (self.label, frozenset(self.links)) + return (self.loc_key, frozenset(self.links)) def as_graph(self): """Generates a Digraph of dependencies""" @@ -157,7 +157,7 @@ class DependencyState(object): @line_nb: the element's line """ - depnode = DependencyNode(self.label, element, line_nb) + depnode = DependencyNode(self.loc_key, element, line_nb) if not self.pending[element]: # Create start node self.links.add((depnode, None)) @@ -175,14 +175,14 @@ class DependencyState(object): @future_pending: the future dependencies """ - depnode = DependencyNode(self.label, element, line_nb) + depnode = DependencyNode(self.loc_key, element, line_nb) # Update pending, add link to unfollowed nodes for dependency in dependencies: if not dependency.follow: # Add non followed dependencies to the dependency graph parent = DependencyNode( - self.label, dependency.element, line_nb) + self.loc_key, dependency.element, line_nb) self.links.add((parent, depnode)) continue # Create future pending between new dependency and the current @@ -194,15 +194,15 @@ class DependencyResult(DependencyState): """Container and methods for DependencyGraph results""" - def __init__(self, ira, initial_state, state, inputs): + def __init__(self, ircfg, initial_state, state, inputs): self.initial_state = initial_state - self.label = state.label + self.loc_key = state.loc_key self.history = state.history self.pending = state.pending self.line_nb = state.line_nb self.inputs = inputs self.links = state.links - self._ira = ira + self._ircfg = ircfg # Init lazy elements self._graph = None @@ -212,7 +212,7 @@ class DependencyResult(DependencyState): def unresolved(self): """Set of nodes whose dependencies weren't found""" return set(element for element in self.pending - if element != self._ira.IRDst) + if element != self._ircfg.IRDst) @property def relevant_nodes(self): @@ -225,17 +225,17 @@ class DependencyResult(DependencyState): return output @property - def relevant_labels(self): - """List of labels containing nodes influencing inputs. + def relevant_loc_keys(self): + """List of loc_keys containing nodes influencing inputs. The history order is preserved.""" - # Get used labels - used_labels = set(depnode.label for depnode in self.relevant_nodes) + # Get used loc_keys + used_loc_keys = set(depnode.loc_key for depnode in self.relevant_nodes) # Keep history order output = [] - for label in self.history: - if label in used_labels: - output.append(label) + for loc_key in self.history: + if loc_key in used_loc_keys: + output.append(loc_key) return output @@ -255,7 +255,7 @@ class DependencyResult(DependencyState): assignblks = [] line2elements = {} for depnode in self.relevant_nodes: - if depnode.label != irb.label: + if depnode.loc_key != irb.loc_key: continue line2elements.setdefault(depnode.line_nb, set()).add(depnode.element) @@ -266,40 +266,42 @@ class DependencyResult(DependencyState): assignmnts = {} for element in elements: if element in irb[line_nb]: - # constants, label, ... are not in destination + # constants, loc_key, ... are not in destination assignmnts[element] = irb[line_nb][element] assignblks.append(AssignBlock(assignmnts)) - return IRBlock(irb.label, assignblks) + return IRBlock(irb.loc_key, assignblks) - def emul(self, ctx=None, step=False): + def emul(self, ir_arch, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history Return the values of inputs nodes' elements + @ir_arch: IntermediateRepresentation instance @ctx: (optional) Initial context as dictionnary @step: (optional) Verbose execution Warning: The emulation is not sound if the inputs nodes depend on loop variant. """ # Init - ctx_init = self._ira.arch.regs.regs_init + ctx_init = {} if ctx is not None: ctx_init.update(ctx) assignblks = [] # Build a single affectation block according to history - last_index = len(self.relevant_labels) - for index, label in enumerate(reversed(self.relevant_labels), 1): - if index == last_index and label == self.initial_state.label: + last_index = len(self.relevant_loc_keys) + for index, loc_key in enumerate(reversed(self.relevant_loc_keys), 1): + if index == last_index and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None - assignblks += self.irblock_slice(self._ira.blocks[label], + assignblks += self.irblock_slice(self._ircfg.blocks[loc_key], line_nb).assignblks # Eval the block - temp_label = AsmLabel("Temp") - symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) - symb_exec.eval_updt_irblock(IRBlock(temp_label, assignblks), step=step) + loc_db = LocationDB() + temp_loc = loc_db.get_or_create_name_location("Temp") + symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) + symb_exec.eval_updt_irblock(IRBlock(temp_loc, assignblks), step=step) # Return only inputs values (others could be wrongs) return {element: symb_exec.symbols[element] @@ -314,30 +316,31 @@ class DependencyResultImplicit(DependencyResult): # Z3 Solver instance _solver = None - unsat_expr = m2_expr.ExprAff(m2_expr.ExprInt(0, 1), - m2_expr.ExprInt(1, 1)) + unsat_expr = ExprAff(ExprInt(0, 1), ExprInt(1, 1)) def _gen_path_constraints(self, translator, expr, expected): """Generate path constraint from @expr. Handle special case with - generated labels + generated loc_keys """ out = [] - expected_is_label = expr_is_label(expected) + expected = self._ircfg.loc_db.canonize_to_exprloc(expected) + expected_is_loc_key = expected.is_loc() for consval in possible_values(expr): - if (expected_is_label and - consval.value != expected): + value = self._ircfg.loc_db.canonize_to_exprloc(consval.value) + if expected_is_loc_key and value != expected: continue - if (not expected_is_label and - expr_is_label(consval.value)): + if not expected_is_loc_key and value.is_loc_key(): continue conds = z3.And(*[translator.from_expr(cond.to_constraint()) for cond in consval.constraints]) - if expected != consval.value: - conds = z3.And(conds, - translator.from_expr( - m2_expr.ExprAff(consval.value, - expected))) + if expected != value: + conds = z3.And( + conds, + translator.from_expr( + ExprAff(value, + expected)) + ) out.append(conds) if out: @@ -348,35 +351,33 @@ class DependencyResultImplicit(DependencyResult): conds = translator.from_expr(self.unsat_expr) return conds - def emul(self, ctx=None, step=False): + def emul(self, ir_arch, ctx=None, step=False): # Init - ctx_init = self._ira.arch.regs.regs_init + ctx_init = {} if ctx is not None: ctx_init.update(ctx) solver = z3.Solver() - symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) + symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) history = self.history[::-1] history_size = len(history) translator = Translator.to_language("z3") - size = self._ira.IRDst.size + size = self._ircfg.IRDst.size - for hist_nb, label in enumerate(history, 1): - if hist_nb == history_size and label == self.initial_state.label: + for hist_nb, loc_key in enumerate(history, 1): + if hist_nb == history_size and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None - irb = self.irblock_slice(self._ira.blocks[label], line_nb) + irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb) # Emul the block and get back destination dst = symb_exec.eval_updt_irblock(irb, step=step) # Add constraint if hist_nb < history_size: - next_label = history[hist_nb] - expected = symb_exec.eval_expr(m2_expr.ExprId(next_label, - size)) - solver.add( - self._gen_path_constraints(translator, dst, expected)) + next_loc_key = history[hist_nb] + expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size)) + solver.add(self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver @@ -412,17 +413,17 @@ class FollowExpr(object): return '%s(%r, %r)' % (self.__class__.__name__, self.follow, self.element) @staticmethod - def to_depnodes(follow_exprs, label, line): + def to_depnodes(follow_exprs, loc_key, line): """Build a set of FollowExpr(DependencyNode) from the @follow_exprs set of FollowExpr @follow_exprs: set of FollowExpr - @label: AsmLabel instance + @loc_key: LocKey instance @line: integer """ dependencies = set() for follow_expr in follow_exprs: dependencies.add(FollowExpr(follow_expr.follow, - DependencyNode(label, + DependencyNode(loc_key, follow_expr.element, line))) return dependencies @@ -446,12 +447,12 @@ class DependencyGraph(object): *explicitely* or *implicitely* involved in the equation of given element. """ - def __init__(self, ira, implicit=False, apply_simp=True, follow_mem=True, + def __init__(self, ircfg, + implicit=False, apply_simp=True, follow_mem=True, follow_call=True): - """Create a DependencyGraph linked to @ira - The IRA graph must have been computed + """Create a DependencyGraph linked to @ircfg - @ira: IRAnalysis instance + @ircfg: DiGraphIR instance @implicit: (optional) Track IRDst for each block in the resulting path Following arguments define filters used to generate dependencies @@ -460,7 +461,7 @@ class DependencyGraph(object): @follow_call: (optional) Track through "call" """ # Init - self._ira = ira + self._ircfg = ircfg self._implicit = implicit # Create callback filters. The order is relevant. @@ -470,7 +471,7 @@ class DependencyGraph(object): self._cb_follow.append(lambda exprs: self._follow_exprs(exprs, follow_mem, follow_call)) - self._cb_follow.append(self._follow_nolabel) + self._cb_follow.append(self._follow_no_loc_key) @staticmethod def _follow_simp_expr(exprs): @@ -491,11 +492,11 @@ class DependencyGraph(object): @follow: set of nodes to follow @nofollow: set of nodes not to follow """ - if isinstance(expr, m2_expr.ExprId): + if expr.is_id(): follow.add(expr) - elif isinstance(expr, m2_expr.ExprInt): + elif expr.is_int(): nofollow.add(expr) - elif isinstance(expr, m2_expr.ExprMem): + elif expr.is_mem(): follow.add(expr) return expr @@ -508,7 +509,7 @@ class DependencyGraph(object): @follow_mem: force the visit of memory sub expressions @follow_call: force the visit of call sub expressions """ - if not follow_mem and isinstance(expr, m2_expr.ExprMem): + if not follow_mem and expr.is_mem(): nofollow.add(expr) return False if not follow_call and expr.is_function_call(): @@ -530,12 +531,13 @@ class DependencyGraph(object): return follow, nofollow @staticmethod - def _follow_nolabel(exprs): - """Do not follow labels""" + def _follow_no_loc_key(exprs): + """Do not follow loc_keys""" follow = set() for expr in exprs: - if not expr_is_int_or_label(expr): - follow.add(expr) + if expr.is_int() or expr.is_loc(): + continue + follow.add(expr) return follow, set() @@ -562,7 +564,7 @@ class DependencyGraph(object): if dst not in state.pending: continue # Track IRDst in implicit mode only - if dst == self._ira.IRDst and not self._implicit: + if dst == self._ircfg.IRDst and not self._implicit: continue assert dst not in node_resolved node_resolved.add(dst) @@ -580,25 +582,25 @@ class DependencyGraph(object): """Follow dependencies tracked in @state in the current irbloc @state: instance of DependencyState""" - irb = self._ira.blocks[state.label] + irb = self._ircfg.blocks[state.loc_key] line_nb = len(irb) if state.line_nb is None else state.line_nb for cur_line_nb, assignblk in reversed(list(enumerate(irb[:line_nb]))): self._track_exprs(state, assignblk, cur_line_nb) - def get(self, label, elements, line_nb, heads): + def get(self, loc_key, elements, line_nb, heads): """Compute the dependencies of @elements at line number @line_nb in - the block named @label in the current IRA, before the execution of + the block named @loc_key in the current DiGraphIR, before the execution of this line. Dependency check stop if one of @heads is reached - @label: AsmLabel instance + @loc_key: LocKey instance @element: set of Expr instances @line_nb: int - @heads: set of AsmLabel instances + @heads: set of LocKey instances Return an iterator on DiGraph(DependencyNode) """ # Init the algorithm inputs = {element: set() for element in elements} - initial_state = DependencyState(label, inputs, line_nb) + initial_state = DependencyState(loc_key, inputs, line_nb) todo = set([initial_state]) done = set() dpResultcls = DependencyResultImplicit if self._implicit else DependencyResult @@ -611,27 +613,27 @@ class DependencyGraph(object): continue done.add(done_state) if (not state.pending or - state.label in heads or - not self._ira.graph.predecessors(state.label)): - yield dpResultcls(self._ira, initial_state, state, elements) + state.loc_key in heads or + not self._ircfg.predecessors(state.loc_key)): + yield dpResultcls(self._ircfg, initial_state, state, elements) if not state.pending: continue if self._implicit: # Force IRDst to be tracked, except in the input block - state.pending[self._ira.IRDst] = set() + state.pending[self._ircfg.IRDst] = set() # Propagate state to parents - for pred in self._ira.graph.predecessors_iter(state.label): + for pred in self._ircfg.predecessors_iter(state.loc_key): todo.add(state.extend(pred)) def get_from_depnodes(self, depnodes, heads): """Alias for the get() method. Use the attributes of @depnodes as argument. - PRE: Labels and lines of depnodes have to be equals + PRE: Loc_Keys and lines of depnodes have to be equals @depnodes: set of DependencyNode instances - @heads: set of AsmLabel instances + @heads: set of LocKey instances """ lead = list(depnodes)[0] elements = set(depnode.element for depnode in depnodes) - return self.get(lead.label, elements, lead.line_nb, heads) + return self.get(lead.loc_key, elements, lead.line_nb, heads) diff --git a/miasm2/analysis/disasm_cb.py b/miasm2/analysis/disasm_cb.py index e759e313..d3278cb4 100644 --- a/miasm2/analysis/disasm_cb.py +++ b/miasm2/analysis/disasm_cb.py @@ -2,10 +2,9 @@ from miasm2.expression.expression import ExprInt, ExprId, ExprMem, match_expr from miasm2.expression.simplifications import expr_simp -from miasm2.core.asmblock \ - import AsmSymbolPool, AsmConstraintNext, AsmConstraintTo +from miasm2.core.asmblock import AsmConstraintNext, AsmConstraintTo +from miasm2.core.locationdb import LocationDB from miasm2.core.utils import upck32 -# from miasm2.core.graph import DiGraph def get_ira(mnemo, attrib): @@ -22,21 +21,19 @@ def get_ira(mnemo, attrib): def arm_guess_subcall( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): ira = get_ira(mnemo, attrib) - sp = AsmSymbolPool() + sp = LocationDB() ir_arch = ira(sp) + ircfg = ira.new_ircfg() print '###' print cur_bloc - ir_arch.add_block(cur_bloc) + ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) - ir_blocks = ir_arch.blocks.values() - # flow_graph = DiGraph() + ir_blocks = ircfg.blocks.values() to_add = set() for irblock in ir_blocks: - # print 'X'*40 - # print irblock pc_val = None lr_val = None for exprs in irblock: @@ -53,43 +50,35 @@ def arm_guess_subcall( l = cur_bloc.lines[-1] if lr_val.arg != l.offset + l.l: continue - # print 'IS CALL!' - l = symbol_pool.getby_offset_create(int(lr_val)) + l = loc_db.get_or_create_offset_location(int(lr_val)) c = AsmConstraintNext(l) to_add.add(c) offsets_to_dis.add(int(lr_val)) - # if to_add: - # print 'R'*70 for c in to_add: - # print c cur_bloc.addto(c) def arm_guess_jump_table( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): ira = get_ira(mnemo, attrib) jra = ExprId('jra') jrb = ExprId('jrb') - sp = AsmSymbolPool() + sp = LocationDB() ir_arch = ira(sp) - ir_arch.add_block(cur_bloc) + ircfg = ira.new_ircfg() + ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) - ir_blocks = ir_arch.blocks.values() + ir_blocks = ircfg.blocks.values() for irblock in ir_blocks: - # print 'X'*40 - # print irblock pc_val = None - # lr_val = None for exprs in irblock: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src - # if e.dst == mnemo.regs.LR: - # lr_val = e.src if pc_val is None: continue if not isinstance(pc_val, ExprMem): @@ -124,7 +113,7 @@ def arm_guess_jump_table( for ad in addrs: offsets_to_dis.add(ad) - l = symbol_pool.getby_offset_create(ad) + l = loc_db.get_or_create_offset_location(ad) c = AsmConstraintTo(l) cur_bloc.addto(c) @@ -132,6 +121,6 @@ guess_funcs = [] def guess_multi_cb( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): for f in guess_funcs: - f(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool) + f(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db) diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py index 427a8bd0..0c01610f 100644 --- a/miasm2/analysis/dse.py +++ b/miasm2/analysis/dse.py @@ -56,15 +56,14 @@ except ImportError: z3 = None from miasm2.expression.expression import ExprMem, ExprInt, ExprCompose, \ - ExprAff, ExprId + ExprAff, ExprId, ExprLoc, LocKey from miasm2.core.bin_stream import bin_stream_vm -from miasm2.core.asmblock import expr_is_label from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec from miasm2.expression.expression_helper import possible_values from miasm2.ir.translators import Translator from miasm2.analysis.expression_range import expr_range from miasm2.analysis.modularintervals import ModularIntervals - +from miasm2.core.locationdb import LocationDB DriftInfo = namedtuple("DriftInfo", ["symbol", "computed", "expected"]) @@ -72,7 +71,7 @@ class DriftException(Exception): """Raised when the emulation drift from the reference engine""" def __init__(self, info): - super(Exception, self).__init__() + super(DriftException, self).__init__() self.info = info def __str__(self): @@ -150,10 +149,12 @@ class DSEEngine(object): def __init__(self, machine): self.machine = machine + self.loc_db = LocationDB() self.handler = {} # addr -> callback(DSEEngine instance) self.instrumentation = {} # addr -> callback(DSEEngine instance) self.addr_to_cacheblocks = {} # addr -> {label -> IRBlock} - self.ir_arch = self.machine.ir() # corresponding IR + self.ir_arch = self.machine.ir(loc_db=self.loc_db) # corresponding IR + self.ircfg = self.ir_arch.new_ircfg() # corresponding IR # Defined after attachment self.jitter = None # Jitload (concrete execution) @@ -165,20 +166,24 @@ class DSEEngine(object): """Prepare the environment for attachment with a jitter""" # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm), - lines_wd=1) + lines_wd=1, + loc_db=self.loc_db) # Symbexec engine ## Prepare symbexec engines self.symb = self.SYMB_ENGINE(self.jitter.cpu, self.jitter.vm, self.ir_arch, {}) self.symb.enable_emulated_simplifications() - self.symb_concrete = EmulatedSymbExec(self.jitter.cpu, self.jitter.vm, - self.ir_arch, {}) + self.symb_concrete = EmulatedSymbExec( + self.jitter.cpu, self.jitter.vm, + self.ir_arch, {} + ) ## Update registers value - self.symb.symbols[self.ir_arch.IRDst] = ExprInt(getattr(self.jitter.cpu, - self.ir_arch.pc.name), - self.ir_arch.IRDst.size) + self.symb.symbols[self.ir_arch.IRDst] = ExprInt( + getattr(self.jitter.cpu, self.ir_arch.pc.name), + self.ir_arch.IRDst.size + ) # Avoid memory write self.symb.func_write = None @@ -188,7 +193,7 @@ class DSEEngine(object): self.jitter.exec_cb = self.callback # Clean jit cache to avoid multi-line basic blocks already jitted - self.jitter.jit.lbl2jitbloc.clear() + self.jitter.jit.clear_jitted_blocks() def attach(self, emulator): """Attach the DSE to @emulator @@ -215,9 +220,9 @@ class DSEEngine(object): self.prepare() def handle(self, cur_addr): - """Handle destination + r"""Handle destination @cur_addr: Expr of the next address in concrete execution - /!\ cur_addr may be a lbl_gen + /!\ cur_addr may be a loc_key In this method, self.symb is in the "just before branching" state """ @@ -295,6 +300,9 @@ class DSEEngine(object): # Call callbacks associated to the current address cur_addr = self.jitter.pc + if isinstance(cur_addr, LocKey): + lbl = self.ir_arch.loc_db.loc_key_to_label(cur_addr) + cur_addr = lbl.offset if cur_addr in self.handler: self.handler[cur_addr](self) @@ -312,24 +320,24 @@ class DSEEngine(object): # Get IR blocks if cur_addr in self.addr_to_cacheblocks: - self.ir_arch.blocks.clear() - self.ir_arch.blocks.update(self.addr_to_cacheblocks[cur_addr]) + self.ircfg.blocks.clear() + self.ircfg.blocks.update(self.addr_to_cacheblocks[cur_addr]) else: ## Reset cache structures - self.ir_arch.blocks.clear()# = {} + self.ircfg.blocks.clear()# = {} ## Update current state asm_block = self.mdis.dis_block(cur_addr) - self.ir_arch.add_block(asm_block) - self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks) + self.ir_arch.add_asmblock_to_ircfg(asm_block, self.ircfg) + self.addr_to_cacheblocks[cur_addr] = dict(self.ircfg.blocks) # Emulate the current instruction self.symb.reset_modified() # Is the symbolic execution going (potentially) to jump on a lbl_gen? - if len(self.ir_arch.blocks) == 1: - next_addr = self.symb.run_at(cur_addr) + if len(self.ircfg.blocks) == 1: + self.symb.run_at(self.ircfg, cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain @@ -339,11 +347,16 @@ class DSEEngine(object): # Update the concrete execution self._update_state_from_concrete_symb(self.symb_concrete) while True: - next_addr_concrete = self.symb_concrete.run_block_at(cur_addr) - self.symb.run_block_at(cur_addr) - if not(expr_is_label(next_addr_concrete) and - next_addr_concrete.name.offset is None): + next_addr_concrete = self.symb_concrete.run_block_at( + self.ircfg, cur_addr + ) + self.symb.run_block_at(self.ircfg, cur_addr) + + if not (isinstance(next_addr_concrete, ExprLoc) and + self.ir_arch.loc_db.get_location_offset( + next_addr_concrete.loc_key + ) is None): # Not a lbl_gen, exit break @@ -351,6 +364,7 @@ class DSEEngine(object): self.handle(next_addr_concrete) cur_addr = next_addr_concrete + # At this stage, symbolic engine is one instruction after the concrete # engine @@ -428,7 +442,7 @@ class DSEEngine(object): symbexec.symbols[reg] = value def update_state_from_concrete(self, cpu=True, mem=False): - """Update the symbolic state with concrete values from the concrete + r"""Update the symbolic state with concrete values from the concrete engine @cpu: (optional) if set, update registers' value @@ -596,13 +610,19 @@ class DSEPathConstraint(DSEEngine): self.cur_solver.add(self.z3_trans.from_expr(cons)) def handle(self, cur_addr): + cur_addr = self.ir_arch.loc_db.canonize_to_exprloc(cur_addr) symb_pc = self.eval_expr(self.ir_arch.IRDst) possibilities = possible_values(symb_pc) cur_path_constraint = set() # path_constraint for the concrete path if len(possibilities) == 1: - assert next(iter(possibilities)).value == cur_addr + dst = next(iter(possibilities)).value + dst = self.ir_arch.loc_db.canonize_to_exprloc(dst) + assert dst == cur_addr else: for possibility in possibilities: + target_addr = self.ir_arch.loc_db.canonize_to_exprloc( + possibility.value + ) path_constraint = set() # Set of ExprAff for the possible path # Get constraint associated to the possible path @@ -642,11 +662,11 @@ class DSEPathConstraint(DSEEngine): "address 0x%x" % address) path_constraint.add(ExprAff(expr_mem, value)) - if possibility.value == cur_addr: + if target_addr == cur_addr: # Add path constraint cur_path_constraint = path_constraint - elif self.produce_solution(possibility.value): + elif self.produce_solution(target_addr): # Looking for a new solution self.cur_solver.push() for cons in path_constraint: @@ -657,8 +677,7 @@ class DSEPathConstraint(DSEEngine): result = self.cur_solver.check() if result == z3.sat: model = self.cur_solver.model() - self.handle_solution(model, possibility.value) + self.handle_solution(model, target_addr) self.cur_solver.pop() self.handle_correct_destination(cur_addr, cur_path_constraint) - diff --git a/miasm2/analysis/sandbox.py b/miasm2/analysis/sandbox.py index e77b1669..b1147adb 100644 --- a/miasm2/analysis/sandbox.py +++ b/miasm2/analysis/sandbox.py @@ -57,16 +57,15 @@ class Sandbox(object): cls.__init__(self, **kwargs) # Logging options - if self.options.singlestep: - self.jitter.jit.log_mn = True - self.jitter.jit.log_regs = True + self.jitter.set_trace_log( + trace_instr=self.options.singlestep, + trace_regs=self.options.singlestep, + trace_new_blocks=self.options.dumpblocs + ) if not self.options.quiet_function_calls: log_func.setLevel(logging.INFO) - if self.options.dumpblocs: - self.jitter.jit.log_newbloc = True - @classmethod def parser(cls, *args, **kwargs): """ diff --git a/miasm2/arch/aarch64/arch.py b/miasm2/arch/aarch64/arch.py index 94be74fd..529621c4 100644 --- a/miasm2/arch/aarch64/arch.py +++ b/miasm2/arch/aarch64/arch.py @@ -9,7 +9,6 @@ from collections import defaultdict from miasm2.core.bin_stream import bin_stream import regs as regs_module from regs import * -from miasm2.core.asmblock import AsmLabel from miasm2.core.cpu import log as log_cpu from miasm2.expression.modint import uint32, uint64, mod_size2int from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp @@ -264,7 +263,7 @@ conds_inv_expr, _, conds_inv_info = gen_regs(CONDS_INV, {}) class aarch64_arg(m_arg): - def asm_ast_to_expr(self, value, symbol_pool, size_hint=None, fixed_size=None): + def asm_ast_to_expr(self, value, loc_db, size_hint=None, fixed_size=None): if size_hint is None: size_hint = 64 if fixed_size is None: @@ -277,25 +276,25 @@ class aarch64_arg(m_arg): if isinstance(value.name, ExprId): fixed_size.add(value.name.size) return value.name - label = symbol_pool.getby_name_create(value.name) - return ExprId(label, size_hint) + loc_key = loc_db.get_or_create_name_location(value.name) + return ExprLoc(loc_key, size_hint) if isinstance(value, AstInt): assert size_hint is not None return ExprInt(value.value, size_hint) if isinstance(value, AstOp): if value.op == "segm": - segm = self.asm_ast_to_expr(value.args[0], symbol_pool) - ptr = self.asm_ast_to_expr(value.args[1], symbol_pool, None, fixed_size) + segm = self.asm_ast_to_expr(value.args[0], loc_db) + ptr = self.asm_ast_to_expr(value.args[1], loc_db, None, fixed_size) return ExprOp('segm', segm, ptr) - args = [self.asm_ast_to_expr(arg, symbol_pool, None, fixed_size) for arg in value.args] + args = [self.asm_ast_to_expr(arg, loc_db, None, fixed_size) for arg in value.args] if len(fixed_size) == 0: # No fixed size pass elif len(fixed_size) == 1: # One fixed size, regen all size = list(fixed_size)[0] - args = [self.asm_ast_to_expr(arg, symbol_pool, size, fixed_size) for arg in value.args] + args = [self.asm_ast_to_expr(arg, loc_db, size, fixed_size) for arg in value.args] else: raise ValueError("Size conflict") @@ -311,44 +310,49 @@ class instruction_aarch64(instruction): super(instruction_aarch64, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos=None): + def arg2str(expr, index=None, loc_db=None): wb = False - if isinstance(e, m2_expr.ExprId) or isinstance(e, m2_expr.ExprInt): - return str(e) - elif isinstance(e, m2_expr.ExprOp) and e.op in shift_expr: - op_str = shift_str[shift_expr.index(e.op)] - return "%s %s %s" % (e.args[0], op_str, e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "slice_at": - return "%s LSL %s" % (e.args[0], e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op in extend_lst: - op_str = e.op - return "%s %s %s" % (e.args[0], op_str, e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "postinc": - if e.args[1].arg != 0: - return "[%s], %s" % (e.args[0], e.args[1]) + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_loc(): + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "preinc_wb": - if e.args[1].arg != 0: - return "[%s, %s]!" % (e.args[0], e.args[1]) + return str(expr) + elif isinstance(expr, m2_expr.ExprOp) and expr.op in shift_expr: + op_str = shift_str[shift_expr.index(expr.op)] + return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "slice_at": + return "%s LSL %s" % (expr.args[0], expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op in extend_lst: + op_str = expr.op + return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "postinc": + if expr.args[1].arg != 0: + return "[%s], %s" % (expr.args[0], expr.args[1]) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "preinc": - if len(e.args) == 1: - return "[%s]" % (e.args[0]) - elif not isinstance(e.args[1], m2_expr.ExprInt) or e.args[1].arg != 0: - return "[%s, %s]" % (e.args[0], e.args[1]) + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc_wb": + if expr.args[1].arg != 0: + return "[%s, %s]!" % (expr.args[0], expr.args[1]) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == 'segm': - arg = e.args[1] + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc": + if len(expr.args) == 1: + return "[%s]" % (expr.args[0]) + elif not isinstance(expr.args[1], m2_expr.ExprInt) or expr.args[1].arg != 0: + return "[%s, %s]" % (expr.args[0], expr.args[1]) + else: + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == 'segm': + arg = expr.args[1] if isinstance(arg, m2_expr.ExprId): arg = str(arg) elif arg.op == 'LSL' and arg.args[1].arg == 0: arg = str(arg.args[0]) else: arg = "%s %s %s" % (arg.args[0], arg.op, arg.args[1]) - return '[%s, %s]' % (e.args[0], arg) + return '[%s, %s]' % (expr.args[0], arg) else: raise NotImplementedError("bad op") @@ -364,15 +368,14 @@ class instruction_aarch64(instruction): else: return 0 - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): index = self.mnemo_flow_to_dst_index(self.name) - e = self.args[index] - if not isinstance(e, m2_expr.ExprInt): + expr = self.args[index] + if not expr.is_int(): return - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = m2_expr.ExprId(l, e.size) - self.args[index] = s + addr = expr.arg + self.offset + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[index] = m2_expr.ExprLoc(loc_key, expr.size) def breakflow(self): return self.name in BRCOND + ["BR", "BLR", "RET", "ERET", "DRPS", "B", "BL"] @@ -380,14 +383,14 @@ class instruction_aarch64(instruction): def is_subcall(self): return self.name in ["BLR", "BL"] - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): index = self.mnemo_flow_to_dst_index(self.name) return [self.args[index]] def splitflow(self): return self.name in BRCOND + ["BLR", "BL"] - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 64 def fixDstOffset(self): @@ -499,7 +502,7 @@ class mn_aarch64(cls_mn): else: raise NotImplementedError('bad attrib') - def get_symbol_size(self, symbol, symbol_pool, mode): + def get_symbol_size(self, symbol, loc_db, mode): return 32 def reset_class(self): @@ -797,8 +800,8 @@ def set_imm_to_size(size, expr): class aarch64_imm_sf(imm_noarg): parser = base_expr - def fromstring(self, text, symbol_pool, parser_result=None): - start, stop = super(aarch64_imm_sf, self).fromstring(text, symbol_pool, parser_result) + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(aarch64_imm_sf, self).fromstring(text, loc_db, parser_result) if start is None: return start, stop size = self.parent.args[0].expr.size diff --git a/miasm2/arch/aarch64/ira.py b/miasm2/arch/aarch64/ira.py index 5a89e910..a895b549 100644 --- a/miasm2/arch/aarch64/ira.py +++ b/miasm2/arch/aarch64/ira.py @@ -6,22 +6,22 @@ from miasm2.arch.aarch64.sem import ir_aarch64l, ir_aarch64b class ir_a_aarch64l_base(ir_aarch64l, ira): - def __init__(self, symbol_pool=None): - ir_aarch64l.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_aarch64l.__init__(self, loc_db) self.ret_reg = self.arch.regs.X0 class ir_a_aarch64b_base(ir_aarch64b, ira): - def __init__(self, symbol_pool=None): - ir_aarch64b.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_aarch64b.__init__(self, loc_db) self.ret_reg = self.arch.regs.X0 class ir_a_aarch64l(ir_a_aarch64l_base): - def __init__(self, symbol_pool=None): - ir_a_aarch64l_base.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_a_aarch64l_base.__init__(self, loc_db) self.ret_reg = self.arch.regs.X0 def get_out_regs(self, _): @@ -45,6 +45,6 @@ class ir_a_aarch64l(ir_a_aarch64l_base): class ir_a_aarch64b(ir_a_aarch64b_base, ir_a_aarch64l): - def __init__(self, symbol_pool=None): - ir_a_aarch64b_base.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_a_aarch64b_base.__init__(self, loc_db) self.ret_reg = self.arch.regs.X0 diff --git a/miasm2/arch/aarch64/jit.py b/miasm2/arch/aarch64/jit.py index 31570f52..91c32c68 100644 --- a/miasm2/arch/aarch64/jit.py +++ b/miasm2/arch/aarch64/jit.py @@ -1,7 +1,7 @@ import logging -from miasm2.jitter.jitload import jitter, named_arguments -from miasm2.core import asmblock +from miasm2.jitter.jitload import Jitter, named_arguments +from miasm2.core.locationdb import LocationDB from miasm2.core.utils import pck64, upck64 from miasm2.arch.aarch64.sem import ir_aarch64b, ir_aarch64l @@ -11,12 +11,11 @@ hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) log.setLevel(logging.CRITICAL) -class jitter_aarch64l(jitter): +class jitter_aarch64l(Jitter): max_reg_arg = 8 def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_aarch64l(sp), *args, **kwargs) + Jitter.__init__(self, ir_aarch64l(LocationDB()), *args, **kwargs) self.vm.set_little_endian() def push_uint64_t(self, value): @@ -69,13 +68,12 @@ class jitter_aarch64l(jitter): func_prepare_systemv = func_prepare_stdcall def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc class jitter_aarch64b(jitter_aarch64l): def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_aarch64b(sp), *args, **kwargs) + Jitter.__init__(self, ir_aarch64b(LocationDB()), *args, **kwargs) self.vm.set_big_endian() diff --git a/miasm2/arch/aarch64/sem.py b/miasm2/arch/aarch64/sem.py index 88b0d0a7..a17c0f14 100644 --- a/miasm2/arch/aarch64/sem.py +++ b/miasm2/arch/aarch64/sem.py @@ -350,7 +350,36 @@ def csel(arg1, arg2, arg3, arg4): cond_expr = cond2expr[arg4.name] arg1 = arg2 if cond_expr else arg3 +def ccmp(ir, instr, arg1, arg2, arg3, arg4): + e = [] + if(arg2.is_int): + arg2=m2_expr.ExprInt(arg2.arg.arg,arg1.size) + default_nf = arg3[0:1] + default_zf = arg3[1:2] + default_cf = arg3[2:3] + default_of = arg3[3:4] + cond_expr = cond2expr[arg4.name] + res = arg1 - arg2 + new_nf = nf + new_zf = update_flag_zf(res)[0].src + new_cf = update_flag_sub_cf(arg1, arg2, res).src + new_of = update_flag_sub_of(arg1, arg2, res).src + + e.append(m2_expr.ExprAff(nf, m2_expr.ExprCond(cond_expr, + new_nf, + default_nf))) + e.append(m2_expr.ExprAff(zf, m2_expr.ExprCond(cond_expr, + new_zf, + default_zf))) + e.append(m2_expr.ExprAff(cf, m2_expr.ExprCond(cond_expr, + new_cf, + default_cf))) + e.append(m2_expr.ExprAff(of, m2_expr.ExprCond(cond_expr, + new_of, + default_of))) + return e, [] + def csinc(ir, instr, arg1, arg2, arg3, arg4): e = [] cond_expr = cond2expr[arg4.name] @@ -593,14 +622,14 @@ def udiv(arg1, arg2, arg3): @sbuild.parse def cbz(arg1, arg2): - dst = m2_expr.ExprId(ir.get_next_label(instr), 64) if arg1 else arg2 + dst = m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) if arg1 else arg2 PC = dst ir.IRDst = dst @sbuild.parse def cbnz(arg1, arg2): - dst = arg2 if arg1 else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg2 if arg1 else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -609,7 +638,7 @@ def cbnz(arg1, arg2): def tbz(arg1, arg2, arg3): bitmask = m2_expr.ExprInt(1, arg1.size) << arg2 dst = m2_expr.ExprId( - ir.get_next_label(instr), 64) if arg1 & bitmask else arg3 + ir.get_next_loc_key(instr), 64) if arg1 & bitmask else arg3 PC = dst ir.IRDst = dst @@ -618,21 +647,21 @@ def tbz(arg1, arg2, arg3): def tbnz(arg1, arg2, arg3): bitmask = m2_expr.ExprInt(1, arg1.size) << arg2 dst = arg3 if arg1 & bitmask else m2_expr.ExprId( - ir.get_next_label(instr), 64) + ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @sbuild.parse def b_ne(arg1): - dst = m2_expr.ExprId(ir.get_next_label(instr), 64) if zf else arg1 + dst = m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) if zf else arg1 PC = dst ir.IRDst = dst @sbuild.parse def b_eq(arg1): - dst = arg1 if zf else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if zf else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -640,7 +669,7 @@ def b_eq(arg1): @sbuild.parse def b_ge(arg1): cond = cond2expr['GE'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -648,7 +677,7 @@ def b_ge(arg1): @sbuild.parse def b_gt(arg1): cond = cond2expr['GT'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -656,7 +685,7 @@ def b_gt(arg1): @sbuild.parse def b_cc(arg1): cond = cond2expr['CC'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -664,7 +693,7 @@ def b_cc(arg1): @sbuild.parse def b_cs(arg1): cond = cond2expr['CS'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -672,7 +701,7 @@ def b_cs(arg1): @sbuild.parse def b_hi(arg1): cond = cond2expr['HI'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -680,7 +709,7 @@ def b_hi(arg1): @sbuild.parse def b_le(arg1): cond = cond2expr['LE'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -688,7 +717,7 @@ def b_le(arg1): @sbuild.parse def b_ls(arg1): cond = cond2expr['LS'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -696,7 +725,7 @@ def b_ls(arg1): @sbuild.parse def b_lt(arg1): cond = cond2expr['LT'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -732,7 +761,7 @@ def br(arg1): def blr(arg1): PC = arg1 ir.IRDst = arg1 - LR = m2_expr.ExprId(ir.get_next_label(instr), 64) + LR = m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) @sbuild.parse def nop(): @@ -761,6 +790,7 @@ mnemo_func.update({ 'cmp': cmp, 'cmn': cmn, 'movk': movk, + 'ccmp': ccmp, 'csinc': csinc, 'csinv': csinv, 'csneg': csneg, @@ -831,8 +861,8 @@ class aarch64info: class ir_aarch64l(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_aarch64, "l", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_aarch64, "l", loc_db) self.pc = PC self.sp = SP self.IRDst = m2_expr.ExprId('IRDst', 64) @@ -877,7 +907,7 @@ class ir_aarch64l(IntermediateRepresentation): src = self.expr_fix_regs_for_mode(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(irblock.label, irs) + return IRBlock(irblock.loc_key, irs) def mod_pc(self, instr, instr_ir, extra_ir): "Replace PC by the instruction's offset" @@ -908,15 +938,15 @@ class ir_aarch64l(IntermediateRepresentation): new_dsts = {dst:src for dst, src in assignblk.iteritems() if dst not in regs_to_fix} irs.append(AssignBlock(new_dsts, assignblk.instr)) - new_irblocks.append(IRBlock(irblock.label, irs)) + new_irblocks.append(IRBlock(irblock.loc_key, irs)) return instr_ir, new_irblocks class ir_aarch64b(ir_aarch64l): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_aarch64, "b", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_aarch64, "b", loc_db) self.pc = PC self.sp = SP self.IRDst = m2_expr.ExprId('IRDst', 64) diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index b240a047..82664476 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -343,62 +343,67 @@ class instruction_arm(instruction): super(instruction_arm, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos = None): + def arg2str(expr, index=None, loc_db=None): wb = False - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - if isinstance(e, ExprOp) and e.op in expr2shift_dct: - if len(e.args) == 1: - return '%s %s' % (e.args[0], expr2shift_dct[e.op]) - elif len(e.args) == 2: - return '%s %s %s' % (e.args[0], expr2shift_dct[e.op], e.args[1]) + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_loc(): + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) + else: + return str(expr) + if isinstance(expr, ExprOp) and expr.op in expr2shift_dct: + if len(expr.args) == 1: + return '%s %s' % (expr.args[0], expr2shift_dct[expr.op]) + elif len(expr.args) == 2: + return '%s %s %s' % (expr.args[0], expr2shift_dct[expr.op], expr.args[1]) else: raise NotImplementedError('zarb arg2str') sb = False - if isinstance(e, ExprOp) and e.op == "sbit": + if isinstance(expr, ExprOp) and expr.op == "sbit": sb = True - e = e.args[0] - if isinstance(e, ExprOp) and e.op == "reglist": - o = [gpregs.expr.index(x) for x in e.args] + expr = expr.args[0] + if isinstance(expr, ExprOp) and expr.op == "reglist": + o = [gpregs.expr.index(x) for x in expr.args] out = reglist2str(o) if sb: out += "^" return out - if isinstance(e, ExprOp) and e.op == 'wback': + if isinstance(expr, ExprOp) and expr.op == 'wback': wb = True - e = e.args[0] - if isinstance(e, ExprId): - out = str(e) + expr = expr.args[0] + if isinstance(expr, ExprId): + out = str(expr) if wb: out += "!" return out - if not isinstance(e, ExprMem): - return str(e) + if not isinstance(expr, ExprMem): + return str(expr) - e = e.arg - if isinstance(e, ExprOp) and e.op == 'wback': + expr = expr.arg + if isinstance(expr, ExprOp) and expr.op == 'wback': wb = True - e = e.args[0] + expr = expr.args[0] - if isinstance(e, ExprId): - r, s = e, None - elif len(e.args) == 1 and isinstance(e.args[0], ExprId): - r, s = e.args[0], None - elif isinstance(e.args[0], ExprId): - r, s = e.args[0], e.args[1] + if isinstance(expr, ExprId): + r, s = expr, None + elif len(expr.args) == 1 and isinstance(expr.args[0], ExprId): + r, s = expr.args[0], None + elif isinstance(expr.args[0], ExprId): + r, s = expr.args[0], expr.args[1] else: - r, s = e.args[0].args + r, s = expr.args[0].args if isinstance(s, ExprOp) and s.op in expr2shift_dct: s = ' '.join([str(x) for x in s.args[0], expr2shift_dct[s.op], s.args[1]]) - if isinstance(e, ExprOp) and e.op == 'postinc': + if isinstance(expr, ExprOp) and expr.op == 'postinc': o = '[%s]' % r if s and not (isinstance(s, ExprInt) and s.arg == 0): o += ', %s' % s @@ -417,17 +422,16 @@ class instruction_arm(instruction): def dstflow(self): return self.name in conditional_branch + unconditional_branch - def dstflow2label(self, symbol_pool): - e = self.args[0] - if not isinstance(e, ExprInt): + def dstflow2label(self, loc_db): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == 'BLX': - ad = e.arg + self.offset + addr = expr.arg + self.offset else: - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[0] = s + addr = expr.arg + self.offset + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[0] = ExprLoc(loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: @@ -443,7 +447,7 @@ class instruction_arm(instruction): return True return self.additional_info.lnk - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): return [self.args[0]] def splitflow(self): @@ -455,7 +459,7 @@ class instruction_arm(instruction): return False return self.breakflow() and self.additional_info.cond != 14 - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 32 def fixDstOffset(self): @@ -490,29 +494,31 @@ class instruction_armt(instruction_arm): return True return self.name in conditional_branch + unconditional_branch - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): if self.name in ["CBZ", "CBNZ"]: - e = self.args[1] + expr = self.args[1] else: - e = self.args[0] - if not isinstance(e, ExprInt): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == 'BLX': - ad = e.arg + (self.offset & 0xfffffffc) + addr = expr.arg + (self.offset & 0xfffffffc) elif self.name == 'BL': - ad = e.arg + self.offset + addr = expr.arg + self.offset elif self.name.startswith('BP'): - ad = e.arg + self.offset + addr = expr.arg + self.offset elif self.name.startswith('CB'): - ad = e.arg + self.offset + self.l + 2 + addr = expr.arg + self.offset + self.l + 2 else: - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) + addr = expr.arg + self.offset + + loc_key = loc_db.get_or_create_offset_location(addr) + dst = ExprLoc(loc_key, expr.size) + if self.name in ["CBZ", "CBNZ"]: - self.args[1] = s + self.args[1] = dst else: - self.args[0] = s + self.args[0] = dst def breakflow(self): if self.name in conditional_branch + unconditional_branch +["CBZ", "CBNZ", 'TBB', 'TBH']: @@ -523,7 +529,7 @@ class instruction_armt(instruction_arm): return True return False - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): if self.name in ['CBZ', 'CBNZ']: return [self.args[1]] return [self.args[0]] @@ -656,7 +662,7 @@ class mn_arm(cls_mn): raise NotImplementedError('bad attrib') - def get_symbol_size(self, symbol, symbol_pool, mode): + def get_symbol_size(self, symbol, loc_db, mode): return 32 @@ -763,28 +769,28 @@ class mn_armt(cls_mn): args = [a.expr for a in self.args] return args - def get_symbol_size(self, symbol, symbol_pool, mode): + def get_symbol_size(self, symbol, loc_db, mode): return 32 class arm_arg(m_arg): - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): if isinstance(arg, AstId): if isinstance(arg.name, ExprId): return arg.name if arg.name in gpregs.str: return None - label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + loc_key = loc_db.get_or_create_name_location(arg.name) + return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] if None in args: return None return ExprOp(arg.op, *args) if isinstance(arg, AstInt): return ExprInt(arg.value, 32) if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) if ptr is None: return None return ExprMem(ptr, arg.size) @@ -1034,16 +1040,12 @@ class arm_op2(arm_arg): shift_kind = shift & 1 shift_type = (shift >> 1) & 3 shift >>= 3 - # print self.parent.immop.value, hex(shift), hex(shift_kind), - # hex(shift_type) if shift_kind: # shift kind is reg if shift & 1: - # log.debug('error in shift1') return False rs = shift >> 1 if rs == 0xf: - # log.debug('error in shift2') return False shift_op = regs_expr[rs] else: @@ -2158,12 +2160,10 @@ class armt_rlist_pclr(armt_rlist): reg_l = list(e.args) self.parent.pclr.value = 0 if self.parent.pp.value == 0: - # print 'push' if regs_expr[14] in reg_l: reg_l.remove(regs_expr[14]) self.parent.pclr.value = 1 else: - # print 'pop', if regs_expr[15] in reg_l: reg_l.remove(regs_expr[15]) self.parent.pclr.value = 1 @@ -2821,8 +2821,8 @@ class armt_aif(reg_noarg, arm_arg): return ret return self.value != 0 - def fromstring(self, text, symbol_pool, parser_result=None): - start, stop = super(armt_aif, self).fromstring(text, symbol_pool, parser_result) + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(armt_aif, self).fromstring(text, loc_db, parser_result) if self.expr.name == "X": return None, None return start, stop diff --git a/miasm2/arch/arm/disasm.py b/miasm2/arch/arm/disasm.py index 586fa903..5e21778d 100644 --- a/miasm2/arch/arm/disasm.py +++ b/miasm2/arch/arm/disasm.py @@ -2,7 +2,7 @@ from miasm2.core.asmblock import AsmConstraint, disasmEngine from miasm2.arch.arm.arch import mn_arm, mn_armt -def cb_arm_fix_call(mn, cur_bloc, symbol_pool, offsets_to_dis, *args, **kwargs): +def cb_arm_fix_call(mn, cur_bloc, loc_db, offsets_to_dis, *args, **kwargs): """ for arm: MOV LR, PC @@ -24,7 +24,8 @@ def cb_arm_fix_call(mn, cur_bloc, symbol_pool, offsets_to_dis, *args, **kwargs): return if not l2.args[1] in values: return - cur_bloc.add_cst(l1.offset + 4, AsmConstraint.c_next, symbol_pool) + loc_key_cst = loc_db.get_or_create_offset_location(l1.offset + 4) + cur_bloc.add_cst(loc_key_cst, AsmConstraint.c_next) offsets_to_dis.add(l1.offset + 4) cb_arm_funcs = [cb_arm_fix_call] diff --git a/miasm2/arch/arm/ira.py b/miasm2/arch/arm/ira.py index ed96376b..7b26a6e4 100644 --- a/miasm2/arch/arm/ira.py +++ b/miasm2/arch/arm/ira.py @@ -6,20 +6,20 @@ from miasm2.expression.expression import ExprAff, ExprOp from miasm2.ir.ir import AssignBlock class ir_a_arml_base(ir_arml, ira): - def __init__(self, symbol_pool=None): - ir_arml.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_arml.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 class ir_a_armb_base(ir_armb, ira): - def __init__(self, symbol_pool=None): - ir_armb.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_armb.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 class ir_a_arml(ir_a_arml_base): - def __init__(self, symbol_pool=None): - ir_a_arml_base.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_a_arml_base.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 def call_effects(self, ad, instr): @@ -55,17 +55,17 @@ class ir_a_arml(ir_a_arml_base): class ir_a_armb(ir_a_armb_base, ir_a_arml): - def __init__(self, symbol_pool=None): - ir_a_armb_base.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_a_armb_base.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 class ir_a_armtl(ir_armtl, ir_a_arml): - def __init__(self, symbol_pool=None): - ir_armtl.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_armtl.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 class ir_a_armtb(ir_a_armtl, ir_armtb, ir_a_armb): - def __init__(self, symbol_pool=None): - ir_armtb.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_armtb.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 diff --git a/miasm2/arch/arm/jit.py b/miasm2/arch/arm/jit.py index 1a37b7f1..10a7c644 100644 --- a/miasm2/arch/arm/jit.py +++ b/miasm2/arch/arm/jit.py @@ -1,12 +1,13 @@ import logging -from miasm2.jitter.jitload import jitter, named_arguments -from miasm2.core import asmblock +from miasm2.jitter.jitload import Jitter, named_arguments +from miasm2.core.locationdb import LocationDB from miasm2.core.utils import pck32, upck32 from miasm2.arch.arm.sem import ir_armb, ir_arml, ir_armtl, ir_armtb, cond_dct_inv, tab_cond from miasm2.jitter.codegen import CGen from miasm2.expression.expression import ExprId, ExprAff, ExprCond from miasm2.ir.ir import IRBlock, AssignBlock +from miasm2.ir.translators.C import TranslatorC log = logging.getLogger('jit_arm') hnd = logging.StreamHandler() @@ -17,11 +18,6 @@ log.setLevel(logging.CRITICAL) class arm_CGen(CGen): - def __init__(self, ir_arch): - self.ir_arch = ir_arch - self.PC = self.ir_arch.arch.regs.PC - self.init_arch_C() - def block2assignblks(self, block): """ @@ -55,12 +51,12 @@ class arm_CGen(CGen): return irblocks_list -class jitter_arml(jitter): +class jitter_arml(Jitter): C_Gen = arm_CGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_arml(sp), *args, **kwargs) + sp = LocationDB() + Jitter.__init__(self, ir_arml(sp), *args, **kwargs) self.vm.set_little_endian() def push_uint32_t(self, value): @@ -111,7 +107,7 @@ class jitter_arml(jitter): get_arg_n_systemv = get_arg_n_stdcall def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc @@ -119,8 +115,8 @@ class jitter_armb(jitter_arml): C_Gen = arm_CGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_armb(sp), *args, **kwargs) + sp = LocationDB() + Jitter.__init__(self, ir_armb(sp), *args, **kwargs) self.vm.set_big_endian() @@ -128,6 +124,6 @@ class jitter_armtl(jitter_arml): C_Gen = arm_CGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_armtl(sp), *args, **kwargs) + sp = LocationDB() + Jitter.__init__(self, ir_armtl(sp), *args, **kwargs) self.vm.set_little_endian() diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index 9e4da3f6..00250157 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -441,16 +441,16 @@ def sdiv(ir, instr, a, b, c=None): if c is None: b, c = a, b - lbl_div = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_div = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_except = ExprId(ir.loc_db.add_location(), ir.IRDst.size) + loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - e.append(ExprAff(ir.IRDst, ExprCond(c, lbl_div, lbl_except))) + e.append(ExprAff(ir.IRDst, ExprCond(c, loc_div, loc_except))) do_except = [] do_except.append(ExprAff(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(ExprAff(ir.IRDst, loc_next)) + blk_except = IRBlock(loc_except.loc_key, [AssignBlock(do_except, instr)]) @@ -461,8 +461,8 @@ def sdiv(ir, instr, a, b, c=None): if dst is not None: do_div.append(ExprAff(ir.IRDst, r)) - do_div.append(ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(ExprAff(ir.IRDst, loc_next)) + blk_div = IRBlock(loc_div.loc_key, [AssignBlock(do_div, instr)]) return e, [blk_div, blk_except] @@ -474,16 +474,16 @@ def udiv(ir, instr, a, b, c=None): - lbl_div = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_div = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_except = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - e.append(ExprAff(ir.IRDst, ExprCond(c, lbl_div, lbl_except))) + e.append(ExprAff(ir.IRDst, ExprCond(c, loc_div, loc_except))) do_except = [] do_except.append(ExprAff(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(ExprAff(ir.IRDst, loc_next)) + blk_except = IRBlock(loc_except.loc_key, [AssignBlock(do_except, instr)]) r = ExprOp("udiv", b, c) @@ -493,8 +493,8 @@ def udiv(ir, instr, a, b, c=None): if dst is not None: do_div.append(ExprAff(ir.IRDst, r)) - do_div.append(ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(ExprAff(ir.IRDst, loc_next)) + blk_div = IRBlock(loc_div.loc_key, [AssignBlock(do_div, instr)]) return e, [blk_div, blk_except] @@ -635,7 +635,6 @@ def st_ld_r(ir, instr, a, a2, b, store=False, size=32, s_ext=False, z_ext=False) base, off = b.args[0], b.args[1] # ExprInt(size/8, 32) else: base, off = b, ExprInt(0, 32) - # print a, wb, base, off, postinc if postinc: ad = base else: @@ -734,13 +733,11 @@ def ldrsh(ir, instr, a, b): def st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False): e = [] wb = False - # sb = False dst = None if isinstance(a, ExprOp) and a.op == 'wback': wb = True a = a.args[0] if isinstance(b, ExprOp) and b.op == 'sbit': - # sb = True b = b.args[0] regs = b.args base = a @@ -932,19 +929,20 @@ def pop(ir, instr, a): def cbz(ir, instr, a, b): e = [] - lbl_next = ExprId(ir.get_next_label(instr), 32) - e.append(ExprAff(ir.IRDst, ExprCond(a, lbl_next, b))) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 32) + e.append(ExprAff(ir.IRDst, ExprCond(a, loc_next_expr, b))) return e, [] def cbnz(ir, instr, a, b): e = [] - lbl_next = ExprId(ir.get_next_label(instr), 32) - e.append(ExprAff(ir.IRDst, ExprCond(a, b, lbl_next))) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 32) + e.append(ir.IRDst, ExprCond(a, b, loc_next_expr)) return e, [] - def uxtb(ir, instr, a, b): e = [] r = b[:8].zeroExtend(32) @@ -1038,7 +1036,7 @@ def pldw(ir, instr, a): def clz(ir, instr, a, b): e = [] - e.append(ExprAff(a, ExprOp('clz', b))) + e.append(ExprAff(a, ExprOp('cntleadzeros', b))) return e, [] def uxtab(ir, instr, a, b, c): @@ -1264,10 +1262,14 @@ def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): raise ValueError('unknown condition %r' % cond) cond = tab_cond[cond] - lbl_next = ExprId(ir.get_next_label(instr), 32) - lbl_do = ExprId(ir.gen_label(), 32) - dst_cond = ExprCond(cond, lbl_do, lbl_next) + + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 32) + loc_do = ir.loc_db.add_location() + loc_do_expr = ExprLoc(loc_do, 32) + + dst_cond = ExprCond(cond, loc_do_expr, loc_next_expr) assert(isinstance(instr_ir, list)) has_irdst = False @@ -1276,8 +1278,8 @@ def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): has_irdst = True break if not has_irdst: - instr_ir.append(ExprAff(ir.IRDst, lbl_next)) - e_do = IRBlock(lbl_do.name, [AssignBlock(instr_ir, instr)]) + instr_ir.append(ExprAff(ir.IRDst, loc_next_expr)) + e_do = IRBlock(loc_do, [AssignBlock(instr_ir, instr)]) e = [ExprAff(ir.IRDst, dst_cond)] return e, [e_do] + extra_ir @@ -1472,8 +1474,8 @@ class arminfo: class ir_arml(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_arm, "l", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_arm, "l", loc_db) self.pc = PC self.sp = SP self.IRDst = ExprId('IRDst', 32) @@ -1527,7 +1529,7 @@ class ir_arml(IntermediateRepresentation): raise ValueError("IT name invalid %s" % instr) return out, instr.args[0] - def do_it_block(self, label, index, block, assignments, gen_pc_updt): + def do_it_block(self, loc, index, block, assignments, gen_pc_updt): instr = block.lines[index] it_hints, it_cond = self.parse_itt(instr) cond_num = cond_dct_inv[it_cond.name] @@ -1539,14 +1541,14 @@ class ir_arml(IntermediateRepresentation): ir_blocks_all = [] # Gen dummy irblock for IT instr - label_next = self.get_next_label(instr) - dst = ExprAff(self.IRDst, ExprId(label_next, 32)) + loc_next = self.get_next_loc_key(instr) + dst = ExprAff(self.IRDst, ExprId(loc_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(loc, assignments) ir_blocks_all.append([irblock]) - label = label_next + loc = loc_next assignments = [] for hint in it_hints: irblocks = [] @@ -1554,38 +1556,40 @@ class ir_arml(IntermediateRepresentation): instr = block.lines[index] # Add conditionnal jump to current irblock - label_do = self.symbol_pool.gen_label() - label_next = self.get_next_label(instr) + loc_do = self.loc_db.add_location() + loc_next = self.get_next_loc_key(instr) if hint: local_cond = ~cond_eq else: local_cond = cond_eq - dst = ExprAff(self.IRDst, ExprCond(local_cond, ExprId(label_do, 32), ExprId(label_next, 32))) + dst = ExprAff(self.IRDst, ExprCond(local_cond, ExprLoc(loc_do, 32), ExprLoc(loc_next, 32))) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(loc, assignments) irblocks.append(irblock) assignments = [] - label = label_do - split = self.add_instr_to_irblock(block, instr, assignments, - irblocks, gen_pc_updt) + loc = loc_do + split = self.add_instr_to_current_state( + instr, block, assignments, + irblocks, gen_pc_updt + ) if split: raise NotImplementedError("Unsupported instr in IT block (%s)" % instr) - dst = ExprAff(self.IRDst, ExprId(label_next, 32)) + dst = ExprAff(self.IRDst, ExprId(loc_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(loc, assignments) irblocks.append(irblock) - label = label_next + loc = loc_next assignments = [] ir_blocks_all.append(irblocks) return index, ir_blocks_all - def add_block(self, block, gen_pc_updt=False): + def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): """ Add a native block to the current IR @block: native assembly block @@ -1594,7 +1598,8 @@ class ir_arml(IntermediateRepresentation): it_hints = None it_cond = None - label = None + label = block.loc_key + assignments = [] ir_blocks_all = [] index = -1 while index + 1 < len(block.lines): @@ -1602,7 +1607,7 @@ class ir_arml(IntermediateRepresentation): instr = block.lines[index] if label is None: assignments = [] - label = self.get_instr_label(instr) + label = self.get_loc_key_for_instr(instr) if instr.name.startswith("IT"): index, irblocks_it = self.do_it_block(label, index, block, assignments, gen_pc_updt) for irblocks in irblocks_it: @@ -1610,8 +1615,10 @@ class ir_arml(IntermediateRepresentation): label = None continue - split = self.add_instr_to_irblock(block, instr, assignments, - ir_blocks_all, gen_pc_updt) + split = self.add_instr_to_current_state( + instr, block, assignments, + ir_blocks_all, gen_pc_updt + ) if split: ir_blocks_all.append(IRBlock(label, assignments)) label = None @@ -1619,16 +1626,16 @@ class ir_arml(IntermediateRepresentation): if label is not None: ir_blocks_all.append(IRBlock(label, assignments)) - new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) + new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) for irblock in new_ir_blocks_all: - self.blocks[irblock.label] = irblock + ircfg.add_irblock(irblock) return new_ir_blocks_all class ir_armb(ir_arml): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_arm, "b", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_arm, "b", loc_db) self.pc = PC self.sp = SP self.IRDst = ExprId('IRDst', 32) @@ -1636,8 +1643,8 @@ class ir_armb(ir_arml): class ir_armtl(ir_arml): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_armt, "l", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_armt, "l", loc_db) self.pc = PC self.sp = SP self.IRDst = ExprId('IRDst', 32) @@ -1662,8 +1669,8 @@ class ir_armtl(ir_arml): class ir_armtb(ir_armtl): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_armt, "b", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_armt, "b", loc_db) self.pc = PC self.sp = SP self.IRDst = ExprId('IRDst', 32) diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py index 15c59cf0..974644dc 100644 --- a/miasm2/arch/mips32/arch.py +++ b/miasm2/arch/mips32/arch.py @@ -5,7 +5,7 @@ from collections import defaultdict from pyparsing import Literal, Group, Optional -from miasm2.expression.expression import ExprMem, ExprInt, ExprId, ExprOp +from miasm2.expression.expression import ExprMem, ExprInt, ExprId, ExprOp, ExprLoc from miasm2.core.bin_stream import bin_stream import miasm2.arch.mips32.regs as regs import miasm2.core.cpu as cpu @@ -60,11 +60,16 @@ class instruction_mips32(cpu.instruction): @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - assert(isinstance(e, ExprMem)) - arg = e.arg + def arg2str(expr, index=None, loc_db=None): + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_loc(): + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) + else: + return str(expr) + assert(isinstance(expr, ExprMem)) + arg = expr.arg if isinstance(arg, ExprId): return "(%s)"%arg assert(len(arg.args) == 2 and arg.op == '+') @@ -88,23 +93,22 @@ class instruction_mips32(cpu.instruction): raise NotImplementedError("TODO %s"%self) return i - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): if self.name in ["J", 'JAL']: - e = self.args[0].arg - ad = (self.offset & (0xFFFFFFFF ^ ((1<< 28)-1))) + e - l = symbol_pool.getby_offset_create(ad) - self.args[0] = ExprId(l, e.size) + expr = self.args[0].arg + addr = (self.offset & (0xFFFFFFFF ^ ((1<< 28)-1))) + expr + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[0] = ExprLoc(loc_key, expr.size) return ndx = self.get_dst_num() - e = self.args[ndx] + expr = self.args[ndx] - if not isinstance(e, ExprInt): + if not isinstance(expr, ExprInt): return - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[ndx] = s + addr = expr.arg + self.offset + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[ndx] = ExprLoc(loc_key, expr.size) def breakflow(self): if self.name == 'BREAK': @@ -118,7 +122,7 @@ class instruction_mips32(cpu.instruction): return True return False - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): if self.name in br_0: return [self.args[0]] elif self.name in br_1: @@ -143,7 +147,7 @@ class instruction_mips32(cpu.instruction): return True return False - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 32 def fixDstOffset(self): @@ -255,23 +259,23 @@ def mips32op(name, fields, args=None, alias=False): #type(name, (mn_mips32b,), dct) class mips32_arg(cpu.m_arg): - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): if isinstance(arg, AstId): if isinstance(arg.name, ExprId): return arg.name if arg.name in gpregs.str: return None - label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + loc_key = loc_db.get_or_create_name_location(arg.name) + return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] if None in args: return None return ExprOp(arg.op, *args) if isinstance(arg, AstInt): return ExprInt(arg.value, 32) if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) if ptr is None: return None return ExprMem(ptr, arg.size) @@ -403,9 +407,9 @@ class mips32_dreg_imm(mips32_arg): return True @staticmethod - def arg2str(e): - assert(isinstance(e, ExprMem)) - arg = e.arg + def arg2str(expr, index=None): + assert(isinstance(expr, ExprMem)) + arg = expr.arg if isinstance(arg, ExprId): return "(%s)"%arg assert(len(arg.args) == 2 and arg.op == '+') diff --git a/miasm2/arch/mips32/ira.py b/miasm2/arch/mips32/ira.py index 7aefad32..3caa8b12 100644 --- a/miasm2/arch/mips32/ira.py +++ b/miasm2/arch/mips32/ira.py @@ -4,19 +4,14 @@ from miasm2.expression.expression import ExprAff, ExprInt, ExprId from miasm2.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock from miasm2.ir.analysis import ira from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b -from miasm2.core.asmblock import expr_is_int_or_label, expr_is_label class ir_a_mips32l(ir_mips32l, ira): - def __init__(self, symbol_pool=None): - ir_mips32l.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_mips32l.__init__(self, loc_db) self.ret_reg = self.arch.regs.V0 - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_updt): - # Avoid adding side effects, already done in post_add_bloc - return False - - def post_add_block(self, block, ir_blocks): - IntermediateRepresentation.post_add_block(self, block, ir_blocks) + def post_add_asmblock_to_ircfg(self, block, ircfg, ir_blocks): + IntermediateRepresentation.post_add_asmblock_to_ircfg(self, block, ircfg, ir_blocks) new_irblocks = [] for irb in ir_blocks: pc_val = None @@ -28,14 +23,15 @@ class ir_a_mips32l(ir_mips32l, ira): if pc_val is None or lr_val is None: new_irblocks.append(irb) continue - if not expr_is_int_or_label(lr_val): - new_irblocks.append(irb) + if lr_val.is_loc(): + offset = self.loc_db.get_location_offset(lr_val.loc_key) + if offset is not None: + lr_val = ExprInt(offset, 32) + if not lr_val.is_int(): continue - if expr_is_label(lr_val): - lr_val = ExprInt(lr_val.name.offset, 32) instr = block.lines[-2] - if lr_val.arg != instr.offset + 8: + if int(lr_val) != instr.offset + 8: raise ValueError("Wrong arg") # CALL @@ -70,6 +66,6 @@ class ir_a_mips32l(ir_mips32l, ira): class ir_a_mips32b(ir_mips32b, ir_a_mips32l): - def __init__(self, symbol_pool=None): - ir_mips32b.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_mips32b.__init__(self, loc_db) self.ret_reg = self.arch.regs.V0 diff --git a/miasm2/arch/mips32/jit.py b/miasm2/arch/mips32/jit.py index 16d88067..a0df64d6 100644 --- a/miasm2/arch/mips32/jit.py +++ b/miasm2/arch/mips32/jit.py @@ -1,7 +1,7 @@ import logging -from miasm2.jitter.jitload import jitter, named_arguments -from miasm2.core import asmblock +from miasm2.jitter.jitload import Jitter, named_arguments +from miasm2.core.locationdb import LocationDB from miasm2.core.utils import pck32, upck32 from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b from miasm2.jitter.codegen import CGen @@ -57,10 +57,10 @@ class mipsCGen(CGen): self.ir_arch.pc] assignments[self.delay_slot_set] = m2_expr.ExprInt(1, 32) # Replace IRDst with next instruction - assignments[self.ir_arch.IRDst] = m2_expr.ExprId( - self.ir_arch.get_next_instr(assignblock.instr), 32) + dst_loc_key = self.ir_arch.get_next_instr(assignblock.instr) + assignments[self.ir_arch.IRDst] = m2_expr.ExprLoc(dst_loc_key, 32) irs.append(AssignBlock(assignments, assignblock.instr)) - irblocks[blk_idx] = IRBlock(irblock.label, irs) + irblocks[blk_idx] = IRBlock(irblock.loc_key, irs) return irblocks_list @@ -69,23 +69,24 @@ class mipsCGen(CGen): Generate the C code for the final block instruction """ - lbl = self.get_block_post_label(block) - out = (self.CODE_RETURN_NO_EXCEPTION % (self.label_to_jitlabel(lbl), + loc_key = self.get_block_post_label(block) + offset = self.ir_arch.loc_db.get_location_offset(loc_key) + out = (self.CODE_RETURN_NO_EXCEPTION % (loc_key, self.C_PC, m2_expr.ExprId('branch_dst_irdst', 32), m2_expr.ExprId('branch_dst_irdst', 32), - self.id_to_c(m2_expr.ExprInt(lbl.offset, 32))) + self.id_to_c(m2_expr.ExprInt(offset, 32))) ).split('\n') return out -class jitter_mips32l(jitter): +class jitter_mips32l(Jitter): C_Gen = mipsCGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_mips32l(sp), *args, **kwargs) + sp = LocationDB() + Jitter.__init__(self, ir_mips32l(sp), *args, **kwargs) self.vm.set_little_endian() def push_uint32_t(self, value): @@ -101,7 +102,7 @@ class jitter_mips32l(jitter): return upck32(self.vm.get_mem(self.cpu.SP + 4 * index, 4)) def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc # calling conventions @@ -144,6 +145,6 @@ class jitter_mips32l(jitter): class jitter_mips32b(jitter_mips32l): def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_mips32b(sp), *args, **kwargs) + sp = LocationDB() + Jitter.__init__(self, ir_mips32b(sp), *args, **kwargs) self.vm.set_big_endian() diff --git a/miasm2/arch/mips32/sem.py b/miasm2/arch/mips32/sem.py index 99c81a33..acf7370f 100644 --- a/miasm2/arch/mips32/sem.py +++ b/miasm2/arch/mips32/sem.py @@ -35,7 +35,7 @@ def jal(arg1): "Jumps to the calculated address @arg1 and stores the return address in $RA" PC = arg1 ir.IRDst = arg1 - RA = ExprId(ir.get_next_break_label(instr), 32) + RA = ExprLoc(ir.get_next_break_loc_key(instr), RA.size) @sbuild.parse def jalr(arg1, arg2): @@ -43,13 +43,13 @@ def jalr(arg1, arg2): address in another register @arg2""" PC = arg1 ir.IRDst = arg1 - arg2 = ExprId(ir.get_next_break_label(instr), 32) + arg2 = ExprLoc(ir.get_next_break_loc_key(instr), arg2.size) @sbuild.parse def bal(arg1): PC = arg1 ir.IRDst = arg1 - RA = ExprId(ir.get_next_break_label(instr), 32) + RA = ExprLoc(ir.get_next_break_loc_key(instr), RA.size) @sbuild.parse def l_b(arg1): @@ -76,7 +76,7 @@ def lb(arg1, arg2): @sbuild.parse def beq(arg1, arg2, arg3): "Branches on @arg3 if the quantities of two registers @arg1, @arg2 are eq" - dst = ExprId(ir.get_next_break_label(instr), 32) if arg1 - arg2 else arg3 + dst = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if arg1 - arg2 else arg3 PC = dst ir.IRDst = dst @@ -84,7 +84,7 @@ def beq(arg1, arg2, arg3): def bgez(arg1, arg2): """Branches on @arg2 if the quantities of register @arg1 is greater than or equal to zero""" - dst = ExprId(ir.get_next_break_label(instr), 32) if arg1.msb() else arg2 + dst = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if arg1.msb() else arg2 PC = dst ir.IRDst = dst @@ -92,7 +92,7 @@ def bgez(arg1, arg2): def bne(arg1, arg2, arg3): """Branches on @arg3 if the quantities of two registers @arg1, @arg2 are NOT equal""" - dst = arg3 if arg1 - arg2 else ExprId(ir.get_next_break_label(instr), 32) + dst = arg3 if arg1 - arg2 else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) PC = dst ir.IRDst = dst @@ -230,7 +230,7 @@ def seh(arg1, arg2): @sbuild.parse def bltz(arg1, arg2): """Branches on @arg2 if the register @arg1 is less than zero""" - dst_o = arg2 if arg1.msb() else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if arg1.msb() else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @@ -238,7 +238,7 @@ def bltz(arg1, arg2): def blez(arg1, arg2): """Branches on @arg2 if the register @arg1 is less than or equal to zero""" cond = (i1(1) if arg1 else i1(0)) | arg1.msb() - dst_o = arg2 if cond else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if cond else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @@ -246,7 +246,7 @@ def blez(arg1, arg2): def bgtz(arg1, arg2): """Branches on @arg2 if the register @arg1 is greater than zero""" cond = (i1(1) if arg1 else i1(0)) | arg1.msb() - dst_o = ExprId(ir.get_next_break_label(instr), 32) if cond else arg2 + dst_o = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if cond else arg2 PC = dst_o ir.IRDst = dst_o @@ -346,13 +346,13 @@ def c_le_d(arg1, arg2, arg3): @sbuild.parse def bc1t(arg1, arg2): - dst_o = arg2 if arg1 else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if arg1 else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @sbuild.parse def bc1f(arg1, arg2): - dst_o = ExprId(ir.get_next_break_label(instr), 32) if arg1 else arg2 + dst_o = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if arg1 else arg2 PC = dst_o ir.IRDst = dst_o @@ -415,22 +415,22 @@ def ehb(arg1): def teq(ir, instr, arg1, arg2): e = [] - lbl_except, lbl_except_expr = ir.gen_label_and_expr(ir.IRDst.size) - lbl_next = ir.get_next_label(instr) - lbl_next_expr = m2_expr.ExprId(lbl_next, ir.IRDst.size) + loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) do_except = [] do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) - blk_except = IRBlock(lbl_except, [AssignBlock(do_except, instr)]) + do_except.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + blk_except = IRBlock(loc_except.index, [AssignBlock(do_except, instr)]) cond = arg1 - arg2 e = [] e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(cond, lbl_next_expr, lbl_except_expr))) + m2_expr.ExprCond(cond, loc_next_expr, loc_except_expr))) return e, [blk_except] @@ -469,8 +469,8 @@ def get_mnemo_expr(ir, instr, *args): class ir_mips32l(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_mips32, 'l', symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_mips32, 'l', loc_db) self.pc = mn_mips32.getpc() self.sp = mn_mips32.getsp() self.IRDst = m2_expr.ExprId('IRDst', 32) @@ -490,14 +490,14 @@ class ir_mips32l(IntermediateRepresentation): return instr_ir, new_extra_ir def get_next_instr(self, instr): - return self.symbol_pool.getby_offset_create(instr.offset + 4) + return self.loc_db.get_or_create_offset_location(instr.offset + 4) - def get_next_break_label(self, instr): - return self.symbol_pool.getby_offset_create(instr.offset + 8) + def get_next_break_loc_key(self, instr): + return self.loc_db.get_or_create_offset_location(instr.offset + 8) class ir_mips32b(ir_mips32l): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_mips32, 'b', symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_mips32, 'b', loc_db) self.pc = mn_mips32.getpc() self.sp = mn_mips32.getsp() self.IRDst = m2_expr.ExprId('IRDst', 32) diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index e4d03edb..ecf4cb13 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -59,7 +59,7 @@ sreg_p = (deref_pinc | deref_nooff | deref_off | base_expr).setParseAction(cb_ex class msp430_arg(m_arg): - def asm_ast_to_expr(self, value, symbol_pool): + def asm_ast_to_expr(self, value, loc_db): if isinstance(value, AstId): name = value.name if isinstance(name, Expr): @@ -69,17 +69,17 @@ class msp430_arg(m_arg): index = gpregs.str.index(name) reg = gpregs.expr[index] return reg - label = symbol_pool.getby_name_create(value.name) - return ExprId(label, 16) + loc_key = loc_db.get_or_create_name_location(value.name) + return ExprLoc(loc_key, 16) if isinstance(value, AstOp): - args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in value.args] + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in value.args] if None in args: return None return ExprOp(value.op, *args) if isinstance(value, AstInt): return ExprInt(value.value, 16) if isinstance(value, AstMem): - ptr = self.asm_ast_to_expr(value.ptr, symbol_pool) + ptr = self.asm_ast_to_expr(value.ptr, loc_db) if ptr is None: return None return ExprMem(ptr, value.size) @@ -102,40 +102,44 @@ class instruction_msp430(instruction): return self.name in ['call'] @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId): - o = str(e) - elif isinstance(e, ExprInt): - o = str(e) - elif isinstance(e, ExprOp) and e.op == "autoinc": - o = "@%s+" % str(e.args[0]) - elif isinstance(e, ExprMem): - if isinstance(e.arg, ExprId): - if pos == 0: - o = "@%s" % e.arg + def arg2str(expr, index=None, loc_db=None): + if isinstance(expr, ExprId): + o = str(expr) + elif isinstance(expr, ExprInt): + o = str(expr) + elif expr.is_loc(): + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) + else: + return str(expr) + elif isinstance(expr, ExprOp) and expr.op == "autoinc": + o = "@%s+" % str(expr.args[0]) + elif isinstance(expr, ExprMem): + if isinstance(expr.arg, ExprId): + if index == 0: + o = "@%s" % expr.arg else: - o = "0x0(%s)" % e.arg - elif isinstance(e.arg, ExprInt): - o = "@%s" % e.arg - elif isinstance(e.arg, ExprOp): - o = "%s(%s)" % (e.arg.args[1], e.arg.args[0]) + o = "0x0(%s)" % expr.arg + elif isinstance(expr.arg, ExprInt): + o = "@%s" % expr.arg + elif isinstance(expr.arg, ExprOp): + o = "%s(%s)" % (expr.arg.args[1], expr.arg.args[0]) else: - raise NotImplementedError('unknown instance e = %s' % type(e)) + raise NotImplementedError('unknown instance expr = %s' % type(expr)) return o - def dstflow2label(self, symbol_pool): - e = self.args[0] - if not isinstance(e, ExprInt): + def dstflow2label(self, loc_db): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == "call": - ad = e.arg + addr = expr.arg else: - ad = e.arg + int(self.offset) + addr = expr.arg + int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[0] = s + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[0] = ExprLoc(loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: @@ -161,10 +165,10 @@ class instruction_msp430(instruction): def is_subcall(self): return self.name in ['call'] - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): return [self.args[0]] - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 16 def fixDstOffset(self): @@ -285,7 +289,7 @@ class mn_msp430(cls_mn): def reset_class(self): super(mn_msp430, self).reset_class() - def getnextflow(self, symbol_pool): + def getnextflow(self, loc_db): raise NotImplementedError('not fully functional') diff --git a/miasm2/arch/msp430/ira.py b/miasm2/arch/msp430/ira.py index 0f88facc..2a850d82 100644 --- a/miasm2/arch/msp430/ira.py +++ b/miasm2/arch/msp430/ira.py @@ -6,15 +6,15 @@ from miasm2.arch.msp430.sem import ir_msp430 class ir_a_msp430_base(ir_msp430, ira): - def __init__(self, symbol_pool=None): - ir_msp430.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_msp430.__init__(self, loc_db) self.ret_reg = self.arch.regs.R15 class ir_a_msp430(ir_a_msp430_base): - def __init__(self, symbol_pool=None): - ir_a_msp430_base.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_a_msp430_base.__init__(self, loc_db) def get_out_regs(self, _): return set([self.ret_reg, self.sp]) diff --git a/miasm2/arch/msp430/jit.py b/miasm2/arch/msp430/jit.py index dd5fe94e..9fbbc639 100644 --- a/miasm2/arch/msp430/jit.py +++ b/miasm2/arch/msp430/jit.py @@ -1,5 +1,5 @@ -from miasm2.jitter.jitload import jitter -from miasm2.core import asmblock +from miasm2.jitter.jitload import Jitter +from miasm2.core.locationdb import LocationDB from miasm2.core.utils import pck16, upck16 from miasm2.arch.msp430.sem import ir_msp430 @@ -11,11 +11,11 @@ hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) log.setLevel(logging.CRITICAL) -class jitter_msp430(jitter): +class jitter_msp430(Jitter): def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_msp430(sp), *args, **kwargs) + sp = LocationDB() + Jitter.__init__(self, ir_msp430(sp), *args, **kwargs) self.vm.set_little_endian() def push_uint16_t(self, value): @@ -37,6 +37,6 @@ class jitter_msp430(jitter): return value def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py index dd24abb1..191abe75 100644 --- a/miasm2/arch/msp430/sem.py +++ b/miasm2/arch/msp430/sem.py @@ -238,8 +238,11 @@ def push_w(ir, instr, a): def call(ir, instr, a): e, a, dummy = mng_autoinc(a, None, 16) - n = ExprId(ir.get_next_label(instr), 16) - e.append(ExprAff(ExprMem(SP - ExprInt(2, 16), 16), n)) + + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) + + e.append(ExprAff(ExprMem(SP - ExprInt(2, 16), 16), loc_next_expr)) e.append(ExprAff(SP, SP - ExprInt(2, 16))) e.append(ExprAff(PC, a)) e.append(ExprAff(ir.IRDst, a)) @@ -272,50 +275,56 @@ def cmp_b(ir, instr, a, b): def jz(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(zf, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(zf, a, n))) + e.append(ExprAff(PC, ExprCond(zf, a, loc_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(zf, a, loc_next_expr))) return e, [] def jnz(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(zf, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(zf, n, a))) + e.append(ExprAff(PC, ExprCond(zf, loc_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(zf, loc_next_expr, a))) return e, [] def jl(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(nf ^ of, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, a, n))) + e.append(ExprAff(PC, ExprCond(nf ^ of, a, loc_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, a, loc_next_expr))) return e, [] def jc(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(cf, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(cf, a, n))) + e.append(ExprAff(PC, ExprCond(cf, a, loc_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(cf, a, loc_next_expr))) return e, [] def jnc(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(cf, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(cf, n, a))) + e.append(ExprAff(PC, ExprCond(cf, loc_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(cf, loc_next_expr, a))) return e, [] def jge(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(nf ^ of, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, n, a))) + e.append(ExprAff(PC, ExprCond(nf ^ of, loc_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, loc_next_expr, a))) return e, [] @@ -414,8 +423,8 @@ def ComposeExprAff(dst, src): class ir_msp430(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_msp430, None, symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_msp430, None, loc_db) self.pc = PC self.sp = SP self.IRDst = ExprId('IRDst', 16) @@ -425,7 +434,6 @@ class ir_msp430(IntermediateRepresentation): pass def get_ir(self, instr): - # print instr#, args args = instr.args instr_ir, extra_ir = mnemo_func[instr.name](self, instr, *args) self.mod_sr(instr, instr_ir, extra_ir) diff --git a/miasm2/arch/ppc/arch.py b/miasm2/arch/ppc/arch.py index 945824a0..c100cde3 100644 --- a/miasm2/arch/ppc/arch.py +++ b/miasm2/arch/ppc/arch.py @@ -5,7 +5,6 @@ from miasm2.expression.expression import * from miasm2.core.cpu import * from collections import defaultdict from miasm2.core.bin_stream import bin_stream -from miasm2.core.asmblock import asm_label import miasm2.arch.ppc.regs as regs_module from miasm2.arch.ppc.regs import * from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp @@ -35,23 +34,23 @@ deref = deref_reg | deref_reg_disp class ppc_arg(m_arg): - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): if isinstance(arg, AstId): if isinstance(arg.name, ExprId): return arg.name if arg.name in gpregs.str: return None - label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + loc_key = loc_db.get_or_create_name_location(arg.name) + return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] if None in args: return None return ExprOp(arg.op, *args) if isinstance(arg, AstInt): return ExprInt(arg.value, 32) if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) if ptr is None: return None return ExprMem(ptr, arg.size) @@ -74,7 +73,7 @@ class instruction_ppc(instruction): super(instruction_ppc, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos = None): + def arg2str(e, pos = None, loc_db=None): if isinstance(e, ExprId) or isinstance(e, ExprInt): return str(e) elif isinstance(e, ExprMem): @@ -110,7 +109,7 @@ class instruction_ppc(instruction): name[-3:] != 'CTR' and name[-4:] != 'CTRL') - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): name = self.name if name[-1] == '+' or name[-1] == '-': name = name[:-1] @@ -132,8 +131,8 @@ class instruction_ppc(instruction): ad = e.arg + self.offset else: ad = e.arg - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) + loc_key = loc_db.get_or_create_offset_location(ad) + s = ExprLoc(loc_key, e.size) self.args[address_index] = s def breakflow(self): @@ -145,7 +144,7 @@ class instruction_ppc(instruction): name = name[0:-1] return name[0] == 'B' and (name[-1] == 'L' or name[-2:-1] == 'LA') - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): if 'LR' in self.name: return [ LR ] elif 'CTR' in self.name: @@ -164,7 +163,7 @@ class instruction_ppc(instruction): ret = ret or self.is_subcall() return ret - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 32 def fixDstOffset(self): @@ -280,7 +279,7 @@ class mn_ppc(cls_mn): else: raise NotImplementedError("bad attrib") - def get_symbol_size(self, symbol, symbol_pool, mode): + def get_symbol_size(self, symbol, loc_db, mode): return 32 diff --git a/miasm2/arch/ppc/ira.py b/miasm2/arch/ppc/ira.py index 76a979ae..a30f972d 100644 --- a/miasm2/arch/ppc/ira.py +++ b/miasm2/arch/ppc/ira.py @@ -35,14 +35,30 @@ class ir_a_ppc32b(ir_ppc32b, ira): instr )] - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_update): - """Replace function call with corresponding call effects, - inside the IR block""" - if not instr.is_subcall(): - return False - call_effects = self.call_effects(instr.getdstflow(None)[0], instr) - assignments+= call_effects - return True + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): + """ + Add the IR effects of an instruction to the current state. + + @instr: native instruction + @block: native block source + @assignments: list of current AssignBlocks + @ir_blocks_all: list of additional effects + @gen_pc_updt: insert PC update effects between instructions + """ + if instr.is_subcall(): + call_effects = self.call_effects(instr.getdstflow(None)[0], instr) + assignments+= call_effects + return True + + if gen_pc_updt is not False: + self.gen_pc_update(assignments, instr) + + assignblk, ir_blocks_extra = self.instr2ir(instr) + assignments.append(assignblk) + ir_blocks_all += ir_blocks_extra + if ir_blocks_extra: + return True + return False def sizeof_char(self): return 8 diff --git a/miasm2/arch/ppc/jit.py b/miasm2/arch/ppc/jit.py index 9134e032..14c203a9 100644 --- a/miasm2/arch/ppc/jit.py +++ b/miasm2/arch/ppc/jit.py @@ -1,5 +1,5 @@ -from miasm2.jitter.jitload import jitter, named_arguments -from miasm2.core import asmblock +from miasm2.jitter.jitload import Jitter, named_arguments +from miasm2.core.locationdb import LocationDB from miasm2.arch.ppc.sem import ir_ppc32b import struct @@ -11,11 +11,11 @@ hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) log.setLevel(logging.CRITICAL) -class jitter_ppc32b(jitter): +class jitter_ppc32b(Jitter): max_reg_arg = 8 def __init__(self, *args, **kwargs): - super(jitter_ppc32b, self).__init__(ir_ppc32b(asmblock.AsmSymbolPool()), + super(jitter_ppc32b, self).__init__(ir_ppc32b(LocationDB()), *args, **kwargs) self.vm.set_big_endian() @@ -66,5 +66,5 @@ class jitter_ppc32b(jitter): def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc diff --git a/miasm2/arch/ppc/sem.py b/miasm2/arch/ppc/sem.py index 741ae24b..678ab041 100644 --- a/miasm2/arch/ppc/sem.py +++ b/miasm2/arch/ppc/sem.py @@ -606,21 +606,21 @@ def mn_do_store(ir, instr, arg1, arg2, arg3=None): ret.append(ExprAff(arg2, address)) if is_stwcx: - lbl_do = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_dont = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_do = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_dont = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) flags = [ ExprAff(CR0_LT, ExprInt(0,1)), ExprAff(CR0_GT, ExprInt(0,1)), ExprAff(CR0_SO, XER_SO)] ret += flags ret.append(ExprAff(CR0_EQ, ExprInt(1,1))) - ret.append(ExprAff(ir.IRDst, lbl_next)) + ret.append(ExprAff(ir.IRDst, loc_next)) dont = flags + [ ExprAff(CR0_EQ, ExprInt(0,1)), - ExprAff(ir.IRDst, lbl_next) ] - additional_ir = [ IRBlock(lbl_do.name, [ AssignBlock(ret) ]), - IRBlock(lbl_dont.name, [ AssignBlock(dont) ]) ] + ExprAff(ir.IRDst, loc_next) ] + additional_ir = [ IRBlock(loc_do, [ AssignBlock(ret) ]), + IRBlock(loc_dont, [ AssignBlock(dont) ]) ] ret = [ ExprAff(reserve, ExprInt(0, 1)), - ExprAff(ir.IRDst, ExprCond(reserve, lbl_do, lbl_dont)) ] + ExprAff(ir.IRDst, ExprCond(reserve, loc_do, loc_dont)) ] return ret, additional_ir @@ -690,7 +690,8 @@ def mn_b(ir, instr, arg1, arg2 = None): def mn_bl(ir, instr, arg1, arg2 = None): if arg2 is not None: arg1 = arg2 - return [ ExprAff(LR, ExprId(ir.get_next_instr(instr), 32)), + dst = ir.get_next_instr(instr) + return [ ExprAff(LR, ExprLoc(dst, 32)), ExprAff(PC, arg1), ExprAff(ir.IRDst, arg1) ], [] @@ -726,13 +727,15 @@ def mn_do_cond_branch(ir, instr, dest): condition = condition & cond_cond else: condition = cond_cond + dst = ir.get_next_instr(instr) dest_expr = ExprCond(condition, dest, - ExprId(ir.get_next_instr(instr), 32)) + ExprLoc(dst, 32)) else: dest_expr = dest if instr.name[-1] == 'L' or instr.name[-2:-1] == 'LA': - ret.append(ExprAff(LR, ExprId(ir.get_next_instr(instr), 32))) + dst = ir.get_next_instr(instr) + ret.append(ExprAff(LR, ExprLoc(dst, 32))) ret.append(ExprAff(PC, dest_expr)) ret.append(ExprAff(ir.IRDst, dest_expr)) @@ -839,8 +842,8 @@ sem_dir = { class ir_ppc32b(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - super(ir_ppc32b, self).__init__(mn_ppc, 'b', symbol_pool) + def __init__(self, loc_db=None): + super(ir_ppc32b, self).__init__(mn_ppc, 'b', loc_db) self.pc = mn_ppc.getpc() self.sp = mn_ppc.getsp() self.IRDst = expr.ExprId('IRDst', 32) @@ -913,9 +916,9 @@ class ir_ppc32b(IntermediateRepresentation): return instr_ir, extra_ir def get_next_instr(self, instr): - l = self.symbol_pool.getby_offset_create(instr.offset + 4) + l = self.loc_db.get_or_create_offset_location(instr.offset + 4) return l - def get_next_break_label(self, instr): - l = self.symbol_pool.getby_offset_create(instr.offset + 4) + def get_next_break_loc_key(self, instr): + l = self.loc_db.get_or_create_offset_location(instr.offset + 4) return l diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py index 14f46265..d5e9820e 100644 --- a/miasm2/arch/sh4/arch.py +++ b/miasm2/arch/sh4/arch.py @@ -96,23 +96,23 @@ dgbr_reg = (DEREF + LPARENT + reg_info_gbr.parser + COMMA + gpregs.parser + RPAR class sh4_arg(m_arg): - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): if isinstance(arg, AstId): if isinstance(arg.name, ExprId): return arg.name if arg.name in gpregs.str: return None - label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + loc_key = loc_db.get_or_create_name_location(arg.name) + return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] if None in args: return None return ExprOp(arg.op, *args) if isinstance(arg, AstInt): return ExprInt(arg.value, 32) if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) if ptr is None: return None return ExprMem(ptr, arg.size) @@ -165,8 +165,8 @@ class sh4_freg(sh4_reg): class sh4_dgpreg(sh4_arg): parser = dgpregs_base - def fromstring(self, text, symbol_pool, parser_result=None): - start, stop = super(sh4_dgpreg, self).fromstring(text, symbol_pool, parser_result) + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(sh4_dgpreg, self).fromstring(text, loc_db, parser_result) if start is None or self.expr == [None]: return start, stop self.expr = ExprMem(self.expr.arg, self.sz) @@ -191,8 +191,8 @@ class sh4_dgpreg(sh4_arg): class sh4_dgpregpinc(sh4_arg): parser = dgpregs_p - def fromstring(self, text, symbol_pool, parser_result=None): - start, stop = super(sh4_dgpregpinc, self).fromstring(text, symbol_pool, parser_result) + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(sh4_dgpregpinc, self).fromstring(text, loc_db, parser_result) if self.expr == [None]: return None, None if not isinstance(self.expr.arg, ExprOp): @@ -406,31 +406,36 @@ class instruction_sh4(instruction): return self.name.startswith('J') @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - assert(isinstance(e, ExprMem)) - e = e.arg - - if isinstance(e, ExprOp): - if e.op == "predec": - s = '-%s' % e.args[0] - elif e.op == "postinc": - s = '%s+' % e.args[0] + def arg2str(expr, index=None, loc_db=None): + if isinstance(expr, ExprId) or isinstance(expr, ExprInt): + return str(expr) + elif expr.is_loc(): + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) + else: + return str(expr) + assert(isinstance(expr, ExprMem)) + expr = expr.arg + + if isinstance(expr, ExprOp): + if expr.op == "predec": + s = '-%s' % expr.args[0] + elif expr.op == "postinc": + s = '%s+' % expr.args[0] else: s = ','.join([str(x).replace('(', '').replace(')', '') - for x in e.args]) + for x in expr.args]) s = "(%s)"%s s = "@%s" % s - elif isinstance(e, ExprId): - s = "@%s" % e + elif isinstance(expr, ExprId): + s = "@%s" % expr else: raise NotImplementedError('zarb arg2str') return s """ - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): e = self.args[0] if not isinstance(e, ExprInt): return @@ -438,7 +443,7 @@ class instruction_sh4(instruction): ad = e.arg+8+self.offset else: ad = e.arg+8+self.offset - l = symbol_pool.getby_offset_create(ad) + l = loc_db.get_or_create_offset_location(ad) s = ExprId(l, e.size) self.args[0] = s """ @@ -451,13 +456,13 @@ class instruction_sh4(instruction): def is_subcall(self): return self.name == 'JSR' - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): return [self.args[0]] def splitflow(self): return self.name == 'JSR' - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 32 def fixDstOffset(self): @@ -818,10 +823,10 @@ addop("bf", [bs('10001011'), s08imm]) return True def dstflow(self): return True - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): e = self.args[0].expr ad = e.arg*2+4+self.offset - l = symbol_pool.getby_offset_create(ad) + l = loc_db.get_or_create_offset_location(ad) s = ExprId(l, e.size) self.args[0].expr = s """ @@ -841,10 +846,10 @@ addop("bra", [bs('1010'), s12imm]) return True def dstflow(self): return True - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): e = self.args[0].expr ad = e.arg*2+4+self.offset - l = symbol_pool.getby_offset_create(ad) + l = loc_db.get_or_create_offset_location(ad) s = ExprId(l, e.size) self.args[0].expr = s """ diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index fc3a5882..815eaee6 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -7,7 +7,6 @@ from miasm2.core.cpu import * from collections import defaultdict import miasm2.arch.x86.regs as regs_module from miasm2.arch.x86.regs import * -from miasm2.core.asmblock import AsmLabel from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp @@ -255,7 +254,7 @@ cl_or_imm |= base_expr class x86_arg(m_arg): - def asm_ast_to_expr(self, value, symbol_pool, size_hint=None, fixed_size=None): + def asm_ast_to_expr(self, value, loc_db, size_hint=None, fixed_size=None): if size_hint is None: size_hint = self.parent.v_opmode() if fixed_size is None: @@ -273,22 +272,22 @@ class x86_arg(m_arg): if value.name in ["FAR"]: return None - label = symbol_pool.getby_name_create(value.name) - return ExprId(label, size_hint) + loc_key = loc_db.get_or_create_name_location(value.name) + return ExprLoc(loc_key, size_hint) if isinstance(value, AstOp): # First pass to retreive fixed_size if value.op == "segm": - segm = self.asm_ast_to_expr(value.args[0], symbol_pool) - ptr = self.asm_ast_to_expr(value.args[1], symbol_pool, None, fixed_size) + segm = self.asm_ast_to_expr(value.args[0], loc_db) + ptr = self.asm_ast_to_expr(value.args[1], loc_db, None, fixed_size) return ExprOp('segm', segm, ptr) - args = [self.asm_ast_to_expr(arg, symbol_pool, None, fixed_size) for arg in value.args] + args = [self.asm_ast_to_expr(arg, loc_db, None, fixed_size) for arg in value.args] if len(fixed_size) == 0: # No fixed size pass elif len(fixed_size) == 1: # One fixed size, regen all size = list(fixed_size)[0] - args = [self.asm_ast_to_expr(arg, symbol_pool, size, fixed_size) for arg in value.args] + args = [self.asm_ast_to_expr(arg, loc_db, size, fixed_size) for arg in value.args] else: raise ValueError("Size conflict") if None in args: @@ -300,7 +299,7 @@ class x86_arg(m_arg): return ExprInt(value.value, size_hint) if isinstance(value, AstMem): fixed_size.add(value.size) - ptr = self.asm_ast_to_expr(value.ptr, symbol_pool, None, set()) + ptr = self.asm_ast_to_expr(value.ptr, loc_db, None, set()) if ptr is None: return None return ExprMem(ptr, value.size) @@ -470,20 +469,15 @@ class instruction_x86(instruction): return True return self.name in ['CALL'] - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): if self.additional_info.g1.value & 6 and self.name in repeat_mn: return expr = self.args[0] - if isinstance(expr, ExprId): - if not isinstance(expr.name, AsmLabel) and expr not in all_regs_ids: - raise ValueError("ExprId must be a label or a register") - elif isinstance(expr, ExprInt): - ad = expr.arg + int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, expr.size) - self.args[0] = s - else: + if not expr.is_int(): return + addr = expr.arg + int(self.offset) + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[0] = ExprLoc(loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: @@ -517,15 +511,14 @@ class instruction_x86(instruction): def is_subcall(self): return self.name in ['CALL'] - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): if self.additional_info.g1.value & 6 and self.name in repeat_mn: - ad = int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, self.v_opmode()) - return [s] + addr = int(self.offset) + loc_key = loc_db.get_or_create_offset_location(addr) + return [ExprLoc(loc_key, self.v_opmode())] return [self.args[0]] - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return self.mode def fixDstOffset(self): @@ -566,9 +559,14 @@ class instruction_x86(instruction): return args @staticmethod - def arg2str(expr, pos=None): - if isinstance(expr, ExprId) or isinstance(expr, ExprInt): + def arg2str(expr, index=None, loc_db=None): + if expr.is_id() or expr.is_int(): o = str(expr) + elif expr.is_loc(): + if loc_db is not None: + o = loc_db.pretty_str(expr.loc_key) + else: + o = str(expr) elif ((isinstance(expr, ExprOp) and expr.op == 'far' and isinstance(expr.args[0], ExprMem)) or isinstance(expr, ExprMem)): @@ -670,7 +668,7 @@ class mn_x86(cls_mn): return [(subcls, name, bases, dct, fields)] @classmethod - def fromstring(cls, text, symbol_pool, mode): + def fromstring(cls, text, loc_db, mode): pref = 0 prefix, new_s = get_prefix(text) if prefix == "LOCK": @@ -682,7 +680,7 @@ class mn_x86(cls_mn): elif prefix == "REPE": pref |= 4 text = new_s - c = super(mn_x86, cls).fromstring(text, symbol_pool, mode) + c = super(mn_x86, cls).fromstring(text, loc_db, mode) c.additional_info.g1.value = pref return c @@ -879,7 +877,7 @@ class mn_x86(cls_mn): return None return prefix + v - def getnextflow(self, symbol_pool): + def getnextflow(self, loc_db): raise NotImplementedError('not fully functional') def ir_pre_instruction(self): @@ -1922,8 +1920,8 @@ def modrm2expr(modrm, parent, w8, sx=0, xmm=0, mm=0, bnd=0): class x86_rm_arg(x86_arg): parser = rmarg - def fromstring(self, text, symbol_pool, parser_result=None): - start, stop = super(x86_rm_arg, self).fromstring(text, symbol_pool, parser_result) + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(x86_rm_arg, self).fromstring(text, loc_db, parser_result) p = self.parent if start is None: return None, None @@ -2058,9 +2056,9 @@ class x86_rm_arg(x86_arg): yield x class x86_rm_mem(x86_rm_arg): - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): self.expr = None - start, stop = super(x86_rm_mem, self).fromstring(text, symbol_pool, parser_result) + start, stop = super(x86_rm_mem, self).fromstring(text, loc_db, parser_result) if not isinstance(self.expr, ExprMem): return None, None return start, stop @@ -2068,9 +2066,9 @@ class x86_rm_mem(x86_rm_arg): class x86_rm_mem_far(x86_rm_arg): parser = mem_far - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): self.expr = None - start, stop = super(x86_rm_mem_far, self).fromstring(text, symbol_pool, parser_result) + start, stop = super(x86_rm_mem_far, self).fromstring(text, loc_db, parser_result) if not isinstance(self.expr, ExprMem): return None, None self.expr = ExprOp('far', self.expr) @@ -2440,7 +2438,7 @@ class x86_rm_reg_noarg(object): parser = gpreg - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if not hasattr(self.parent, 'sx') and hasattr(self.parent, "w8"): self.parent.w8.value = 1 if parser_result: @@ -2457,7 +2455,7 @@ class x86_rm_reg_noarg(object): result, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - expr = self.asm_ast_to_expr(result[0], symbol_pool) + expr = self.asm_ast_to_expr(result[0], loc_db) if expr is None: return None, None @@ -2744,7 +2742,7 @@ class bs_cond_imm(bs_cond_scale, x86_arg): parser = base_expr max_size = 32 - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: expr, start, stop = parser_result[self.parser] else: @@ -2871,7 +2869,7 @@ class bs_cond_imm64(bs_cond_imm): class bs_rel_off(bs_cond_imm): parser = base_expr - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: expr, start, stop = parser_result[self.parser] else: @@ -3013,7 +3011,7 @@ class bs_moff(bsi): class bs_movoff(x86_arg): parser = deref_mem - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] if e is None: @@ -3080,7 +3078,7 @@ class bs_movoff(x86_arg): class bs_msegoff(x86_arg): parser = deref_ptr - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] if e is None: diff --git a/miasm2/arch/x86/ira.py b/miasm2/arch/x86/ira.py index d0bebfb6..be10213e 100644 --- a/miasm2/arch/x86/ira.py +++ b/miasm2/arch/x86/ira.py @@ -8,8 +8,8 @@ from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 class ir_a_x86_16(ir_x86_16, ira): - def __init__(self, symbol_pool=None): - ir_x86_16.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_x86_16.__init__(self, loc_db) self.ret_reg = self.arch.regs.AX def get_out_regs(self, _): @@ -17,8 +17,8 @@ class ir_a_x86_16(ir_x86_16, ira): class ir_a_x86_32(ir_x86_32, ir_a_x86_16): - def __init__(self, symbol_pool=None): - ir_x86_32.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_x86_32.__init__(self, loc_db) self.ret_reg = self.arch.regs.EAX def sizeof_char(self): @@ -39,8 +39,8 @@ class ir_a_x86_32(ir_x86_32, ir_a_x86_16): class ir_a_x86_64(ir_x86_64, ir_a_x86_16): - def __init__(self, symbol_pool=None): - ir_x86_64.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_x86_64.__init__(self, loc_db) self.ret_reg = self.arch.regs.RAX def call_effects(self, ad, instr): diff --git a/miasm2/arch/x86/jit.py b/miasm2/arch/x86/jit.py index 50501060..bf74051d 100644 --- a/miasm2/arch/x86/jit.py +++ b/miasm2/arch/x86/jit.py @@ -1,10 +1,11 @@ import logging -from miasm2.jitter.jitload import jitter, named_arguments -from miasm2.core import asmblock +from miasm2.jitter.jitload import Jitter, named_arguments from miasm2.core.utils import pck16, pck32, pck64, upck16, upck32, upck64 from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 from miasm2.jitter.codegen import CGen +from miasm2.core.locationdb import LocationDB +from miasm2.ir.translators.C import TranslatorC log = logging.getLogger('jit_x86') hnd = logging.StreamHandler() @@ -17,6 +18,7 @@ class x86_32_CGen(CGen): def __init__(self, ir_arch): self.ir_arch = ir_arch self.PC = self.ir_arch.arch.regs.RIP + self.translator = TranslatorC(self.ir_arch.loc_db) self.init_arch_C() def gen_post_code(self, attrib): @@ -32,13 +34,13 @@ class x86_64_CGen(x86_32_CGen): out.append('dump_gpregs_64(jitcpu->cpu);') return out -class jitter_x86_16(jitter): +class jitter_x86_16(Jitter): C_Gen = x86_32_CGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_x86_16(sp), *args, **kwargs) + sp = LocationDB() + Jitter.__init__(self, ir_x86_16(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.do_stk_segm = False self.orig_irbloc_fix_regs_for_mode = self.ir_arch.irbloc_fix_regs_for_mode @@ -60,17 +62,17 @@ class jitter_x86_16(jitter): return upck16(self.vm.get_mem(self.cpu.SP + 4 * index, 4)) def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.IP = self.pc -class jitter_x86_32(jitter): +class jitter_x86_32(Jitter): C_Gen = x86_32_CGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_x86_32(sp), *args, **kwargs) + sp = LocationDB() + Jitter.__init__(self, ir_x86_32(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.do_stk_segm = False @@ -102,7 +104,7 @@ class jitter_x86_32(jitter): return upck32(self.vm.get_mem(self.cpu.ESP + 4 * index, 4)) def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.EIP = self.pc # calling conventions @@ -178,15 +180,15 @@ class jitter_x86_32(jitter): -class jitter_x86_64(jitter): +class jitter_x86_64(Jitter): C_Gen = x86_64_CGen args_regs_systemv = ['RDI', 'RSI', 'RDX', 'RCX', 'R8', 'R9'] args_regs_stdcall = ['RCX', 'RDX', 'R8', 'R9'] def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_x86_64(sp), *args, **kwargs) + sp = LocationDB() + Jitter.__init__(self, ir_x86_64(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.do_stk_segm = False @@ -209,7 +211,7 @@ class jitter_x86_64(jitter): return upck64(self.vm.get_mem(self.cpu.RSP + 8 * index, 8)) def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.RIP = self.pc # calling conventions diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 276b796f..f3ca3a62 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -240,11 +240,13 @@ def gen_jcc(ir, instr, cond, dst, jmp_if): e = [] meip = mRIP[ir.IRDst.size] - next_lbl = m2_expr.ExprId(ir.get_next_label(instr), dst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, dst.size) + if jmp_if: - dstA, dstB = dst, next_lbl + dstA, dstB = dst, loc_next_expr else: - dstA, dstB = next_lbl, dst + dstA, dstB = loc_next_expr, dst mn_dst = m2_expr.ExprCond(cond, dstA.zeroExtend(ir.IRDst.size), dstB.zeroExtend(ir.IRDst.size)) @@ -260,17 +262,18 @@ def gen_fcmov(ir, instr, cond, arg1, arg2, mov_if): @cond: condition @mov_if: invert condition if False""" - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) if mov_if: - dstA, dstB = lbl_do, lbl_skip + dstA, dstB = loc_do_expr, loc_skip_expr else: - dstA, dstB = lbl_skip, lbl_do + dstA, dstB = loc_skip_expr, loc_do_expr e = [] e_do, extra_irs = [m2_expr.ExprAff(arg1, arg2)], [] - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_skip_expr)) e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] def gen_cmov(ir, instr, cond, dst, src, mov_if): @@ -280,17 +283,18 @@ def gen_cmov(ir, instr, cond, dst, src, mov_if): @cond: condition @mov_if: invert condition if False""" - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) if mov_if: - dstA, dstB = lbl_do, lbl_skip + dstA, dstB = loc_do_expr, loc_skip_expr else: - dstA, dstB = lbl_skip, lbl_do + dstA, dstB = loc_skip_expr, loc_do_expr e = [] e_do, extra_irs = mov(ir, instr, dst, src) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_skip_expr)) e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] def mov(_, instr, dst, src): @@ -504,12 +508,14 @@ def _rotate_tpl(ir, instr, dst, src, op, left=False): else: return ([], []) e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) + + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_skip_expr)) e.append(m2_expr.ExprAff( - ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, lbl_skip))) - return (e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])]) + ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, loc_skip_expr))) + return (e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])]) def l_rol(ir, instr, dst, src): @@ -551,12 +557,14 @@ def rotate_with_carry_tpl(ir, instr, op, dst, src): else: return ([], []) e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) + + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_skip_expr)) e.append(m2_expr.ExprAff( - ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, lbl_skip))) - return (e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])]) + ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, loc_skip_expr))) + return (e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])]) def rcl(ir, instr, dst, src): return rotate_with_carry_tpl(ir, instr, '<<<', dst, src) @@ -638,12 +646,13 @@ def _shift_tpl(op, ir, instr, a, b, c=None, op_inv=None, left=False, return [], [] e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) - e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, - lbl_skip))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_skip_expr)) + e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, + loc_skip_expr))) + return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] def sar(ir, instr, dst, src): @@ -767,7 +776,7 @@ def pop_gen(ir, instr, src, size): e.append(m2_expr.ExprAff(sp, new_sp)) # XXX FIX XXX for pop [esp] if isinstance(src, m2_expr.ExprMem): - src = src.replace_expr({sp: new_sp}) + src = expr_simp(src.replace_expr({sp: new_sp})) result = sp if ir.do_stk_segm: result = ir.gen_segm_expr(SS, result) @@ -973,9 +982,9 @@ def bswap(_, instr, dst): def cmps(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) src1 = mRSI[instr.mode][:instr.v_admode()] src2 = mRDI[instr.mode][:instr.v_admode()] @@ -999,24 +1008,24 @@ def cmps(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src1, src1 + offset)) e0.append(m2_expr.ExprAff(src2, src2 + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src1, src1 - offset)) e1.append(m2_expr.ExprAff(src2, src2 - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) return e, [e0, e1] def scas(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) src = mRDI[instr.mode][:instr.v_admode()] @@ -1036,16 +1045,16 @@ def scas(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src, src + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src, src - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) return e, [e0, e1] @@ -1185,7 +1194,7 @@ def call(ir, instr, dst): meip = mRIP[ir.IRDst.size] opmode, admode = s, instr.v_admode() myesp = mRSP[instr.mode][:opmode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) if isinstance(dst, m2_expr.ExprOp): if dst.op == "segm": @@ -1229,8 +1238,6 @@ def call(ir, instr, dst): e.append(m2_expr.ExprAff(ir.ExprMem(c, size=s), n)) e.append(m2_expr.ExprAff(meip, dst.zeroExtend(ir.IRDst.size))) e.append(m2_expr.ExprAff(ir.IRDst, dst.zeroExtend(ir.IRDst.size))) - # if not expr_is_int_or_label(dst): - # dst = meip return e, [] @@ -1432,7 +1439,7 @@ def loop(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) c = myecx - m2_expr.ExprInt(1, myecx.size) dst_o = m2_expr.ExprCond(c, dst.zeroExtend(ir.IRDst.size), @@ -1449,7 +1456,7 @@ def loopne(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), m2_expr.ExprInt(1, 1), @@ -1471,7 +1478,7 @@ def loope(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), m2_expr.ExprInt(1, 1), m2_expr.ExprInt(0, 1)) @@ -1508,24 +1515,25 @@ def div(ir, instr, src1): e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) - lbl_div = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_div, loc_div_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) do_div = [] do_div += e - do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + blk_div = IRBlock(loc_div, [AssignBlock(do_div, instr)]) do_except = [] do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + blk_except = IRBlock(loc_except, [AssignBlock(do_except, instr)]) e = [] e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(src1, lbl_div, lbl_except))) + m2_expr.ExprCond(src1, loc_div_expr, loc_except_expr))) return e, [blk_div, blk_except] @@ -1554,24 +1562,25 @@ def idiv(ir, instr, src1): e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) - lbl_div = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_div, loc_div_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) do_div = [] do_div += e - do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + blk_div = IRBlock(loc_div, [AssignBlock(do_div, instr)]) do_except = [] do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + blk_except = IRBlock(loc_except, [AssignBlock(do_except, instr)]) e = [] e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(src1, lbl_div, lbl_except))) + m2_expr.ExprCond(src1, loc_div_expr, loc_except_expr))) return e, [blk_div, blk_except] @@ -1713,9 +1722,9 @@ def cqo(_, instr): def stos(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) addr_o = mRDI[instr.mode][:instr.v_admode()] addr = addr_o @@ -1732,25 +1741,25 @@ def stos(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(addr_o, addr_p)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(addr_o, addr_m)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) e = [] e.append(m2_expr.ExprAff(ir.ExprMem(addr, size), b)) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) return e, [e0, e1] def lods(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) e = [] addr_o = mRSI[instr.mode][:instr.v_admode()] @@ -1768,13 +1777,13 @@ def lods(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(addr_o, addr_p)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(addr_o, addr_m)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) e = [] if instr.mode == 64 and b.size == 32: @@ -1784,14 +1793,14 @@ def lods(ir, instr, size): e.append(m2_expr.ExprAff(b, ir.ExprMem(addr, size))) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) return e, [e0, e1] def movs(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) dst = mRDI[instr.mode][:instr.v_admode()] src = mRSI[instr.mode][:instr.v_admode()] @@ -1815,17 +1824,17 @@ def movs(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src, src + offset)) e0.append(m2_expr.ExprAff(dst, dst + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src, src - offset)) e1.append(m2_expr.ExprAff(dst, dst - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) return e, [e0, e1] @@ -2876,14 +2885,15 @@ def bsr_bsf(ir, instr, dst, src, op_func): ZF = 0 DEST = @op_func(SRC) """ - lbl_src_null = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_src_not_null = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_src_null, loc_src_null_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_src_not_null, loc_src_not_null_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) - aff_dst = m2_expr.ExprAff(ir.IRDst, lbl_next) + aff_dst = m2_expr.ExprAff(ir.IRDst, loc_next_expr) e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(src, - lbl_src_not_null, - lbl_src_null))] + loc_src_not_null_expr, + loc_src_null_expr))] e_src_null = [] e_src_null.append(m2_expr.ExprAff(zf, m2_expr.ExprInt(1, zf.size))) # XXX destination is undefined @@ -2894,8 +2904,8 @@ def bsr_bsf(ir, instr, dst, src, op_func): e_src_not_null.append(m2_expr.ExprAff(dst, op_func(src))) e_src_not_null.append(aff_dst) - return e, [IRBlock(lbl_src_null.name, [AssignBlock(e_src_null, instr)]), - IRBlock(lbl_src_not_null.name, [AssignBlock(e_src_not_null, instr)])] + return e, [IRBlock(loc_src_null, [AssignBlock(e_src_null, instr)]), + IRBlock(loc_src_not_null, [AssignBlock(e_src_not_null, instr)])] def bsf(ir, instr, dst, src): @@ -3925,9 +3935,10 @@ def pshufd(_, instr, dst, src, imm): def ps_rl_ll(ir, instr, dst, src, op, size): - lbl_zero = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_zero, loc_zero_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) if src.size == 8: count = src.zeroExtend(dst.size) @@ -3940,8 +3951,8 @@ def ps_rl_ll(ir, instr, dst, src, op, size): test = expr_simp(count & m2_expr.ExprInt( ((1 << dst.size) - 1) ^ mask, dst.size)) e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(test, - lbl_zero, - lbl_do))] + loc_zero_expr, + loc_do_expr))] slices = [] for i in xrange(0, dst.size, size): @@ -3954,12 +3965,12 @@ def ps_rl_ll(ir, instr, dst, src, op, size): return [m2_expr.ExprAff(dst, m2_expr.ExprInt(0, dst.size))], [] e_zero = [m2_expr.ExprAff(dst, m2_expr.ExprInt(0, dst.size)), - m2_expr.ExprAff(ir.IRDst, lbl_next)] + m2_expr.ExprAff(ir.IRDst, loc_next_expr)] e_do = [] e.append(m2_expr.ExprAff(dst[0:dst.size], m2_expr.ExprCompose(*slices))) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)]), - IRBlock(lbl_zero.name, [AssignBlock(e_zero, instr)])] + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)]), + IRBlock(loc_zero, [AssignBlock(e_zero, instr)])] def psrlw(ir, instr, dst, src): @@ -4463,7 +4474,8 @@ paddsw = vec_vertical_instr('+', 16, _saturation_add_signed) # Others SSE operations def maskmovq(ir, instr, src, mask): - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) blks = [] # For each possibility, check if a write is necessary @@ -4477,32 +4489,32 @@ def maskmovq(ir, instr, src, mask): for i, start in enumerate(xrange(0, mask.size, 8)): bit = mask[start + 7: start + 8] cur_label = check_labels[i] - next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else lbl_next + next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else loc_next_expr write_label = write_labels[i] check = m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(bit, write_label, next_check_label)) - blks.append(IRBlock(cur_label.name, [AssignBlock([check], instr)])) + blks.append(IRBlock(cur_label.name.loc_key, [AssignBlock([check], instr)])) # Build write blocks dst_addr = mRDI[instr.mode] for i, start in enumerate(xrange(0, mask.size, 8)): bit = mask[start + 7: start + 8] cur_label = write_labels[i] - next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else lbl_next + next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else loc_next_expr write_addr = dst_addr + m2_expr.ExprInt(i, dst_addr.size) # @8[DI/EDI/RDI + i] = src[byte i] write_mem = m2_expr.ExprAff(m2_expr.ExprMem(write_addr, 8), src[start: start + 8]) jump = m2_expr.ExprAff(ir.IRDst, next_check_label) - blks.append(IRBlock(cur_label.name, [AssignBlock([write_mem, jump], instr)])) + blks.append(IRBlock(cur_label.name.loc_key, [AssignBlock([write_mem, jump], instr)])) # If mask is null, bypass all e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(mask, check_labels[0], - lbl_next))] + loc_next_expr))] return e, blks @@ -5054,8 +5066,8 @@ mnemo_func = {'mov': mov, class ir_x86_16(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_x86, 16, symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_x86, 16, loc_db) self.do_stk_segm = False self.do_ds_segm = False self.do_str_segm = False @@ -5145,13 +5157,15 @@ class ir_x86_16(IntermediateRepresentation): c_cond = cond_dec | (zf ^ m2_expr.ExprInt(1, 1)) # gen while - lbl_do = m2_expr.ExprId(self.gen_label(), self.IRDst.size) - lbl_end = m2_expr.ExprId(self.gen_label(), self.IRDst.size) - lbl_skip = m2_expr.ExprId(self.get_next_label(instr), self.IRDst.size) - lbl_next = m2_expr.ExprId(self.get_next_label(instr), self.IRDst.size) - - fix_next_lbl = {lbl_next: lbl_end} - new_extra_ir = [irblock.modify_exprs(mod_src=lambda expr: expr.replace_expr(fix_next_lbl)) + loc_do, loc_do_expr = self.gen_loc_key_and_expr(self.IRDst.size) + loc_end, loc_end_expr = self.gen_loc_key_and_expr(self.IRDst.size) + loc_skip = self.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, self.IRDst.size) + loc_next = self.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, self.IRDst.size) + + fix_next_loc = {loc_next_expr: loc_end_expr} + new_extra_ir = [irblock.modify_exprs(mod_src=lambda expr: expr.replace_expr(fix_next_loc)) for irblock in extra_ir] cond_bloc = [] @@ -5159,14 +5173,14 @@ class ir_x86_16(IntermediateRepresentation): c_reg - m2_expr.ExprInt(1, c_reg.size))) cond_bloc.append(m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_cond, - lbl_skip, - lbl_do))) - cond_bloc = IRBlock(lbl_end.name, [AssignBlock(cond_bloc, instr)]) + loc_skip_expr, + loc_do_expr))) + cond_bloc = IRBlock(loc_end, [AssignBlock(cond_bloc, instr)]) e_do = instr_ir - c = IRBlock(lbl_do.name, [AssignBlock(e_do, instr)]) - e_n = [m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_reg, lbl_do, - lbl_skip))] + c = IRBlock(loc_do, [AssignBlock(e_do, instr)]) + e_n = [m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_reg, loc_do_expr, + loc_skip_expr))] return e_n, [cond_bloc, c] + new_extra_ir def expr_fix_regs_for_mode(self, e, mode=64): @@ -5195,13 +5209,13 @@ class ir_x86_16(IntermediateRepresentation): src = self.expr_fix_regs_for_mode(src, mode) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(irblock.label, irs) + return IRBlock(irblock.loc_key, irs) class ir_x86_32(ir_x86_16): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_x86, 32, symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_x86, 32, loc_db) self.do_stk_segm = False self.do_ds_segm = False self.do_str_segm = False @@ -5214,8 +5228,8 @@ class ir_x86_32(ir_x86_16): class ir_x86_64(ir_x86_16): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_x86, 64, symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_x86, 64, loc_db) self.do_stk_segm = False self.do_ds_segm = False self.do_str_segm = False diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index 8740aeb7..c8af4056 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -1,16 +1,18 @@ #-*- coding:utf-8 -*- import logging -import inspect import warnings from collections import namedtuple -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprId, ExprInt, ExprLoc, \ + get_expr_locs +from miasm2.expression.expression import LocKey from miasm2.expression.simplifications import expr_simp from miasm2.expression.modint import moduint, modint from miasm2.core.utils import Disasm_Exception, pck from miasm2.core.graph import DiGraph, DiGraphSimplifier, MatchGraphJoker from miasm2.core.interval import interval +from miasm2.core.locationdb import LocationDB log_asmblock = logging.getLogger("asmblock") @@ -25,49 +27,6 @@ def is_int(a): isinstance(a, moduint) or isinstance(a, modint) -def expr_is_label(e): - return isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel) - - -def expr_is_int_or_label(e): - return isinstance(e, m2_expr.ExprInt) or \ - (isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel)) - - -class AsmLabel(object): - - "Stand for an assembly label" - - def __init__(self, name="", offset=None): - self.fixedblocs = False - if is_int(name): - name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) - self.name = name - self.attrib = None - if offset is None: - self.offset = None - else: - self.offset = int(offset) - - def __str__(self): - if isinstance(self.offset, (int, long)): - return "%s:0x%08x" % (self.name, self.offset) - else: - return "%s:%s" % (self.name, str(self.offset)) - - def __repr__(self): - rep = '<%s ' % self.__class__.__name__ - if self.name: - rep += repr(self.name) + ' ' - rep += '>' - return rep - - -class asm_label(AsmLabel): - - def __init__(self, name="", offset=None): - warnings.warn('DEPRECATION WARNING: use "AsmLabel" instead of "asm_label"') - super(asm_label, self).__init__(name, offset) class AsmRaw(object): @@ -77,6 +36,9 @@ class AsmRaw(object): def __str__(self): return repr(self.raw) + def to_string(self, loc_db): + return str(self) + class asm_raw(AsmRaw): @@ -89,76 +51,115 @@ class AsmConstraint(object): c_to = "c_to" c_next = "c_next" - def __init__(self, label, c_t=c_to): + def __init__(self, loc_key, c_t=c_to): # Sanity check - assert isinstance(label, AsmLabel) + assert isinstance(loc_key, LocKey) - self.label = label + self.loc_key = loc_key self.c_t = c_t + def get_label(self): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + return self.loc_key + + def set_label(self, loc_key): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + self.loc_key = loc_key + + label = property(get_label, set_label) + + def to_string(self, loc_db=None): + if loc_db is None: + return "%s:%s" % (self.c_t, self.loc_key) + else: + return "%s:%s" % ( + self.c_t, + loc_db.pretty_str(self.loc_key) + ) + def __str__(self): - return "%s:%s" % (str(self.c_t), str(self.label)) + return self.to_string() class asm_constraint(AsmConstraint): - def __init__(self, label, c_t=AsmConstraint.c_to): + def __init__(self, loc_key, c_t=AsmConstraint.c_to): warnings.warn('DEPRECATION WARNING: use "AsmConstraint" instead of "asm_constraint"') - super(asm_constraint, self).__init__(label, c_t) + super(asm_constraint, self).__init__(loc_key, c_t) class AsmConstraintNext(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): super(AsmConstraintNext, self).__init__( - label, c_t=AsmConstraint.c_next) + loc_key, + c_t=AsmConstraint.c_next + ) class asm_constraint_next(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): warnings.warn('DEPRECATION WARNING: use "AsmConstraintNext" instead of "asm_constraint_next"') - super(asm_constraint_next, self).__init__(label) + super(asm_constraint_next, self).__init__(loc_key) class AsmConstraintTo(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): super(AsmConstraintTo, self).__init__( - label, c_t=AsmConstraint.c_to) + loc_key, + c_t=AsmConstraint.c_to + ) class asm_constraint_to(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): warnings.warn('DEPRECATION WARNING: use "AsmConstraintTo" instead of "asm_constraint_to"') - super(asm_constraint_to, self).__init__(label) + super(asm_constraint_to, self).__init__(loc_key) class AsmBlock(object): - def __init__(self, label, alignment=1): - assert isinstance(label, AsmLabel) + def __init__(self, loc_key, alignment=1): + assert isinstance(loc_key, LocKey) + self.bto = set() self.lines = [] - self.label = label + self._loc_key = loc_key self.alignment = alignment - def __str__(self): + def get_label(self): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + return self.loc_key + + loc_key = property(lambda self:self._loc_key) + label = property(get_label) + + + def to_string(self, loc_db=None): out = [] - out.append(str(self.label)) - for l in self.lines: - out.append(str(l)) + if loc_db is None: + out.append(str(self.loc_key)) + else: + out.append(loc_db.pretty_str(self.loc_key)) + + for instr in self.lines: + out.append(instr.to_string(loc_db)) if self.bto: lbls = ["->"] - for l in self.bto: - if l is None: + for dst in self.bto: + if dst is None: lbls.append("Unknown? ") else: - lbls.append(str(l) + " ") + lbls.append(dst.to_string(loc_db) + " ") lbls = '\t'.join(lbls) out.append(lbls) return '\n'.join(out) + def __str__(self): + return self.to_string() + def addline(self, l): self.lines.append(l) @@ -166,23 +167,25 @@ class AsmBlock(object): assert isinstance(self.bto, set) self.bto.add(c) - def split(self, offset, l): + def split(self, loc_db, offset): + loc_key = loc_db.get_or_create_offset_location(offset) log_asmblock.debug('split at %x', offset) i = -1 offsets = [x.offset for x in self.lines] - if not l.offset in offsets: + offset = loc_db.get_location_offset(loc_key) + if offset not in offsets: log_asmblock.warning( 'cannot split bloc at %X ' % offset + 'middle instruction? default middle') offsets.sort() return None - new_bloc = AsmBlock(l) + new_bloc = AsmBlock(loc_key) i = offsets.index(offset) self.lines, new_bloc.lines = self.lines[:i], self.lines[i:] flow_mod_instr = self.get_flow_instr() log_asmblock.debug('flow mod %r', flow_mod_instr) - c = AsmConstraint(l, AsmConstraint.c_next) + c = AsmConstraint(loc_key, AsmConstraint.c_next) # move dst if flowgraph modifier was in original bloc # (usecase: split delayslot bloc) if flow_mod_instr: @@ -209,16 +212,14 @@ class AsmBlock(object): def get_offsets(self): return [x.offset for x in self.lines] - def add_cst(self, offset, c_t, symbol_pool): - if isinstance(offset, (int, long)): - l = symbol_pool.getby_offset_create(offset) - elif isinstance(offset, str): - l = symbol_pool.getby_name_create(offset) - elif isinstance(offset, AsmLabel): - l = offset - else: - raise ValueError('unknown offset type %r' % offset) - c = AsmConstraint(l, c_t) + def add_cst(self, loc_key, constraint_type): + """ + Add constraint between current block and block at @loc_key + @loc_key: LocKey instance of constraint target + @constraint_type: AsmConstraint c_to/c_next + """ + assert isinstance(loc_key, LocKey) + c = AsmConstraint(loc_key, constraint_type) self.bto.add(c) def get_flow_instr(self): @@ -244,9 +245,9 @@ class AsmBlock(object): return None def get_next(self): - for x in self.bto: - if x.c_t == AsmConstraint.c_next: - return x.label + for constraint in self.bto: + if constraint.c_t == AsmConstraint.c_next: + return constraint.loc_key return None @staticmethod @@ -279,7 +280,7 @@ class AsmBlock(object): # destination -> associated constraints dests = {} for constraint in self.bto: - dests.setdefault(constraint.label, set()).add(constraint) + dests.setdefault(constraint.loc_key, set()).add(constraint) self.bto = set(self._filter_constraint(constraints) for constraints in dests.itervalues()) @@ -287,9 +288,9 @@ class AsmBlock(object): class asm_bloc(object): - def __init__(self, label, alignment=1): + def __init__(self, loc_key, alignment=1): warnings.warn('DEPRECATION WARNING: use "AsmBlock" instead of "asm_bloc"') - super(asm_bloc, self).__init__(label, alignment) + super(asm_bloc, self).__init__(loc_key, alignment) class AsmBlockBad(AsmBlock): @@ -297,23 +298,35 @@ class AsmBlockBad(AsmBlock): """Stand for a *bad* ASM block (malformed, unreachable, not disassembled, ...)""" - ERROR_TYPES = {-1: "Unknown error", - 0: "Unable to disassemble", - 1: "Null starting block", - 2: "Address forbidden by dont_dis", - } - def __init__(self, label=None, alignment=1, errno=-1, *args, **kwargs): + ERROR_UNKNOWN = -1 + ERROR_CANNOT_DISASM = 0 + ERROR_NULL_STARTING_BLOCK = 1 + ERROR_FORBIDDEN = 2 + ERROR_IO = 3 + + + ERROR_TYPES = { + ERROR_UNKNOWN: "Unknown error", + ERROR_CANNOT_DISASM: "Unable to disassemble", + ERROR_NULL_STARTING_BLOCK: "Null starting block", + ERROR_FORBIDDEN: "Address forbidden by dont_dis", + ERROR_IO: "IOError", + } + + def __init__(self, loc_key=None, alignment=1, errno=ERROR_UNKNOWN, *args, **kwargs): """Instanciate an AsmBlock_bad. - @label, @alignement: same as AsmBlock.__init__ + @loc_key, @alignement: same as AsmBlock.__init__ @errno: (optional) specify a error type associated with the block """ - super(AsmBlockBad, self).__init__(label, alignment, *args, **kwargs) + super(AsmBlockBad, self).__init__(loc_key, alignment, *args, **kwargs) self._errno = errno + errno = property(lambda self: self._errno) + def __str__(self): error_txt = self.ERROR_TYPES.get(self._errno, self._errno) - return "\n".join([str(self.label), + return "\n".join([str(self.loc_key), "\tBad block: %s" % error_txt]) def addline(self, *args, **kwargs): @@ -328,157 +341,21 @@ class AsmBlockBad(AsmBlock): class asm_block_bad(AsmBlockBad): - def __init__(self, label=None, alignment=1, errno=-1, *args, **kwargs): + def __init__(self, loc_key=None, alignment=1, errno=-1, *args, **kwargs): warnings.warn('DEPRECATION WARNING: use "AsmBlockBad" instead of "asm_block_bad"') - super(asm_block_bad, self).__init__(label, alignment, *args, **kwargs) - - -class AsmSymbolPool(object): - """ - Store symbols. + super(asm_block_bad, self).__init__(loc_key, alignment, *args, **kwargs) - A symbol links a name to an (optional) offset - - Rules and limitations: - - two different symbols cannot have the same offset - - two different symbols cannot have the same name - - symbols manipulation (comparison, creation ...) can only be done on - symbols generated by the same symbol pool - """ - - def __init__(self): - self._labels = set() - self._name2label = {} - self._offset2label = {} - self._label_num = 0 - - def add_label(self, name, offset=None): - """ - Create and add a label to the symbol_pool - @name: label's name - @offset: (optional) label's offset - """ - label = AsmLabel(name, offset) - - # Test for collisions - if (label.offset in self._offset2label and - label != self._offset2label[label.offset]): - raise ValueError('symbol %s has same offset as %s' % - (label, self._offset2label[label.offset])) - if (label.name in self._name2label and - label != self._name2label[label.name]): - raise ValueError('symbol %s has same name as %s' % - (label, self._name2label[label.name])) - - self._labels.add(label) - if label.offset is not None: - self._offset2label[label.offset] = label - if label.name != "": - self._name2label[label.name] = label - return label - - def remove_label(self, label): - """ - Delete a @label - """ - self._name2label.pop(label.name, None) - self._offset2label.pop(label.offset, None) - if label in self._labels: - self._labels.remove(label) - - def del_label_offset(self, label): - """Unpin the @label from its offset""" - self._offset2label.pop(label.offset, None) - label.offset = None - - def getby_offset(self, offset): - """Retrieve label using its @offset""" - return self._offset2label.get(offset, None) - - def getby_name(self, name): - """Retrieve label using its @name""" - return self._name2label.get(name, None) - - def getby_name_create(self, name): - """Get a label from its @name, create it if it doesn't exist""" - label = self.getby_name(name) - if label is None: - label = self.add_label(name) - return label - - def getby_offset_create(self, offset): - """Get a label from its @offset, create it if it doesn't exist""" - label = self.getby_offset(offset) - if label is None: - label = self.add_label(offset, offset) - return label - - def rename_label(self, label, newname): - """Rename the @label name to @newname""" - if newname in self._name2label: - raise ValueError('Symbol already known') - self._name2label.pop(label.name, None) - label.name = newname - self._name2label[label.name] = label - - def set_offset(self, label, offset): - """Pin the @label from at @offset - Note that there is a special case when the offset is a list - it happens when offsets are recomputed in resolve_symbol* - """ - if label is None: - raise ValueError('label should not be None') - if not label.name in self._name2label: - raise ValueError('label %s not in symbol pool' % label) - if offset is not None and offset in self._offset2label: - raise ValueError('Conflict in label %s' % label) - self._offset2label.pop(label.offset, None) - label.offset = offset - if is_int(label.offset): - self._offset2label[label.offset] = label - - @property - def labels(self): - """Return all labels""" - return self._labels - - @property - def items(self): - """Return all labels""" - warnings.warn('DEPRECATION WARNING: use "labels" instead of "items"') - return list(self._labels) - - - def __str__(self): - return reduce(lambda x, y: x + str(y) + '\n', self._labels, "") - - def __getitem__(self, item): - if item in self._name2label: - return self._name2label[item] - if item in self._offset2label: - return self._offset2label[item] - raise KeyError('unknown symbol %r' % item) - - def __contains__(self, item): - return item in self._name2label or item in self._offset2label - - def merge(self, symbol_pool): - """Merge with another @symbol_pool""" - self._labels.update(symbol_pool.labels) - self._name2label.update(symbol_pool._name2label) - self._offset2label.update(symbol_pool._offset2label) - - def gen_label(self): - """Generate a new unpinned label""" - label = self.add_label("lbl_gen_%.8X" % (self._label_num)) - self._label_num += 1 - return label +class AsmSymbolPool(LocationDB): + """[DEPRECATED API] use 'LocationDB' instead""" + def __init__(self, *args, **kwargs): + warnings.warn("Deprecated API, use 'LocationDB' instead") + super(AsmSymbolPool, self).__init__(*args, **kwargs) class asm_symbol_pool(AsmSymbolPool): def __init__(self): - warnings.warn('DEPRECATION WARNING: use "AsmSymbolPool" instead of "asm_symbol_pool"') + warnings.warn('DEPRECATION WARNING: use "LocationDB" instead of "asm_symbol_pool"') super(asm_symbol_pool, self).__init__() @@ -491,7 +368,7 @@ class AsmCFG(DiGraph): Specialized the .dot export and force the relation between block to be uniq, and associated with a constraint. - Offer helpers on AsmCFG management, such as research by label, sanity + Offer helpers on AsmCFG management, such as research by loc_key, sanity checking and mnemonic size guessing. """ @@ -499,14 +376,23 @@ class AsmCFG(DiGraph): AsmCFGPending = namedtuple("AsmCFGPending", ["waiter", "constraint"]) - def __init__(self, *args, **kwargs): + def __init__(self, loc_db=None, *args, **kwargs): super(AsmCFG, self).__init__(*args, **kwargs) # Edges -> constraint self.edges2constraint = {} - # Expected AsmLabel -> set( (src, dst), constraint ) + # Expected LocKey -> set( (src, dst), constraint ) self._pendings = {} - # Label2block built on the fly - self._label2block = {} + # Loc_Key2block built on the fly + self._loc_key_to_block = {} + # loc_db + self.loc_db = loc_db + + + def copy(self): + """Copy the current graph instance""" + graph = self.__class__(self.loc_db) + return graph + self + # Compatibility with old list API def append(self, *args, **kwargs): @@ -518,121 +404,170 @@ class AsmCFG(DiGraph): def __getitem__(self, *args, **kwargs): raise DeprecationWarning("Order of AsmCFG elements is not reliable") + def __contains__(self, _): + """ + DEPRECATED. Use: + - loc_key in AsmCFG.nodes() to test loc_key existence + """ + raise RuntimeError("DEPRECATED") + def __iter__(self): - """Iterator on AsmBlock composing the current graph""" - return iter(self._nodes) + """ + DEPRECATED. Use: + - AsmCFG.blocks() to iter on blocks + - loc_key in AsmCFG.nodes() to test loc_key existence + """ + raise RuntimeError("DEPRECATED") def __len__(self): """Return the number of blocks in AsmCFG""" return len(self._nodes) + blocks = property(lambda x:x._loc_key_to_block.itervalues()) + # Manage graph with associated constraints def add_edge(self, src, dst, constraint): """Add an edge to the graph - @src: AsmBlock instance, source - @dst: AsmBlock instance, destination + @src: LocKey instance, source + @dst: LocKey instance, destination @constraint: constraint associated to this edge """ # Sanity check - assert (src, dst) not in self.edges2constraint + assert isinstance(src, LocKey) + assert isinstance(dst, LocKey) + known_cst = self.edges2constraint.get((src, dst), None) + if known_cst is not None: + assert known_cst == constraint + return # Add the edge to src.bto if needed - if dst.label not in [cons.label for cons in src.bto]: - src.bto.add(AsmConstraint(dst.label, constraint)) + block_src = self.loc_key_to_block(src) + if block_src: + if dst not in [cons.loc_key for cons in block_src.bto]: + block_src.bto.add(AsmConstraint(dst, constraint)) # Add edge self.edges2constraint[(src, dst)] = constraint super(AsmCFG, self).add_edge(src, dst) def add_uniq_edge(self, src, dst, constraint): - """Add an edge from @src to @dst if it doesn't already exist""" - if (src not in self._nodes_succ or - dst not in self._nodes_succ[src]): - self.add_edge(src, dst, constraint) + """ + Synonym for `add_edge` + """ + self.add_edge(src, dst, constraint) def del_edge(self, src, dst): """Delete the edge @src->@dst and its associated constraint""" + src_blk = self.loc_key_to_block(src) + dst_blk = self.loc_key_to_block(dst) + assert src_blk is not None + assert dst_blk is not None # Delete from src.bto - to_remove = [cons for cons in src.bto if cons.label == dst.label] + to_remove = [cons for cons in src_blk.bto if cons.loc_key == dst] if to_remove: assert len(to_remove) == 1 - src.bto.remove(to_remove[0]) + src_blk.bto.remove(to_remove[0]) # Del edge del self.edges2constraint[(src, dst)] super(AsmCFG, self).del_edge(src, dst) - def add_node(self, block): - """Add the block @block to the current instance, if it is not already in + def del_block(self, block): + super(AsmCFG, self).del_node(block.loc_key) + del self._loc_key_to_block[block.loc_key] + + + def add_node(self, node): + assert isinstance(node, LocKey) + return super(AsmCFG, self).add_node(node) + + def add_block(self, block): + """ + Add the block @block to the current instance, if it is not already in @block: AsmBlock instance Edges will be created for @block.bto, if destinations are already in this instance. If not, they will be resolved when adding these aforementionned destinations. `self.pendings` indicates which blocks are not yet resolved. + """ - status = super(AsmCFG, self).add_node(block) + status = super(AsmCFG, self).add_node(block.loc_key) + if not status: return status # Update waiters - if block.label in self._pendings: - for bblpend in self._pendings[block.label]: - self.add_edge(bblpend.waiter, block, bblpend.constraint) - del self._pendings[block.label] + if block.loc_key in self._pendings: + for bblpend in self._pendings[block.loc_key]: + self.add_edge(bblpend.waiter.loc_key, block.loc_key, bblpend.constraint) + del self._pendings[block.loc_key] # Synchronize edges with block destinations - self._label2block[block.label] = block + self._loc_key_to_block[block.loc_key] = block + for constraint in block.bto: - dst = self._label2block.get(constraint.label, - None) + dst = self._loc_key_to_block.get(constraint.loc_key, + None) if dst is None: # Block is yet unknown, add it to pendings to_add = self.AsmCFGPending(waiter=block, constraint=constraint.c_t) - self._pendings.setdefault(constraint.label, + self._pendings.setdefault(constraint.loc_key, set()).add(to_add) else: # Block is already in known nodes - self.add_edge(block, dst, constraint.c_t) + self.add_edge(block.loc_key, dst.loc_key, constraint.c_t) return status - def del_node(self, block): - super(AsmCFG, self).del_node(block) - del self._label2block[block.label] - def merge(self, graph): """Merge with @graph, taking in account constraints""" - # -> add_edge(x, y, constraint) - for node in graph._nodes: + # Add known blocks + for block in graph.blocks: + self.add_block(block) + # Add nodes not already in it (ie. not linked to a block) + for node in graph.nodes(): self.add_node(node) + # -> add_edge(x, y, constraint) for edge in graph._edges: - # Use "_uniq_" beacause the edge can already exist due to add_node - self.add_uniq_edge(*edge, constraint=graph.edges2constraint[edge]) + # May fail if there is an incompatibility in edges constraints + # between the two graphs + self.add_edge(*edge, constraint=graph.edges2constraint[edge]) + def node2lines(self, node): - yield self.DotCellDescription(text=str(node.label.name), + if self.loc_db is None: + loc_key_name = str(node) + else: + loc_key_name = self.loc_db.pretty_str(node) + yield self.DotCellDescription(text=loc_key_name, attr={'align': 'center', 'colspan': 2, 'bgcolor': 'grey'}) - - if isinstance(node, AsmBlockBad): - yield [self.DotCellDescription( - text=node.ERROR_TYPES.get(node._errno, - node._errno), - attr={})] + block = self._loc_key_to_block.get(node, None) + if block is None: raise StopIteration - for line in node.lines: + if isinstance(block, AsmBlockBad): + yield [ + self.DotCellDescription( + text=block.ERROR_TYPES.get(block._errno, + block._errno + ), + attr={}) + ] + raise StopIteration + for line in block.lines: if self._dot_offset: yield [self.DotCellDescription(text="%.8X" % line.offset, attr={}), - self.DotCellDescription(text=str(line), attr={})] + self.DotCellDescription(text=line.to_string(self.loc_db), attr={})] else: - yield self.DotCellDescription(text=str(line), attr={}) + yield self.DotCellDescription(text=line.to_string(self.loc_db), attr={}) def node_attr(self, node): - if isinstance(node, AsmBlockBad): + block = self._loc_key_to_block.get(node, None) + if isinstance(block, AsmBlockBad): return {'style': 'filled', 'fillcolor': 'red'} return {} @@ -658,22 +593,19 @@ class AsmCFG(DiGraph): # Helpers @property def pendings(self): - """Dictionary of label -> set(AsmCFGPending instance) indicating - which label are missing in the current instance. - A label is missing if a block which is already in nodes has constraints + """Dictionary of loc_key -> set(AsmCFGPending instance) indicating + which loc_key are missing in the current instance. + A loc_key is missing if a block which is already in nodes has constraints with him (thanks to its .bto) and the corresponding block is not yet in nodes """ return self._pendings - def _build_label2block(self): - self._label2block = {block.label: block - for block in self._nodes} - - def label2block(self, label): - """Return the block corresponding to label @label - @label: AsmLabel instance or ExprId(AsmLabel) instance""" - return self._label2block[label] + def label2block(self, loc_key): + """Return the block corresponding to loc_key @loc_key + @loc_key: LocKey instance""" + warnings.warn('DEPRECATION WARNING: use "loc_key_to_block" instead of "label2block"') + return self.loc_key_to_block(loc_key) def rebuild_edges(self): """Consider blocks '.bto' and rebuild edges according to them, ie: @@ -684,20 +616,25 @@ class AsmCFG(DiGraph): This method should be called if a block's '.bto' in nodes have been modified without notifying this instance to resynchronize edges. """ - self._build_label2block() - for block in self._nodes: + for block in self.blocks: edges = [] # Rebuild edges from bto for constraint in block.bto: - dst = self._label2block.get(constraint.label, - None) + dst = self._loc_key_to_block.get(constraint.loc_key, + None) if dst is None: # Missing destination, add to pendings - self._pendings.setdefault(constraint.label, - set()).add(self.AsmCFGPending(block, - constraint.c_t)) + self._pendings.setdefault( + constraint.loc_key, + set() + ).add( + self.AsmCFGPending( + block, + constraint.c_t + ) + ) continue - edge = (block, dst) + edge = (block.loc_key, dst.loc_key) edges.append(edge) if edge in self._edges: # Already known edge, constraint may have changed @@ -707,43 +644,52 @@ class AsmCFG(DiGraph): self.add_edge(edge[0], edge[1], constraint.c_t) # Remove useless edges - for succ in self.successors(block): - edge = (block, succ) + for succ in self.successors(block.loc_key): + edge = (block.loc_key, succ) if edge not in edges: self.del_edge(*edge) def get_bad_blocks(self): """Iterator on AsmBlockBad elements""" # A bad asm block is always a leaf - for block in self.leaves(): + for loc_key in self.leaves(): + block = self._loc_key_to_block.get(loc_key, None) if isinstance(block, AsmBlockBad): yield block def get_bad_blocks_predecessors(self, strict=False): - """Iterator on block with an AsmBlockBad destination - @strict: (optional) if set, return block with only bad + """Iterator on loc_keys with an AsmBlockBad destination + @strict: (optional) if set, return loc_key with only bad successors """ # Avoid returning the same block done = set() for badblock in self.get_bad_blocks(): - for predecessor in self.predecessors_iter(badblock): + for predecessor in self.predecessors_iter(badblock.loc_key): if predecessor not in done: if (strict and - not all(isinstance(block, AsmBlockBad) + not all(isinstance(self._loc_key_to_block.get(block, None), AsmBlockBad) for block in self.successors_iter(predecessor))): continue yield predecessor done.add(predecessor) def getby_offset(self, offset): - """Return block containing @offset""" - for block in self: + """Return asmblock containing @offset""" + for block in self.blocks: if block.lines[0].offset <= offset < \ (block.lines[-1].offset + block.lines[-1].l): return block return None + def loc_key_to_block(self, loc_key): + """ + Return the asmblock corresponding to loc_key @loc_key, None if unknown + loc_key + @loc_key: LocKey instance + """ + return self._loc_key_to_block.get(loc_key, None) + def sanity_check(self): """Do sanity checks on blocks' constraints: * no pendings @@ -752,33 +698,37 @@ class AsmCFG(DiGraph): """ if len(self._pendings) != 0: - raise RuntimeError("Some blocks are missing: %s" % map(str, - self._pendings.keys())) + raise RuntimeError("Some blocks are missing: %s" % map( + str, + self._pendings.keys() + )) next_edges = {edge: constraint for edge, constraint in self.edges2constraint.iteritems() if constraint == AsmConstraint.c_next} - for block in self._nodes: + for loc_key in self._nodes: + if loc_key not in self._loc_key_to_block: + raise RuntimeError("Not supported yet: every node must have a corresponding AsmBlock") # No next constraint to self - if (block, block) in next_edges: + if (loc_key, loc_key) in next_edges: raise RuntimeError('Bad constraint: self in next') # No multiple next constraint to same block - pred_next = list(pblock - for (pblock, dblock) in next_edges - if dblock == block) + pred_next = list(ploc_key + for (ploc_key, dloc_key) in next_edges + if dloc_key == loc_key) if len(pred_next) > 1: raise RuntimeError("Too many next constraints for bloc %r" - "(%s)" % (block.label, - map(lambda x: x.label, pred_next))) + "(%s)" % (loc_key, + pred_next)) def guess_blocks_size(self, mnemo): """Asm and compute max block size Add a 'size' and 'max_size' attribute on each block @mnemo: metamn instance""" - for block in self._nodes: + for block in self.blocks: size = 0 for instr in block.lines: if isinstance(instr, AsmRaw): @@ -812,24 +762,26 @@ class AsmCFG(DiGraph): block.max_size = size log_asmblock.info("size: %d max: %d", block.size, block.max_size) - def apply_splitting(self, symbol_pool, dis_block_callback=None, **kwargs): + def apply_splitting(self, loc_db, dis_block_callback=None, **kwargs): """Consider @self' bto destinations and split block in @self if one of these destinations jumps in the middle of this block. - In order to work, they must be only one block in @self per label in - @symbol_pool (which is true if @self come from the same disasmEngine). + In order to work, they must be only one block in @self per loc_key in + @loc_db (which is true if @self come from the same disasmEngine). - @symbol_pool: AsmSymbolPool instance associated with @self'labels + @loc_db: LocationDB instance associated with @self'loc_keys @dis_block_callback: (optional) if set, this callback will be called on new block destinations @kwargs: (optional) named arguments to pass to dis_block_callback """ # Get all possible destinations not yet resolved, with a resolved # offset - block_dst = [label.offset - for label in self.pendings - if label.offset is not None] + block_dst = [] + for loc_key in self.pendings: + offset = loc_db.get_location_offset(loc_key) + if offset is not None: + block_dst.append(offset) - todo = self.nodes().copy() + todo = set(self.blocks) rebuild_needed = False while todo: @@ -841,9 +793,9 @@ class AsmCFG(DiGraph): if not (off > range_start and off < range_stop): continue - # `cur_block` must be splitted at offset `off` - label = symbol_pool.getby_offset_create(off) - new_b = cur_block.split(off, label) + # `cur_block` must be splitted at offset `off`from miasm2.core.locationdb import LocationDB + + new_b = cur_block.split(loc_db, off) log_asmblock.debug("Split block %x", off) if new_b is None: log_asmblock.error("Cannot split %x!!", off) @@ -852,22 +804,24 @@ class AsmCFG(DiGraph): # Remove pending from cur_block # Links from new_b will be generated in rebuild_edges for dst in new_b.bto: - if dst.label not in self.pendings: + if dst.loc_key not in self.pendings: continue - self.pendings[dst.label] = set(pending for pending in self.pendings[dst.label] - if pending.waiter != cur_block) + self.pendings[dst.loc_key] = set(pending for pending in self.pendings[dst.loc_key] + if pending.waiter != cur_block) # The new block destinations may need to be disassembled if dis_block_callback: - offsets_to_dis = set(constraint.label.offset - for constraint in new_b.bto) + offsets_to_dis = set( + self.loc_db.get_location_offset(constraint.loc_key) + for constraint in new_b.bto + ) dis_block_callback(cur_bloc=new_b, offsets_to_dis=offsets_to_dis, - symbol_pool=symbol_pool, **kwargs) + loc_db=loc_db, **kwargs) # Update structure rebuild_needed = True - self.add_node(new_b) + self.add_block(new_b) # The new block must be considered todo.add(new_b) @@ -879,18 +833,18 @@ class AsmCFG(DiGraph): def __str__(self): out = [] - for node in self.nodes(): - out.append(str(node)) - for nodeA, nodeB in self.edges(): - out.append("%s -> %s" % (nodeA.label, nodeB.label)) + for block in self.blocks: + out.append(str(block)) + for loc_key_a, loc_key_b in self.edges(): + out.append("%s -> %s" % (loc_key_a, loc_key_b)) return '\n'.join(out) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, hex(id(self))) # Out of _merge_blocks to be computed only once -_acceptable_block = lambda block: (not isinstance(block, AsmBlockBad) and - len(block.lines) > 0) +_acceptable_block = lambda graph, loc_key: (not isinstance(graph.loc_key_to_block(loc_key), AsmBlockBad) and + len(graph.loc_key_to_block(loc_key).lines) > 0) _parent = MatchGraphJoker(restrict_in=False, filt=_acceptable_block) _son = MatchGraphJoker(restrict_out=False, filt=_acceptable_block) _expgraph = _parent >> _son @@ -906,7 +860,9 @@ def _merge_blocks(dg, graph): for match in _expgraph.match(graph): # Get matching blocks - block, succ = match[_parent], match[_son] + lbl_block, lbl_succ = match[_parent], match[_son] + block = graph.loc_key_to_block(lbl_block) + succ = graph.loc_key_to_block(lbl_succ) # Ignore already deleted blocks if (block in to_ignore or @@ -926,11 +882,11 @@ def _merge_blocks(dg, graph): # Merge block block.lines += succ.lines - for nextb in graph.successors_iter(succ): - graph.add_edge(block, nextb, graph.edges2constraint[(succ, nextb)]) + for nextb in graph.successors_iter(lbl_succ): + graph.add_edge(lbl_block, nextb, graph.edges2constraint[(lbl_succ, nextb)]) - graph.del_node(succ) - to_ignore.add(succ) + graph.del_block(succ) + to_ignore.add(lbl_succ) bbl_simplifier = DiGraphSimplifier() @@ -959,34 +915,40 @@ def conservative_asm(mnemo, instr, symbols, conservative): def fix_expr_val(expr, symbols): """Resolve an expression @expr using @symbols""" def expr_calc(e): - if isinstance(e, m2_expr.ExprId): - s = symbols._name2label[e.name] - e = m2_expr.ExprInt(s.offset, e.size) + if isinstance(e, ExprId): + # Example: + # toto: + # .dword label + loc_key = symbols.get_name_location(e.name) + offset = symbols.get_location_offset(loc_key) + e = ExprInt(offset, e.size) return e result = expr.visit(expr_calc) result = expr_simp(result) - if not isinstance(result, m2_expr.ExprInt): + if not isinstance(result, ExprInt): raise RuntimeError('Cannot resolve symbol %s' % expr) return result -def fix_label_offset(symbol_pool, label, offset, modified): - """Fix the @label offset to @offset. If the @offset has changed, add @label +def fix_loc_offset(loc_db, loc_key, offset, modified): + """ + Fix the @loc_key offset to @offset. If the @offset has changed, add @loc_key to @modified - @symbol_pool: current symbol_pool + @loc_db: current loc_db """ - if label.offset == offset: + loc_offset = loc_db.get_location_offset(loc_key) + if loc_offset == offset: return - symbol_pool.set_offset(label, offset) - modified.add(label) + loc_db.set_location_offset(loc_key, offset, force=True) + modified.add(loc_key) class BlockChain(object): """Manage blocks linked with an asm_constraint_next""" - def __init__(self, symbol_pool, blocks): - self.symbol_pool = symbol_pool + def __init__(self, loc_db, blocks): + self.loc_db = loc_db self.blocks = blocks self.place() @@ -998,7 +960,8 @@ class BlockChain(object): def _set_pinned_block_idx(self): self.pinned_block_idx = None for i, block in enumerate(self.blocks): - if is_int(block.label.offset): + loc_key = block.loc_key + if self.loc_db.get_location_offset(loc_key) is not None: if self.pinned_block_idx is not None: raise ValueError("Multiples pinned block detected") self.pinned_block_idx = i @@ -1016,7 +979,8 @@ class BlockChain(object): if not self.pinned: return - offset_base = self.blocks[self.pinned_block_idx].label.offset + loc = self.blocks[self.pinned_block_idx].loc_key + offset_base = self.loc_db.get_location_offset(loc) assert(offset_base % self.blocks[self.pinned_block_idx].alignment == 0) self.offset_min = offset_base @@ -1036,48 +1000,48 @@ class BlockChain(object): self.place() return [self] - def fix_blocks(self, modified_labels): + def fix_blocks(self, modified_loc_keys): """Propagate a pinned to its blocks' neighbour - @modified_labels: store new pinned labels""" + @modified_loc_keys: store new pinned loc_keys""" if not self.pinned: raise ValueError('Trying to fix unpinned block') # Propagate offset to blocks before pinned block pinned_block = self.blocks[self.pinned_block_idx] - offset = pinned_block.label.offset + offset = self.loc_db.get_location_offset(pinned_block.loc_key) if offset % pinned_block.alignment != 0: raise RuntimeError('Bad alignment') for block in self.blocks[:self.pinned_block_idx - 1:-1]: new_offset = offset - block.size new_offset = new_offset - new_offset % pinned_block.alignment - fix_label_offset(self.symbol_pool, - block.label, - new_offset, - modified_labels) + fix_loc_offset(self.loc_db, + block.loc_key, + new_offset, + modified_loc_keys) # Propagate offset to blocks after pinned block - offset = pinned_block.label.offset + pinned_block.size + offset = self.loc_db.get_location_offset(pinned_block.loc_key) + pinned_block.size last_block = pinned_block for block in self.blocks[self.pinned_block_idx + 1:]: offset += (- offset) % last_block.alignment - fix_label_offset(self.symbol_pool, - block.label, - offset, - modified_labels) + fix_loc_offset(self.loc_db, + block.loc_key, + offset, + modified_loc_keys) offset += block.size last_block = block - return modified_labels + return modified_loc_keys class BlockChainWedge(object): """Stand for wedges between blocks""" - def __init__(self, symbol_pool, offset, size): - self.symbol_pool = symbol_pool + def __init__(self, loc_db, offset, size): + self.loc_db = loc_db self.offset = offset self.max_size = size self.offset_min = offset @@ -1086,23 +1050,22 @@ class BlockChainWedge(object): def merge(self, chain): """Best effort merge two block chains Return the list of resulting blockchains""" - self.symbol_pool.set_offset(chain.blocks[0].label, self.offset_max) + self.loc_db.set_location_offset(chain.blocks[0].loc_key, self.offset_max) chain.place() return [self, chain] -def group_constrained_blocks(symbol_pool, blocks): +def group_constrained_blocks(loc_db, asmcfg): """ - Return the BlockChains list built from grouped asm blocks linked by + Return the BlockChains list built from grouped blocks in asmcfg linked by asm_constraint_next - @blocks: a list of asm block + @asmcfg: an AsmCfg instance """ log_asmblock.info('group_constrained_blocks') - # Group adjacent blocks - remaining_blocks = list(blocks) + # Group adjacent asmcfg + remaining_blocks = list(asmcfg.blocks) known_block_chains = {} - lbl2block = {block.label: block for block in blocks} while remaining_blocks: # Create a new block chain @@ -1111,10 +1074,10 @@ def group_constrained_blocks(symbol_pool, blocks): # Find sons in remainings blocks linked with a next constraint while True: # Get next block - next_label = block_list[-1].get_next() - if next_label is None or next_label not in lbl2block: + next_loc_key = block_list[-1].get_next() + if next_loc_key is None or asmcfg.loc_key_to_block(next_loc_key) is None: break - next_block = lbl2block[next_label] + next_block = asmcfg.loc_key_to_block(next_loc_key) # Add the block at the end of the current chain if next_block not in remaining_blocks: @@ -1123,15 +1086,15 @@ def group_constrained_blocks(symbol_pool, blocks): remaining_blocks.remove(next_block) # Check if son is in a known block group - if next_label is not None and next_label in known_block_chains: - block_list += known_block_chains[next_label] - del known_block_chains[next_label] + if next_loc_key is not None and next_loc_key in known_block_chains: + block_list += known_block_chains[next_loc_key] + del known_block_chains[next_loc_key] - known_block_chains[block_list[0].label] = block_list + known_block_chains[block_list[0].loc_key] = block_list out_block_chains = [] - for label in known_block_chains: - chain = BlockChain(symbol_pool, known_block_chains[label]) + for loc_key in known_block_chains: + chain = BlockChain(loc_db, known_block_chains[loc_key]) out_block_chains.append(chain) return out_block_chains @@ -1151,7 +1114,7 @@ def get_blockchains_address_interval(blockChains, dst_interval): return allocated_interval -def resolve_symbol(blockChains, symbol_pool, dst_interval=None): +def resolve_symbol(blockChains, loc_db, dst_interval=None): """Place @blockChains in the @dst_interval""" log_asmblock.info('resolve_symbol') @@ -1169,7 +1132,7 @@ def resolve_symbol(blockChains, symbol_pool, dst_interval=None): # Add wedge in forbidden intervals for start, stop in forbidden_interval.intervals: wedge = BlockChainWedge( - symbol_pool, offset=start, size=stop + 1 - start) + loc_db, offset=start, size=stop + 1 - start) pinned_chains.append(wedge) # Try to place bigger blockChains first @@ -1198,28 +1161,22 @@ def resolve_symbol(blockChains, symbol_pool, dst_interval=None): return [chain for chain in fixed_chains if isinstance(chain, BlockChain)] -def filter_exprid_label(exprs): - """Extract labels from list of ExprId @exprs""" - return set(expr.name for expr in exprs if isinstance(expr.name, AsmLabel)) - - -def get_block_labels(block): - """Extract labels used by @block""" +def get_block_loc_keys(block): + """Extract loc_keys used by @block""" symbols = set() for instr in block.lines: if isinstance(instr, AsmRaw): if isinstance(instr.raw, list): for expr in instr.raw: - symbols.update(m2_expr.get_expr_ids(expr)) + symbols.update(get_expr_locs(expr)) else: for arg in instr.args: - symbols.update(m2_expr.get_expr_ids(arg)) - labels = filter_exprid_label(symbols) - return labels + symbols.update(get_expr_locs(arg)) + return symbols -def assemble_block(mnemo, block, symbol_pool, conservative=False): - """Assemble a @block using @symbol_pool +def assemble_block(mnemo, block, loc_db, conservative=False): + """Assemble a @block using @loc_db @conservative: (optional) use original bytes when possible """ offset_i = 0 @@ -1230,7 +1187,7 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): # Fix special AsmRaw data = "" for expr in instr.raw: - expr_int = fix_expr_val(expr, symbol_pool) + expr_int = fix_expr_val(expr, loc_db) data += pck[expr_int.size](expr_int.arg) instr.data = data @@ -1240,16 +1197,16 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): # Assemble an instruction saved_args = list(instr.args) - instr.offset = block.label.offset + offset_i + instr.offset = loc_db.get_location_offset(block.loc_key) + offset_i # Replace instruction's arguments by resolved ones - instr.args = instr.resolve_args_with_symbols(symbol_pool) + instr.args = instr.resolve_args_with_symbols(loc_db) if instr.dstflow(): instr.fixDstOffset() old_l = instr.l - cached_candidate, _ = conservative_asm(mnemo, instr, symbol_pool, + cached_candidate, _ = conservative_asm(mnemo, instr, loc_db, conservative) # Restore original arguments @@ -1263,19 +1220,19 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): offset_i += instr.l -def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): - """Resolve and assemble @blockChains using @symbol_pool until fixed point is +def asmblock_final(mnemo, asmcfg, blockChains, loc_db, conservative=False): + """Resolve and assemble @blockChains using @loc_db until fixed point is reached""" log_asmblock.debug("asmbloc_final") # Init structures - lbl2block = {block.label: block for block in blocks} - blocks_using_label = {} - for block in blocks: - labels = get_block_labels(block) - for label in labels: - blocks_using_label.setdefault(label, set()).add(block) + blocks_using_loc_key = {} + for block in asmcfg.blocks: + exprlocs = get_block_loc_keys(block) + loc_keys = set(expr.loc_key for expr in exprlocs) + for loc_key in loc_keys: + blocks_using_loc_key.setdefault(loc_key, set()).add(block) block2chain = {} for chain in blockChains: @@ -1283,25 +1240,26 @@ def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): block2chain[block] = chain # Init worklist - blocks_to_rework = set(blocks) + blocks_to_rework = set(asmcfg.blocks) # Fix and re-assemble blocks until fixed point is reached while True: # Propagate pinned blocks into chains - modified_labels = set() + modified_loc_keys = set() for chain in blockChains: - chain.fix_blocks(modified_labels) + chain.fix_blocks(modified_loc_keys) - for label in modified_labels: + for loc_key in modified_loc_keys: # Retrive block with modified reference - if label in lbl2block: - blocks_to_rework.add(lbl2block[label]) + mod_block = asmcfg.loc_key_to_block(loc_key) + if mod_block is not None: + blocks_to_rework.add(mod_block) - # Enqueue blocks referencing a modified label - if label not in blocks_using_label: + # Enqueue blocks referencing a modified loc_key + if loc_key not in blocks_using_loc_key: continue - for block in blocks_using_label[label]: + for block in blocks_using_loc_key[loc_key]: blocks_to_rework.add(block) # No more work @@ -1310,33 +1268,36 @@ def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): while blocks_to_rework: block = blocks_to_rework.pop() - assemble_block(mnemo, block, symbol_pool, conservative) + assemble_block(mnemo, block, loc_db, conservative) -def asmbloc_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): - """Resolve and assemble @blockChains using @symbol_pool until fixed point is +def asmbloc_final(mnemo, blocks, blockChains, loc_db, conservative=False): + """Resolve and assemble @blockChains using @loc_db until fixed point is reached""" warnings.warn('DEPRECATION WARNING: use "asmblock_final" instead of "asmbloc_final"') - asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative) + asmblock_final(mnemo, blocks, blockChains, loc_db, conservative) -def asm_resolve_final(mnemo, blocks, symbol_pool, dst_interval=None): - """Resolve and assemble @blocks using @symbol_pool into interval +def asm_resolve_final(mnemo, asmcfg, loc_db, dst_interval=None): + """Resolve and assemble @asmcfg using @loc_db into interval @dst_interval""" - blocks.sanity_check() + asmcfg.sanity_check() - blocks.guess_blocks_size(mnemo) - blockChains = group_constrained_blocks(symbol_pool, blocks) + asmcfg.guess_blocks_size(mnemo) + blockChains = group_constrained_blocks(loc_db, asmcfg) resolved_blockChains = resolve_symbol( - blockChains, symbol_pool, dst_interval) + blockChains, + loc_db, + dst_interval + ) - asmblock_final(mnemo, blocks, resolved_blockChains, symbol_pool) + asmblock_final(mnemo, asmcfg, resolved_blockChains, loc_db) patches = {} output_interval = interval() - for block in blocks: - offset = block.label.offset + for block in asmcfg.blocks: + offset = loc_db.get_location_offset(block.loc_key) for instr in block.lines: if not instr.data: # Empty line @@ -1375,7 +1336,7 @@ class disasmEngine(object): - blocs_wd: maximum number of distinct disassembled block + callback(arch, attrib, pool_bin, cur_bloc, offsets_to_dis, - symbol_pool) + loc_db) - dis_block_callback: callback after each new disassembled block """ @@ -1389,7 +1350,7 @@ class disasmEngine(object): self.arch = arch self.attrib = attrib self.bin_stream = bin_stream - self.symbol_pool = AsmSymbolPool() + self.loc_db = LocationDB() # Setup options self.dont_dis = [] @@ -1421,6 +1382,10 @@ class disasmEngine(object): warnings.warn("""DEPRECATION WARNING: "dis_bloc_callback" use dis_block_callback.""") self.dis_block_callback = function + @property + def symbol_pool(self): + warnings.warn("""DEPRECATION WARNING: use 'loc_db'""") + return self.loc_db # Deprecated job_done = property(get_job_done, set_job_done) @@ -1439,8 +1404,8 @@ class disasmEngine(object): delayslot_count = self.arch.delayslot offsets_to_dis = set() add_next_offset = False - label = self.symbol_pool.getby_offset_create(offset) - cur_block = AsmBlock(label) + loc_key = self.loc_db.get_or_create_offset_location(offset) + cur_block = AsmBlock(loc_key) log_asmblock.debug("dis at %X", int(offset)) while not in_delayslot or delayslot_count > 0: if in_delayslot: @@ -1450,17 +1415,17 @@ class disasmEngine(object): if not cur_block.lines: job_done.add(offset) # Block is empty -> bad block - cur_block = AsmBlockBad(label, errno=2) + cur_block = AsmBlockBad(loc_key, errno=AsmBlockBad.ERROR_FORBIDDEN) else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - cur_block.add_cst(offset, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break if lines_cpt > 0 and offset in self.split_dis: - cur_block.add_cst(offset, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) offsets_to_dis.add(offset) break @@ -1470,28 +1435,35 @@ class disasmEngine(object): break if offset in job_done: - cur_block.add_cst(offset, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break off_i = offset + error = None try: instr = self.arch.dis(self.bin_stream, self.attrib, offset) - except (Disasm_Exception, IOError), e: + except Disasm_Exception as e: + log_asmblock.warning(e) + instr = None + error = AsmBlockBad.ERROR_CANNOT_DISASM + except IOError as e: log_asmblock.warning(e) instr = None + error = AsmBlockBad.ERROR_IO + if instr is None: log_asmblock.warning("cannot disasm at %X", int(off_i)) if not cur_block.lines: job_done.add(offset) # Block is empty -> bad block - cur_block = AsmBlockBad(label, errno=0) + cur_block = AsmBlockBad(loc_key, errno=error) else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - cur_block.add_cst(off_i, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(off_i) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break # XXX TODO nul start block option @@ -1499,12 +1471,12 @@ class disasmEngine(object): log_asmblock.warning("reach nul instr at %X", int(off_i)) if not cur_block.lines: # Block is empty -> bad block - cur_block = AsmBlockBad(label, errno=1) + cur_block = AsmBlockBad(loc_key, errno=AsmBlockBad.ERROR_NULL_STARTING_BLOCK) else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - cur_block.add_cst(off_i, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(off_i) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break # special case: flow graph modificator in delayslot @@ -1525,31 +1497,32 @@ class disasmEngine(object): # test split if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall): add_next_offset = True - pass if instr.dstflow(): - instr.dstflow2label(self.symbol_pool) - dst = instr.getdstflow(self.symbol_pool) - dstn = [] - for d in dst: - if isinstance(d, m2_expr.ExprId) and \ - isinstance(d.name, AsmLabel): - dstn.append(d.name) - if d.name.offset in self.dont_dis_retcall_funcs: - add_next_offset = False - dst = dstn + instr.dstflow2label(self.loc_db) + destinations = instr.getdstflow(self.loc_db) + known_dsts = [] + for dst in destinations: + if not dst.is_loc(): + continue + loc_key = dst.loc_key + loc_key_offset = self.loc_db.get_location_offset(loc_key) + known_dsts.append(loc_key) + if loc_key_offset in self.dont_dis_retcall_funcs: + add_next_offset = False if (not instr.is_subcall()) or self.follow_call: - cur_block.bto.update( - [AsmConstraint(x, AsmConstraint.c_to) for x in dst]) + cur_block.bto.update([AsmConstraint(loc_key, AsmConstraint.c_to) for loc_key in known_dsts]) # get in delayslot mode in_delayslot = True delayslot_count = instr.delayslot for c in cur_block.bto: - offsets_to_dis.add(c.label.offset) + loc_key_offset = self.loc_db.get_location_offset(c.loc_key) + offsets_to_dis.add(loc_key_offset) if add_next_offset: - cur_block.add_cst(offset, AsmConstraint.c_next, self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) offsets_to_dis.add(offset) # Fix multiple constraints @@ -1559,7 +1532,9 @@ class disasmEngine(object): self.dis_block_callback(mn=self.arch, attrib=self.attrib, pool_bin=self.bin_stream, cur_bloc=cur_block, offsets_to_dis=offsets_to_dis, - symbol_pool=self.symbol_pool) + loc_db=self.loc_db, + # Deprecated API + symbol_pool=self.loc_db) return cur_block, offsets_to_dis def dis_block(self, offset): @@ -1589,7 +1564,7 @@ class disasmEngine(object): log_asmblock.info("dis bloc all") job_done = set() if blocks is None: - blocks = AsmCFG() + blocks = AsmCFG(self.loc_db) todo = [offset] bloc_cpt = 0 @@ -1605,9 +1580,9 @@ class disasmEngine(object): continue cur_block, nexts = self._dis_block(target_offset, job_done) todo += nexts - blocks.add_node(cur_block) + blocks.add_block(cur_block) - blocks.apply_splitting(self.symbol_pool, + blocks.apply_splitting(self.loc_db, dis_block_callback=self.dis_block_callback, mn=self.arch, attrib=self.attrib, pool_bin=self.bin_stream) diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index 6c3de8a7..1326d08b 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -8,13 +8,12 @@ from collections import defaultdict import pyparsing import miasm2.expression.expression as m2_expr -from miasm2.core import asmblock from miasm2.core.bin_stream import bin_stream, bin_stream_str from miasm2.core.utils import Disasm_Exception from miasm2.expression.simplifications import expr_simp -from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstOp log = logging.getLogger("cpuhelper") console_handler = logging.StreamHandler() @@ -672,7 +671,7 @@ class bs_swapargs(bs_divert): class m_arg(object): - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -682,11 +681,11 @@ class m_arg(object): except StopIteration: return None, None arg = v[0] - expr = self.asm_ast_to_expr(arg, symbol_pool) + expr = self.asm_ast_to_expr(arg, loc_db) self.expr = expr return start, stop - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): raise NotImplementedError("Virtual") @@ -709,7 +708,7 @@ class reg_noarg(object): reg_info = None parser = None - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -719,7 +718,7 @@ class reg_noarg(object): except StopIteration: return None, None arg = v[0] - expr = self.parses_to_expr(arg, symbol_pool) + expr = self.parses_to_expr(arg, loc_db) self.expr = expr return start, stop @@ -985,18 +984,24 @@ class instruction(object): self.mode = mode self.args = args self.additional_info = additional_info + self.offset = None + self.l = None + self.b = None def gen_args(self, args): out = ', '.join([str(x) for x in args]) return out def __str__(self): + return self.to_string() + + def to_string(self, loc_db=None): o = "%-10s " % self.name args = [] for i, arg in enumerate(self.args): if not isinstance(arg, m2_expr.Expr): raise ValueError('zarb arg type') - x = self.arg2str(arg, pos = i) + x = self.arg2str(arg, i, loc_db) args.append(x) o += self.gen_args(args) return o @@ -1011,40 +1016,40 @@ class instruction(object): if symbols is None: symbols = {} args_out = [] - for a in self.args: - e = a + for expr in self.args: # try to resolve symbols using symbols (0 for default value) - ids = m2_expr.get_expr_ids(e) - fixed_ids = {} - for x in ids: - if isinstance(x.name, asmblock.AsmLabel): - name = x.name.name - # special symbol $ - if name == '$': - fixed_ids[x] = self.get_asm_offset(x) - continue - if name == '_': - fixed_ids[x] = self.get_asm_next_offset(x) - continue - if not name in symbols: - raise ValueError('unresolved symbol! %r' % x) - else: - name = x.name - if not name in symbols: + loc_keys = m2_expr.get_expr_locs(expr) + fixed_expr = {} + for exprloc in loc_keys: + loc_key = exprloc.loc_key + names = symbols.get_location_names(loc_key) + # special symbols + if '$' in names: + fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue - if symbols[name].offset is None: - raise ValueError('The offset of label "%s" cannot be ' - 'determined' % name) + if '_' in names: + fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) + continue + if not names: + raise ValueError('Unresolved symbol: %r' % exprloc) + + offset = symbols.get_location_offset(loc_key) + if offset is None: + raise ValueError( + 'The offset of loc_key "%s" cannot be determined' % name + ) else: - size = x.size + # Fix symbol with its offset + size = exprloc.size if size is None: - default_size = self.get_symbol_size(x, symbols) + default_size = self.get_symbol_size(exprloc, symbols) size = default_size - value = m2_expr.ExprInt(symbols[name].offset, size) - fixed_ids[x] = value - e = e.replace_expr(fixed_ids) - e = expr_simp(e) - args_out.append(e) + value = m2_expr.ExprInt(offset, size) + fixed_expr[exprloc] = value + + expr = expr.replace_expr(fixed_expr) + expr = expr_simp(expr) + args_out.append(expr) return args_out def get_info(self, c): @@ -1275,7 +1280,7 @@ class cls_mn(object): return out[0] @classmethod - def fromstring(cls, text, symbol_pool, mode = None): + def fromstring(cls, text, loc_db, mode = None): global total_scans name = re.search('(\S+)', text).groups() if not name: @@ -1315,11 +1320,11 @@ class cls_mn(object): if start != 0: v, start, stop = [None], None, None if v != [None]: - v = f.asm_ast_to_expr(v[0], symbol_pool) + v = f.asm_ast_to_expr(v[0], loc_db) if v is None: v, start, stop = [None], None, None parsers[(i, start_i)][p] = v, start, stop - start, stop = f.fromstring(args_str, symbol_pool, parsers[(i, start_i)]) + start, stop = f.fromstring(args_str, loc_db, parsers[(i, start_i)]) if start != 0: log.debug("cannot fromstring %r", args_str) cannot_parse = True @@ -1524,12 +1529,12 @@ class cls_mn(object): def parse_prefix(self, v): return 0 - def set_dst_symbol(self, symbol_pool): - dst = self.getdstflow(symbol_pool) + def set_dst_symbol(self, loc_db): + dst = self.getdstflow(loc_db) args = [] for d in dst: if isinstance(d, m2_expr.ExprInt): - l = symbol_pool.getby_offset_create(int(d)) + l = loc_db.get_or_create_offset_location(int(d)) a = m2_expr.ExprId(l.name, d.size) else: @@ -1537,7 +1542,7 @@ class cls_mn(object): args.append(a) self.args_symb = args - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): return [self.args[0].expr] @@ -1558,7 +1563,7 @@ class imm_noarg(object): return None return v - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] else: diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py index ce17fc75..d88f8721 100644 --- a/miasm2/core/graph.py +++ b/miasm2/core/graph.py @@ -715,11 +715,12 @@ class MatchGraphJoker(object): matched node must be the same than the joker node in the associated MatchGraph @restrict_out: (optional) counterpart of @restrict_in for successors - @filt: (optional) function(node) -> boolean for filtering candidate node + @filt: (optional) function(graph, node) -> boolean for filtering + candidate node @name: (optional) helper for displaying the current joker """ if filt is None: - filt = lambda node: True + filt = lambda graph, node: True self.filt = filt if name is None: name = str(id(self)) @@ -816,7 +817,7 @@ class MatchGraph(DiGraph): return False # Check lambda filtering - if not expected.filt(candidate): + if not expected.filt(graph, candidate): return False # Check arity diff --git a/miasm2/core/locationdb.py b/miasm2/core/locationdb.py new file mode 100644 index 00000000..39c1c99a --- /dev/null +++ b/miasm2/core/locationdb.py @@ -0,0 +1,453 @@ +import warnings + +from miasm2.expression.expression import LocKey, ExprLoc +from miasm2.expression.modint import moduint, modint + + +def is_int(a): + return isinstance(a, (int, long, moduint, modint)) + + +class LocationDB(object): + """ + LocationDB is a "database" of information associated to location. + + An entry in a LocationDB is uniquely identified with a LocKey. + Additionnal information which can be associated with a LocKey are: + - an offset (uniq per LocationDB) + - several names (each are uniqs per LocationDB) + + As a schema: + loc_key 1 <-> 0..1 offset + 1 <-> 0..n name + + >>> loc_db = LocationDB() + # Add a location with no additionnal information + >>> loc_key1 = loc_db.add_location() + # Add a location with an offset + >>> loc_key2 = loc_db.add_location(offset=0x1234) + # Add a location with several names + >>> loc_key3 = loc_db.add_location(name="first_name") + >>> loc_db.add_location_name(loc_key3, "second_name") + # Associate an offset to an existing location + >>> loc_db.set_location_offset(loc_key3, 0x5678) + # Remove a name from an existing location + >>> loc_db.remove_location_name(loc_key3, "second_name") + + # Get back offset + >>> loc_db.get_location_offset(loc_key1) + None + >>> loc_db.get_location_offset(loc_key2) + 0x1234 + + # Display a location + >>> loc_db.pretty_str(loc_key1) + loc_key_1 + >>> loc_db.pretty_str(loc_key2) + loc_1234 + >>> loc_db.pretty_str(loc_key3) + first_name + """ + + def __init__(self): + # Known LocKeys + self._loc_keys = set() + + # Association tables + self._loc_key_to_offset = {} + self._loc_key_to_names = {} + self._name_to_loc_key = {} + self._offset_to_loc_key = {} + + # Counter for new LocKey generation + self._loc_key_num = 0 + + def get_location_offset(self, loc_key): + """ + Return the offset of @loc_key if any, None otherwise. + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + return self._loc_key_to_offset.get(loc_key) + + def get_location_names(self, loc_key): + """ + Return the frozenset of names associated to @loc_key + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + return frozenset(self._loc_key_to_names.get(loc_key, set())) + + def get_name_location(self, name): + """ + Return the LocKey of @name if any, None otherwise. + @name: target name + """ + return self._name_to_loc_key.get(name) + + def get_or_create_name_location(self, name): + """ + Return the LocKey of @name if any, create one otherwise. + @name: target name + """ + loc_key = self._name_to_loc_key.get(name) + if loc_key is not None: + return loc_key + return self.add_location(name=name) + + def get_offset_location(self, offset): + """ + Return the LocKey of @offset if any, None otherwise. + @name: target offset + """ + return self._offset_to_loc_key.get(offset) + + def get_or_create_offset_location(self, offset): + """ + Return the LocKey of @offset if any, create one otherwise. + @offset: target offset + """ + loc_key = self._offset_to_loc_key.get(offset) + if loc_key is not None: + return loc_key + return self.add_location(offset=offset) + + def add_location_name(self, loc_key, name): + """Associate a name @name to a given @loc_key + @name: str instance + @loc_key: LocKey instance + """ + assert loc_key in self._loc_keys + already_existing_loc = self._name_to_loc_key.get(name) + if already_existing_loc is not None and already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (name, already_existing_loc)) + self._loc_key_to_names.setdefault(loc_key, set()).add(name) + self._name_to_loc_key[name] = loc_key + + def remove_location_name(self, loc_key, name): + """Disassociate a name @name from a given @loc_key + Fail if @name is not already associated to @loc_key + @name: str instance + @loc_key: LocKey instance + """ + assert loc_key in self._loc_keys + already_existing_loc = self._name_to_loc_key.get(name) + if already_existing_loc is None: + raise KeyError("%r is not already associated" % name) + if already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (name, already_existing_loc)) + del self._name_to_loc_key[name] + self._loc_key_to_names[loc_key].remove(name) + + def set_location_offset(self, loc_key, offset, force=False): + """Associate the offset @offset to an LocKey @loc_key + + If @force is set, override silently. Otherwise, if an offset is already + associated to @loc_key, an error will be raised + """ + assert loc_key in self._loc_keys + already_existing_loc = self.get_offset_location(offset) + if already_existing_loc is not None and already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (offset, already_existing_loc)) + already_existing_off = self._loc_key_to_offset.get(loc_key) + if (already_existing_off is not None and + already_existing_off != offset): + if not force: + raise ValueError( + "%r already has an offset (0x%x). Use 'force=True'" + " for silent overriding" % ( + loc_key, already_existing_off + )) + else: + self.unset_location_offset(loc_key) + self._offset_to_loc_key[offset] = loc_key + self._loc_key_to_offset[loc_key] = offset + + def unset_location_offset(self, loc_key): + """Disassociate LocKey @loc_key's offset + + Fail if there is already no offset associate with it + @loc_key: LocKey + """ + assert loc_key in self._loc_keys + already_existing_off = self._loc_key_to_offset.get(loc_key) + if already_existing_off is None: + raise ValueError("%r already has no offset" % (loc_key)) + del self._offset_to_loc_key[already_existing_off] + del self._loc_key_to_offset[loc_key] + + def consistency_check(self): + """Ensure internal structures are consistent with each others""" + assert set(self._loc_key_to_names).issubset(self._loc_keys) + assert set(self._loc_key_to_offset).issubset(self._loc_keys) + assert self._loc_key_to_offset == {v: k for k, v in self._offset_to_loc_key.iteritems()} + assert reduce( + lambda x, y:x.union(y), + self._loc_key_to_names.itervalues(), + set(), + ) == set(self._name_to_loc_key) + for name, loc_key in self._name_to_loc_key.iteritems(): + assert name in self._loc_key_to_names[loc_key] + + def add_location(self, name=None, offset=None, strict=True): + """Add a new location in the locationDB. Returns the corresponding LocKey. + If @name is set, also associate a name to this new location. + If @offset is set, also associate an offset to this new location. + + Strict mode (set by @strict, default): + If a location with @offset or @name already exists, an error will be + raised. + Otherwise: + If a location with @offset or @name already exists, the corresponding + LocKey will be returned. + """ + + # Deprecation handling + if is_int(name): + assert offset is None or offset == name + warnings.warn("Deprecated API: use 'add_location(offset=)' instead." + " An additionnal 'name=' can be provided to also " + "associate a name (there is no more default name)") + offset = name + name = None + + # Argument cleaning + offset_loc_key = None + if offset is not None: + offset = int(offset) + offset_loc_key = self.get_offset_location(offset) + + # Test for collisions + name_loc_key = None + if name is not None: + name_loc_key = self.get_name_location(name) + + if strict: + if name_loc_key is not None: + raise ValueError("An entry for %r already exists (%r), and " + "strict mode is enabled" % ( + name, name_loc_key + )) + if offset_loc_key is not None: + raise ValueError("An entry for 0x%x already exists (%r), and " + "strict mode is enabled" % ( + offset, offset_loc_key + )) + else: + # Non-strict mode + if name_loc_key is not None: + known_offset = self.get_offset_location(name_loc_key) + if known_offset != offset: + raise ValueError( + "Location with name '%s' already have an offset: 0x%x " + "(!= 0x%x)" % (name, offset, known_offset) + ) + # Name already known, same offset -> nothing to do + return name_loc_key + + elif offset_loc_key is not None: + if name is not None: + # This is an error. Check for already known name are checked above + raise ValueError( + "Location with offset 0x%x already exists." + "To add a name to this location, use the dedicated API" + "'add_location_name(%r, %r)'" % ( + offset_loc_key, + name + )) + # Offset already known, no name specified + return offset_loc_key + + # No collision, this is a brand new location + loc_key = LocKey(self._loc_key_num) + self._loc_key_num += 1 + self._loc_keys.add(loc_key) + + if offset is not None: + assert offset not in self._offset_to_loc_key + self._offset_to_loc_key[offset] = loc_key + self._loc_key_to_offset[loc_key] = offset + + if name is not None: + self._name_to_loc_key[name] = loc_key + self._loc_key_to_names[loc_key] = set([name]) + + return loc_key + + def remove_location(self, loc_key): + """ + Delete the location corresponding to @loc_key + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + if loc_key not in self._loc_keys: + raise KeyError("Unknown loc_key %r" % loc_key) + names = self._loc_key_to_names.pop(loc_key, []) + for name in names: + del self._name_to_loc_key[name] + offset = self._loc_key_to_offset.pop(loc_key, None) + self._offset_to_loc_key.pop(offset, None) + self._loc_keys.remove(loc_key) + + def pretty_str(self, loc_key): + """Return a human readable version of @loc_key, according to information + available in this LocationDB instance""" + names = self.get_location_names(loc_key) + if names: + return ",".join(names) + offset = self.get_location_offset(loc_key) + if offset is not None: + return "loc_%x" % offset + return str(loc_key) + + @property + def loc_keys(self): + """Return all loc_keys""" + return self._loc_keys + + @property + def names(self): + """Return all known names""" + return self._name_to_loc_key.keys() + + @property + def offsets(self): + """Return all known offsets""" + return self._offset_to_loc_key.keys() + + def __str__(self): + out = [] + for loc_key in self._loc_keys: + names = self.get_location_names(loc_key) + offset = self.get_location_offset(loc_key) + out.append("%s: %s - %s" % ( + loc_key, + "0x%x" % offset if offset is not None else None, + ",".join(names) + )) + return "\n".join(out) + + def merge(self, location_db): + """Merge with another LocationDB @location_db + + WARNING: old reference to @location_db information (such as LocKeys) + must be retrieved from the updated version of this instance. The + dedicated "get_*" APIs may be used for this task + """ + # A simple merge is not doable here, because LocKey will certainly + # collides + + for foreign_loc_key in location_db.loc_keys: + foreign_names = location_db.get_location_names(foreign_loc_key) + foreign_offset = location_db.get_location_offset(foreign_loc_key) + if foreign_names: + init_name = list(foreign_names)[0] + else: + init_name = None + loc_key = self.add_location(offset=foreign_offset, name=init_name, + strict=False) + cur_names = self.get_location_names(loc_key) + for name in foreign_names: + if name not in cur_names and name != init_name: + self.add_location_name(loc_key, name=name) + + def canonize_to_exprloc(self, expr): + """ + If expr is ExprInt, return ExprLoc with corresponding loc_key + Else, return expr + + @expr: Expr instance + """ + if expr.is_int(): + loc_key = self.get_or_create_offset_location(int(expr)) + ret = ExprLoc(loc_key, expr.size) + return ret + return expr + + # Deprecated APIs + @property + def items(self): + """Return all loc_keys""" + warnings.warn('DEPRECATION WARNING: use "loc_keys" instead of "items"') + return list(self._loc_keys) + + def __getitem__(self, item): + warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' + '"get_offset_location"') + if item in self._name_to_loc_key: + return self._name_to_loc_key[item] + if item in self._offset_to_loc_key: + return self._offset_to_loc_key[item] + raise KeyError('unknown symbol %r' % item) + + def __contains__(self, item): + warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' + '"get_offset_location", or ".offsets" or ".names"') + return item in self._name_to_loc_key or item in self._offset_to_loc_key + + def loc_key_to_name(self, loc_key): + """[DEPRECATED API], see 'get_location_names'""" + warnings.warn("Deprecated API: use 'get_location_names'") + return sorted(self.get_location_names(loc_key))[0] + + def loc_key_to_offset(self, loc_key): + """[DEPRECATED API], see 'get_location_offset'""" + warnings.warn("Deprecated API: use 'get_location_offset'") + return self.get_location_offset(loc_key) + + def remove_loc_key(self, loc_key): + """[DEPRECATED API], see 'remove_location'""" + warnings.warn("Deprecated API: use 'remove_location'") + self.remove_location(loc_key) + + def del_loc_key_offset(self, loc_key): + """[DEPRECATED API], see 'unset_location_offset'""" + warnings.warn("Deprecated API: use 'unset_location_offset'") + self.unset_location_offset(loc_key) + + def getby_offset(self, offset): + """[DEPRECATED API], see 'get_offset_location'""" + warnings.warn("Deprecated API: use 'get_offset_location'") + return self.get_offset_location(offset) + + def getby_name(self, name): + """[DEPRECATED API], see 'get_name_location'""" + warnings.warn("Deprecated API: use 'get_name_location'") + return self.get_name_location(name) + + def getby_offset_create(self, offset): + """[DEPRECATED API], see 'get_or_create_offset_location'""" + warnings.warn("Deprecated API: use 'get_or_create_offset_location'") + return self.get_or_create_offset_location(offset) + + def getby_name_create(self, name): + """[DEPRECATED API], see 'get_or_create_name_location'""" + warnings.warn("Deprecated API: use 'get_or_create_name_location'") + return self.get_or_create_name_location(name) + + def rename_location(self, loc_key, newname): + """[DEPRECATED API], see 'add_name_location' and 'remove_location_name' + """ + warnings.warn("Deprecated API: use 'add_location_name' and " + "'remove_location_name'") + for name in self.get_location_names(loc_key): + self.remove_location_name(loc_key, name) + self.add_location_name(loc_key, name) + + def set_offset(self, loc_key, offset): + """[DEPRECATED API], see 'set_location_offset'""" + warnings.warn("Deprecated API: use 'set_location_offset'") + self.set_location_offset(loc_key, offset, force=True) + + def gen_loc_key(self): + """[DEPRECATED API], see 'add_location'""" + warnings.warn("Deprecated API: use 'add_location'") + return self.add_location() + + def str_loc_key(self, loc_key): + """[DEPRECATED API], see 'pretty_str'""" + warnings.warn("Deprecated API: use 'pretty_str'") + return self.pretty_str(loc_key) diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index df419680..7efa17d0 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -1,10 +1,11 @@ #-*- coding:utf-8 -*- import re -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprId, ExprInt, ExprOp, ExprLoc, \ + LocKey import miasm2.core.asmblock as asmblock from miasm2.core.cpu import instruction, base_expr -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstInt, AstId, AstOp declarator = {'byte': 8, 'word': 16, @@ -59,72 +60,47 @@ class DirectiveDontSplit(Directive): pass -def guess_next_new_label(symbol_pool): +def guess_next_new_label(loc_db): """Generate a new label - @symbol_pool: the AsmSymbolPool instance""" + @loc_db: the LocationDB instance""" i = 0 gen_name = "loc_%.8X" while True: name = gen_name % i - label = symbol_pool.getby_name(name) + label = loc_db.get_name_location(name) if label is None: - return symbol_pool.add_label(name) + return loc_db.add_location(name) i += 1 -def replace_expr_labels(expr, symbol_pool, replace_id): - """Create AsmLabel of the expression @expr in the @symbol_pool - Update @replace_id""" - - if not (isinstance(expr, m2_expr.ExprId) and - isinstance(expr.name, asmblock.AsmLabel)): - return expr - - old_lbl = expr.name - new_lbl = symbol_pool.getby_name_create(old_lbl.name) - replace_id[expr] = m2_expr.ExprId(new_lbl, expr.size) - return replace_id[expr] - - -def replace_orphan_labels(instr, symbol_pool): - """Link orphan labels used by @instr to the @symbol_pool""" - - for i, arg in enumerate(instr.args): - replace_id = {} - arg.visit(lambda e: replace_expr_labels(e, - symbol_pool, - replace_id)) - instr.args[i] = instr.args[i].replace_expr(replace_id) - - STATE_NO_BLOC = 0 STATE_IN_BLOC = 1 -def asm_ast_to_expr_with_size(arg, symbol_pool, size): +def asm_ast_to_expr_with_size(arg, loc_db, size): if isinstance(arg, AstId): - return m2_expr.ExprId(arg.name, size) + return ExprId(arg.name, size) if isinstance(arg, AstOp): - args = [asm_ast_to_expr_with_size(tmp, symbol_pool, size) for tmp in arg.args] - return m2_expr.ExprOp(arg.op, *args) + args = [asm_ast_to_expr_with_size(tmp, loc_db, size) for tmp in arg.args] + return ExprOp(arg.op, *args) if isinstance(arg, AstInt): - return m2_expr.ExprInt(arg.value, size) + return ExprInt(arg.value, size) return None -def parse_txt(mnemo, attrib, txt, symbol_pool=None): - """Parse an assembly listing. Returns a couple (blocks, symbol_pool), where - blocks is a list of asm_bloc and symbol_pool the associated AsmSymbolPool +def parse_txt(mnemo, attrib, txt, loc_db=None): + """Parse an assembly listing. Returns a couple (asmcfg, loc_db), where + asmcfg is an AsmCfg instance and loc_db the associated LocationDB @mnemo: architecture used @attrib: architecture attribute @txt: assembly listing - @symbol_pool: (optional) the AsmSymbolPool instance used to handle labels + @loc_db: (optional) the LocationDB instance used to handle labels of the listing """ - if symbol_pool is None: - symbol_pool = asmblock.AsmSymbolPool() + if loc_db is None: + loc_db = asmblock.LocationDB() C_NEXT = asmblock.AsmConstraint.c_next C_TO = asmblock.AsmConstraint.c_to @@ -145,7 +121,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) - label = symbol_pool.getby_name_create(label_name) + label = loc_db.get_or_create_name_location(label_name) lines.append(label) continue # directive @@ -182,7 +158,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): for element in data_raw: element = element.strip() element_parsed = base_expr.parseString(element)[0] - element_expr = asm_ast_to_expr_with_size(element_parsed, symbol_pool, size) + element_expr = asm_ast_to_expr_with_size(element_parsed, loc_db, size) expr_list.append(element_expr) raw_data = asmblock.AsmRaw(expr_list) @@ -214,7 +190,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) - label = symbol_pool.getby_name_create(label_name) + label = loc_db.get_or_create_name_location(label_name) lines.append(label) continue @@ -222,22 +198,19 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') - instr = mnemo.fromstring(line, symbol_pool, attrib) - - # replace orphan AsmLabel with labels from symbol_pool - replace_orphan_labels(instr, symbol_pool) + instr = mnemo.fromstring(line, loc_db, attrib) if instr.dstflow(): - instr.dstflow2label(symbol_pool) + instr.dstflow2label(loc_db) lines.append(instr) asmblock.log_asmblock.info("___pre asm oki___") - # make blocks + # make asmcfg cur_block = None state = STATE_NO_BLOC i = 0 - blocks = asmblock.AsmCFG() + asmcfg = asmblock.AsmCFG(loc_db) block_to_nlink = None delayslot = 0 while i < len(lines): @@ -256,21 +229,24 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): block_to_nlink = None i += 1 continue - elif not isinstance(line, asmblock.AsmLabel): + elif not isinstance(line, LocKey): # First line must be a label. If it's not the case, generate # it. - label = guess_next_new_label(symbol_pool) - cur_block = asmblock.AsmBlock(label, alignment=mnemo.alignment) + loc = guess_next_new_label(loc_db) + cur_block = asmblock.AsmBlock(loc, alignment=mnemo.alignment) else: cur_block = asmblock.AsmBlock(line, alignment=mnemo.alignment) i += 1 # Generate the current bloc - blocks.add_node(cur_block) + asmcfg.add_block(cur_block) state = STATE_IN_BLOC if block_to_nlink: block_to_nlink.addto( - asmblock.AsmConstraint(cur_block.label, - C_NEXT)) + asmblock.AsmConstraint( + cur_block.loc_key, + C_NEXT + ) + ) block_to_nlink = None continue @@ -287,10 +263,11 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): elif isinstance(line, asmblock.AsmRaw): cur_block.addline(line) block_to_nlink = cur_block - elif isinstance(line, asmblock.AsmLabel): + elif isinstance(line, LocKey): if block_to_nlink: cur_block.addto( - asmblock.AsmConstraint(line, C_NEXT)) + asmblock.AsmConstraint(line, C_NEXT) + ) block_to_nlink = None state = STATE_NO_BLOC continue @@ -304,8 +281,8 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): if delayslot: raise RuntimeError("Cannot have breakflow in delayslot") if line.dstflow(): - for dst in line.getdstflow(symbol_pool): - if not isinstance(dst, m2_expr.ExprId): + for dst in line.getdstflow(loc_db): + if not isinstance(dst, ExprId): continue if dst in mnemo.regs.all_regs_ids: continue @@ -319,10 +296,10 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): raise RuntimeError("unknown class %s" % line.__class__) i += 1 - for block in blocks: + for block in asmcfg.blocks: # Fix multiple constraints block.fix_constraints() # Log block asmblock.log_asmblock.info(block) - return blocks, symbol_pool + return asmcfg, loc_db diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index 8d6d3e07..ab1af953 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -139,14 +139,22 @@ class SemBuilder(object): return self._functions.copy() @staticmethod - def _create_labels(lbl_else=False): + def _create_labels(loc_else=False): """Return the AST standing for label creations - @lbl_else (optional): if set, create a label 'lbl_else'""" - lbl_end = "lbl_end = ExprId(ir.get_next_label(instr), ir.IRDst.size)" - out = ast.parse(lbl_end).body - out += ast.parse("lbl_if = ExprId(ir.gen_label(), ir.IRDst.size)").body - if lbl_else: - out += ast.parse("lbl_else = ExprId(ir.gen_label(), ir.IRDst.size)").body + @loc_else (optional): if set, create a label 'loc_else'""" + loc_end = "loc_end = ir.get_next_loc_key(instr)" + loc_end_expr = "loc_end_expr = ExprLoc(loc_end, ir.IRDst.size)" + out = ast.parse(loc_end).body + out += ast.parse(loc_end_expr).body + loc_if = "loc_if = ir.loc_db.add_location()" + loc_if_expr = "loc_if_expr = ExprLoc(loc_if, ir.IRDst.size)" + out += ast.parse(loc_if).body + out += ast.parse(loc_if_expr).body + if loc_else: + loc_else = "loc_else = ir.loc_db.add_location()" + loc_else_expr = "loc_else_expr = ExprLoc(loc_else, ir.IRDst.size)" + out += ast.parse(loc_else).body + out += ast.parse(loc_else_expr).body return out def _parse_body(self, body, argument_names): @@ -195,20 +203,20 @@ class SemBuilder(object): real_body.append(statement) elif isinstance(statement, ast.If): - # Create jumps : ir.IRDst = lbl_if if cond else lbl_end + # Create jumps : ir.IRDst = loc_if if cond else loc_end # if .. else .. are also handled cond = statement.test - real_body += self._create_labels(lbl_else=True) + real_body += self._create_labels(loc_else=True) - lbl_end = ast.Name(id='lbl_end', ctx=ast.Load()) - lbl_if = ast.Name(id='lbl_if', ctx=ast.Load()) - lbl_else = ast.Name(id='lbl_else', ctx=ast.Load()) \ - if statement.orelse else lbl_end + loc_end = ast.Name(id='loc_end_expr', ctx=ast.Load()) + loc_if = ast.Name(id='loc_if_expr', ctx=ast.Load()) + loc_else = ast.Name(id='loc_else_expr', ctx=ast.Load()) \ + if statement.orelse else loc_end dst = ast.Call(func=ast.Name(id='ExprCond', ctx=ast.Load()), args=[cond, - lbl_if, - lbl_else], + loc_if, + loc_else], keywords=[], starargs=None, kwargs=None) @@ -230,10 +238,10 @@ class SemBuilder(object): kwargs=None)) # Create the new blocks - elements = [(statement.body, 'lbl_if')] + elements = [(statement.body, 'loc_if')] if statement.orelse: - elements.append((statement.orelse, 'lbl_else')) - for content, lbl_name in elements: + elements.append((statement.orelse, 'loc_else')) + for content, loc_name in elements: sub_blocks, sub_body = self._parse_body(content, argument_names) if len(sub_blocks) > 1: @@ -242,7 +250,7 @@ class SemBuilder(object): ## Close the last block jmp_end = ast.Call(func=ast.Name(id='ExprAff', ctx=ast.Load()), - args=[IRDst, lbl_end], + args=[IRDst, loc_end], keywords=[], starargs=None, kwargs=None) @@ -261,16 +269,14 @@ class SemBuilder(object): ## Replace the block with a call to 'IRBlock' - lbl_if_name = ast.Attribute(value=ast.Name(id=lbl_name, - ctx=ast.Load()), - attr='name', ctx=ast.Load()) + loc_if_name = ast.Name(id=loc_name, ctx=ast.Load()) assignblks = ast.List(elts=[assignblk], ctx=ast.Load()) sub_blocks[-1] = ast.Call(func=ast.Name(id='IRBlock', ctx=ast.Load()), - args=[lbl_if_name, + args=[loc_if_name, assignblks], keywords=[], starargs=None, diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index 54cd5a2d..8e63e6a2 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -19,6 +19,7 @@ # IR components are : # - ExprInt # - ExprId +# - ExprLoc # - ExprAff # - ExprCond # - ExprMem @@ -48,12 +49,13 @@ TOK_POS_STRICT = "Spos" # Hashing constants EXPRINT = 1 EXPRID = 2 -EXPRAFF = 3 -EXPRCOND = 4 -EXPRMEM = 5 -EXPROP = 6 -EXPRSLICE = 7 -EXPRCOMPOSE = 8 +EXPRLOC = 3 +EXPRAFF = 4 +EXPRCOND = 5 +EXPRMEM = 6 +EXPROP = 7 +EXPRSLICE = 8 +EXPRCOMPOSE = 9 priorities_list = [ @@ -115,6 +117,8 @@ class DiGraphExpr(DiGraph): return node.op elif isinstance(node, ExprId): return node.name + elif isinstance(node, ExprLoc): + return "%s" % node.loc_key elif isinstance(node, ExprMem): return "@%d" % node.size elif isinstance(node, ExprCompose): @@ -141,6 +145,32 @@ class DiGraphExpr(DiGraph): return "" + +class LocKey(object): + def __init__(self, key): + self._key = key + + key = property(lambda self: self._key) + + def __hash__(self): + return hash(self._key) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ is not other.__class__: + return False + return self.key == other.key + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return "<%s %d>" % (self.__class__.__name__, self._key) + + def __str__(self): + return "loc_key_%d" % self.key + # IR definitions class Expr(object): @@ -383,6 +413,9 @@ class Expr(object): def is_id(self, name=None): return False + def is_loc(self, label=None): + return False + def is_aff(self): return False @@ -532,6 +565,7 @@ class ExprId(Expr): if size is None: warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprId(name, SIZE)') size = 32 + assert isinstance(name, str) super(ExprId, self).__init__(size) self._name = name @@ -584,6 +618,68 @@ class ExprId(Expr): return True +class ExprLoc(Expr): + + """An ExprLoc represent a Label in Miasm IR. + """ + + __slots__ = Expr.__slots__ + ["_loc_key"] + + def __init__(self, loc_key, size): + """Create an identifier + @loc_key: int, label loc_key + @size: int, identifier's size + """ + assert isinstance(loc_key, LocKey) + super(ExprLoc, self).__init__(size) + self._loc_key = loc_key + + loc_key= property(lambda self: self._loc_key) + + def __reduce__(self): + state = self._loc_key, self._size + return self.__class__, state + + def __new__(cls, loc_key, size): + return Expr.get_object(cls, (loc_key, size)) + + def __str__(self): + return str(self._loc_key) + + def get_r(self, mem_read=False, cst_read=False): + return set() + + def get_w(self): + return set() + + def _exprhash(self): + return hash((EXPRLOC, self._loc_key, self._size)) + + def _exprrepr(self): + return "%s(%r, %d)" % (self.__class__.__name__, self._loc_key, self._size) + + def __contains__(self, expr): + return self == expr + + @visit_chk + def visit(self, callback, test_visit=None): + return self + + def copy(self): + return ExprLoc(self._loc_key, self._size) + + def depth(self): + return 1 + + def graph_recursive(self, graph): + graph.add_node(self) + + def is_loc(self, loc_key=None): + if loc_key is not None and self._loc_key != loc_key: + return False + return True + + class ExprAff(Expr): """An ExprAff represent an affection from an Expression to another one. @@ -1226,10 +1322,11 @@ class ExprCompose(Expr): # Expression order for comparaison EXPR_ORDER_DICT = {ExprId: 1, - ExprCond: 2, - ExprMem: 3, - ExprOp: 4, - ExprSlice: 5, + ExprLoc: 2, + ExprCond: 3, + ExprMem: 4, + ExprOp: 5, + ExprSlice: 6, ExprCompose: 7, ExprInt: 8, } @@ -1289,6 +1386,11 @@ def compare_exprs(expr1, expr2): if ret: return ret return cmp(expr1.size, expr2.size) + elif cls1 == ExprLoc: + ret = cmp(expr1.loc_key, expr2.loc_key) + if ret: + return ret + return cmp(expr1.size, expr2.size) elif cls1 == ExprAff: raise NotImplementedError( "Comparaison from an ExprAff not yet implemented") @@ -1379,11 +1481,19 @@ def ExprInt_from(expr, i): def get_expr_ids_visit(expr, ids): """Visitor to retrieve ExprId in @expr @expr: Expr""" - if isinstance(expr, ExprId): + if expr.is_id(): ids.add(expr) return expr +def get_expr_locs_visit(expr, locs): + """Visitor to retrieve ExprLoc in @expr + @expr: Expr""" + if expr.is_loc(): + locs.add(expr) + return expr + + def get_expr_ids(expr): """Retrieve ExprId in @expr @expr: Expr""" @@ -1392,6 +1502,14 @@ def get_expr_ids(expr): return ids +def get_expr_locs(expr): + """Retrieve ExprLoc in @expr + @expr: Expr""" + locs = set() + expr.visit(lambda x: get_expr_locs_visit(x, locs)) + return locs + + def test_set(expr, pattern, tks, result): """Test if v can correspond to e. If so, update the context in result. Otherwise, return False @@ -1431,6 +1549,9 @@ def match_expr(expr, pattern, tks, result=None): elif expr.is_id(): return test_set(expr, pattern, tks, result) + elif expr.is_loc(): + return test_set(expr, pattern, tks, result) + elif expr.is_op(): # expr need to be the same operation than pattern diff --git a/miasm2/expression/expression_helper.py b/miasm2/expression/expression_helper.py index 722d169d..2fe5e26d 100644 --- a/miasm2/expression/expression_helper.py +++ b/miasm2/expression/expression_helper.py @@ -268,6 +268,9 @@ class Variables_Identifier(object): elif isinstance(expr, m2_expr.ExprId): pass + elif isinstance(expr, m2_expr.ExprLoc): + pass + elif isinstance(expr, m2_expr.ExprMem): self.find_variables_rec(expr.arg) @@ -552,7 +555,8 @@ def possible_values(expr): # Terminal expression if (isinstance(expr, m2_expr.ExprInt) or - isinstance(expr, m2_expr.ExprId)): + isinstance(expr, m2_expr.ExprId) or + isinstance(expr, m2_expr.ExprLoc)): consvals.add(ConstrainedValue(frozenset(), expr)) # Unary expression elif isinstance(expr, m2_expr.ExprSlice): diff --git a/miasm2/expression/expression_reduce.py b/miasm2/expression/expression_reduce.py index 45386ca2..22ac8d8d 100644 --- a/miasm2/expression/expression_reduce.py +++ b/miasm2/expression/expression_reduce.py @@ -4,8 +4,8 @@ Apply reduction rules to an Expression ast """ import logging -from miasm2.expression.expression import ExprInt, ExprId, ExprOp, ExprSlice,\ - ExprCompose, ExprMem, ExprCond +from miasm2.expression.expression import ExprInt, ExprId, ExprLoc, ExprOp, \ + ExprSlice, ExprCompose, ExprMem, ExprCond log_reduce = logging.getLogger("expr_reduce") console_handler = logging.StreamHandler() @@ -29,7 +29,7 @@ class ExprNode(object): expr = self.expr if self.info is not None: out = repr(self.info) - elif expr.is_int() or expr.is_id(): + elif expr.is_int() or expr.is_id() or expr.is_loc(): out = str(expr) elif expr.is_mem(): out = "@%d[%r]" % (self.expr.size, self.arg) @@ -76,7 +76,7 @@ class ExprReducer(object): @expr: Expression to analyze """ - if isinstance(expr, (ExprId, ExprInt)): + if isinstance(expr, (ExprId, ExprLoc, ExprInt)): node = ExprNode(expr) elif isinstance(expr, (ExprMem, ExprSlice)): son = self.expr2node(expr.arg) @@ -118,7 +118,7 @@ class ExprReducer(object): expr = node.expr log_reduce.debug("\t" * lvl + "Reduce...: %s", node.expr) - if isinstance(expr, (ExprId, ExprInt)): + if isinstance(expr, (ExprId, ExprInt, ExprLoc)): pass elif isinstance(expr, ExprMem): arg = self.categorize(node.arg, lvl=lvl + 1, **kwargs) diff --git a/miasm2/expression/parser.py b/miasm2/expression/parser.py index b3f3af1c..cbfd58d0 100644 --- a/miasm2/expression/parser.py +++ b/miasm2/expression/parser.py @@ -1,6 +1,6 @@ import pyparsing -from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \ - ExprCond, ExprCompose, ExprOp, ExprAff +from miasm2.expression.expression import ExprInt, ExprId, ExprLoc, ExprSlice, \ + ExprMem, ExprCond, ExprCompose, ExprOp, ExprAff, LocKey integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t: int(t[0])) @@ -16,6 +16,7 @@ str_int = str_int_pos | str_int_neg STR_EXPRINT = pyparsing.Suppress("ExprInt") STR_EXPRID = pyparsing.Suppress("ExprId") +STR_EXPRLOC = pyparsing.Suppress("ExprLoc") STR_EXPRSLICE = pyparsing.Suppress("ExprSlice") STR_EXPRMEM = pyparsing.Suppress("ExprMem") STR_EXPRCOND = pyparsing.Suppress("ExprCond") @@ -23,11 +24,17 @@ STR_EXPRCOMPOSE = pyparsing.Suppress("ExprCompose") STR_EXPROP = pyparsing.Suppress("ExprOp") STR_EXPRAFF = pyparsing.Suppress("ExprAff") +LOCKEY = pyparsing.Suppress("LocKey") + STR_COMMA = pyparsing.Suppress(",") LPARENTHESIS = pyparsing.Suppress("(") RPARENTHESIS = pyparsing.Suppress(")") +T_INF = pyparsing.Suppress("<") +T_SUP = pyparsing.Suppress(">") + + string_quote = pyparsing.QuotedString(quoteChar="'", escChar='\\', escQuote='\\') string_dquote = pyparsing.QuotedString(quoteChar='"', escChar='\\', escQuote='\\') @@ -36,26 +43,33 @@ string = string_quote | string_dquote expr = pyparsing.Forward() -expr_int = pyparsing.Group(STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS) -expr_id = pyparsing.Group(STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS) -expr_slice = pyparsing.Group(STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS) -expr_mem = pyparsing.Group(STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS) -expr_cond = pyparsing.Group(STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS) -expr_compose = pyparsing.Group(STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) -expr_op = pyparsing.Group(STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) -expr_aff = pyparsing.Group(STR_EXPRAFF + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS) - -expr << (expr_int | expr_id | expr_slice | expr_mem | expr_cond | \ +expr_int = STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS +expr_id = STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS +expr_loc = STR_EXPRLOC + LPARENTHESIS + T_INF + LOCKEY + str_int + T_SUP + STR_COMMA + str_int + RPARENTHESIS +expr_slice = STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS +expr_mem = STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS +expr_cond = STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS +expr_compose = STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS +expr_op = STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS +expr_aff = STR_EXPRAFF + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS + +expr << (expr_int | expr_id | expr_loc | expr_slice | expr_mem | expr_cond | \ expr_compose | expr_op | expr_aff) -expr_int.setParseAction(lambda t: ExprInt(*t[0])) -expr_id.setParseAction(lambda t: ExprId(*t[0])) -expr_slice.setParseAction(lambda t: ExprSlice(*t[0])) -expr_mem.setParseAction(lambda t: ExprMem(*t[0])) -expr_cond.setParseAction(lambda t: ExprCond(*t[0])) -expr_compose.setParseAction(lambda t: ExprCompose(*t[0])) -expr_op.setParseAction(lambda t: ExprOp(*t[0])) -expr_aff.setParseAction(lambda t: ExprAff(*t[0])) +def parse_loc_key(t): + assert len(t) == 2 + loc_key, size = LocKey(t[0]), t[1] + return ExprLoc(loc_key, size) + +expr_int.setParseAction(lambda t: ExprInt(*t)) +expr_id.setParseAction(lambda t: ExprId(*t)) +expr_loc.setParseAction(parse_loc_key) +expr_slice.setParseAction(lambda t: ExprSlice(*t)) +expr_mem.setParseAction(lambda t: ExprMem(*t)) +expr_cond.setParseAction(lambda t: ExprCond(*t)) +expr_compose.setParseAction(lambda t: ExprCompose(*t)) +expr_op.setParseAction(lambda t: ExprOp(*t)) +expr_aff.setParseAction(lambda t: ExprAff(*t)) def str_to_expr(str_in): diff --git a/miasm2/expression/simplifications_common.py b/miasm2/expression/simplifications_common.py index 13b25ce2..149c5b8d 100644 --- a/miasm2/expression/simplifications_common.py +++ b/miasm2/expression/simplifications_common.py @@ -250,6 +250,26 @@ def simp_cst_propagation(e_s, expr): e_s(Y.msb()) == ExprInt(0, 1)): args = [args[0].args[0], X + Y] + # ((var >> int1) << int1) => var & mask + # ((var << int1) >> int1) => var & mask + if (op_name in ['<<', '>>'] and + args[0].is_op() and + args[0].op in ['<<', '>>'] and + op_name != args[0]): + var = args[0].args[0] + int1 = args[0].args[1] + int2 = args[1] + if int1 == int2 and int1.is_int() and int(int1) < expr.size: + if op_name == '>>': + mask = ExprInt((1 << (expr.size - int(int1))) - 1, expr.size) + else: + mask = ExprInt( + ((1 << int(int1)) - 1) ^ ((1 << expr.size) - 1), + expr.size + ) + ret = var & mask + return ret + # ((A & A.mask) if op_name == "&" and args[-1] == expr.mask: return ExprOp('&', *args[:-1]) diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py index fc0c81c9..962b9889 100644 --- a/miasm2/ir/analysis.py +++ b/miasm2/ir/analysis.py @@ -3,11 +3,11 @@ import warnings import logging -from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import IntermediateRepresentation, AssignBlock -from miasm2.expression.expression import ExprAff, ExprOp +from miasm2.expression.expression import ExprOp from miasm2.analysis.data_flow import dead_simp as new_dead_simp_imp + log = logging.getLogger("analysis") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) @@ -27,6 +27,7 @@ class ira(IntermediateRepresentation): class ira_x86_16(ir_x86_16, ira) """ + ret_reg = None def call_effects(self, addr, instr): """Default modelisation of a function call to @addr. This may be used to: @@ -44,14 +45,36 @@ class ira(IntermediateRepresentation): instr) return [assignblk] - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_update): - """Replace function call with corresponding call effects, - inside the IR block""" - if not instr.is_subcall(): - return False - call_effects = self.call_effects(instr.args[0], instr) - assignments+= call_effects - return True + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): + """ + Add the IR effects of an instruction to the current state. + If the instruction is a function call, replace the original IR by a + model of the sub function + + Returns a bool: + * True if the current assignments list must be split + * False in other cases. + + @instr: native instruction + @block: native block source + @assignments: current irbloc + @ir_blocks_all: list of additional effects + @gen_pc_updt: insert PC update effects between instructions + """ + if instr.is_subcall(): + call_effects = self.call_effects(instr.args[0], instr) + assignments+= call_effects + return True + + if gen_pc_updt is not False: + self.gen_pc_update(assignments, instr) + + assignblk, ir_blocks_extra = self.instr2ir(instr) + assignments.append(assignblk) + ir_blocks_all += ir_blocks_extra + if ir_blocks_extra: + return True + return False def sizeof_char(self): "Return the size of a char in bits" @@ -73,7 +96,7 @@ class ira(IntermediateRepresentation): "Return the size of a void* in bits" raise NotImplementedError("Abstract method") - def dead_simp(self): + def dead_simp(self, ircfg): """Deprecated: See miasm2.analysis.data_flow.dead_simp()""" warnings.warn('DEPRECATION WARNING: Please use miasm2.analysis.data_flow.dead_simp(ira) instead of ira.dead_simp()') - new_dead_simp_imp(self) + new_dead_simp_imp(self, ircfg) diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 1c6895e0..bf9b4e9a 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -23,11 +23,24 @@ from itertools import chain import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.core.asmblock import AsmSymbolPool, expr_is_label, AsmLabel, \ - AsmBlock +from miasm2.core.asmblock import AsmBlock, AsmConstraint from miasm2.core.graph import DiGraph +def _expr_loc_to_symb(expr, loc_db): + if not expr.is_loc(): + return expr + if loc_db is None: + name = str(expr) + else: + names = loc_db.get_location_names(expr.loc_key) + if not names: + name = loc_db.pretty_str(expr.loc_key) + else: + # Use only one name for readability + name = sorted(names)[0] + return m2_expr.ExprId(name, expr.size) + class AssignBlock(object): """Represent parallel IR assignment, such as: EAX = EBX @@ -257,6 +270,15 @@ class AssignBlock(object): new_assignblk[new_dst] = new_src return AssignBlock(irs=new_assignblk, instr=self.instr) + def to_string(self, loc_db=None): + out = [] + for dst, src in self.iteritems(): + new_src = src.visit(lambda expr:_expr_loc_to_symb(expr, loc_db)) + new_dst = dst.visit(lambda expr:_expr_loc_to_symb(expr, loc_db)) + line = "%s = %s" % (new_dst, new_src) + out.append(line) + out.append("") + return "\n".join(out) class IRBlock(object): """Intermediate representation block object. @@ -264,16 +286,16 @@ class IRBlock(object): Stand for an intermediate representation basic block. """ - __slots__ = ["label", "_assignblks", "_dst", "_dst_linenb"] + __slots__ = ["_loc_key", "_assignblks", "_dst", "_dst_linenb"] - def __init__(self, label, assignblks): + def __init__(self, loc_key, assignblks): """ - @label: AsmLabel of the IR basic block + @loc_key: LocKey of the IR basic block @assignblks: list of AssignBlock """ - assert isinstance(label, AsmLabel) - self.label = label + assert isinstance(loc_key, m2_expr.LocKey) + self._loc_key = loc_key for assignblk in assignblks: assert isinstance(assignblk, AssignBlock) self._assignblks = tuple(assignblks) @@ -281,6 +303,13 @@ class IRBlock(object): self._dst_linenb = None + def get_label(self): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + return self.loc_key + + loc_key = property(lambda self:self._loc_key) + label = property(get_label) + @property def assignblks(self): return self._assignblks @@ -340,7 +369,7 @@ class IRBlock(object): else: new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(self.label, irs) + return IRBlock(self.loc_key, irs) @property def dst_linenb(self): @@ -351,7 +380,7 @@ class IRBlock(object): def __str__(self): out = [] - out.append('%s' % self.label) + out.append(str(self.loc_key)) for assignblk in self: for dst, src in assignblk.iteritems(): out.append('\t%s = %s' % (dst, src)) @@ -378,7 +407,23 @@ class IRBlock(object): for dst, src in assignblk.iteritems(): new_assignblk[mod_dst(dst)] = mod_src(src) assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(self.label, assignblks) + return IRBlock(self.loc_key, assignblks) + + def to_string(self, loc_db=None): + out = [] + if loc_db is None: + node_name = "%s:" % self.loc_key + else: + names = loc_db.get_location_names(self.loc_key) + if not names: + node_name = "%s:" % loc_db.pretty_str(self.loc_key) + else: + node_name = "".join("%s:\n" % name for name in names) + out.append(node_name) + + for i, assignblk in enumerate(self): + out.append(assignblk.to_string(loc_db)) + return '\n'.join(out) class irbloc(IRBlock): @@ -387,33 +432,76 @@ class irbloc(IRBlock): Use IRBlock instead of irbloc """ - def __init__(self, label, irs, lines=None): + def __init__(self, loc_key, irs, lines=None): warnings.warn('DEPRECATION WARNING: use "IRBlock" instead of "irblock"') - super(irbloc, self).__init__(label, irs) + super(irbloc, self).__init__(loc_key, irs) class DiGraphIR(DiGraph): """DiGraph for IR instances""" - def __init__(self, blocks, *args, **kwargs): + def __init__(self, irdst, loc_db, blocks=None, *args, **kwargs): """Instanciate a DiGraphIR + @loc_db: LocationDB instance @blocks: IR blocks """ + self.loc_db = loc_db + if blocks is None: + blocks = {} self._blocks = blocks + self._irdst = irdst super(DiGraphIR, self).__init__(*args, **kwargs) + @property + def IRDst(self): + return self._irdst + + @property + def blocks(self): + return self._blocks + + def add_irblock(self, irblock): + """ + Add the @irblock to the current DiGraphIR + @irblock: IRBlock instance + """ + self.blocks[irblock.loc_key] = irblock + self.add_node(irblock.loc_key) + + for dst in self.dst_trackback(irblock): + if dst.is_int(): + dst_loc_key = self.loc_db.get_or_create_offset_location(int(dst)) + dst = m2_expr.ExprLoc(dst_loc_key, irblock.dst.size) + if dst.is_loc(): + self.add_uniq_edge(irblock.loc_key, dst.loc_key) + def node2lines(self, node): - yield self.DotCellDescription(text=str(node.name), - attr={'align': 'center', - 'colspan': 2, - 'bgcolor': 'grey'}) + if self.loc_db is None: + node_name = str(node) + else: + names = self.loc_db.get_location_names(node) + if not names: + node_name = self.loc_db.pretty_str(node) + else: + node_name = "".join("%s:\n" % name for name in names) + yield self.DotCellDescription( + text="%s" % node_name, + attr={ + 'align': 'center', + 'colspan': 2, + 'bgcolor': 'grey', + } + ) if node not in self._blocks: yield [self.DotCellDescription(text="NOT PRESENT", attr={})] raise StopIteration for i, assignblk in enumerate(self._blocks[node]): for dst, src in assignblk.iteritems(): - line = "%s = %s" % (dst, src) + + new_src = src.visit(lambda expr:_expr_loc_to_symb(expr, self.loc_db)) + new_dst = dst.visit(lambda expr:_expr_loc_to_symb(expr, self.loc_db)) + line = "%s = %s" % (new_dst, new_src) if self._dot_offset: yield [self.DotCellDescription(text="%-4d" % i, attr={}), self.DotCellDescription(text=line, attr={})] @@ -427,11 +515,10 @@ class DiGraphIR(DiGraph): src_irdst = self._blocks[src].dst edge_color = "blue" if isinstance(src_irdst, m2_expr.ExprCond): - if (expr_is_label(src_irdst.src1) and - src_irdst.src1.name == dst): + src1, src2 = src_irdst.src1, src_irdst.src2 + if src1.is_loc(dst): edge_color = "limegreen" - elif (expr_is_label(src_irdst.src2) and - src_irdst.src2.name == dst): + elif src2.is_loc(dst): edge_color = "red" return {"color": edge_color} @@ -448,245 +535,54 @@ class DiGraphIR(DiGraph): self._dot_offset = offset return super(DiGraphIR, self).dot() + def get_loc_key(self, addr): + """Transforms an ExprId/ExprInt/loc_key/int into a loc_key + @addr: an ExprId/ExprInt/loc_key/int""" -class IntermediateRepresentation(object): - """ - Intermediate representation object - - Allow native assembly to intermediate representation traduction - """ - - def __init__(self, arch, attrib, symbol_pool=None): - if symbol_pool is None: - symbol_pool = AsmSymbolPool() - self.symbol_pool = symbol_pool - self.blocks = {} - self.pc = arch.getpc(attrib) - self.sp = arch.getsp(attrib) - self.arch = arch - self.attrib = attrib - # Lazy structure - self._graph = None - - @property - def blocs(self): - warnings.warn('DEPRECATION WARNING: use ".blocks" instead of ".blocs"') - return self.blocks - - def get_ir(self, instr): - raise NotImplementedError("Abstract Method") - - def instr2ir(self, instr): - ir_bloc_cur, extra_irblocks = self.get_ir(instr) - for index, irb in enumerate(extra_irblocks): - irs = [] - for assignblk in irb: - irs.append(AssignBlock(assignblk, instr)) - extra_irblocks[index] = IRBlock(irb.label, irs) - assignblk = AssignBlock(ir_bloc_cur, instr) - return assignblk, extra_irblocks - - def get_label(self, addr): - """Transforms an ExprId/ExprInt/label/int into a label - @addr: an ExprId/ExprInt/label/int""" - - if (isinstance(addr, m2_expr.ExprId) and - isinstance(addr.name, AsmLabel)): - addr = addr.name - if isinstance(addr, AsmLabel): + if isinstance(addr, m2_expr.LocKey): return addr + elif isinstance(addr, m2_expr.ExprLoc): + return addr.loc_key try: addr = int(addr) except (ValueError, TypeError): return None - return self.symbol_pool.getby_offset_create(addr) + return self.loc_db.get_offset_location(addr) - def get_block(self, addr): - """Returns the irbloc associated to an ExprId/ExprInt/label/int - @addr: an ExprId/ExprInt/label/int""" - label = self.get_label(addr) - return self.blocks.get(label, None) + def get_or_create_loc_key(self, addr): + """Transforms an ExprId/ExprInt/loc_key/int into a loc_key + If the offset @addr is not in the LocationDB, create it + @addr: an ExprId/ExprInt/loc_key/int""" - def get_bloc(self, addr): - """ - DEPRECATED function - Use get_block instead of get_block - """ - warnings.warn('DEPRECATION WARNING: use "get_block" instead of "get_bloc"') - return self.get_block(addr) + loc_key = self.get_loc_key(addr) + if loc_key is not None: + return loc_key - def add_instr(self, line, addr=0, gen_pc_updt=False): - block = AsmBlock(self.gen_label()) - block.lines = [line] - self.add_block(block, gen_pc_updt) + return self.loc_db.add_location(offset=int(addr)) + + def get_block(self, addr): + """Returns the irbloc associated to an ExprId/ExprInt/loc_key/int + @addr: an ExprId/ExprInt/loc_key/int""" + + loc_key = self.get_loc_key(addr) + if loc_key is None: + return None + return self.blocks.get(loc_key, None) def getby_offset(self, offset): out = set() for irb in self.blocks.values(): for assignblk in irb: instr = assignblk.instr + if instr is None: + continue if instr.offset <= offset < instr.offset + instr.l: out.add(irb) return out - def gen_pc_update(self, assignments, instr): - offset = m2_expr.ExprInt(instr.offset, self.pc.size) - assignments.append(AssignBlock({self.pc:offset}, instr)) - - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_updt): - """Function called before adding an instruction from the the native @block to - the current irbloc. - - Returns a couple. The first element is the new irblock. The second the a - bool: - * True if the current irblock must be split - * False in other cases. - - @block: native block source - @instr: native instruction - @irb_cur: current irbloc - @ir_blocks_all: list of additional effects - @gen_pc_updt: insert PC update effects between instructions - - """ - - return False - - def add_instr_to_irblock(self, block, instr, assignments, ir_blocks_all, gen_pc_updt): - """ - Add the IR effects of an instruction to the current irblock. - - Returns a couple. The first element is the new irblock. The second the a - bool: - * True if the current irblock must be split - * False in other cases. - - @block: native block source - @instr: native instruction - @irb_cur: current irbloc - @ir_blocks_all: list of additional effects - @gen_pc_updt: insert PC update effects between instructions - """ - - split = self.pre_add_instr(block, instr, assignments, ir_blocks_all, gen_pc_updt) - if split: - return True - - assignblk, ir_blocks_extra = self.instr2ir(instr) - - if gen_pc_updt is not False: - self.gen_pc_update(assignments, instr) - - assignments.append(assignblk) - ir_blocks_all += ir_blocks_extra - if ir_blocks_extra: - return True - return False - - def add_block(self, block, gen_pc_updt=False): - """ - Add a native block to the current IR - @block: native assembly block - @gen_pc_updt: insert PC update effects between instructions - """ - - label = None - ir_blocks_all = [] - for instr in block.lines: - if label is None: - assignments = [] - label = self.get_instr_label(instr) - split = self.add_instr_to_irblock(block, instr, assignments, - ir_blocks_all, gen_pc_updt) - if split: - ir_blocks_all.append(IRBlock(label, assignments)) - label = None - assignments = [] - if label is not None: - ir_blocks_all.append(IRBlock(label, assignments)) - - new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) - for irblock in new_ir_blocks_all: - self.blocks[irblock.label] = irblock - return new_ir_blocks_all - - def add_bloc(self, block, gen_pc_updt=False): - """ - DEPRECATED function - Use add_block instead of add_block - """ - warnings.warn('DEPRECATION WARNING: use "add_block" instead of "add_bloc"') - return self.add_block(block, gen_pc_updt) - - def expr_fix_regs_for_mode(self, expr, *args, **kwargs): - return expr - - def expraff_fix_regs_for_mode(self, expr, *args, **kwargs): - return expr - - def irbloc_fix_regs_for_mode(self, irblock, *args, **kwargs): - return irblock - - def is_pc_written(self, block): - """Return the first Assignblk of the @blockin which PC is written - @block: IRBlock instance""" - all_pc = self.arch.pc.values() - for assignblk in block: - if assignblk.dst in all_pc: - return assignblk - return None - - def set_empty_dst_to_next(self, block, ir_blocks): - for index, irblock in enumerate(ir_blocks): - if irblock.dst is not None: - continue - next_lbl = block.get_next() - if next_lbl is None: - dst = m2_expr.ExprId(self.get_next_label(block.lines[-1]), - self.pc.size) - else: - dst = m2_expr.ExprId(next_lbl, - self.pc.size) - assignblk = AssignBlock({self.IRDst: dst}, irblock[-1].instr) - ir_blocks[index] = IRBlock(irblock.label, list(irblock.assignblks) + [assignblk]) - - def post_add_block(self, block, ir_blocks): - self.set_empty_dst_to_next(block, ir_blocks) - - new_irblocks = [] - for irblock in ir_blocks: - new_irblock = self.irbloc_fix_regs_for_mode(irblock, self.attrib) - self.blocks[irblock.label] = new_irblock - new_irblocks.append(new_irblock) - # Forget graph if any - self._graph = None - return new_irblocks - - def post_add_bloc(self, block, ir_blocks): - """ - DEPRECATED function - Use post_add_block instead of post_add_bloc - """ - warnings.warn('DEPRECATION WARNING: use "post_add_block" instead of "post_add_bloc"') - return self.post_add_block(block, ir_blocks) - - def get_instr_label(self, instr): - """Returns the label associated to an instruction - @instr: current instruction""" - - return self.symbol_pool.getby_offset_create(instr.offset) - - def gen_label(self): - # TODO: fix hardcoded offset - label = self.symbol_pool.gen_label() - return label - - def get_next_label(self, instr): - label = self.symbol_pool.getby_offset_create(instr.offset + instr.l) - return label def simplify(self, simplifier): """ @@ -694,21 +590,21 @@ class IntermediateRepresentation(object): @simplifier: ExpressionSimplifier instance """ modified = False - for label, block in self.blocks.iteritems(): + for loc_key, block in self.blocks.iteritems(): assignblks = [] for assignblk in block: new_assignblk = assignblk.simplify(simplifier) if assignblk != new_assignblk: modified = True assignblks.append(new_assignblk) - self.blocks[label] = IRBlock(label, assignblks) + self.blocks[loc_key] = IRBlock(loc_key, assignblks) return modified - def replace_expr_in_ir(self, bloc, rep): - for assignblk in bloc: + def replace_expr_in_ir(self, block, replaced): + for assignblk in block: for dst, src in assignblk.items(): del assignblk[dst] - assignblk[dst.replace_expr(rep)] = src.replace_expr(rep) + assignblk[dst.replace_expr(replaced)] = src.replace_expr(replaced) def get_rw(self, regs_ids=None): """ @@ -728,14 +624,14 @@ class IntermediateRepresentation(object): out = set() while todo: dst = todo.pop() - if expr_is_label(dst): + if dst.is_loc(): done.add(dst) - elif isinstance(dst, (m2_expr.ExprMem, m2_expr.ExprInt)): + elif dst.is_mem() or dst.is_int(): done.add(dst) - elif isinstance(dst, m2_expr.ExprCond): + elif dst.is_cond(): todo.add(dst.src1) todo.add(dst.src2) - elif isinstance(dst, m2_expr.ExprId): + elif dst.is_id(): out.add(dst) else: done.add(dst) @@ -765,38 +661,16 @@ class IntermediateRepresentation(object): return done - def _gen_graph(self): - """ - Gen irbloc digraph - """ - self._graph = DiGraphIR(self.blocks) - for lbl, block in self.blocks.iteritems(): - self._graph.add_node(lbl) - for dst in self.dst_trackback(block): - if dst.is_int(): - dst_lbl = self.symbol_pool.getby_offset_create(int(dst)) - dst = m2_expr.ExprId(dst_lbl, self.pc.size) - if expr_is_label(dst): - self._graph.add_edge(lbl, dst.name) - - @property - def graph(self): - """Get a DiGraph representation of current IR instance. - Lazy property, building the graph on-demand""" - if self._graph is None: - self._gen_graph() - return self._graph - def remove_empty_assignblks(self): modified = False - for label, block in self.blocks.iteritems(): + for loc_key, block in self.blocks.iteritems(): irs = [] for assignblk in block: if len(assignblk): irs.append(assignblk) else: modified = True - self.blocks[label] = IRBlock(label, irs) + self.blocks[loc_key] = IRBlock(loc_key, irs) return modified def remove_jmp_blocks(self): @@ -814,62 +688,62 @@ class IntermediateRepresentation(object): if len(assignblk) > 1: continue assert set(assignblk.keys()) == set([self.IRDst]) - if len(self.graph.successors(block.label)) != 1: + if len(self.successors(block.loc_key)) != 1: continue - if not expr_is_label(assignblk[self.IRDst]): + if not assignblk[self.IRDst].is_loc(): continue - dst = assignblk[self.IRDst].name - if dst == block.label: + dst = assignblk[self.IRDst].loc_key + if dst == block.loc_key: # Infinite loop block continue - jmp_blocks.add(block.label) + jmp_blocks.add(block.loc_key) # Remove them, relink graph modified = False - for label in jmp_blocks: - block = self.blocks[label] - dst_label = block.dst.name - parents = self.graph.predecessors(block.label) + for loc_key in jmp_blocks: + block = self.blocks[loc_key] + dst_loc_key = block.dst + parents = self.predecessors(block.loc_key) for lbl in parents: parent = self.blocks.get(lbl, None) if parent is None: continue dst = parent.dst - if dst.is_id(block.label): - dst = m2_expr.ExprId(dst_label, dst.size) + if dst.is_id(block.loc_key): + dst = m2_expr.ExprLoc(dst_loc_key, dst.size) - self.graph.discard_edge(lbl, block.label) - self.graph.discard_edge(block.label, dst_label) + self.discard_edge(lbl, block.loc_key) + self.discard_edge(block.loc_key, dst_loc_key) - self.graph.add_uniq_edge(lbl, dst_label) + self.add_uniq_edge(lbl, dst_loc_key) modified = True elif dst.is_cond(): src1, src2 = dst.src1, dst.src2 - if src1.is_id(block.label): - dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprId(dst_label, dst.size), dst.src2) - self.graph.discard_edge(lbl, block.label) - self.graph.discard_edge(block.label, dst_label) - self.graph.add_uniq_edge(lbl, dst_label) + if src1.is_id(block.loc_key): + dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprLoc(dst_loc_key, dst.size), dst.src2) + self.discard_edge(lbl, block.loc_key) + self.discard_edge(block.loc_key, dst_loc_key) + self.add_uniq_edge(lbl, dst_loc_key) modified = True - if src2.is_id(block.label): - dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprId(dst_label, dst.size)) - self.graph.discard_edge(lbl, block.label) - self.graph.discard_edge(block.label, dst_label) - self.graph.add_uniq_edge(lbl, dst_label) + if src2.is_id(block.loc_key): + dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprLoc(dst_loc_key, dst.size)) + self.discard_edge(lbl, block.loc_key) + self.discard_edge(block.loc_key, dst_loc_key) + self.add_uniq_edge(lbl, dst_loc_key) modified = True if dst.src1 == dst.src2: dst = dst.src1 else: continue new_parent = parent.set_dst(dst) - self.blocks[parent.label] = new_parent + self.blocks[parent.loc_key] = new_parent # Remove unlinked useless nodes - for label in jmp_blocks: - if (len(self.graph.predecessors(label)) == 0 and - len(self.graph.successors(label)) == 0): - self.graph.del_node(label) - del self.blocks[label] + for loc_key in jmp_blocks: + if (len(self.predecessors(loc_key)) == 0 and + len(self.successors(loc_key)) == 0): + self.del_node(loc_key) + del self.blocks[loc_key] return modified def merge_blocks(self): @@ -878,21 +752,21 @@ class IntermediateRepresentation(object): parent """ modified = False - todo = set(self.graph.nodes()) + todo = set(self.nodes()) while todo: block = todo.pop() - sons = self.graph.successors(block) + sons = self.successors(block) if len(sons) != 1: continue son = list(sons)[0] - if self.graph.predecessors(son) != [block]: + if self.predecessors(son) != [block]: continue if block not in self.blocks: continue if son not in self.blocks: continue # Block has one son, son has one parent => merge - assignblks =[] + assignblks = [] for assignblk in self.blocks[block]: if self.IRDst not in assignblk: assignblks.append(assignblk) @@ -906,13 +780,13 @@ class IntermediateRepresentation(object): assignblks += self.blocks[son].assignblks new_block = IRBlock(block, assignblks) - self.graph.discard_edge(block, son) + self.discard_edge(block, son) - for lson in self.graph.successors(son): - self.graph.add_uniq_edge(block, lson) - self.graph.discard_edge(son, lson) + for lson in self.successors(son): + self.add_uniq_edge(block, lson) + self.discard_edge(son, lson) del self.blocks[son] - self.graph.del_node(son) + self.del_node(son) self.blocks[block] = new_block todo.add(block) @@ -920,12 +794,224 @@ class IntermediateRepresentation(object): return modified +class IntermediateRepresentation(object): + """ + Intermediate representation object + + Allow native assembly to intermediate representation traduction + """ + + def __init__(self, arch, attrib, loc_db): + self.pc = arch.getpc(attrib) + self.sp = arch.getsp(attrib) + self.arch = arch + self.attrib = attrib + self.loc_db = loc_db + self.IRDst = None + + def get_ir(self, instr): + raise NotImplementedError("Abstract Method") + + def new_ircfg(self, *args, **kwargs): + """ + Return a new instance of DiGraphIR + """ + return DiGraphIR(self.IRDst, self.loc_db, *args, **kwargs) + + def new_ircfg_from_asmcfg(self, asmcfg, *args, **kwargs): + """ + Return a new instance of DiGraphIR from an @asmcfg + @asmcfg: AsmCFG instance + """ + + ircfg = DiGraphIR(self.IRDst, self.loc_db, *args, **kwargs) + for block in asmcfg.blocks: + self.add_asmblock_to_ircfg(block, ircfg) + return ircfg + + def instr2ir(self, instr): + ir_bloc_cur, extra_irblocks = self.get_ir(instr) + for index, irb in enumerate(extra_irblocks): + irs = [] + for assignblk in irb: + irs.append(AssignBlock(assignblk, instr)) + extra_irblocks[index] = IRBlock(irb.loc_key, irs) + assignblk = AssignBlock(ir_bloc_cur, instr) + return assignblk, extra_irblocks + + def add_instr_to_ircfg(self, instr, ircfg, loc_key=None, gen_pc_updt=False): + """ + Add the native instruction @instr to the @ircfg + @instr: instruction instance + @ircfg: IRCFG instance + @loc_key: loc_key instance of the instruction destination + @gen_pc_updt: insert PC update effects between instructions + """ + + if loc_key is None: + offset = getattr(instr, "offset", None) + loc_key = self.loc_db.add_location(offset=offset) + block = AsmBlock(loc_key) + block.lines = [instr] + self.add_asmblock_to_ircfg(block, ircfg, gen_pc_updt) + return loc_key + + def gen_pc_update(self, assignments, instr): + offset = m2_expr.ExprInt(instr.offset, self.pc.size) + assignments.append(AssignBlock({self.pc:offset}, instr)) + + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): + """ + Add the IR effects of an instruction to the current state. + + Returns a bool: + * True if the current assignments list must be split + * False in other cases. + + @instr: native instruction + @block: native block source + @assignments: list of current AssignBlocks + @ir_blocks_all: list of additional effects + @gen_pc_updt: insert PC update effects between instructions + """ + if gen_pc_updt is not False: + self.gen_pc_update(assignments, instr) + + assignblk, ir_blocks_extra = self.instr2ir(instr) + assignments.append(assignblk) + ir_blocks_all += ir_blocks_extra + if ir_blocks_extra: + return True + return False + + def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): + """ + Add a native block to the current IR + @block: native assembly block + @ircfg: DiGraphIR instance + @gen_pc_updt: insert PC update effects between instructions + """ + + loc_key = block.loc_key + ir_blocks_all = [] + + assignments = [] + for instr in block.lines: + if loc_key is None: + assignments = [] + loc_key = self.get_loc_key_for_instr(instr) + split = self.add_instr_to_current_state( + instr, block, assignments, + ir_blocks_all, gen_pc_updt + ) + if split: + ir_blocks_all.append(IRBlock(loc_key, assignments)) + loc_key = None + assignments = [] + if loc_key is not None: + ir_blocks_all.append(IRBlock(loc_key, assignments)) + + new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) + for irblock in new_ir_blocks_all: + ircfg.add_irblock(irblock) + return new_ir_blocks_all + + def add_block(self, block, gen_pc_updt=False): + """ + DEPRECATED function + Use add_block instead of add_block + """ + warnings.warn("""DEPRECATION WARNING + ircfg is now out of IntermediateRepresentation + Use: + ircfg = ir_arch.new_ircfg() + ir_arch.add_asmblock_to_ircfg(block, ircfg) + """) + raise RuntimeError("API Deprecated") + + def add_bloc(self, block, gen_pc_updt=False): + """ + DEPRECATED function + Use add_block instead of add_block + """ + self.add_block(block, gen_pc_updt) + + def get_next_loc_key(self, instr): + loc_key = self.loc_db.get_or_create_offset_location(instr.offset + instr.l) + return loc_key + + def get_loc_key_for_instr(self, instr): + """Returns the loc_key associated to an instruction + @instr: current instruction""" + return self.loc_db.get_or_create_offset_location(instr.offset) + + def gen_loc_key_and_expr(self, size): + """ + Return a loc_key and it's corresponding ExprLoc + @size: size of expression + """ + loc_key = self.loc_db.add_location() + return loc_key, m2_expr.ExprLoc(loc_key, size) + + def expr_fix_regs_for_mode(self, expr, *args, **kwargs): + return expr + + def expraff_fix_regs_for_mode(self, expr, *args, **kwargs): + return expr + + def irbloc_fix_regs_for_mode(self, irblock, *args, **kwargs): + return irblock + + def is_pc_written(self, block): + """Return the first Assignblk of the @blockin which PC is written + @block: IRBlock instance""" + all_pc = self.arch.pc.values() + for assignblk in block: + if assignblk.dst in all_pc: + return assignblk + return None + + def set_empty_dst_to_next(self, block, ir_blocks): + for index, irblock in enumerate(ir_blocks): + if irblock.dst is not None: + continue + next_loc_key = block.get_next() + if next_loc_key is None: + loc_key = None + if block.lines: + line = block.lines[-1] + if line.offset is not None: + loc_key = self.loc_db.get_or_create_offset_location(line.offset + line.l) + if loc_key is None: + loc_key = self.loc_db.add_location() + block.add_cst(loc_key, AsmConstraint.c_next) + else: + loc_key = next_loc_key + dst = m2_expr.ExprLoc(loc_key, self.pc.size) + if irblock.assignblks: + instr = irblock.assignblks[-1].instr + else: + instr = None + assignblk = AssignBlock({self.IRDst: dst}, instr) + ir_blocks[index] = IRBlock(irblock.loc_key, list(irblock.assignblks) + [assignblk]) + + def post_add_asmblock_to_ircfg(self, block, ircfg, ir_blocks): + self.set_empty_dst_to_next(block, ir_blocks) + + new_irblocks = [] + for irblock in ir_blocks: + new_irblock = self.irbloc_fix_regs_for_mode(irblock, self.attrib) + ircfg.add_irblock(new_irblock) + new_irblocks.append(new_irblock) + return new_irblocks + + class ir(IntermediateRepresentation): """ DEPRECATED object Use IntermediateRepresentation instead of ir """ - def __init__(self, label, irs, lines=None): + def __init__(self, loc_key, irs, lines=None): warnings.warn('DEPRECATION WARNING: use "IntermediateRepresentation" instead of "ir"') - super(ir, self).__init__(label, irs, lines) + super(ir, self).__init__(loc_key, irs, lines) diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 4070f261..9ab455da 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -2,10 +2,9 @@ import warnings import logging from collections import MutableMapping -from miasm2.expression.expression import ExprOp, ExprId, ExprInt, ExprMem, \ - ExprCompose, ExprSlice, ExprCond, ExprAff +from miasm2.expression.expression import ExprOp, ExprId, ExprLoc, ExprInt, \ + ExprMem, ExprCompose, ExprSlice, ExprCond from miasm2.expression.simplifications import expr_simp -from miasm2.core import asmblock from miasm2.ir.ir import AssignBlock log = logging.getLogger("symbexec") @@ -15,15 +14,16 @@ log.addHandler(console_handler) log.setLevel(logging.INFO) -def get_block(ir_arch, mdis, addr): +def get_block(ir_arch, ircfg, mdis, addr): """Get IRBlock at address @addr""" - lbl = ir_arch.get_label(addr) - if not lbl in ir_arch.blocks: - block = mdis.dis_block(lbl.offset) - ir_arch.add_block(block) - irblock = ir_arch.get_block(lbl) + loc_key = ircfg.get_or_create_loc_key(addr) + if not loc_key in ircfg.blocks: + offset = mdis.loc_db.get_location_offset(loc_key) + block = mdis.dis_block(offset) + ir_arch.add_asmblock_to_ircfg(block, ircfg) + irblock = ircfg.get_block(loc_key) if irblock is None: - raise LookupError('No block found at that address: %s' % lbl) + raise LookupError('No block found at that address: %s' % ir_arch.loc_db.pretty_str(loc_key)) return irblock @@ -804,7 +804,7 @@ class SymbolicExecutionEngine(object): StateEngine = SymbolicState - def __init__(self, ir_arch, state, + def __init__(self, ir_arch, state=None, func_read=None, func_write=None, sb_expr_simp=expr_simp): @@ -812,6 +812,7 @@ class SymbolicExecutionEngine(object): self.expr_to_visitor = { ExprInt: self.eval_exprint, ExprId: self.eval_exprid, + ExprLoc: self.eval_exprloc, ExprMem: self.eval_exprmem, ExprSlice: self.eval_exprslice, ExprCond: self.eval_exprcond, @@ -819,6 +820,9 @@ class SymbolicExecutionEngine(object): ExprCompose: self.eval_exprcompose, } + if state is None: + state = {} + self.symbols = SymbolMngr(addrsize=ir_arch.addrsize, expr_simp=expr_simp) for dst, src in state.iteritems(): @@ -885,10 +889,16 @@ class SymbolicExecutionEngine(object): def eval_exprid(self, expr, **kwargs): """[DEV]: Evaluate an ExprId using the current state""" - if isinstance(expr.name, asmblock.AsmLabel) and expr.name.offset is not None: - ret = ExprInt(expr.name.offset, expr.size) + ret = self.symbols.read(expr) + return ret + + def eval_exprloc(self, expr, **kwargs): + """[DEV]: Evaluate an ExprLoc using the current state""" + offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) + if offset is not None: + ret = ExprInt(offset, expr.size) else: - ret = self.symbols.read(expr) + ret = expr return ret def eval_exprmem(self, expr, **kwargs): @@ -953,7 +963,7 @@ class SymbolicExecutionEngine(object): @mems: track mems only """ if init_state is None: - init_state = self.ir_arch.arch.regs.regs_init + init_state = {} if ids: for variable, value in self.symbols.symbols_id.iteritems(): if variable in init_state and init_state[variable] == value: @@ -1040,31 +1050,33 @@ class SymbolicExecutionEngine(object): self.dump(mems=False) self.dump(ids=False) print '_' * 80 - return self.eval_expr(self.ir_arch.IRDst) + dst = self.eval_expr(self.ir_arch.IRDst) + + return dst - def run_block_at(self, addr, step=False): + def run_block_at(self, ircfg, addr, step=False): """ Symbolic execution of the block at @addr @addr: address to execute (int or ExprInt or label) @step: display intermediate steps """ - irblock = self.ir_arch.get_block(addr) + irblock = ircfg.get_block(addr) if irblock is not None: addr = self.eval_updt_irblock(irblock, step=step) return addr - def run_at(self, addr, lbl_stop=None, step=False): + def run_at(self, ircfg, addr, lbl_stop=None, step=False): """ Symbolic execution starting at @addr @addr: address to execute (int or ExprInt or label) - @lbl_stop: AsmLabel to stop execution on + @lbl_stop: LocKey to stop execution on @step: display intermediate steps """ while True: - irblock = self.ir_arch.get_block(addr) + irblock = ircfg.get_block(addr) if irblock is None: break - if irblock.label == lbl_stop: + if irblock.loc_key == lbl_stop: break addr = self.eval_updt_irblock(irblock, step=step) return addr diff --git a/miasm2/ir/symbexec_top.py b/miasm2/ir/symbexec_top.py index 1e1e76e9..f5ecb566 100644 --- a/miasm2/ir/symbexec_top.py +++ b/miasm2/ir/symbexec_top.py @@ -2,7 +2,6 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine from miasm2.expression.simplifications import expr_simp from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\ ExprMem, ExprCond, ExprCompose, ExprOp -from miasm2.core import asmblock TOPSTR = "TOP" @@ -121,14 +120,20 @@ class SymbExecTopNoMem(SymbolicExecutionEngine): def eval_exprid(self, expr, **kwargs): """[DEV]: Evaluate an ExprId using the current state""" - if isinstance(expr.name, asmblock.AsmLabel) and expr.name.offset is not None: - ret = ExprInt(expr.name.offset, expr.size) - elif expr in self.regstop: + if expr in self.regstop: ret = exprid_top(expr) else: ret = self.symbols.read(expr) return ret + def eval_exprloc(self, expr, **kwargs): + offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) + if offset is not None: + ret = ExprInt(offset, expr.size) + else: + ret = expr + return ret + def eval_exprcond(self, expr, **kwargs): """[DEV]: Evaluate an ExprCond using the current state""" cond = self.eval_expr_visitor(expr.cond, **kwargs) diff --git a/miasm2/ir/symbexec_types.py b/miasm2/ir/symbexec_types.py index fedd25bc..349d55a6 100644 --- a/miasm2/ir/symbexec_types.py +++ b/miasm2/ir/symbexec_types.py @@ -1,9 +1,6 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine from miasm2.expression.simplifications import expr_simp -from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\ - ExprMem, ExprCond, ExprCompose, ExprOp - -from miasm2.core.ctypesmngr import CTypeId +from miasm2.expression.expression import ExprId, ExprMem class SymbolicStateCTypes(StateEngine): @@ -88,7 +85,6 @@ class SymbExecCType(SymbolicExecutionEngine): @assignblk: AssignBlock instance """ pool_out = {} - eval_cache = {} for dst, src in assignblk.iteritems(): objcs = self.chandler.expr_to_types(src, self.symbols) if isinstance(dst, ExprMem): diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index bcffc364..cafec7c8 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -18,44 +18,82 @@ class TranslatorC(Translator): '>>>': 'rot_right', } + def __init__(self, loc_db=None, **kwargs): + """Instance a C translator + @loc_db: LocationDB instance + """ + super(TranslatorC, self).__init__(**kwargs) + # symbol pool + self.loc_db = loc_db + + def _size2mask(self, size): + """Return a C string corresponding to the size2mask operation, with support for + @size <= 128""" + mask = size2mask(size) + if size > 64: + # Avoid "integer constant is too large for its type" error + return "(0x%x | ((uint128_t) 0x%x << 64))" % ( + mask & 0xFFFFFFFFFFFFFFFF, + (mask >> 64) & 0xFFFFFFFFFFFFFFFF, + ) + return "0x%x" % mask def from_ExprId(self, expr): - if isinstance(expr.name, asmblock.AsmLabel): - return "0x%x" % expr.name.offset return str(expr) def from_ExprInt(self, expr): + if expr.size == 128: + # Avoid "integer constant is too large for its type" error + return "(0x%x | ((uint128_t) 0x%x << 64))" % ( + int(expr) & 0xFFFFFFFFFFFFFFFF, + (int(expr) >> 64) & 0xFFFFFFFFFFFFFFFF, + ) return "0x%x" % expr.arg.arg + def from_ExprLoc(self, expr): + loc_key = expr.loc_key + if self.loc_db is None: + return str(loc_key) + + offset = self.loc_db.get_location_offset(loc_key) + if offset is None: + return str(loc_key) + + return "0x%x" % offset + def from_ExprAff(self, expr): - return "%s = %s" % tuple(map(self.from_expr, (expr.dst, expr.src))) + new_dst = self.from_expr(expr.dst) + new_src = self.from_expr(expr.src) + return "%s = %s" % (new_dst, new_src) def from_ExprCond(self, expr): - return "(%s?%s:%s)" % tuple(map(self.from_expr, - (expr.cond, expr.src1, expr.src2))) + new_cond = self.from_expr(expr.cond) + new_src1 = self.from_expr(expr.src1) + new_src2 = self.from_expr(expr.src2) + return "(%s?%s:%s)" % (new_cond, new_src1, new_src2) def from_ExprMem(self, expr): - return "MEM_LOOKUP_%.2d(jitcpu, %s)" % (expr.size, - self.from_expr(expr.arg)) + new_ptr = self.from_expr(expr.arg) + return "MEM_LOOKUP_%.2d(jitcpu, %s)" % (expr.size, new_ptr) def from_ExprOp(self, expr): if len(expr.args) == 1: if expr.op == 'parity': - return "parity(%s&0x%x)" % (self.from_expr(expr.args[0]), - size2mask(expr.args[0].size)) + return "parity(%s&%s)" % ( + self.from_expr(expr.args[0]), + self._size2mask(expr.args[0].size), + ) elif expr.op in ['cntleadzeros', 'cnttrailzeros']: - return "%s(0x%x, %s)" % (expr.op, - expr.args[0].size, - self.from_expr(expr.args[0])) - elif expr.op in ['clz']: - return "%s(%s)" % (expr.op, - self.from_expr(expr.args[0])) + return "%s(0x%x, %s)" % ( + expr.op, + expr.args[0].size, + self.from_expr(expr.args[0]) + ) elif expr.op == '!': - return "(~ %s)&0x%x" % (self.from_expr(expr.args[0]), - size2mask(expr.args[0].size)) - elif expr.op in ["hex2bcd", "bcd2hex"]: - return "%s_%d(%s)" % (expr.op, expr.args[0].size, - self.from_expr(expr.args[0])) + return "(~ %s)&%s" % ( + self.from_expr(expr.args[0]), + self._size2mask(expr.args[0].size), + ) elif (expr.op.startswith("double_to_") or expr.op.endswith("_to_double") or expr.op.startswith("access_") or @@ -63,37 +101,53 @@ class TranslatorC(Translator): expr.op.startswith("fxam_c") or expr.op in ["-", "ftan", "frndint", "f2xm1", "fsin", "fsqrt", "fabs", "fcos", "fchs"]): - return "%s(%s)" % (expr.op, self.from_expr(expr.args[0])) + return "%s(%s)" % ( + expr.op, + self.from_expr(expr.args[0]) + ) else: raise NotImplementedError('Unknown op: %r' % expr.op) elif len(expr.args) == 2: if expr.op == "==": - return '(((%s&0x%x) == (%s&0x%x))?1:0)' % ( - self.from_expr(expr.args[0]), size2mask(expr.args[0].size), - self.from_expr(expr.args[1]), size2mask(expr.args[1].size)) + return '(((%s&%s) == (%s&%s))?1:0)' % ( + self.from_expr(expr.args[0]), + self._size2mask(expr.args[0].size), + self.from_expr(expr.args[1]), + self._size2mask(expr.args[1].size), + ) elif expr.op in self.dct_shift: - return 'SHIFT_%s(%d, %s, %s)' % (self.dct_shift[expr.op].upper(), - expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return 'SHIFT_%s(%d, %s, %s)' % ( + self.dct_shift[expr.op].upper(), + expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.is_associative() or expr.op in ["%", "/"]: - oper = ['(%s&0x%x)' % (self.from_expr(arg), size2mask(arg.size)) + oper = ['(%s&%s)' % ( + self.from_expr(arg), + self._size2mask(arg.size) + ) for arg in expr.args] oper = str(expr.op).join(oper) - return "((%s)&0x%x)" % (oper, size2mask(expr.args[0].size)) + return "((%s)&%s)" % (oper, self._size2mask(expr.args[0].size)) elif expr.op in ['-']: - return '(((%s&0x%x) %s (%s&0x%x))&0x%x)' % ( - self.from_expr(expr.args[0]), size2mask(expr.args[0].size), + return '(((%s&%s) %s (%s&%s))&%s)' % ( + self.from_expr(expr.args[0]), + self._size2mask(expr.args[0].size), str(expr.op), - self.from_expr(expr.args[1]), size2mask(expr.args[1].size), - size2mask(expr.args[0].size)) + self.from_expr(expr.args[1]), + self._size2mask(expr.args[1].size), + self._size2mask(expr.args[0].size) + ) elif expr.op in self.dct_rot: - return '(%s(%s, %s, %s) &0x%x)' % (self.dct_rot[expr.op], - expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1]), - size2mask(expr.args[0].size)) + return '(%s(%s, %s, %s) &%s)' % ( + self.dct_rot[expr.op], + expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]), + self._size2mask(expr.args[0].size), + ) elif expr.op == 'x86_cpuid': return "%s(%s, %s)" % (expr.op, self.from_expr(expr.args[0]), @@ -101,38 +155,53 @@ class TranslatorC(Translator): elif (expr.op.startswith("fcom") or expr.op in ["fadd", "fsub", "fdiv", 'fmul', "fscale", "fprem", "fprem_lsb", "fyl2x", "fpatan"]): - return "fpu_%s(%s, %s)" % (expr.op, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return "fpu_%s(%s, %s)" % ( + expr.op, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op == "segm": return "segm2addr(jitcpu, %s, %s)" % ( - self.from_expr(expr.args[0]), self.from_expr(expr.args[1])) + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op in ['udiv', 'umod', 'idiv', 'imod']: - return '%s%d((vm_cpu_t*)jitcpu->cpu, %s, %s)' % (expr.op, - expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return '%s%d(%s, %s)' % ( + expr.op, + expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op in ["bcdadd", "bcdadd_cf"]: - return "%s_%d(%s, %s)" % (expr.op, expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return "%s_%d(%s, %s)" % ( + expr.op, expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) else: raise NotImplementedError('Unknown op: %r' % expr.op) elif len(expr.args) >= 3 and expr.is_associative(): # ????? - oper = ['(%s&0x%x)' % (self.from_expr(arg), size2mask(arg.size)) + oper = ['(%s&%s)' % ( + self.from_expr(arg), + self._size2mask(arg.size), + ) for arg in expr.args] oper = str(expr.op).join(oper) - return "((%s)&0x%x)" % (oper, size2mask(expr.args[0].size)) - + return "((%s)&%s)" % ( + oper, + self._size2mask(expr.args[0].size) + ) else: raise NotImplementedError('Unknown op: %s' % expr.op) def from_ExprSlice(self, expr): # XXX check mask for 64 bit & 32 bit compat - return "((%s>>%d) & 0x%X)" % (self.from_expr(expr.arg), - expr.start, - (1 << (expr.stop - expr.start)) - 1) + return "((%s>>%d) &%s)" % ( + self.from_expr(expr.arg), + expr.start, + self._size2mask(expr.stop - expr.start) + ) def from_ExprCompose(self, expr): out = [] @@ -149,10 +218,12 @@ class TranslatorC(Translator): dst_cast = "uint%d_t" % size for index, arg in expr.iter_args(): - out.append("(((%s)(%s & 0x%X)) << %d)" % (dst_cast, - self.from_expr(arg), - (1 << arg.size) - 1, - index)) + out.append("(((%s)(%s & %s)) << %d)" % ( + dst_cast, + self.from_expr(arg), + self._size2mask(arg.size), + index) + ) out = ' | '.join(out) return '(' + out + ')' diff --git a/miasm2/ir/translators/python.py b/miasm2/ir/translators/python.py index d7369e9e..e05f5e4d 100644 --- a/miasm2/ir/translators/python.py +++ b/miasm2/ir/translators/python.py @@ -20,6 +20,9 @@ class TranslatorPython(Translator): def from_ExprId(self, expr): return str(expr) + def from_ExprLoc(self, expr): + return str(expr) + def from_ExprMem(self, expr): return "memory(%s, 0x%x)" % (self.from_expr(expr.arg), expr.size / 8) diff --git a/miasm2/ir/translators/smt2.py b/miasm2/ir/translators/smt2.py index 18bcb9bd..1a513bfb 100644 --- a/miasm2/ir/translators/smt2.py +++ b/miasm2/ir/translators/smt2.py @@ -1,7 +1,6 @@ import logging import operator -from miasm2.core.asmblock import AsmLabel from miasm2.ir.translators.translator import Translator from miasm2.expression.smt2_helper import * @@ -120,7 +119,7 @@ class TranslatorSMT2(Translator): # Implemented language __LANG__ = "smt2" - def __init__(self, endianness="<", **kwargs): + def __init__(self, endianness="<", loc_db=None, **kwargs): """Instance a SMT2 translator @endianness: (optional) memory endianness """ @@ -129,24 +128,26 @@ class TranslatorSMT2(Translator): self._mem = SMT2Mem(endianness) # map of translated bit vectors self._bitvectors = dict() + # symbol pool + self.loc_db = loc_db def from_ExprInt(self, expr): return bit_vec_val(expr.arg.arg, expr.size) def from_ExprId(self, expr): - if isinstance(expr.name, AsmLabel): - if expr.name.offset is not None: - return bit_vec_val(str(expr.name.offset), expr.size) - else: - # SMT2-escape expression name - name = "|{}|".format(str(expr.name)) - if name not in self._bitvectors: - self._bitvectors[name] = expr.size - return name - else: - if str(expr) not in self._bitvectors: - self._bitvectors[str(expr)] = expr.size - return str(expr) + if str(expr) not in self._bitvectors: + self._bitvectors[str(expr)] = expr.size + return str(expr) + + def from_ExprLoc(self, expr): + loc_key = expr.loc_key + if self.loc_db is None or self.loc_db.get_location_offset(loc_key) is None: + if str(loc_key) not in self._bitvectors: + self._bitvectors[str(loc_key)] = expr.size + return str(loc_key) + + offset = self.loc_db.get_location_offset(loc_key) + return bit_vec_val(str(offset), expr.size) def from_ExprMem(self, expr): addr = self.from_expr(expr.arg) diff --git a/miasm2/ir/translators/translator.py b/miasm2/ir/translators/translator.py index e3641843..557fdabe 100644 --- a/miasm2/ir/translators/translator.py +++ b/miasm2/ir/translators/translator.py @@ -53,6 +53,12 @@ class Translator(object): """ raise NotImplementedError("Abstract method") + def from_ExprLoc(self, expr): + """Translate an ExprLoc + @expr: ExprLoc to translate + """ + raise NotImplementedError("Abstract method") + def from_ExprCompose(self, expr): """Translate an ExprCompose @expr: ExprCompose to translate @@ -100,6 +106,7 @@ class Translator(object): # Handle Expr type handlers = {m2_expr.ExprInt: self.from_ExprInt, m2_expr.ExprId: self.from_ExprId, + m2_expr.ExprLoc: self.from_ExprLoc, m2_expr.ExprCompose: self.from_ExprCompose, m2_expr.ExprSlice: self.from_ExprSlice, m2_expr.ExprOp: self.from_ExprOp, diff --git a/miasm2/ir/translators/z3_ir.py b/miasm2/ir/translators/z3_ir.py index 536daff1..887c68d0 100644 --- a/miasm2/ir/translators/z3_ir.py +++ b/miasm2/ir/translators/z3_ir.py @@ -5,7 +5,6 @@ import operator # Raise an ImportError if z3 is not available WITHOUT actually importing it imp.find_module("z3") -from miasm2.core.asmblock import AsmLabel from miasm2.ir.translators.translator import Translator log = logging.getLogger("translator_z3") @@ -116,7 +115,7 @@ class TranslatorZ3(Translator): # Operations translation trivial_ops = ["+", "-", "/", "%", "&", "^", "|", "*", "<<"] - def __init__(self, endianness="<", **kwargs): + def __init__(self, endianness="<", loc_db=None, **kwargs): """Instance a Z3 translator @endianness: (optional) memory endianness """ @@ -126,15 +125,24 @@ class TranslatorZ3(Translator): super(TranslatorZ3, self).__init__(**kwargs) self._mem = Z3Mem(endianness) + self.loc_db = loc_db def from_ExprInt(self, expr): return z3.BitVecVal(expr.arg.arg, expr.size) def from_ExprId(self, expr): - if isinstance(expr.name, AsmLabel) and expr.name.offset is not None: - return z3.BitVecVal(expr.name.offset, expr.size) - else: + return z3.BitVec(str(expr), expr.size) + + def from_ExprLoc(self, expr): + if self.loc_db is None: + # No loc_db, fallback to default name return z3.BitVec(str(expr), expr.size) + loc_key = expr.loc_key + offset = self.loc_db.get_location_offset(loc_key) + if offset is not None: + return z3.BitVecVal(offset, expr.size) + # fallback to default name + return z3.BitVec(str(loc_key), expr.size) def from_ExprMem(self, expr): addr = self.from_expr(expr.arg) diff --git a/miasm2/jitter/JitCore.c b/miasm2/jitter/JitCore.c index 84f835f1..496ec8e1 100644 --- a/miasm2/jitter/JitCore.c +++ b/miasm2/jitter/JitCore.c @@ -74,6 +74,11 @@ uint64_t __attribute__((weak)) MEM_LOOKUP_64(JitCpu* jitcpu, uint64_t addr) return vm_MEM_LOOKUP_64(&(jitcpu->pyvm->vm_mngr), addr); } +uint128_t __attribute__((weak)) MEM_LOOKUP_128(JitCpu* jitcpu, uint64_t addr) +{ + return vm_MEM_LOOKUP_128(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr); +} + void __attribute__((weak)) MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) { vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); @@ -94,8 +99,10 @@ void __attribute__((weak)) MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); } - - +void __attribute__((weak)) MEM_WRITE_128(JitCpu* jitcpu, uint64_t addr, uint128_t src) +{ + vm_MEM_WRITE_128(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); +} PyObject* __attribute__((weak)) vm_get_mem(JitCpu *self, PyObject* args) { diff --git a/miasm2/jitter/JitCore.h b/miasm2/jitter/JitCore.h index f599d6ea..c703178d 100644 --- a/miasm2/jitter/JitCore.h +++ b/miasm2/jitter/JitCore.h @@ -4,6 +4,7 @@ #define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} #define RAISE_ret0(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return 0;} +#define uint128_t __uint128_t #define PyGetInt(item, value) \ if (PyInt_Check(item)){ \ @@ -30,6 +31,23 @@ } \ +#define getset_reg_u128(regname) \ + static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ + { \ + return _PyLong_FromByteArray((const unsigned char*) &(((vm_cpu_t*)(self->cpu))-> regname ), sizeof(uint128_t), /*little_endian*/ 1, /*is_signed*/ 0); \ + } \ + static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + uint128_t val = 0; \ + int i; \ + unsigned char bytes[sizeof(uint128_t)]; \ + _PyLong_AsByteArray((PyLongObject*)value, bytes, sizeof(uint128_t), /*little_endian*/ 1, /*is_signed*/ 0); \ + for (i = 0; i < sizeof(uint128_t); i++) { \ + val |= (uint128_t) bytes[i] << (8 * i); \ + } \ + ((vm_cpu_t*)(self->cpu))-> regname = val; \ + return 0; \ + } #define getset_reg_u64(regname) \ static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ @@ -122,10 +140,12 @@ uint8_t MEM_LOOKUP_08(JitCpu* jitcpu, uint64_t addr); uint16_t MEM_LOOKUP_16(JitCpu* jitcpu, uint64_t addr); uint32_t MEM_LOOKUP_32(JitCpu* jitcpu, uint64_t addr); uint64_t MEM_LOOKUP_64(JitCpu* jitcpu, uint64_t addr); +uint128_t MEM_LOOKUP_128(JitCpu* jitcpu, uint64_t addr); void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); +void MEM_WRITE_128(JitCpu* jitcpu, uint64_t addr, uint128_t src); PyObject* vm_get_mem(JitCpu *self, PyObject* args); diff --git a/miasm2/jitter/Jitgcc.c b/miasm2/jitter/Jitgcc.c index 79274f24..329b7db4 100644 --- a/miasm2/jitter/Jitgcc.c +++ b/miasm2/jitter/Jitgcc.c @@ -10,13 +10,13 @@ typedef struct { typedef int (*jitted_func)(block_id*, PyObject*); -PyObject* gcc_exec_bloc(PyObject* self, PyObject* args) +PyObject* gcc_exec_block(PyObject* self, PyObject* args) { jitted_func func; PyObject* jitcpu; PyObject* func_py; PyObject* lbl2ptr; - PyObject* breakpoints; + PyObject* stop_offsets; PyObject* retaddr = NULL; int status; block_id BlockDst; @@ -26,7 +26,7 @@ PyObject* gcc_exec_bloc(PyObject* self, PyObject* args) if (!PyArg_ParseTuple(args, "OOOO|K", - &retaddr, &jitcpu, &lbl2ptr, &breakpoints, + &retaddr, &jitcpu, &lbl2ptr, &stop_offsets, &max_exec_per_call)) return NULL; @@ -73,8 +73,8 @@ PyObject* gcc_exec_bloc(PyObject* self, PyObject* args) if (status) return retaddr; - // Check breakpoint - if (PyDict_Contains(breakpoints, retaddr)) + // Check stop offsets + if (PySet_Contains(stop_offsets, retaddr)) return retaddr; } } @@ -85,8 +85,8 @@ static PyObject *GccError; static PyMethodDef GccMethods[] = { - {"gcc_exec_bloc", gcc_exec_bloc, METH_VARARGS, - "gcc exec bloc"}, + {"gcc_exec_block", gcc_exec_block, METH_VARARGS, + "gcc exec block"}, {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/miasm2/jitter/Jitllvm.c b/miasm2/jitter/Jitllvm.c index b46f88e3..6ecbd483 100644 --- a/miasm2/jitter/Jitllvm.c +++ b/miasm2/jitter/Jitllvm.c @@ -10,7 +10,7 @@ // Needed to get the JitCpu.cpu offset, arch independent #include "arch/JitCore_x86.h" -PyObject* llvm_exec_bloc(PyObject* self, PyObject* args) +PyObject* llvm_exec_block(PyObject* self, PyObject* args) { uint64_t (*func)(void*, void*, void*, uint8_t*); vm_cpu_t* cpu; @@ -20,14 +20,14 @@ PyObject* llvm_exec_bloc(PyObject* self, PyObject* args) uint8_t status; PyObject* func_py; PyObject* lbl2ptr; - PyObject* breakpoints; + PyObject* stop_offsets; PyObject* retaddr = NULL; uint64_t max_exec_per_call = 0; uint64_t cpt; int do_cpt; if (!PyArg_ParseTuple(args, "OOOO|K", - &retaddr, &jitcpu, &lbl2ptr, &breakpoints, + &retaddr, &jitcpu, &lbl2ptr, &stop_offsets, &max_exec_per_call)) return NULL; @@ -68,16 +68,16 @@ PyObject* llvm_exec_bloc(PyObject* self, PyObject* args) if (status) return retaddr; - // Check breakpoint - if (PyDict_Contains(breakpoints, retaddr)) + // Check stop offsets + if (PySet_Contains(stop_offsets, retaddr)) return retaddr; } } static PyMethodDef LLVMMethods[] = { - {"llvm_exec_bloc", llvm_exec_bloc, METH_VARARGS, - "llvm exec bloc"}, + {"llvm_exec_block", llvm_exec_block, METH_VARARGS, + "llvm exec block"}, {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/miasm2/jitter/arch/JitCore_aarch64.c b/miasm2/jitter/arch/JitCore_aarch64.c index 0224bfe2..a8502ef9 100644 --- a/miasm2/jitter/arch/JitCore_aarch64.c +++ b/miasm2/jitter/arch/JitCore_aarch64.c @@ -284,25 +284,6 @@ PyObject* vm_set_mem(JitCpu *self, PyObject* args) return Py_None; } - -UDIV(16) -UDIV(32) -UDIV(64) - -UMOD(16) -UMOD(32) -UMOD(64) - - -IDIV(16) -IDIV(32) -IDIV(64) - -IMOD(16) -IMOD(32) -IMOD(64) - - static PyMemberDef JitCpu_members[] = { {NULL} /* Sentinel */ }; diff --git a/miasm2/jitter/arch/JitCore_aarch64.h b/miasm2/jitter/arch/JitCore_aarch64.h index c7fc3cea..fa50513f 100644 --- a/miasm2/jitter/arch/JitCore_aarch64.h +++ b/miasm2/jitter/arch/JitCore_aarch64.h @@ -49,19 +49,4 @@ typedef struct { void dump_gpregs(vm_cpu_t* vmcpu); -uint64_t udiv64(vm_cpu_t* vmcpu, uint64_t a, uint64_t b); -uint64_t umod64(vm_cpu_t* vmcpu, uint64_t a, uint64_t b); -int64_t idiv64(vm_cpu_t* vmcpu, int64_t a, int64_t b); -int64_t imod64(vm_cpu_t* vmcpu, int64_t a, int64_t b); - -uint32_t udiv32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); -uint32_t umod32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); -int32_t idiv32(vm_cpu_t* vmcpu, int32_t a, int32_t b); -int32_t imod32(vm_cpu_t* vmcpu, int32_t a, int32_t b); - -uint16_t udiv16(vm_cpu_t* vmcpu, uint16_t a, uint16_t b); -uint16_t umod16(vm_cpu_t* vmcpu, uint16_t a, uint16_t b); -int16_t idiv16(vm_cpu_t* vmcpu, int16_t a, int16_t b); -int16_t imod16(vm_cpu_t* vmcpu, int16_t a, int16_t b); - #define RETURN_PC return BlockDst; diff --git a/miasm2/jitter/arch/JitCore_arm.c b/miasm2/jitter/arch/JitCore_arm.c index 60b8f95b..8a09ba6c 100644 --- a/miasm2/jitter/arch/JitCore_arm.c +++ b/miasm2/jitter/arch/JitCore_arm.c @@ -187,16 +187,6 @@ void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) } - -UDIV(32) - -UMOD(32) - -IDIV(32) - -IMOD(32) - - void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) { vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); @@ -221,22 +211,6 @@ void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) check_automod(jitcpu, addr, 64); } - -uint32_t clz(uint32_t arg) -{ - - int i; - - for (i=0; i<32; i++) { - if (arg & (1ull << (31-i))) - break; - } - return i; -} - - - - PyObject* vm_set_mem(JitCpu *self, PyObject* args) { PyObject *py_addr; diff --git a/miasm2/jitter/arch/JitCore_arm.h b/miasm2/jitter/arch/JitCore_arm.h index 445ac44a..1f3ccaf2 100644 --- a/miasm2/jitter/arch/JitCore_arm.h +++ b/miasm2/jitter/arch/JitCore_arm.h @@ -38,13 +38,4 @@ typedef struct { void dump_gpregs(vm_cpu_t* vmcpu); - -uint32_t udiv32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); -uint32_t umod32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); -int32_t idiv32(vm_cpu_t* vmcpu, int32_t a, int32_t b); -int32_t imod32(vm_cpu_t* vmcpu, int32_t a, int32_t b); - - #define RETURN_PC return BlockDst; - -uint32_t clz(uint32_t arg); diff --git a/miasm2/jitter/arch/JitCore_mips32.c b/miasm2/jitter/arch/JitCore_mips32.c index 2142e8cc..4ebfbec8 100644 --- a/miasm2/jitter/arch/JitCore_mips32.c +++ b/miasm2/jitter/arch/JitCore_mips32.c @@ -222,25 +222,6 @@ void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) } -UDIV(16) -UDIV(32) -UDIV(64) - -UMOD(16) -UMOD(32) -UMOD(64) - - -IDIV(16) -IDIV(32) -IDIV(64) - -IMOD(16) -IMOD(32) -IMOD(64) - - - void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) { vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); diff --git a/miasm2/jitter/arch/JitCore_mips32.h b/miasm2/jitter/arch/JitCore_mips32.h index b45cac2b..ff2045d7 100644 --- a/miasm2/jitter/arch/JitCore_mips32.h +++ b/miasm2/jitter/arch/JitCore_mips32.h @@ -335,19 +335,4 @@ typedef struct { void dump_gpregs(vm_cpu_t* vmcpu); -uint64_t udiv64(vm_cpu_t* vmcpu, uint64_t a, uint64_t b); -uint64_t umod64(vm_cpu_t* vmcpu, uint64_t a, uint64_t b); -int64_t idiv64(vm_cpu_t* vmcpu, int64_t a, int64_t b); -int64_t imod64(vm_cpu_t* vmcpu, int64_t a, int64_t b); - -uint32_t udiv32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); -uint32_t umod32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); -int32_t idiv32(vm_cpu_t* vmcpu, int32_t a, int32_t b); -int32_t imod32(vm_cpu_t* vmcpu, int32_t a, int32_t b); - -uint16_t udiv16(vm_cpu_t* vmcpu, uint16_t a, uint16_t b); -uint16_t umod16(vm_cpu_t* vmcpu, uint16_t a, uint16_t b); -int16_t idiv16(vm_cpu_t* vmcpu, int16_t a, int16_t b); -int16_t imod16(vm_cpu_t* vmcpu, int16_t a, int16_t b); - #define RETURN_PC return BlockDst; diff --git a/miasm2/jitter/arch/JitCore_msp430.h b/miasm2/jitter/arch/JitCore_msp430.h index cad028a9..3934d37a 100644 --- a/miasm2/jitter/arch/JitCore_msp430.h +++ b/miasm2/jitter/arch/JitCore_msp430.h @@ -36,16 +36,4 @@ typedef struct { #define RETURN_PC return BlockDst; -uint16_t bcdadd_16(uint16_t a, uint16_t b); - -uint16_t bcdadd_cf_16(uint16_t a, uint16_t b); - -uint16_t hex2bcd_16(uint16_t a); - -uint8_t hex2bcd_8(uint8_t a); - -uint8_t bcd2hex_8(uint8_t a); - -uint16_t bcd2hex_16(uint16_t a); - void dump_gpregs(vm_cpu_t* vmcpu); diff --git a/miasm2/jitter/arch/JitCore_ppc32.c b/miasm2/jitter/arch/JitCore_ppc32.c index 7ea5e88f..616e7cfd 100644 --- a/miasm2/jitter/arch/JitCore_ppc32.c +++ b/miasm2/jitter/arch/JitCore_ppc32.c @@ -246,38 +246,6 @@ get_gpreg_offset_all(void) { return dict; } -int32_t -idiv32(struct vm_cpu *cpu, int32_t a, int32_t b) { - if (b == 0) - return 0; - - return a / b; -} - -uint32_t -udiv32(struct vm_cpu *cpu, uint32_t a, uint32_t b) { - if (b == 0) - return 0; - - return a / b; -} - -int32_t -imod32(struct vm_cpu *cpu, int32_t a, int32_t b) { - if (b == 0) - return 0; - - return a % b; -} - -uint32_t -umod32(struct vm_cpu *cpu, uint32_t a, uint32_t b) { - if (b == 0) - return 0; - - return a % b; -} - static PyGetSetDef JitCpu_getseters[] = { {"vmmngr", (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, diff --git a/miasm2/jitter/arch/JitCore_ppc32.h b/miasm2/jitter/arch/JitCore_ppc32.h index 18cee358..5a048190 100644 --- a/miasm2/jitter/arch/JitCore_ppc32.h +++ b/miasm2/jitter/arch/JitCore_ppc32.h @@ -14,11 +14,6 @@ struct vm_cpu { uint32_t reserve_address; }; -int32_t idiv32(struct vm_cpu *, int32_t, int32_t); -uint32_t udiv32(struct vm_cpu *, uint32_t, uint32_t); -int32_t imod32(struct vm_cpu *, int32_t, int32_t); -uint32_t umod32(struct vm_cpu *, uint32_t, uint32_t); - void dump_gpregs(struct vm_cpu *); typedef struct vm_cpu vm_cpu_t; diff --git a/miasm2/jitter/arch/JitCore_x86.c b/miasm2/jitter/arch/JitCore_x86.c index e1e36989..eda6e6e5 100644 --- a/miasm2/jitter/arch/JitCore_x86.c +++ b/miasm2/jitter/arch/JitCore_x86.c @@ -55,6 +55,23 @@ reg_dict gpreg_dict[] = { {.name = "RAX", .offset = offsetof(vm_cpu_t, RAX)}, {.name = "MM6", .offset = offsetof(vm_cpu_t, MM6)}, {.name = "MM7", .offset = offsetof(vm_cpu_t, MM7)}, + {.name = "XMM0", .offset = offsetof(vm_cpu_t, XMM0)}, + {.name = "XMM1", .offset = offsetof(vm_cpu_t, XMM1)}, + {.name = "XMM2", .offset = offsetof(vm_cpu_t, XMM2)}, + {.name = "XMM3", .offset = offsetof(vm_cpu_t, XMM3)}, + {.name = "XMM4", .offset = offsetof(vm_cpu_t, XMM4)}, + {.name = "XMM5", .offset = offsetof(vm_cpu_t, XMM5)}, + {.name = "XMM6", .offset = offsetof(vm_cpu_t, XMM6)}, + {.name = "XMM7", .offset = offsetof(vm_cpu_t, XMM7)}, + {.name = "XMM8", .offset = offsetof(vm_cpu_t, XMM8)}, + {.name = "XMM9", .offset = offsetof(vm_cpu_t, XMM9)}, + {.name = "XMM10", .offset = offsetof(vm_cpu_t, XMM10)}, + {.name = "XMM11", .offset = offsetof(vm_cpu_t, XMM11)}, + {.name = "XMM12", .offset = offsetof(vm_cpu_t, XMM12)}, + {.name = "XMM13", .offset = offsetof(vm_cpu_t, XMM13)}, + {.name = "XMM14", .offset = offsetof(vm_cpu_t, XMM14)}, + {.name = "XMM15", .offset = offsetof(vm_cpu_t, XMM15)}, + {.name = "tsc1", .offset = offsetof(vm_cpu_t, tsc1)}, {.name = "tsc2", .offset = offsetof(vm_cpu_t, tsc2)}, @@ -120,6 +137,23 @@ PyObject* cpu_get_gpreg(JitCpu* self) get_reg(MM6); get_reg(MM7); + get_reg(XMM0); + get_reg(XMM1); + get_reg(XMM2); + get_reg(XMM3); + get_reg(XMM4); + get_reg(XMM5); + get_reg(XMM6); + get_reg(XMM7); + get_reg(XMM8); + get_reg(XMM9); + get_reg(XMM10); + get_reg(XMM11); + get_reg(XMM12); + get_reg(XMM13); + get_reg(XMM14); + get_reg(XMM15); + get_reg(tsc1); get_reg(tsc2); @@ -330,24 +364,6 @@ uint64_t segm2addr(JitCpu* jitcpu, uint64_t segm, uint64_t addr) return addr + ((vm_cpu_t*)jitcpu->cpu)->segm_base[segm]; } - -UDIV(16) -UDIV(32) -UDIV(64) - -UMOD(16) -UMOD(32) -UMOD(64) - - -IDIV(16) -IDIV(32) -IDIV(64) - -IMOD(16) -IMOD(32) -IMOD(64) - void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) { vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); @@ -544,6 +560,23 @@ getset_reg_u64(MM5); getset_reg_u64(MM6); getset_reg_u64(MM7); +getset_reg_u128(XMM0); +getset_reg_u128(XMM1); +getset_reg_u128(XMM2); +getset_reg_u128(XMM3); +getset_reg_u128(XMM4); +getset_reg_u128(XMM5); +getset_reg_u128(XMM6); +getset_reg_u128(XMM7); +getset_reg_u128(XMM8); +getset_reg_u128(XMM9); +getset_reg_u128(XMM10); +getset_reg_u128(XMM11); +getset_reg_u128(XMM12); +getset_reg_u128(XMM13); +getset_reg_u128(XMM14); +getset_reg_u128(XMM15); + getset_reg_u32(tsc1); getset_reg_u32(tsc2); @@ -620,6 +653,23 @@ PyObject* get_gpreg_offset_all(void) get_reg_off(MM6); get_reg_off(MM7); + get_reg_off(XMM0); + get_reg_off(XMM1); + get_reg_off(XMM2); + get_reg_off(XMM3); + get_reg_off(XMM4); + get_reg_off(XMM5); + get_reg_off(XMM6); + get_reg_off(XMM7); + get_reg_off(XMM8); + get_reg_off(XMM9); + get_reg_off(XMM10); + get_reg_off(XMM11); + get_reg_off(XMM12); + get_reg_off(XMM13); + get_reg_off(XMM14); + get_reg_off(XMM15); + get_reg_off(tsc1); get_reg_off(tsc2); @@ -708,6 +758,23 @@ static PyGetSetDef JitCpu_getseters[] = { {"MM6", (getter)JitCpu_get_MM6, (setter)JitCpu_set_MM6, "MM6", NULL}, {"MM7", (getter)JitCpu_get_MM7, (setter)JitCpu_set_MM7, "MM7", NULL}, + {"XMM0", (getter)JitCpu_get_XMM0, (setter)JitCpu_set_XMM0, "XMM0", NULL}, + {"XMM1", (getter)JitCpu_get_XMM1, (setter)JitCpu_set_XMM1, "XMM1", NULL}, + {"XMM2", (getter)JitCpu_get_XMM2, (setter)JitCpu_set_XMM2, "XMM2", NULL}, + {"XMM3", (getter)JitCpu_get_XMM3, (setter)JitCpu_set_XMM3, "XMM3", NULL}, + {"XMM4", (getter)JitCpu_get_XMM4, (setter)JitCpu_set_XMM4, "XMM4", NULL}, + {"XMM5", (getter)JitCpu_get_XMM5, (setter)JitCpu_set_XMM5, "XMM5", NULL}, + {"XMM6", (getter)JitCpu_get_XMM6, (setter)JitCpu_set_XMM6, "XMM6", NULL}, + {"XMM7", (getter)JitCpu_get_XMM7, (setter)JitCpu_set_XMM7, "XMM7", NULL}, + {"XMM8", (getter)JitCpu_get_XMM8, (setter)JitCpu_set_XMM8, "XMM8", NULL}, + {"XMM9", (getter)JitCpu_get_XMM9, (setter)JitCpu_set_XMM9, "XMM9", NULL}, + {"XMM10", (getter)JitCpu_get_XMM10, (setter)JitCpu_set_XMM10, "XMM10", NULL}, + {"XMM11", (getter)JitCpu_get_XMM11, (setter)JitCpu_set_XMM11, "XMM11", NULL}, + {"XMM12", (getter)JitCpu_get_XMM12, (setter)JitCpu_set_XMM12, "XMM12", NULL}, + {"XMM13", (getter)JitCpu_get_XMM13, (setter)JitCpu_set_XMM13, "XMM13", NULL}, + {"XMM14", (getter)JitCpu_get_XMM14, (setter)JitCpu_set_XMM14, "XMM14", NULL}, + {"XMM15", (getter)JitCpu_get_XMM15, (setter)JitCpu_set_XMM15, "XMM15", NULL}, + {"tsc1", (getter)JitCpu_get_tsc1, (setter)JitCpu_set_tsc1, "tsc1", NULL}, {"tsc2", (getter)JitCpu_get_tsc2, (setter)JitCpu_set_tsc2, "tsc2", NULL}, diff --git a/miasm2/jitter/arch/JitCore_x86.h b/miasm2/jitter/arch/JitCore_x86.h index e55948e6..221ba5db 100644 --- a/miasm2/jitter/arch/JitCore_x86.h +++ b/miasm2/jitter/arch/JitCore_x86.h @@ -1,3 +1,4 @@ +#define uint128_t __uint128_t typedef struct { uint32_t exception_flags; @@ -96,31 +97,30 @@ typedef struct { uint64_t MM6; uint64_t MM7; + /* SSE */ + uint128_t XMM0; + uint128_t XMM1; + uint128_t XMM2; + uint128_t XMM3; + uint128_t XMM4; + uint128_t XMM5; + uint128_t XMM6; + uint128_t XMM7; + uint128_t XMM8; + uint128_t XMM9; + uint128_t XMM10; + uint128_t XMM11; + uint128_t XMM12; + uint128_t XMM13; + uint128_t XMM14; + uint128_t XMM15; + uint32_t segm_base[0x10000]; }vm_cpu_t; - - - void dump_gpregs_32(vm_cpu_t* vmcpu); void dump_gpregs_64(vm_cpu_t* vmcpu); uint64_t segm2addr(JitCpu* jitcpu, uint64_t segm, uint64_t addr); - -uint64_t udiv64(vm_cpu_t* vmcpu, uint64_t a, uint64_t b); -uint64_t umod64(vm_cpu_t* vmcpu, uint64_t a, uint64_t b); -int64_t idiv64(vm_cpu_t* vmcpu, int64_t a, int64_t b); -int64_t imod64(vm_cpu_t* vmcpu, int64_t a, int64_t b); - -uint32_t udiv32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); -uint32_t umod32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); -int32_t idiv32(vm_cpu_t* vmcpu, int32_t a, int32_t b); -int32_t imod32(vm_cpu_t* vmcpu, int32_t a, int32_t b); - -uint16_t udiv16(vm_cpu_t* vmcpu, uint16_t a, uint16_t b); -uint16_t umod16(vm_cpu_t* vmcpu, uint16_t a, uint16_t b); -int16_t idiv16(vm_cpu_t* vmcpu, int16_t a, int16_t b); -int16_t imod16(vm_cpu_t* vmcpu, int16_t a, int16_t b); - #define RETURN_PC return BlockDst; diff --git a/miasm2/jitter/codegen.py b/miasm2/jitter/codegen.py index 2c546be8..10140fd2 100644 --- a/miasm2/jitter/codegen.py +++ b/miasm2/jitter/codegen.py @@ -2,17 +2,18 @@ Module to generate C code for a given native @block """ -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import Expr, ExprId, ExprLoc, ExprInt, \ + ExprMem, ExprCond, LocKey from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.ir.translators import Translator -from miasm2.core.asmblock import expr_is_label, AsmBlockBad, AsmLabel -# Miasm to C translator -TRANSLATOR = Translator.to_language("C") +from miasm2.ir.translators.C import TranslatorC +from miasm2.core.asmblock import AsmBlockBad -SIZE_TO_MASK = {x: 2**x - 1 for x in (1, 2, 3, 7, 8, 16, 32, 64)} +TRANSLATOR_NO_SYMBOL = TranslatorC(loc_db=None) + +SIZE_TO_MASK = {size: TRANSLATOR_NO_SYMBOL.from_expr(ExprInt(0, size).mask) + for size in (1, 2, 3, 7, 8, 16, 32, 64, 128)} -MASK_INT = 0xffffffffffffffff class Attributes(object): @@ -101,26 +102,21 @@ class CGen(object): def __init__(self, ir_arch): self.ir_arch = ir_arch self.PC = self.ir_arch.pc + self.translator = TranslatorC(self.ir_arch.loc_db) self.init_arch_C() def init_arch_C(self): """Iinitialize jitter internals""" self.id_to_c_id = {} for reg in self.ir_arch.arch.regs.all_regs_ids: - self.id_to_c_id[reg] = m2_expr.ExprId('mycpu->%s' % reg, reg.size) + self.id_to_c_id[reg] = ExprId('mycpu->%s' % reg, reg.size) self.C_PC = self.id_to_c(self.PC) - @staticmethod - def label_to_jitlabel(lbl): - """Convert AsmLabel to a jitter label name""" - assert lbl.offset is not None - return "jitblock_%X" % lbl.offset - def dst_to_c(self, src): """Translate Expr @src into C code""" - if not isinstance(src, m2_expr.Expr): - src = m2_expr.ExprInt(src, self.PC.size) + if not isinstance(src, Expr): + src = ExprInt(src, self.PC.size) return self.id_to_c(src) def patch_c_id(self, expr): @@ -129,14 +125,14 @@ class CGen(object): def id_to_c(self, expr): """Translate Expr @expr into corresponding C code""" - return TRANSLATOR.from_expr(self.patch_c_id(expr)) + return self.translator.from_expr(self.patch_c_id(expr)) - def add_label_index(self, dst2index, lbl): + def add_label_index(self, dst2index, loc_key): """Insert @lbl to the dictionnary @dst2index with a uniq value - @dst2index: AsmLabel -> uniq value - @lbl: AsmLabel istance""" + @dst2index: LocKey -> uniq value + @loc_key: LocKey istance""" - dst2index[lbl] = len(dst2index) + dst2index[loc_key] = len(dst2index) def assignblk_to_irbloc(self, instr, assignblk): """ @@ -147,10 +143,11 @@ class CGen(object): new_assignblk = dict(assignblk) if self.ir_arch.IRDst not in assignblk: offset = instr.offset + instr.l - dst = m2_expr.ExprInt(offset, self.ir_arch.IRDst.size) + loc_key = self.ir_arch.loc_db.get_or_create_offset_location(offset) + dst = ExprLoc(loc_key, self.ir_arch.IRDst.size) new_assignblk[self.ir_arch.IRDst] = dst irs = [AssignBlock(new_assignblk, instr)] - return IRBlock(self.ir_arch.get_instr_label(instr), irs) + return IRBlock(self.ir_arch.get_loc_key_for_instr(instr), irs) def block2assignblks(self, block): """ @@ -168,6 +165,7 @@ class CGen(object): for irblock in irblocks: assert irblock.dst is not None irblocks_list.append(irblocks) + return irblocks_list def add_local_var(self, dst_var, dst_index, expr): @@ -183,7 +181,7 @@ class CGen(object): if size not in dst_index: raise RuntimeError("Unsupported operand size %s", size) var_num = dst_index[size] - dst = m2_expr.ExprId("var_%.2d_%.2d" % (size, var_num), size) + dst = ExprId("var_%.2d_%.2d" % (size, var_num), size) dst_index[size] += 1 dst_var[expr] = dst return dst @@ -199,12 +197,13 @@ class CGen(object): # Prefetch memory read for expr in assignblk.get_r(mem_read=True): - if not isinstance(expr, m2_expr.ExprMem): + if not isinstance(expr, ExprMem): continue var_num = mem_index[expr.size] mem_index[expr.size] += 1 - var = m2_expr.ExprId( - "prefetch_%.2d_%.2d" % (expr.size, var_num), expr.size) + var = ExprId( + "prefetch_%.2d_%.2d" % (expr.size, var_num), expr.size + ) mem_var[expr] = var # Generate memory prefetch @@ -236,9 +235,9 @@ class CGen(object): for dst, src in sorted(assignblk.iteritems()): src = src.replace_expr(prefetchers) - if dst is self.ir_arch.IRDst: + if dst == self.ir_arch.IRDst: pass - elif isinstance(dst, m2_expr.ExprId): + elif isinstance(dst, ExprId): new_dst = self.add_local_var(dst_var, dst_index, dst) if dst in self.ir_arch.arch.regs.regs_flt_expr: # Dont mask float affectation @@ -246,19 +245,19 @@ class CGen(object): '%s = (%s);' % (self.id_to_c(new_dst), self.id_to_c(src))) else: c_main.append( - '%s = (%s)&0x%X;' % (self.id_to_c(new_dst), - self.id_to_c(src), - SIZE_TO_MASK[src.size])) - elif isinstance(dst, m2_expr.ExprMem): + '%s = (%s)&%s;' % (self.id_to_c(new_dst), + self.id_to_c(src), + SIZE_TO_MASK[src.size])) + elif isinstance(dst, ExprMem): ptr = dst.arg.replace_expr(prefetchers) - new_dst = m2_expr.ExprMem(ptr, dst.size) + new_dst = ExprMem(ptr, dst.size) str_dst = self.id_to_c(new_dst).replace('MEM_LOOKUP', 'MEM_WRITE') c_mem.append('%s, %s);' % (str_dst[:-1], self.id_to_c(src))) else: raise ValueError("Unknown dst") for dst, new_dst in dst_var.iteritems(): - if dst is self.ir_arch.IRDst: + if dst == self.ir_arch.IRDst: continue c_updt.append('%s = %s;' % (self.id_to_c(dst), self.id_to_c(new_dst))) c_var.append("uint%d_t %s;" % (new_dst.size, new_dst)) @@ -283,25 +282,25 @@ class CGen(object): @dst2index: dictionnary to link label to its index """ - if isinstance(expr, m2_expr.ExprCond): + if isinstance(expr, ExprCond): cond = self.id_to_c(expr.cond) src1, src1b = self.traverse_expr_dst(expr.src1, dst2index) src2, src2b = self.traverse_expr_dst(expr.src2, dst2index) return ("((%s)?(%s):(%s))" % (cond, src1, src2), "((%s)?(%s):(%s))" % (cond, src1b, src2b)) - if isinstance(expr, m2_expr.ExprInt): + if isinstance(expr, ExprInt): offset = int(expr) - self.add_label_index(dst2index, offset) - return ("%s" % dst2index[offset], hex(offset)) - if expr_is_label(expr): - label = expr.name - if label.offset != None: - offset = label.offset - self.add_label_index(dst2index, offset) - return ("%s" % dst2index[offset], hex(offset)) - self.add_label_index(dst2index, label) - return ("%s" % dst2index[label], "0") - + loc_key = self.ir_arch.loc_db.get_or_create_offset_location(offset) + self.add_label_index(dst2index, loc_key) + return ("%s" % dst2index[loc_key], hex(offset)) + if expr.is_loc(): + loc_key = expr.loc_key + offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) + if offset is not None: + self.add_label_index(dst2index, loc_key) + return ("%s" % dst2index[loc_key], hex(offset)) + self.add_label_index(dst2index, loc_key) + return ("%s" % dst2index[loc_key], "0") dst2index[expr] = -1 return ("-1", self.id_to_c(expr)) @@ -337,8 +336,12 @@ class CGen(object): out = [] if instr_attrib.log_mn: - out.append('printf("%.8X %s\\n");' % (instr_attrib.instr.offset, - instr_attrib.instr)) + out.append( + 'printf("%.8X %s\\n");' % ( + instr_attrib.instr.offset, + instr_attrib.instr.to_string(self.ir_arch.loc_db) + ) + ) return out def gen_post_code(self, attrib): @@ -354,24 +357,27 @@ class CGen(object): @attrib: instruction Attributes @instr_offsets: instructions offsets list @dst: potential instruction destination""" - if isinstance(dst, AsmLabel) and dst.offset is None: - # Generate goto for local labels - return ['goto %s;' % dst.name] - offset = None - if isinstance(dst, AsmLabel) and dst.offset is not None: - offset = dst.offset - elif isinstance(dst, (int, long)): - offset = dst + out = [] - if (offset is not None and - offset > attrib.instr.offset and + if isinstance(dst, Expr): + out += self.gen_post_code(attrib) + out.append('BlockDst->address = DST_value;') + out += self.gen_post_instr_checks(attrib) + out.append('\t\treturn JIT_RET_NO_EXCEPTION;') + return out + + assert isinstance(dst, LocKey) + offset = self.ir_arch.loc_db.get_location_offset(dst) + if offset is None: + # Generate goto for local labels + return ['goto %s;' % dst] + if (offset > attrib.instr.offset and offset in instr_offsets): # Only generate goto for next instructions. # (consecutive instructions) - lbl = self.ir_arch.symbol_pool.getby_offset_create(dst) out += self.gen_post_code(attrib) out += self.gen_post_instr_checks(attrib) - out.append('goto %s;' % self.label_to_jitlabel(lbl)) + out.append('goto %s;' % dst) else: out += self.gen_post_code(attrib) out.append('BlockDst->address = DST_value;') @@ -467,10 +473,10 @@ class CGen(object): element_read = assignblk.get_r(mem_read=True) # Check mem read - attrib.mem_read = any(isinstance(expr, m2_expr.ExprMem) + attrib.mem_read = any(isinstance(expr, ExprMem) for expr in element_read) # Check mem write - attrib.mem_write = any(isinstance(dst, m2_expr.ExprMem) + attrib.mem_write = any(isinstance(dst, ExprMem) for dst in assignblk) def get_attributes(self, instr, irblocks, log_mn=False, log_regs=False): @@ -512,7 +518,7 @@ class CGen(object): last_instr = block.lines[-1] offset = last_instr.offset + last_instr.l - return self.ir_arch.symbol_pool.getby_offset_create(offset) + return self.ir_arch.loc_db.get_or_create_offset_location(offset) def gen_init(self, block): """ @@ -521,9 +527,11 @@ class CGen(object): """ instr_offsets = [line.offset for line in block.lines] - instr_offsets.append(self.get_block_post_label(block).offset) - lbl_start = self.ir_arch.symbol_pool.getby_offset_create(instr_offsets[0]) - return (self.CODE_INIT % self.label_to_jitlabel(lbl_start)).split("\n"), instr_offsets + post_label = self.get_block_post_label(block) + post_offset = self.ir_arch.loc_db.get_location_offset(post_label) + instr_offsets.append(post_offset) + lbl_start = block.loc_key + return (self.CODE_INIT % lbl_start).split("\n"), instr_offsets def gen_irblock(self, instr_attrib, attributes, instr_offsets, irblock): """ @@ -555,9 +563,10 @@ class CGen(object): Generate the C code for the final block instruction """ - lbl = self.get_block_post_label(block) - dst = self.dst_to_c(lbl.offset) - code = self.CODE_RETURN_NO_EXCEPTION % (self.label_to_jitlabel(lbl), self.C_PC, dst, dst) + loc_key = self.get_block_post_label(block) + offset = self.ir_arch.loc_db.get_location_offset(loc_key) + dst = self.dst_to_c(offset) + code = self.CODE_RETURN_NO_EXCEPTION % (loc_key, self.C_PC, dst, dst) return code.split('\n') def gen_c(self, block, log_mn=False, log_regs=False): @@ -570,23 +579,19 @@ class CGen(object): if isinstance(block, AsmBlockBad): return self.gen_bad_block() irblocks_list = self.block2assignblks(block) - out, instr_offsets = self.gen_init(block) assert len(block.lines) == len(irblocks_list) for instr, irblocks in zip(block.lines, irblocks_list): instr_attrib, irblocks_attributes = self.get_attributes(instr, irblocks, log_mn, log_regs) - for index, irblock in enumerate(irblocks): new_irblock = self.ir_arch.irbloc_fix_regs_for_mode(irblock, self.ir_arch.attrib) - if new_irblock.label.offset is None: - out.append("%-40s // %.16X %s" % - (str(new_irblock.label.name) + ":", instr.offset, instr)) - else: - out.append("%-40s // %.16X %s" % - (self.label_to_jitlabel(new_irblock.label) + ":", instr.offset, instr)) + label = str(new_irblock.loc_key) + out.append("%-40s // %.16X %s" % + (label + ":", instr.offset, instr)) if index == 0: out += self.gen_pre_code(instr_attrib) out += self.gen_irblock(instr_attrib, irblocks_attributes[index], instr_offsets, new_irblock) out += self.gen_finalize(block) + return ['\t' + line for line in out] diff --git a/miasm2/jitter/jitcore.py b/miasm2/jitter/jitcore.py index f2b1375d..fc5cf35e 100644 --- a/miasm2/jitter/jitcore.py +++ b/miasm2/jitter/jitcore.py @@ -16,66 +16,69 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # from hashlib import md5 +import warnings -from miasm2.core import asmblock +from miasm2.core.asmblock import disasmEngine, AsmBlockBad from miasm2.core.interval import interval from miasm2.core.utils import BoundedDict +from miasm2.expression.expression import LocKey from miasm2.jitter.csts import * - class JitCore(object): "JiT management. This is an abstract class" + # Jitted function's name + FUNCNAME = "block_entry" + jitted_block_delete_cb = None jitted_block_max_size = 10000 - def __init__(self, ir_arch, bs=None): + def __init__(self, ir_arch, bin_stream): """Initialise a JitCore instance. @ir_arch: ir instance for current architecture - @bs: bitstream + @bin_stream: bin_stream instance """ - + # Arch related self.ir_arch = ir_arch + self.ircfg = self.ir_arch.new_ircfg() self.arch_name = "%s%s" % (self.ir_arch.arch.name, self.ir_arch.attrib) - self.bs = bs - self.known_blocs = {} - self.lbl2jitbloc = BoundedDict(self.jitted_block_max_size, + + # Structures for block tracking + self.offset_to_jitted_func = BoundedDict(self.jitted_block_max_size, delete_cb=self.jitted_block_delete_cb) - self.lbl2bloc = {} + self.loc_key_to_block = {} + self.blocks_mem_interval = interval() + + # Logging & options self.log_mn = False self.log_regs = False self.log_newbloc = False - self.segm_to_do = set() - self.jitcount = 0 - self.addr2obj = {} - self.addr2objref = {} - self.blocs_mem_interval = interval() - self.disasm_cb = None - self.split_dis = set() self.options = {"jit_maxline": 50, # Maximum number of line jitted "max_exec_per_call": 0 # 0 means no limit } - self.mdis = asmblock.disasmEngine(ir_arch.arch, ir_arch.attrib, bs, - lines_wd=self.options["jit_maxline"], - symbol_pool=ir_arch.symbol_pool, - follow_call=False, - dontdis_retcall=False, - split_dis=self.split_dis, - dis_block_callback=self.disasm_cb) + # Disassembly Engine + self.split_dis = set() + self.mdis = disasmEngine( + ir_arch.arch, ir_arch.attrib, bin_stream, + lines_wd=self.options["jit_maxline"], + loc_db=ir_arch.loc_db, + follow_call=False, + dontdis_retcall=False, + split_dis=self.split_dis, + ) def set_options(self, **kwargs): "Set options relative to the backend" - self.options.update(kwargs) def clear_jitted_blocks(self): "Reset all jitted blocks" - self.lbl2jitbloc.clear() - self.lbl2bloc.clear() - self.blocs_mem_interval = interval() + self.offset_to_jitted_func.clear() + self.loc_key_to_block.clear() + self.blocks_mem_interval = interval() def add_disassembly_splits(self, *args): """The disassembly engine will stop on address in args if they @@ -90,7 +93,7 @@ class JitCore(object): "Initialise the Jitter" raise NotImplementedError("Abstract class") - def get_bloc_min_max(self, cur_block): + def set_block_min_max(self, cur_block): "Update cur_block to set min/max address" if cur_block.lines: @@ -98,94 +101,107 @@ class JitCore(object): cur_block.ad_max = cur_block.lines[-1].offset + cur_block.lines[-1].l else: # 1 byte block for unknown mnemonic - cur_block.ad_min = cur_block.label.offset - cur_block.ad_max = cur_block.label.offset+1 + offset = ir_arch.loc_db.get_location_offset(cur_block.loc_key) + cur_block.ad_min = offset + cur_block.ad_max = offset+1 - def add_bloc_to_mem_interval(self, vm, block): + def add_block_to_mem_interval(self, vm, block): "Update vm to include block addresses in its memory range" - - self.blocs_mem_interval += interval([(block.ad_min, block.ad_max - 1)]) + self.blocks_mem_interval += interval([(block.ad_min, block.ad_max - 1)]) vm.reset_code_bloc_pool() - for a, b in self.blocs_mem_interval: + for a, b in self.blocks_mem_interval: vm.add_code_bloc(a, b + 1) - def jitirblocs(self, label, irblocks): + def jit_irblocks(self, label, irblocks): """JiT a group of irblocks. @label: the label of the irblocks - @irblocks: a gorup of irblocks + @irblocks: a group of irblocks """ raise NotImplementedError("Abstract class") - def add_bloc(self, block): + def add_block(self, block): """Add a block to JiT and JiT it. @block: asm_bloc to add """ - - irblocks = self.ir_arch.add_block(block, gen_pc_updt = True) + irblocks = self.ir_arch.add_asmblock_to_ircfg(block, self.ircfg, gen_pc_updt = True) block.blocks = irblocks - self.jitirblocs(block.label, irblocks) + self.jit_irblocks(block.loc_key, irblocks) - def disbloc(self, addr, vm): + def disasm_and_jit_block(self, addr, vm): """Disassemble a new block and JiT it - @addr: address of the block to disassemble (AsmLabel or int) + @addr: address of the block to disassemble (LocKey or int) @vm: VmMngr instance """ # Get the block - if isinstance(addr, asmblock.AsmLabel): - addr = addr.offset + if isinstance(addr, LocKey): + addr = self.ir_arch.loc_db.get_location_offset(addr) + if addr is None: + raise RuntimeError("Unknown offset for LocKey") # Prepare disassembler self.mdis.lines_wd = self.options["jit_maxline"] - self.mdis.dis_block_callback = self.disasm_cb # Disassemble it - try: - cur_block = self.mdis.dis_block(addr) - except IOError: - # vm_exception_flag is set - label = self.ir_arch.symbol_pool.getby_offset_create(addr) - cur_block = asmblock.AsmBlockBad(label) - + cur_block = self.mdis.dis_block(addr) + if isinstance(cur_block, AsmBlockBad): + return cur_block # Logging if self.log_newbloc: - print cur_block + print cur_block.to_string(self.mdis.loc_db) # Update label -> block - self.lbl2bloc[cur_block.label] = cur_block + self.loc_key_to_block[cur_block.loc_key] = cur_block # Store min/max block address needed in jit automod code - self.get_bloc_min_max(cur_block) + self.set_block_min_max(cur_block) # JiT it - self.add_bloc(cur_block) + self.add_block(cur_block) # Update jitcode mem range - self.add_bloc_to_mem_interval(vm, cur_block) - - def runbloc(self, cpu, lbl, breakpoints): - """Run the block starting at lbl. + self.add_block_to_mem_interval(vm, cur_block) + return cur_block + + def run_at(self, cpu, offset, stop_offsets): + """Run from the starting address @offset. + Execution will stop if: + - max_exec_per_call option is reached + - a new, yet unknown, block is reached after the execution of block at + address @offset + - an address in @stop_offsets is reached @cpu: JitCpu instance - @lbl: target label + @offset: starting address (int) + @stop_offsets: set of address on which the jitter must stop """ - if lbl is None: - lbl = getattr(cpu, self.ir_arch.pc.name) + if offset is None: + offset = getattr(cpu, self.ir_arch.pc.name) - if not lbl in self.lbl2jitbloc: + if offset not in self.offset_to_jitted_func: # Need to JiT the block - self.disbloc(lbl, cpu.vmmngr) + cur_block = self.disasm_and_jit_block(offset, cpu.vmmngr) + if isinstance(cur_block, AsmBlockBad): + errno = cur_block.errno + if errno == AsmBlockBad.ERROR_IO: + cpu.vmmngr.set_exception(EXCEPT_ACCESS_VIOL) + elif errno == AsmBlockBad.ERROR_CANNOT_DISASM: + cpu.set_exception(EXCEPT_UNK_MNEMO) + else: + raise RuntimeError("Unhandled disasm result %r" % errno) + return offset # Run the block and update cpu/vmmngr state - return self.exec_wrapper(lbl, cpu, self.lbl2jitbloc.data, breakpoints, + return self.exec_wrapper(offset, cpu, self.offset_to_jitted_func.data, + stop_offsets, self.options["max_exec_per_call"]) - def blocs2memrange(self, blocks): + def blocks_to_memrange(self, blocks): """Return an interval instance standing for blocks addresses - @blocks: list of asm_bloc instances + @blocks: list of AsmBlock instances """ mem_range = interval() @@ -204,10 +220,10 @@ class JitCore(object): vm.reset_code_bloc_pool() # Add blocks in the pool - for start, stop in self.blocs_mem_interval: + for start, stop in self.blocks_mem_interval: vm.add_code_bloc(start, stop + 1) - def del_bloc_in_range(self, ad1, ad2): + def del_block_in_range(self, ad1, ad2): """Find and remove jitted block in range [ad1, ad2]. Return the list of block removed. @ad1: First address @@ -216,7 +232,7 @@ class JitCore(object): # Find concerned blocks modified_blocks = set() - for block in self.lbl2bloc.values(): + for block in self.loc_key_to_block.values(): if not block.lines: continue if block.ad_max <= ad1 or block.ad_min >= ad2: @@ -227,26 +243,28 @@ class JitCore(object): modified_blocks.add(block) # Generate interval to delete - del_interval = self.blocs2memrange(modified_blocks) + del_interval = self.blocks_to_memrange(modified_blocks) # Remove interval from monitored interval list - self.blocs_mem_interval -= del_interval + self.blocks_mem_interval -= del_interval # Remove modified blocks for block in modified_blocks: try: for irblock in block.blocks: # Remove offset -> jitted block link - if irblock.label.offset in self.lbl2jitbloc: - del(self.lbl2jitbloc[irblock.label.offset]) + offset = self.ir_arch.loc_db.get_location_offset(irblock.loc_key) + if offset in self.offset_to_jitted_func: + del(self.offset_to_jitted_func[offset]) except AttributeError: # The block has never been translated in IR - if block.label.offset in self.lbl2jitbloc: - del(self.lbl2jitbloc[block.label.offset]) + offset = self.ir_arch.loc_db.get_location_offset(block.loc_key) + if offset in self.offset_to_jitted_func: + del(self.offset_to_jitted_func[offset]) # Remove label -> block link - del(self.lbl2bloc[block.label]) + del(self.loc_key_to_block[block.loc_key]) return modified_blocks @@ -256,7 +274,7 @@ class JitCore(object): @mem_range: list of start/stop addresses """ for addr_start, addr_stop in mem_range: - self.del_bloc_in_range(addr_start, addr_stop) + self.del_block_in_range(addr_start, addr_stop) self.__updt_jitcode_mem_range(vm) vm.reset_memory_access() @@ -275,9 +293,20 @@ class JitCore(object): @block: asmblock """ block_raw = "".join(line.b for line in block.lines) - block_hash = md5("%X_%s_%s_%s_%s" % (block.label.offset, + offset = self.ir_arch.loc_db.get_location_offset(block.loc_key) + block_hash = md5("%X_%s_%s_%s_%s" % (offset, self.arch_name, self.log_mn, self.log_regs, block_raw)).hexdigest() return block_hash + + @property + def disasm_cb(self): + warnings.warn("Deprecated API: use .mdis.dis_block_callback") + return self.mdis.dis_block_callback + + @disasm_cb.setter + def disasm_cb(self, value): + warnings.warn("Deprecated API: use .mdis.dis_block_callback") + self.mdis.dis_block_callback = value diff --git a/miasm2/jitter/jitcore_cc_base.py b/miasm2/jitter/jitcore_cc_base.py index 4dd8825a..bbf10a53 100644 --- a/miasm2/jitter/jitcore_cc_base.py +++ b/miasm2/jitter/jitcore_cc_base.py @@ -46,9 +46,9 @@ class resolver: class JitCore_Cc_Base(JitCore): "JiT management, abstract class using a C compiler as backend" - def __init__(self, ir_arch, bs=None): + def __init__(self, ir_arch, bin_stream): self.jitted_block_delete_cb = self.deleteCB - super(JitCore_Cc_Base, self).__init__(ir_arch, bs) + super(JitCore_Cc_Base, self).__init__(ir_arch, bin_stream) self.resolver = resolver() self.ir_arch = ir_arch self.states = {} @@ -85,21 +85,12 @@ class JitCore_Cc_Base(JitCore): """ self.codegen = codegen - def label2fname(self, label): - """ - Generate function name from @label - @label: AsmLabel instance - """ - return "block_%s" % self.codegen.label_to_jitlabel(label) - - def gen_c_code(self, label, block): + def gen_c_code(self, block): """ Return the C code corresponding to the @irblocks - @label: AsmLabel of the block to jit @irblocks: list of irblocks """ - f_name = self.label2fname(label) - f_declaration = 'int %s(block_id * BlockDst, JitCpu* jitcpu)' % f_name + f_declaration = 'int %s(block_id * BlockDst, JitCpu* jitcpu)' % self.FUNCNAME out = self.codegen.gen_c(block, log_mn=self.log_mn, log_regs=self.log_regs) out = [f_declaration + '{'] + out + ['}\n'] c_code = out diff --git a/miasm2/jitter/jitcore_gcc.py b/miasm2/jitter/jitcore_gcc.py index ccccc37a..dbaa2a08 100644 --- a/miasm2/jitter/jitcore_gcc.py +++ b/miasm2/jitter/jitcore_gcc.py @@ -13,9 +13,9 @@ from miasm2.jitter.jitcore_cc_base import JitCore_Cc_Base, gen_core class JitCore_Gcc(JitCore_Cc_Base): "JiT management, using a C compiler as backend" - def __init__(self, ir_arch, bs=None): - super(JitCore_Gcc, self).__init__(ir_arch, bs) - self.exec_wrapper = Jitgcc.gcc_exec_bloc + def __init__(self, ir_arch, bin_stream): + super(JitCore_Gcc, self).__init__(ir_arch, bin_stream) + self.exec_wrapper = Jitgcc.gcc_exec_block def deleteCB(self, offset): """Free the state associated to @offset and delete it @@ -25,14 +25,14 @@ class JitCore_Gcc(JitCore_Cc_Base): del self.states[offset] def load_code(self, label, fname_so): - f_name = self.label2fname(label) lib = ctypes.cdll.LoadLibrary(fname_so) - func = getattr(lib, f_name) + func = getattr(lib, self.FUNCNAME) addr = ctypes.cast(func, ctypes.c_void_p).value - self.lbl2jitbloc[label.offset] = addr - self.states[label.offset] = lib + offset = self.ir_arch.loc_db.get_location_offset(label) + self.offset_to_jitted_func[offset] = addr + self.states[offset] = lib - def add_bloc(self, block): + def add_block(self, block): """Add a bloc to JiT and JiT it. @block: block to jit """ @@ -40,7 +40,7 @@ class JitCore_Gcc(JitCore_Cc_Base): fname_out = os.path.join(self.tempdir, "%s.so" % block_hash) if not os.access(fname_out, os.R_OK | os.X_OK): - func_code = self.gen_c_code(block.label, block) + func_code = self.gen_c_code(block) # Create unique C file fdesc, fname_in = tempfile.mkstemp(suffix=".c") @@ -60,7 +60,7 @@ class JitCore_Gcc(JitCore_Cc_Base): os.rename(fname_tmp, fname_out) os.remove(fname_in) - self.load_code(block.label, fname_out) + self.load_code(block.loc_key, fname_out) @staticmethod def gen_C_source(ir_arch, func_code): diff --git a/miasm2/jitter/jitcore_llvm.py b/miasm2/jitter/jitcore_llvm.py index 452b6d84..ea4f20ec 100644 --- a/miasm2/jitter/jitcore_llvm.py +++ b/miasm2/jitter/jitcore_llvm.py @@ -8,7 +8,6 @@ import Jitllvm class JitCore_LLVM(jitcore.JitCore): - "JiT management, using LLVM as backend" # Architecture dependant libraries @@ -20,8 +19,8 @@ class JitCore_LLVM(jitcore.JitCore): "ppc32": "JitCore_ppc32.so", } - def __init__(self, ir_arch, bs=None): - super(JitCore_LLVM, self).__init__(ir_arch, bs) + def __init__(self, ir_arch, bin_stream): + super(JitCore_LLVM, self).__init__(ir_arch, bin_stream) self.options.update({"safe_mode": True, # Verify each function "optimise": True, # Optimise functions @@ -29,7 +28,7 @@ class JitCore_LLVM(jitcore.JitCore): "log_assembly": False, # Print assembly executed }) - self.exec_wrapper = Jitllvm.llvm_exec_bloc + self.exec_wrapper = Jitllvm.llvm_exec_block self.ir_arch = ir_arch # Cache temporary dir @@ -74,16 +73,17 @@ class JitCore_LLVM(jitcore.JitCore): # Enable caching self.context.enable_cache() - def add_bloc(self, block): + def add_block(self, block): """Add a block to JiT and JiT it. @block: the block to add """ + block_hash = self.hash_block(block) fname_out = os.path.join(self.tempdir, "%s.bc" % block_hash) if not os.access(fname_out, os.R_OK): # Build a function in the context - func = LLVMFunction(self.context, LLVMFunction.canonize_label_name(block.label)) + func = LLVMFunction(self.context, self.FUNCNAME) # Set log level func.log_regs = self.log_regs @@ -114,7 +114,9 @@ class JitCore_LLVM(jitcore.JitCore): else: # The cache file exists: function can be loaded from cache - ptr = self.context.get_ptr_from_cache(fname_out, LLVMFunction.canonize_label_name(block.label)) + ptr = self.context.get_ptr_from_cache(fname_out, self.FUNCNAME) # Store a pointer on the function jitted code - self.lbl2jitbloc[block.label.offset] = ptr + loc_key = block.loc_key + offset = self.ir_arch.loc_db.get_location_offset(loc_key) + self.offset_to_jitted_func[offset] = ptr diff --git a/miasm2/jitter/jitcore_python.py b/miasm2/jitter/jitcore_python.py index 799848ab..61bd98d0 100644 --- a/miasm2/jitter/jitcore_python.py +++ b/miasm2/jitter/jitcore_python.py @@ -4,7 +4,6 @@ import miasm2.jitter.csts as csts from miasm2.expression.simplifications import ExpressionSimplifier from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec - ################################################################################ # Python jitter Core # ################################################################################ @@ -15,15 +14,19 @@ class JitCore_Python(jitcore.JitCore): SymbExecClass = EmulatedSymbExec - def __init__(self, ir_arch, bs=None): - super(JitCore_Python, self).__init__(ir_arch, bs) + def __init__(self, ir_arch, bin_stream): + super(JitCore_Python, self).__init__(ir_arch, bin_stream) self.ir_arch = ir_arch + self.ircfg = self.ir_arch.new_ircfg() # CPU & VM (None for now) will be set later expr_simp = ExpressionSimplifier() expr_simp.enable_passes(ExpressionSimplifier.PASS_COMMONS) - self.symbexec = self.SymbExecClass(None, None, self.ir_arch, {}, - sb_expr_simp=expr_simp) + self.symbexec = self.SymbExecClass( + None, None, + self.ir_arch, {}, + sb_expr_simp=expr_simp + ) self.symbexec.enable_emulated_simplifications() def set_cpu_vm(self, cpu, vm): @@ -34,10 +37,10 @@ class JitCore_Python(jitcore.JitCore): "Preload symbols according to current architecture" self.symbexec.reset_regs() - def jitirblocs(self, label, irblocks): + def jit_irblocks(self, loc_key, irblocks): """Create a python function corresponding to an irblocks' group. - @label: the label of the irblocks - @irblocks: a gorup of irblocks + @loc_key: the loc_key of the irblocks + @irblocks: a group of irblocks """ def myfunc(cpu): @@ -48,7 +51,7 @@ class JitCore_Python(jitcore.JitCore): vmmngr = cpu.vmmngr # Keep current location in irblocks - cur_label = label + cur_loc_key = loc_key # Required to detect new instructions offsets_jitted = set() @@ -57,13 +60,14 @@ class JitCore_Python(jitcore.JitCore): exec_engine = self.symbexec expr_simp = exec_engine.expr_simp + known_loc_keys = set(irb.loc_key for irb in irblocks) # For each irbloc inside irblocks while True: - # Get the current bloc for irb in irblocks: - if irb.label == cur_label: + if irb.loc_key == cur_loc_key: break + else: raise RuntimeError("Irblocks must end with returning an " "ExprInt instance") @@ -75,7 +79,7 @@ class JitCore_Python(jitcore.JitCore): for assignblk in irb: instr = assignblk.instr # For each new instruction (in assembly) - if instr.offset not in offsets_jitted: + if instr is not None and instr.offset not in offsets_jitted: # Test exceptions vmmngr.check_invalid_code_blocs() vmmngr.check_memory_breakpoint() @@ -120,23 +124,25 @@ class JitCore_Python(jitcore.JitCore): # Manage resulting address if isinstance(ad, m2_expr.ExprInt): return ad.arg.arg - elif isinstance(ad, m2_expr.ExprId): - cur_label = ad.name + elif isinstance(ad, m2_expr.ExprLoc): + cur_loc_key = ad.loc_key else: raise NotImplementedError("Type not handled: %s" % ad) - # Associate myfunc with current label - self.lbl2jitbloc[label.offset] = myfunc + # Associate myfunc with current loc_key + offset = self.ir_arch.loc_db.get_location_offset(loc_key) + assert offset is not None + self.offset_to_jitted_func[offset] = myfunc - def exec_wrapper(self, label, cpu, _lbl2jitbloc, _breakpoints, + def exec_wrapper(self, loc_key, cpu, _offset_to_jitted_func, _stop_offsets, _max_exec_per_call): - """Call the function @label with @cpu - @label: function's label + """Call the function @loc_key with @cpu + @loc_key: function's loc_key @cpu: JitCpu instance """ - # Get Python function corresponding to @label - fc_ptr = self.lbl2jitbloc[label] + # Get Python function corresponding to @loc_key + fc_ptr = self.offset_to_jitted_func[loc_key] # Execute the function return fc_ptr(cpu) diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index db486b4f..288e737a 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -1,5 +1,6 @@ import logging +import warnings from functools import wraps from collections import Sequence, namedtuple, Iterator @@ -160,7 +161,7 @@ class ExceptionHandle(): return not self.__eq__(to_cmp) -class jitter(object): +class Jitter(object): "Main class for JIT handling" @@ -204,8 +205,11 @@ class jitter(object): self.cpu = jcore.JitCpu() self.ir_arch = ir_arch self.bs = bin_stream_vm(self.vm) + self.ircfg = self.ir_arch.new_ircfg() - self.symbexec = EmulatedSymbExec(self.cpu, self.vm, self.ir_arch, {}) + self.symbexec = EmulatedSymbExec( + self.cpu, self.vm, self.ir_arch, {} + ) self.symbexec.reset_regs() try: @@ -302,18 +306,20 @@ class jitter(object): """ self.exceptions_handler.add_callback(flag, callback) - def runbloc(self, pc): + def run_at(self, pc): """Wrapper on JiT backend. Run the code at PC and return the next PC. @pc: address of code to run""" - return self.jit.runbloc(self.cpu, pc, self.breakpoints_handler.callbacks) + return self.jit.run_at( + self.cpu, pc, + set(self.breakpoints_handler.callbacks.keys()) + ) def runiter_once(self, pc): """Iterator on callbacks results on code running from PC. Check exceptions before breakpoints.""" self.pc = pc - # Callback called before exec if self.exec_cb is not None: res = self.exec_cb(self) @@ -349,7 +355,7 @@ class jitter(object): assert(self.get_exception() == 0) # Run the bloc at PC - self.pc = self.runbloc(self.pc) + self.pc = self.run_at(self.pc) # Check exceptions (raised by the execution of the block) exception_flag = self.get_exception() @@ -485,3 +491,33 @@ class jitter(object): self.symbexec.update_cpu_from_engine() return ret + + def set_trace_log(self, + trace_instr=True, trace_regs=True, + trace_new_blocks=False): + """ + Activate/Deactivate trace log options + + @trace_instr: activate instructions tracing log + @trace_regs: activate registers tracing log + @trace_new_blocks: dump new code blocks log + """ + + # As trace state changes, clear already jitted blocks + self.jit.clear_jitted_blocks() + + self.jit.log_mn = trace_instr + self.jit.log_regs = trace_regs + self.jit.log_newbloc = trace_new_blocks + + +class jitter(Jitter): + """ + DEPRECATED object + Use Jitter instead of jitter + """ + + + def __init__(self, *args, **kwargs): + warnings.warn("Deprecated API: use Jitter") + super(jitter, self).__init__(*args, **kwargs) diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index 97cd9f17..d63351cc 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -14,7 +14,8 @@ import os from llvmlite import binding as llvm from llvmlite import ir as llvm_ir -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprId, ExprInt, ExprMem, ExprSlice, \ + ExprCond, ExprLoc, ExprOp, ExprCompose, LocKey import miasm2.jitter.csts as m2_csts import miasm2.core.asmblock as m2_asmblock from miasm2.jitter.codegen import CGen @@ -43,7 +44,7 @@ class LLVMType(llvm_ir.Type): @classmethod def generic(cls, e): "Generic value for execution" - if isinstance(e, m2_expr.ExprInt): + if isinstance(e, ExprInt): return llvm_e.GenericValue.int(LLVMType.IntType(e.size), int(e.arg)) elif isinstance(e, llvm_e.GenericValue): return e @@ -69,6 +70,16 @@ class LLVMContext(): self.target_machine = target.create_target_machine() self.init_exec_engine() + def canonize_label_name(self, label): + """Canonize @label names to a common form. + @label: str or asmlabel instance""" + if isinstance(label, str): + return label + elif isinstance(label, LocKey): + return str(label) + else: + raise ValueError("label must either be str or LocKey") + def optimise_level(self, level=2): """Set the optimisation level to @level from 0 to 2 0: non-optimized @@ -193,7 +204,7 @@ class LLVMContext_JIT(LLVMContext): fc = {} p8 = llvm_ir.PointerType(LLVMType.IntType(8)) - for i in [8, 16, 32, 64]: + for i in [8, 16, 32, 64, 128]: fc["MEM_LOOKUP_%02d" % i] = {"ret": LLVMType.IntType(i), "args": [p8, LLVMType.IntType(64)]} @@ -405,7 +416,7 @@ class LLVMFunction(): @label: str or asmlabel @overwrite: if False, do nothing if a bbl with the same name already exists Return the corresponding LLVM Basic Block""" - name = self.canonize_label_name(label) + name = self.llvm_context.canonize_label_name(label) bbl = self.name2bbl.get(name, None) if not overwrite and bbl is not None: return bbl @@ -483,25 +494,9 @@ class LLVMFunction(): var_casted = var self.builder.ret(var_casted) - @staticmethod - def canonize_label_name(label): - """Canonize @label names to a common form. - @label: str or asmlabel instance""" - if isinstance(label, str): - return label - if m2_asmblock.expr_is_label(label): - label = label.name - if isinstance(label, m2_asmblock.AsmLabel): - if label.offset is None: - return "label_%s" % label.name - else: - return "label_%X" % label.offset - else: - raise ValueError("label must either be str or asmlabel") - - def get_basic_bloc_by_label(self, label): + def get_basic_block_by_loc_key(self, loc_key): "Return the bbl corresponding to label, None otherwise" - return self.name2bbl.get(self.canonize_label_name(label), None) + return self.name2bbl.get(self.llvm_context.canonize_label_name(loc_key), None) def global_constant(self, name, value): """ @@ -567,11 +562,11 @@ class LLVMFunction(): # Destination builder = self.builder - if isinstance(dst, m2_expr.ExprId): + if isinstance(dst, ExprId): ptr_casted = self.get_ptr_by_expr(dst) builder.store(src, ptr_casted) - elif isinstance(dst, m2_expr.ExprMem): + elif isinstance(dst, ExprMem): addr = self.add_ir(dst.arg) self.llvm_context.memory_write(self, addr, dst.size, src) else: @@ -624,20 +619,19 @@ class LLVMFunction(): builder = self.builder - if isinstance(expr, m2_expr.ExprInt): + if isinstance(expr, ExprInt): ret = llvm_ir.Constant(LLVMType.IntType(expr.size), int(expr.arg)) self.update_cache(expr, ret) return ret - if isinstance(expr, m2_expr.ExprId): - name = expr.name - if not isinstance(name, str): - # Resolve label - offset = name.offset - ret = llvm_ir.Constant(LLVMType.IntType(expr.size), offset) - self.update_cache(expr, ret) - return ret + if expr.is_loc(): + offset = self.llvm_context.ir_arch.loc_db.get_location_offset(expr.loc_key) + ret = llvm_ir.Constant(LLVMType.IntType(expr.size), offset) + self.update_cache(expr, ret) + return ret + if isinstance(expr, ExprId): + name = expr.name try: # If expr.name is already known (args) return self.local_vars[name] @@ -650,7 +644,7 @@ class LLVMFunction(): self.update_cache(expr, var) return var - if isinstance(expr, m2_expr.ExprOp): + if isinstance(expr, ExprOp): op = expr.op if (op in self.op_translate or @@ -857,12 +851,12 @@ class LLVMFunction(): raise NotImplementedError() - if isinstance(expr, m2_expr.ExprMem): + if isinstance(expr, ExprMem): addr = self.add_ir(expr.arg) return self.llvm_context.memory_lookup(self, addr, expr.size) - if isinstance(expr, m2_expr.ExprCond): + if isinstance(expr, ExprCond): # Compute cond cond = self.add_ir(expr.cond) zero_casted = LLVMType.IntType(expr.cond.size)(0) @@ -875,7 +869,7 @@ class LLVMFunction(): self.update_cache(expr, ret) return ret - if isinstance(expr, m2_expr.ExprSlice): + if isinstance(expr, ExprSlice): src = self.add_ir(expr.arg) @@ -901,7 +895,7 @@ class LLVMFunction(): self.update_cache(expr, ret) return ret - if isinstance(expr, m2_expr.ExprCompose): + if isinstance(expr, ExprCompose): args = [] @@ -982,9 +976,9 @@ class LLVMFunction(): builder.position_at_end(then_block) PC = self.llvm_context.PC if isinstance(offset, (int, long)): - offset = self.add_ir(m2_expr.ExprInt(offset, PC.size)) + offset = self.add_ir(ExprInt(offset, PC.size)) self.affect(offset, PC) - self.affect(self.add_ir(m2_expr.ExprInt(1, 8)), m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(1, 8)), ExprId("status", 32)) self.set_ret(offset) builder.position_at_end(merge_block) @@ -1029,9 +1023,9 @@ class LLVMFunction(): builder.position_at_end(then_block) PC = self.llvm_context.PC if isinstance(offset, (int, long)): - offset = self.add_ir(m2_expr.ExprInt(offset, PC.size)) + offset = self.add_ir(ExprInt(offset, PC.size)) self.affect(offset, PC) - self.affect(self.add_ir(m2_expr.ExprInt(1, 8)), m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(1, 8)), ExprId("status", 32)) self.set_ret(offset) builder.position_at_end(merge_block) @@ -1056,6 +1050,9 @@ class LLVMFunction(): self.builder.call(fc_ptr, [self.local_vars["vmmngr"]]) self.check_memory_exception(next_instr, restricted_exception=False) + if attrib.set_exception: + self.check_cpu_exception(next_instr, restricted_exception=False) + if attrib.mem_read | attrib.mem_write: fc_ptr = self.mod.get_global("reset_memory_access") self.builder.call(fc_ptr, [self.local_vars["vmmngr"]]) @@ -1073,9 +1070,9 @@ class LLVMFunction(): for i, solution in enumerate(possible_values(expr)): value = solution.value index = dst2case.get(value, i) - to_eval = to_eval.replace_expr({value: m2_expr.ExprInt(index, value.size)}) + to_eval = to_eval.replace_expr({value: ExprInt(index, value.size)}) dst2case[value] = index - if m2_asmblock.expr_is_int_or_label(value): + if value.is_int() or value.is_loc(): case2dst[i] = value else: case2dst[i] = self.add_ir(value) @@ -1101,13 +1098,14 @@ class LLVMFunction(): # We are no longer in the main stream, deactivate cache self.main_stream = False - if isinstance(dst, m2_expr.ExprInt): - dst = m2_expr.ExprId(self.llvm_context.ir_arch.symbol_pool.getby_offset_create(int(dst)), - dst.size) + if isinstance(dst, ExprInt): + loc_key = self.llvm_context.ir_arch.loc_db.get_or_create_offset_location(int(dst)) + dst = ExprLoc(loc_key, dst.size) - if m2_asmblock.expr_is_label(dst): - bbl = self.get_basic_bloc_by_label(dst) - offset = dst.name.offset + if isinstance(dst, ExprLoc): + loc_key = dst.loc_key + bbl = self.get_basic_block_by_loc_key(loc_key) + offset = self.llvm_context.ir_arch.loc_db.get_location_offset(loc_key) if bbl is not None: # "local" jump, inside this function if offset is None: @@ -1127,7 +1125,7 @@ class LLVMFunction(): # extern # "extern" jump on a defined offset, return to the caller - dst = self.add_ir(m2_expr.ExprInt(offset, PC.size)) + dst = self.add_ir(ExprInt(offset, PC.size)) # "extern" jump with a computed value, return to the caller assert isinstance(dst, (llvm_ir.Instruction, llvm_ir.Value)) @@ -1139,7 +1137,7 @@ class LLVMFunction(): self.gen_post_code(attrib) self.affect(dst, PC) self.gen_post_instr_checks(attrib, dst) - self.affect(self.add_ir(m2_expr.ExprInt(0, 8)), m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(0, 8)), ExprId("status", 32)) self.set_ret(dst) @@ -1163,7 +1161,7 @@ class LLVMFunction(): # Prefetch memory for element in assignblk.get_r(mem_read=True): - if isinstance(element, m2_expr.ExprMem): + if isinstance(element, ExprMem): self.add_ir(element) # Evaluate expressions @@ -1181,7 +1179,7 @@ class LLVMFunction(): # Update the memory for dst, src in values.iteritems(): - if isinstance(dst, m2_expr.ExprMem): + if isinstance(dst, ExprMem): self.affect(src, dst) # Check memory write exception @@ -1191,7 +1189,7 @@ class LLVMFunction(): # Update registers values for dst, src in values.iteritems(): - if not isinstance(dst, m2_expr.ExprMem): + if not isinstance(dst, ExprMem): self.affect(src, dst) # Check post assignblk exception flags @@ -1232,11 +1230,12 @@ class LLVMFunction(): builder = self.builder m2_exception_flag = self.llvm_context.ir_arch.arch.regs.exception_flags t_size = LLVMType.IntType(m2_exception_flag.size) - self.affect(self.add_ir(m2_expr.ExprInt(1, 8)), - m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(1, 8)), + ExprId("status", 32)) self.affect(t_size(m2_csts.EXCEPT_UNK_MNEMO), m2_exception_flag) - self.set_ret(LLVMType.IntType(64)(asmblock.label.offset)) + offset = self.llvm_context.ir_arch.loc_db.get_location_offset(asmblock.loc_key) + self.set_ret(LLVMType.IntType(64)(offset)) def gen_finalize(self, asmblock, codegen): """ @@ -1247,11 +1246,11 @@ class LLVMFunction(): next_label = codegen.get_block_post_label(asmblock) builder = self.builder - builder.position_at_end(self.get_basic_bloc_by_label(next_label)) + builder.position_at_end(self.get_basic_block_by_loc_key(next_label)) # Common code - self.affect(self.add_ir(m2_expr.ExprInt(0, 8)), - m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(0, 8)), + ExprId("status", 32)) # Check if IRDst has been set zero_casted = LLVMType.IntType(codegen.delay_slot_set.size)(0) @@ -1274,14 +1273,15 @@ class LLVMFunction(): PC = self.llvm_context.PC to_ret = self.add_ir(codegen.delay_slot_dst) self.affect(to_ret, PC) - self.affect(self.add_ir(m2_expr.ExprInt(0, 8)), - m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(0, 8)), + ExprId("status", 32)) self.set_ret(to_ret) # Else Block builder.position_at_end(else_block) PC = self.llvm_context.PC - to_ret = LLVMType.IntType(PC.size)(next_label.offset) + next_label_offset = self.llvm_context.ir_arch.loc_db.get_location_offset(next_label) + to_ret = LLVMType.IntType(PC.size)(next_label_offset) self.affect(to_ret, PC) self.set_ret(to_ret) @@ -1290,16 +1290,16 @@ class LLVMFunction(): Prototype : f(i8* jitcpu, i8* vmcpu, i8* vmmngr, i8* status)""" # Build function signature - self.my_args.append((m2_expr.ExprId("jitcpu", 32), + self.my_args.append((ExprId("jitcpu", 32), llvm_ir.PointerType(LLVMType.IntType(8)), "jitcpu")) - self.my_args.append((m2_expr.ExprId("vmcpu", 32), + self.my_args.append((ExprId("vmcpu", 32), llvm_ir.PointerType(LLVMType.IntType(8)), "vmcpu")) - self.my_args.append((m2_expr.ExprId("vmmngr", 32), + self.my_args.append((ExprId("vmmngr", 32), llvm_ir.PointerType(LLVMType.IntType(8)), "vmmngr")) - self.my_args.append((m2_expr.ExprId("status", 32), + self.my_args.append((ExprId("status", 32), llvm_ir.PointerType(LLVMType.IntType(8)), "status")) ret_size = 64 @@ -1317,7 +1317,7 @@ class LLVMFunction(): # Create basic blocks (for label branchs) entry_bbl, builder = self.entry_bbl, self.builder for instr in asmblock.lines: - lbl = self.llvm_context.ir_arch.symbol_pool.getby_offset_create(instr.offset) + lbl = self.llvm_context.ir_arch.loc_db.get_or_create_offset_location(instr.offset) self.append_basic_block(lbl) # TODO: merge duplicate code with CGen @@ -1332,9 +1332,10 @@ class LLVMFunction(): ptr = self.CreateEntryBlockAlloca(eltype, default_value=eltype(0)) self.local_vars_pointers[element.name] = ptr - lbl = codegen.get_block_post_label(asmblock) - instr_offsets.append(lbl.offset) - self.append_basic_block(lbl) + loc_key = codegen.get_block_post_label(asmblock) + offset = self.llvm_context.ir_arch.loc_db.get_location_offset(loc_key) + instr_offsets.append(offset) + self.append_basic_block(loc_key) # Add content builder.position_at_end(entry_bbl) @@ -1347,7 +1348,7 @@ class LLVMFunction(): # Pre-create basic blocks for irblock in irblocks: - self.append_basic_block(irblock.label, overwrite=False) + self.append_basic_block(irblock.loc_key, overwrite=False) # Generate the corresponding code for index, irblock in enumerate(irblocks): @@ -1355,8 +1356,7 @@ class LLVMFunction(): irblock, self.llvm_context.ir_arch.attrib) # Set the builder at the begining of the correct bbl - name = self.canonize_label_name(new_irblock.label) - self.builder.position_at_end(self.get_basic_bloc_by_label(name)) + self.builder.position_at_end(self.get_basic_block_by_loc_key(new_irblock.loc_key)) if index == 0: self.gen_pre_code(instr_attrib) @@ -1367,7 +1367,7 @@ class LLVMFunction(): # Branch entry_bbl on first label builder.position_at_end(entry_bbl) - first_label_bbl = self.get_basic_bloc_by_label(asmblock.label) + first_label_bbl = self.get_basic_block_by_loc_key(asmblock.loc_key) builder.branch(first_label_bbl) diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py index deaebd09..01dea647 100644 --- a/miasm2/jitter/loader/elf.py +++ b/miasm2/jitter/loader/elf.py @@ -32,7 +32,6 @@ def preload_elf(vm, e, runtime_lib, patch_vm_imp=True): # XXX quick hack fa = get_import_address_elf(e) dyn_funcs = {} - # log.debug('imported funcs: %s' % fa) for (libname, libfunc), ads in fa.items(): for ad in ads: ad_base_lib = runtime_lib.lib_get_add_base(libname) @@ -77,7 +76,6 @@ def vm_load_elf(vm, fdata, name="", **kargs): # -2: Trick to avoid merging 2 consecutive pages i += [(a_addr, b_addr - 2)] for a, b in i.intervals: - # print hex(a), hex(b) vm.add_memory_page(a, PAGE_READ | PAGE_WRITE, "\x00" * (b + 2 - a), repr(name)) diff --git a/miasm2/jitter/loader/pe.py b/miasm2/jitter/loader/pe.py index 2fe4cd3f..9bc0ef8b 100644 --- a/miasm2/jitter/loader/pe.py +++ b/miasm2/jitter/loader/pe.py @@ -163,7 +163,8 @@ def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs): section.size = new_size section.rawsize = new_size section.data = strpatchwork.StrPatchwork( - section.data[:new_size]) + section.data[:new_size] + ) section.offset = section.addr # Last section alignement diff --git a/miasm2/jitter/op_semantics.c b/miasm2/jitter/op_semantics.c index 82f56422..0420532a 100644 --- a/miasm2/jitter/op_semantics.c +++ b/miasm2/jitter/op_semantics.c @@ -774,3 +774,23 @@ uint64_t double_to_mem_64(double d) #endif return m; } + +UDIV(16) +UDIV(32) +UDIV(64) +UDIV(128) + +UMOD(16) +UMOD(32) +UMOD(64) +UMOD(128) + +IDIV(16) +IDIV(32) +IDIV(64) +IDIV(128) + +IMOD(16) +IMOD(32) +IMOD(64) +IMOD(128) diff --git a/miasm2/jitter/op_semantics.h b/miasm2/jitter/op_semantics.h index a52afd42..3eb81cff 100644 --- a/miasm2/jitter/op_semantics.h +++ b/miasm2/jitter/op_semantics.h @@ -1,6 +1,9 @@ #ifndef OP_SEMANTICS_H #define OP_SEMANTICS_H +#define uint128_t __uint128_t +#define int128_t __int128_t + #define CC_P 1 extern const uint8_t parity_table[256]; #define parity(a) parity_table[(a) & 0xFF] @@ -27,7 +30,7 @@ unsigned int cntleadzeros(uint64_t size, uint64_t src); unsigned int cnttrailzeros(uint64_t size, uint64_t src); #define UDIV(sizeA) \ - uint ## sizeA ## _t udiv ## sizeA (vm_cpu_t* vmcpu, uint ## sizeA ## _t a, uint ## sizeA ## _t b) \ + uint ## sizeA ## _t udiv ## sizeA (uint ## sizeA ## _t a, uint ## sizeA ## _t b) \ { \ uint ## sizeA ## _t r; \ if (b == 0) { \ @@ -40,7 +43,7 @@ unsigned int cnttrailzeros(uint64_t size, uint64_t src); #define UMOD(sizeA) \ - uint ## sizeA ## _t umod ## sizeA (vm_cpu_t* vmcpu, uint ## sizeA ## _t a, uint ## sizeA ## _t b) \ + uint ## sizeA ## _t umod ## sizeA (uint ## sizeA ## _t a, uint ## sizeA ## _t b) \ { \ uint ## sizeA ## _t r; \ if (b == 0) { \ @@ -53,7 +56,7 @@ unsigned int cnttrailzeros(uint64_t size, uint64_t src); #define IDIV(sizeA) \ - int ## sizeA ## _t idiv ## sizeA (vm_cpu_t* vmcpu, int ## sizeA ## _t a, int ## sizeA ## _t b) \ + int ## sizeA ## _t idiv ## sizeA (int ## sizeA ## _t a, int ## sizeA ## _t b) \ { \ int ## sizeA ## _t r; \ if (b == 0) { \ @@ -66,7 +69,7 @@ unsigned int cnttrailzeros(uint64_t size, uint64_t src); #define IMOD(sizeA) \ - int ## sizeA ## _t imod ## sizeA (vm_cpu_t* vmcpu, int ## sizeA ## _t a, int ## sizeA ## _t b) \ + int ## sizeA ## _t imod ## sizeA (int ## sizeA ## _t a, int ## sizeA ## _t b) \ { \ int ## sizeA ## _t r; \ if (b == 0) { \ @@ -77,6 +80,21 @@ unsigned int cnttrailzeros(uint64_t size, uint64_t src); return r; \ } +uint64_t udiv64(uint64_t a, uint64_t b); +uint64_t umod64(uint64_t a, uint64_t b); +int64_t idiv64(int64_t a, int64_t b); +int64_t imod64(int64_t a, int64_t b); + +uint32_t udiv32(uint32_t a, uint32_t b); +uint32_t umod32(uint32_t a, uint32_t b); +int32_t idiv32(int32_t a, int32_t b); +int32_t imod32(int32_t a, int32_t b); + +uint16_t udiv16(uint16_t a, uint16_t b); +uint16_t umod16(uint16_t a, uint16_t b); +int16_t idiv16(int16_t a, int16_t b); +int16_t imod16(int16_t a, int16_t b); + unsigned int x86_cpuid(unsigned int a, unsigned int reg_num); double int2double(unsigned int m); diff --git a/miasm2/jitter/vm_mngr.c b/miasm2/jitter/vm_mngr.c index 6da7bfed..dd0f6cf2 100644 --- a/miasm2/jitter/vm_mngr.c +++ b/miasm2/jitter/vm_mngr.c @@ -101,6 +101,13 @@ uint64_t set_endian64(vm_mngr_t* vm_mngr, uint64_t val) return Endian64_Swap(val); } +uint128_t set_endian128(vm_mngr_t* vm_mngr, uint128_t val) +{ + if (vm_mngr->sex == __BYTE_ORDER) + return val; + else + return Endian128_Swap(val); +} void print_val(uint64_t base, uint64_t addr) { @@ -108,7 +115,7 @@ void print_val(uint64_t base, uint64_t addr) fprintf(stderr, "addr 0x%"PRIX64" val 0x%"PRIX64"\n", addr-base, *ptr); } -inline int midpoint(int imin, int imax) +int midpoint(int imin, int imax) { return (imin + imax) / 2; } @@ -159,11 +166,11 @@ struct memory_page_node * get_memory_page_from_address(vm_mngr_t* vm_mngr, uint6 -static uint64_t memory_page_read(vm_mngr_t* vm_mngr, unsigned int my_size, uint64_t ad) +static uint128_t memory_page_read(vm_mngr_t* vm_mngr, unsigned int my_size, uint64_t ad) { struct memory_page_node * mpn; unsigned char * addr; - uint64_t ret = 0; + uint128_t ret = 0; struct memory_breakpoint_info * b; @@ -206,6 +213,10 @@ static uint64_t memory_page_read(vm_mngr_t* vm_mngr, unsigned int my_size, uint6 ret = *((uint64_t*)addr)&0xFFFFFFFFFFFFFFFFULL; ret = set_endian64(vm_mngr, ret); break; + case 128: + ret = *((uint128_t*)addr)&MASK_128; + ret = set_endian128(vm_mngr, ret); + break; default: exit(EXIT_FAILURE); break; @@ -238,6 +249,9 @@ static uint64_t memory_page_read(vm_mngr_t* vm_mngr, unsigned int my_size, uint6 case 64: ret = set_endian64(vm_mngr, ret); break; + case 128: + ret = set_endian128(vm_mngr, ret); + break; default: exit(EXIT_FAILURE); break; @@ -247,7 +261,7 @@ static uint64_t memory_page_read(vm_mngr_t* vm_mngr, unsigned int my_size, uint6 } static void memory_page_write(vm_mngr_t* vm_mngr, unsigned int my_size, - uint64_t ad, uint64_t src) + uint64_t ad, uint128_t src) { struct memory_page_node * mpn; unsigned char * addr; @@ -291,6 +305,10 @@ static void memory_page_write(vm_mngr_t* vm_mngr, unsigned int my_size, src = set_endian64(vm_mngr, src); *((uint64_t*)addr) = src&0xFFFFFFFFFFFFFFFFULL; break; + case 128: + src = set_endian128(vm_mngr, src); + *((uint128_t*)addr) = src&MASK_128; + break; default: exit(EXIT_FAILURE); break; @@ -312,6 +330,9 @@ static void memory_page_write(vm_mngr_t* vm_mngr, unsigned int my_size, case 64: src = set_endian64(vm_mngr, src); break; + case 128: + src = set_endian128(vm_mngr, src); + break; default: exit(EXIT_FAILURE); break; @@ -480,6 +501,12 @@ void vm_MEM_WRITE_64(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t src) memory_page_write(vm_mngr, 64, addr, src); } +void vm_MEM_WRITE_128(vm_mngr_t* vm_mngr, uint64_t addr, uint128_t src) +{ + add_mem_write(vm_mngr, addr, 16); + memory_page_write(vm_mngr, 128, addr, src); +} + unsigned char vm_MEM_LOOKUP_08(vm_mngr_t* vm_mngr, uint64_t addr) { unsigned char ret; @@ -508,6 +535,13 @@ uint64_t vm_MEM_LOOKUP_64(vm_mngr_t* vm_mngr, uint64_t addr) ret = memory_page_read(vm_mngr, 64, addr); return ret; } +uint128_t vm_MEM_LOOKUP_128(vm_mngr_t* vm_mngr, uint128_t addr) +{ + uint128_t ret; + add_mem_read(vm_mngr, addr, 16); + ret = memory_page_read(vm_mngr, 128, addr); + return ret; +} int vm_read_mem(vm_mngr_t* vm_mngr, uint64_t addr, char** buffer_ptr, uint64_t size) diff --git a/miasm2/jitter/vm_mngr.h b/miasm2/jitter/vm_mngr.h index eff5e0da..a50b52d0 100644 --- a/miasm2/jitter/vm_mngr.h +++ b/miasm2/jitter/vm_mngr.h @@ -26,6 +26,8 @@ #define __LITTLE_ENDIAN _LITTLE_ENDIAN #endif +#define uint128_t __uint128_t + #define Endian16_Swap(value) \ ((((uint16_t)((value) & 0x00FF)) << 8) | \ (((uint16_t)((value) & 0xFF00)) >> 8)) @@ -46,8 +48,25 @@ ((((uint64_t)value)>>40) & 0x000000000000FF00ULL) | \ ((((uint64_t)value)>>56) & 0x00000000000000FFULL)) - - +#define Endian128_Swap(value) \ + (((((uint128_t)value)>>120) & 0xFF) | \ + ((((uint128_t)value)>>112) & 0xFF) << 8 | \ + ((((uint128_t)value)>>104) & 0xFF) << 16 | \ + ((((uint128_t)value)>>96) & 0xFF) << 24 | \ + ((((uint128_t)value)>>88) & 0xFF) << 32 | \ + ((((uint128_t)value)>>80) & 0xFF) << 40 | \ + ((((uint128_t)value)>>72) & 0xFF) << 48 | \ + ((((uint128_t)value)>>64) & 0xFF) << 56 | \ + ((((uint128_t)value)>>56) & 0xFF) << 64 | \ + ((((uint128_t)value)>>48) & 0xFF) << 72 | \ + ((((uint128_t)value)>>40) & 0xFF) << 80 | \ + ((((uint128_t)value)>>32) & 0xFF) << 88 | \ + ((((uint128_t)value)>>24) & 0xFF) << 96 | \ + ((((uint128_t)value)>>16) & 0xFF) << 104 | \ + ((((uint128_t)value)>>8) & 0xFF) << 112 | \ + ((((uint128_t)value)) & 0xFF) << 120) + +#define MASK_128 ((uint128_t) 0xFFFFFFFFFFFFFFFFULL | (uint128_t) 0xFFFFFFFFFFFFFFFFULL << 64) LIST_HEAD(code_bloc_list_head, code_bloc_node); LIST_HEAD(memory_breakpoint_info_head, memory_breakpoint_info); @@ -174,13 +193,13 @@ void vm_MEM_WRITE_08(vm_mngr_t* vm_mngr, uint64_t addr, unsigned char src); void vm_MEM_WRITE_16(vm_mngr_t* vm_mngr, uint64_t addr, unsigned short src); void vm_MEM_WRITE_32(vm_mngr_t* vm_mngr, uint64_t addr, unsigned int src); void vm_MEM_WRITE_64(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t src); - +void vm_MEM_WRITE_128(vm_mngr_t* vm_mngr, uint64_t addr, uint128_t src); unsigned char vm_MEM_LOOKUP_08(vm_mngr_t* vm_mngr, uint64_t addr); unsigned short vm_MEM_LOOKUP_16(vm_mngr_t* vm_mngr, uint64_t addr); unsigned int vm_MEM_LOOKUP_32(vm_mngr_t* vm_mngr, uint64_t addr); uint64_t vm_MEM_LOOKUP_64(vm_mngr_t* vm_mngr, uint64_t addr); - +uint128_t vm_MEM_LOOKUP_128(vm_mngr_t* vm_mngr, uint128_t addr); void MEM_WRITE_08_PASSTHROUGH(uint64_t addr, unsigned char src); void MEM_WRITE_16_PASSTHROUGH(uint64_t addr, unsigned short src); diff --git a/miasm2/os_dep/win_api_x86_32.py b/miasm2/os_dep/win_api_x86_32.py index a88f4a8a..0101152a 100644 --- a/miasm2/os_dep/win_api_x86_32.py +++ b/miasm2/os_dep/win_api_x86_32.py @@ -734,12 +734,16 @@ def kernel32_VirtualProtect(jitter): flnewprotect = args.flnewprotect & 0xFFF if not flnewprotect in ACCESS_DICT: raise ValueError('unknown access dw!') - jitter.vm.set_mem_access(args.lpvoid, ACCESS_DICT[flnewprotect]) if args.lpfloldprotect: old = jitter.vm.get_mem_access(args.lpvoid) jitter.vm.set_mem(args.lpfloldprotect, pck32(ACCESS_DICT_INV[old])) + for addr in jitter.vm.get_all_memory(): + # Multi-page + if args.lpvoid <= addr < args.lpvoid + args.dwsize: + jitter.vm.set_mem_access(addr, ACCESS_DICT[flnewprotect]) + jitter.func_ret_stdcall(ret_ad, 1) diff --git a/test/analysis/data_flow.py b/test/analysis/data_flow.py index d0a85e13..d0dbbd8d 100644 --- a/test/analysis/data_flow.py +++ b/test/analysis/data_flow.py @@ -1,10 +1,12 @@ """ Test cases for dead code elimination""" from miasm2.expression.expression import ExprId, ExprInt, ExprAff, ExprMem -from miasm2.core.asmblock import AsmLabel +from miasm2.core.locationdb import LocationDB from miasm2.analysis.data_flow import * from miasm2.ir.analysis import ira from miasm2.ir.ir import IRBlock, AssignBlock +loc_db = LocationDB() + a = ExprId("a", 32) b = ExprId("b", 32) c = ExprId("c", 32) @@ -24,13 +26,13 @@ CST1 = ExprInt(0x11, 32) CST2 = ExprInt(0x12, 32) CST3 = ExprInt(0x13, 32) -LBL0 = AsmLabel("lbl0") -LBL1 = AsmLabel("lbl1") -LBL2 = AsmLabel("lbl2") -LBL3 = AsmLabel("lbl3") -LBL4 = AsmLabel("lbl4") -LBL5 = AsmLabel("lbl5") -LBL6 = AsmLabel("lbl6") +LBL0 = loc_db.add_location("lbl0", 0) +LBL1 = loc_db.add_location("lbl1", 1) +LBL2 = loc_db.add_location("lbl2", 2) +LBL3 = loc_db.add_location("lbl3", 3) +LBL4 = loc_db.add_location("lbl4", 4) +LBL5 = loc_db.add_location("lbl5", 5) +LBL6 = loc_db.add_location("lbl6", 6) IRDst = ExprId('IRDst', 32) dummy = ExprId('dummy', 32) @@ -66,117 +68,122 @@ class IRATest(ira): """Fake IRA class for tests""" - def __init__(self, symbol_pool=None): + def __init__(self, loc_db=None): arch = Arch() - super(IRATest, self).__init__(arch, 32, symbol_pool) + super(IRATest, self).__init__(arch, 32, loc_db) self.IRDst = IRDst self.ret_reg = r def get_out_regs(self, _): return set([self.ret_reg, self.sp]) +IRA = IRATest(loc_db) + # graph 1 : Simple graph with dead and alive variables -G1_IRA = IRATest() +G1_IRA = IRA.new_ircfg() G1_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]]) G1_IRB1 = gen_irblock(LBL1, [[ExprAff(a, b)]]) G1_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) -G1_IRA.blocks = {irb.label : irb for irb in [G1_IRB0, G1_IRB1, G1_IRB2]} +for irb in [G1_IRB0, G1_IRB1, G1_IRB2]: + G1_IRA.add_irblock(irb) -G1_IRA.graph.add_uniq_edge(G1_IRB0.label, G1_IRB1.label) -G1_IRA.graph.add_uniq_edge(G1_IRB1.label, G1_IRB2.label) +G1_IRA.add_uniq_edge(G1_IRB0.loc_key, G1_IRB1.loc_key) +G1_IRA.add_uniq_edge(G1_IRB1.loc_key, G1_IRB2.loc_key) # Expected output for graph 1 -G1_EXP_IRA = IRATest() +G1_EXP_IRA = IRA.new_ircfg() G1_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(b, CST2)]]) G1_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(a, b)]]) G1_EXP_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) -G1_EXP_IRA.blocks = {irb.label : irb for irb in [G1_EXP_IRB0, G1_EXP_IRB1, - G1_EXP_IRB2]} +for irb in [G1_EXP_IRB0, G1_EXP_IRB1, G1_EXP_IRB2]: + G1_EXP_IRA.add_irblock(irb) # graph 2 : Natural loop with dead variable -G2_IRA = IRATest() +G2_IRA = IRA.new_ircfg() G2_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(r, CST1)]]) G2_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G2_IRB2 = gen_irblock(LBL2, [[ExprAff(a, r)]]) -G2_IRA.blocks = {irb.label : irb for irb in [G2_IRB0, G2_IRB1, G2_IRB2]} +for irb in [G2_IRB0, G2_IRB1, G2_IRB2]: + G2_IRA.add_irblock(irb) -G2_IRA.graph.add_uniq_edge(G2_IRB0.label, G2_IRB1.label) -G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB2.label) -G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB1.label) +G2_IRA.add_uniq_edge(G2_IRB0.loc_key, G2_IRB1.loc_key) +G2_IRA.add_uniq_edge(G2_IRB1.loc_key, G2_IRB2.loc_key) +G2_IRA.add_uniq_edge(G2_IRB1.loc_key, G2_IRB1.loc_key) # Expected output for graph 2 -G2_EXP_IRA = IRATest() +G2_EXP_IRA = IRA.new_ircfg() G2_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(r, CST1)]]) G2_EXP_IRB1 = gen_irblock(LBL1, [[]]) G2_EXP_IRB2 = gen_irblock(LBL2, [[]]) -G2_EXP_IRA.blocks = {irb.label : irb for irb in [G2_EXP_IRB0, G2_EXP_IRB1, - G2_EXP_IRB2]} +for irb in [G2_EXP_IRB0, G2_EXP_IRB1, G2_EXP_IRB2]: + G2_EXP_IRA.add_irblock(irb) # graph 3 : Natural loop with alive variables -G3_IRA = IRATest() +G3_IRA = IRA.new_ircfg() G3_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G3_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G3_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) -G3_IRA.blocks = {irb.label : irb for irb in [G3_IRB0, G3_IRB1, G3_IRB2]} +for irb in [G3_IRB0, G3_IRB1, G3_IRB2]: + G3_IRA.add_irblock(irb) -G3_IRA.graph.add_uniq_edge(G3_IRB0.label, G3_IRB1.label) -G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB2.label) -G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB1.label) +G3_IRA.add_uniq_edge(G3_IRB0.loc_key, G3_IRB1.loc_key) +G3_IRA.add_uniq_edge(G3_IRB1.loc_key, G3_IRB2.loc_key) +G3_IRA.add_uniq_edge(G3_IRB1.loc_key, G3_IRB1.loc_key) # Expected output for graph 3 -G3_EXP_IRA = IRATest() +G3_EXP_IRA = IRA.new_ircfg() G3_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G3_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G3_EXP_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) -G3_EXP_IRA.blocks = {irb.label : irb for irb in [G3_EXP_IRB0, G3_EXP_IRB1, - G3_EXP_IRB2]} +for irb in [G3_EXP_IRB0, G3_EXP_IRB1, G3_EXP_IRB2]: + G3_EXP_IRA.add_irblock(irb) # graph 4 : If/else with dead variables -G4_IRA = IRATest() +G4_IRA = IRA.new_ircfg() G4_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G4_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G4_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)]]) G4_IRB3 = gen_irblock(LBL3, [[ExprAff(a, CST3)], [ExprAff(r, a)]]) -G4_IRA.blocks = {irb.label : irb for irb in [G4_IRB0, G4_IRB1, G4_IRB2, - G4_IRB3]} +for irb in [G4_IRB0, G4_IRB1, G4_IRB2, G4_IRB3]: + G4_IRA.add_irblock(irb) -G4_IRA.graph.add_uniq_edge(G4_IRB0.label, G4_IRB1.label) -G4_IRA.graph.add_uniq_edge(G4_IRB0.label, G4_IRB2.label) -G4_IRA.graph.add_uniq_edge(G4_IRB1.label, G4_IRB3.label) -G4_IRA.graph.add_uniq_edge(G4_IRB2.label, G4_IRB3.label) +G4_IRA.add_uniq_edge(G4_IRB0.loc_key, G4_IRB1.loc_key) +G4_IRA.add_uniq_edge(G4_IRB0.loc_key, G4_IRB2.loc_key) +G4_IRA.add_uniq_edge(G4_IRB1.loc_key, G4_IRB3.loc_key) +G4_IRA.add_uniq_edge(G4_IRB2.loc_key, G4_IRB3.loc_key) # Expected output for graph 4 -G4_EXP_IRA = IRATest() +G4_EXP_IRA = IRA.new_ircfg() G4_EXP_IRB0 = gen_irblock(LBL0, [[]]) G4_EXP_IRB1 = gen_irblock(LBL1, [[]]) G4_EXP_IRB2 = gen_irblock(LBL2, [[]]) G4_EXP_IRB3 = gen_irblock(LBL3, [[ExprAff(a, CST3)], [ExprAff(r, a)]]) -G4_EXP_IRA.blocks = {irb.label : irb for irb in [G4_EXP_IRB0, G4_EXP_IRB1, - G4_EXP_IRB2, G4_EXP_IRB3]} +for irb in [G4_EXP_IRB0, G4_EXP_IRB1, G4_EXP_IRB2, G4_EXP_IRB3]: + G4_EXP_IRA.add_irblock(irb) # graph 5 : Loop and If/else with dead variables -G5_IRA = IRATest() +G5_IRA = IRA.new_ircfg() G5_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G5_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -185,19 +192,19 @@ G5_IRB3 = gen_irblock(LBL3, [[ExprAff(a, a+CST3)]]) G5_IRB4 = gen_irblock(LBL4, [[ExprAff(a, a+CST1)]]) G5_IRB5 = gen_irblock(LBL5, [[ExprAff(a, r)]]) -G5_IRA.blocks = {irb.label : irb for irb in [G5_IRB0, G5_IRB1, G5_IRB2, G5_IRB3, - G5_IRB4, G5_IRB5]} +for irb in [G5_IRB0, G5_IRB1, G5_IRB2, G5_IRB3, G5_IRB4, G5_IRB5]: + G5_IRA.add_irblock(irb) -G5_IRA.graph.add_uniq_edge(G5_IRB0.label, G5_IRB1.label) -G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB2.label) -G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB3.label) -G5_IRA.graph.add_uniq_edge(G5_IRB2.label, G5_IRB4.label) -G5_IRA.graph.add_uniq_edge(G5_IRB3.label, G5_IRB4.label) -G5_IRA.graph.add_uniq_edge(G5_IRB4.label, G5_IRB5.label) -G5_IRA.graph.add_uniq_edge(G5_IRB4.label, G5_IRB1.label) +G5_IRA.add_uniq_edge(G5_IRB0.loc_key, G5_IRB1.loc_key) +G5_IRA.add_uniq_edge(G5_IRB1.loc_key, G5_IRB2.loc_key) +G5_IRA.add_uniq_edge(G5_IRB1.loc_key, G5_IRB3.loc_key) +G5_IRA.add_uniq_edge(G5_IRB2.loc_key, G5_IRB4.loc_key) +G5_IRA.add_uniq_edge(G5_IRB3.loc_key, G5_IRB4.loc_key) +G5_IRA.add_uniq_edge(G5_IRB4.loc_key, G5_IRB5.loc_key) +G5_IRA.add_uniq_edge(G5_IRB4.loc_key, G5_IRB1.loc_key) # Expected output for graph 5 -G5_EXP_IRA = IRATest() +G5_EXP_IRA = IRA.new_ircfg() G5_EXP_IRB0 = gen_irblock(LBL0, [[]]) G5_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -206,72 +213,72 @@ G5_EXP_IRB3 = gen_irblock(LBL3, [[]]) G5_EXP_IRB4 = gen_irblock(LBL4, [[]]) G5_EXP_IRB5 = gen_irblock(LBL5, [[]]) -G5_EXP_IRA.blocks = {irb.label : irb for irb in [G5_EXP_IRB0, G5_EXP_IRB1, - G5_EXP_IRB2, G5_EXP_IRB3, - G5_EXP_IRB4, G5_EXP_IRB5]} +for irb in [G5_EXP_IRB0, G5_EXP_IRB1, G5_EXP_IRB2, + G5_EXP_IRB3, G5_EXP_IRB4, G5_EXP_IRB5]: + G5_EXP_IRA.add_irblock(irb) # graph 6 : Natural loop with dead variables symetric affectation # (a = b <-> b = a ) -G6_IRA = IRATest() +G6_IRA = IRA.new_ircfg() G6_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G6_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) G6_IRB2 = gen_irblock(LBL2, [[ExprAff(a, b)]]) G6_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST2)]]) -G6_IRA.blocks = {irb.label : irb for irb in [G6_IRB0, G6_IRB1, G6_IRB2, - G6_IRB3]} +for irb in [G6_IRB0, G6_IRB1, G6_IRB2, G6_IRB3]: + G6_IRA.add_irblock(irb) -G6_IRA.graph.add_uniq_edge(G6_IRB0.label, G6_IRB1.label) -G6_IRA.graph.add_uniq_edge(G6_IRB1.label, G6_IRB2.label) -G6_IRA.graph.add_uniq_edge(G6_IRB2.label, G6_IRB1.label) -G6_IRA.graph.add_uniq_edge(G6_IRB2.label, G6_IRB3.label) +G6_IRA.add_uniq_edge(G6_IRB0.loc_key, G6_IRB1.loc_key) +G6_IRA.add_uniq_edge(G6_IRB1.loc_key, G6_IRB2.loc_key) +G6_IRA.add_uniq_edge(G6_IRB2.loc_key, G6_IRB1.loc_key) +G6_IRA.add_uniq_edge(G6_IRB2.loc_key, G6_IRB3.loc_key) # Expected output for graph 6 -G6_EXP_IRA = IRATest() +G6_EXP_IRA = IRA.new_ircfg() G6_EXP_IRB0 = gen_irblock(LBL0, [[]]) G6_EXP_IRB1 = gen_irblock(LBL1, [[]]) G6_EXP_IRB2 = gen_irblock(LBL2, [[]]) G6_EXP_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST2)]]) -G6_EXP_IRA.blocks = {irb.label : irb for irb in [G6_EXP_IRB0, G6_EXP_IRB1, - G6_EXP_IRB2, G6_EXP_IRB3]} +for irb in [G6_EXP_IRB0, G6_EXP_IRB1, G6_EXP_IRB2, G6_EXP_IRB3]: + G6_EXP_IRA.add_irblock(irb) # graph 7 : Double entry loop with dead variables -G7_IRA = IRATest() +G7_IRA = IRA.new_ircfg() G7_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(r, CST1)]]) G7_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) G7_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)]]) G7_IRB3 = gen_irblock(LBL3, [[ExprAff(a, r)]]) -G7_IRA.blocks = {irb.label : irb for irb in [G7_IRB0, G7_IRB1, G7_IRB2, - G7_IRB3]} +for irb in [G7_IRB0, G7_IRB1, G7_IRB2, G7_IRB3]: + G7_IRA.add_irblock(irb) -G7_IRA.graph.add_uniq_edge(G7_IRB0.label, G7_IRB1.label) -G7_IRA.graph.add_uniq_edge(G7_IRB1.label, G7_IRB2.label) -G7_IRA.graph.add_uniq_edge(G7_IRB2.label, G7_IRB1.label) -G7_IRA.graph.add_uniq_edge(G7_IRB2.label, G7_IRB3.label) -G7_IRA.graph.add_uniq_edge(G7_IRB0.label, G7_IRB2.label) +G7_IRA.add_uniq_edge(G7_IRB0.loc_key, G7_IRB1.loc_key) +G7_IRA.add_uniq_edge(G7_IRB1.loc_key, G7_IRB2.loc_key) +G7_IRA.add_uniq_edge(G7_IRB2.loc_key, G7_IRB1.loc_key) +G7_IRA.add_uniq_edge(G7_IRB2.loc_key, G7_IRB3.loc_key) +G7_IRA.add_uniq_edge(G7_IRB0.loc_key, G7_IRB2.loc_key) # Expected output for graph 7 -G7_EXP_IRA = IRATest() +G7_EXP_IRA = IRA.new_ircfg() G7_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(r, CST1)]]) G7_EXP_IRB1 = gen_irblock(LBL1, [[]]) G7_EXP_IRB2 = gen_irblock(LBL2, [[]]) G7_EXP_IRB3 = gen_irblock(LBL3, [[]]) -G7_EXP_IRA.blocks = {irb.label : irb for irb in [G7_EXP_IRB0, G7_EXP_IRB1, - G7_EXP_IRB2, G7_EXP_IRB3]} +for irb in [G7_EXP_IRB0, G7_EXP_IRB1, G7_EXP_IRB2, G7_EXP_IRB3]: + G7_EXP_IRA.add_irblock(irb) # graph 8 : Nested loops with dead variables -G8_IRA = IRATest() +G8_IRA = IRA.new_ircfg() G8_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST1)]]) G8_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) @@ -279,31 +286,31 @@ G8_IRB2 = gen_irblock(LBL2, [[ExprAff(b, b+CST2)]]) G8_IRB3 = gen_irblock(LBL3, [[ExprAff(a, b)]]) -G8_IRA.blocks = {irb.label : irb for irb in [G8_IRB0, G8_IRB1, G8_IRB2, - G8_IRB3]} +for irb in [G8_IRB0, G8_IRB1, G8_IRB2, G8_IRB3]: + G8_IRA.add_irblock(irb) -G8_IRA.graph.add_uniq_edge(G8_IRB0.label, G8_IRB1.label) -G8_IRA.graph.add_uniq_edge(G8_IRB1.label, G8_IRB2.label) -G8_IRA.graph.add_uniq_edge(G8_IRB2.label, G8_IRB1.label) -G8_IRA.graph.add_uniq_edge(G8_IRB2.label, G8_IRB3.label) -G8_IRA.graph.add_uniq_edge(G8_IRB3.label, G8_IRB2.label) +G8_IRA.add_uniq_edge(G8_IRB0.loc_key, G8_IRB1.loc_key) +G8_IRA.add_uniq_edge(G8_IRB1.loc_key, G8_IRB2.loc_key) +G8_IRA.add_uniq_edge(G8_IRB2.loc_key, G8_IRB1.loc_key) +G8_IRA.add_uniq_edge(G8_IRB2.loc_key, G8_IRB3.loc_key) +G8_IRA.add_uniq_edge(G8_IRB3.loc_key, G8_IRB2.loc_key) # Expected output for graph 8 -G8_EXP_IRA = IRATest() +G8_EXP_IRA = IRA.new_ircfg() G8_EXP_IRB0 = gen_irblock(LBL0, [[], []]) G8_EXP_IRB1 = gen_irblock(LBL1, [[]]) G8_EXP_IRB2 = gen_irblock(LBL2, [[]]) G8_EXP_IRB3 = gen_irblock(LBL3, [[]]) -G8_EXP_IRA.blocks = {irb.label : irb for irb in [G8_EXP_IRB0, G8_EXP_IRB1, - G8_EXP_IRB2, G8_EXP_IRB3]} +for irb in [G8_EXP_IRB0, G8_EXP_IRB1, G8_EXP_IRB2, G8_EXP_IRB3]: + G8_EXP_IRA.add_irblock(irb) # graph 9 : Miultiple-exits loops with dead variables -G9_IRA = IRATest() +G9_IRA = IRA.new_ircfg() G9_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST1)]]) G9_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)], [ExprAff(b, b+CST1)]]) @@ -311,22 +318,22 @@ G9_IRB2 = gen_irblock(LBL2, [[ExprAff(a, a+CST2)], [ExprAff(b, b+CST2)]]) G9_IRB3 = gen_irblock(LBL3, [[ExprAff(a, b)]]) G9_IRB4 = gen_irblock(LBL4, [[ExprAff(r, a)], [ExprAff(r, b)]]) -G9_IRA.blocks = {irb.label : irb for irb in [G9_IRB0, G9_IRB1, G9_IRB2, - G9_IRB3, G9_IRB4]} +for irb in [G9_IRB0, G9_IRB1, G9_IRB2, G9_IRB3, G9_IRB4]: + G9_IRA.add_irblock(irb) -G9_IRA.graph.add_uniq_edge(G9_IRB0.label, G9_IRB4.label) -G9_IRA.graph.add_uniq_edge(G9_IRB0.label, G9_IRB1.label) -G9_IRA.graph.add_uniq_edge(G9_IRB1.label, G9_IRB0.label) -G9_IRA.graph.add_uniq_edge(G9_IRB1.label, G9_IRB4.label) -G9_IRA.graph.add_uniq_edge(G9_IRB1.label, G9_IRB2.label) -G9_IRA.graph.add_uniq_edge(G9_IRB2.label, G9_IRB0.label) -G9_IRA.graph.add_uniq_edge(G9_IRB2.label, G9_IRB3.label) -G9_IRA.graph.add_uniq_edge(G9_IRB3.label, G9_IRB4.label) +G9_IRA.add_uniq_edge(G9_IRB0.loc_key, G9_IRB4.loc_key) +G9_IRA.add_uniq_edge(G9_IRB0.loc_key, G9_IRB1.loc_key) +G9_IRA.add_uniq_edge(G9_IRB1.loc_key, G9_IRB0.loc_key) +G9_IRA.add_uniq_edge(G9_IRB1.loc_key, G9_IRB4.loc_key) +G9_IRA.add_uniq_edge(G9_IRB1.loc_key, G9_IRB2.loc_key) +G9_IRA.add_uniq_edge(G9_IRB2.loc_key, G9_IRB0.loc_key) +G9_IRA.add_uniq_edge(G9_IRB2.loc_key, G9_IRB3.loc_key) +G9_IRA.add_uniq_edge(G9_IRB3.loc_key, G9_IRB4.loc_key) # Expected output for graph 9 -G9_EXP_IRA = IRATest() +G9_EXP_IRA = IRA.new_ircfg() G9_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(b, CST1)]]) G9_EXP_IRB1 = gen_irblock(LBL1, [[], [ExprAff(b, b+CST1)]]) @@ -334,42 +341,42 @@ G9_EXP_IRB2 = gen_irblock(LBL2, [[], [ExprAff(b, b+CST2)]]) G9_EXP_IRB3 = gen_irblock(LBL3, [[]]) G9_EXP_IRB4 = gen_irblock(LBL4, [[], [ExprAff(r, b)]]) -G9_EXP_IRA.blocks = {irb.label : irb for irb in [G9_EXP_IRB0, G9_EXP_IRB1, - G9_EXP_IRB2, G9_EXP_IRB3, - G9_EXP_IRB4]} +for irb in [G9_EXP_IRB0, G9_EXP_IRB1, G9_EXP_IRB2, G9_EXP_IRB3, G9_EXP_IRB4]: + G9_EXP_IRA.add_irblock(irb) # graph 10 : Natural loop with alive variables symetric affectation # (a = b <-> b = a ) -G10_IRA = IRATest() +G10_IRA = IRA.new_ircfg() G10_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G10_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) G10_IRB2 = gen_irblock(LBL2, [[ExprAff(a, b)]]) G10_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST1)]]) -G10_IRA.blocks = {irb.label : irb for irb in [G10_IRB0, G10_IRB1, - G10_IRB2, G10_IRB3]} +for irb in [G10_IRB0, G10_IRB1, G10_IRB2, G10_IRB3]: + G10_IRA.add_irblock(irb) + -G10_IRA.graph.add_uniq_edge(G10_IRB0.label, G10_IRB1.label) -G10_IRA.graph.add_uniq_edge(G10_IRB1.label, G10_IRB2.label) -G10_IRA.graph.add_uniq_edge(G10_IRB2.label, G10_IRB1.label) -G10_IRA.graph.add_uniq_edge(G10_IRB2.label, G10_IRB3.label) +G10_IRA.add_uniq_edge(G10_IRB0.loc_key, G10_IRB1.loc_key) +G10_IRA.add_uniq_edge(G10_IRB1.loc_key, G10_IRB2.loc_key) +G10_IRA.add_uniq_edge(G10_IRB2.loc_key, G10_IRB1.loc_key) +G10_IRA.add_uniq_edge(G10_IRB2.loc_key, G10_IRB3.loc_key) # Expected output for graph 10 -G10_EXP_IRA = IRATest() +G10_EXP_IRA = IRA.new_ircfg() G10_EXP_IRB0 = gen_irblock(LBL0, [[]]) G10_EXP_IRB1 = gen_irblock(LBL1, [[]]) G10_EXP_IRB2 = gen_irblock(LBL2, [[]]) G10_EXP_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST1)]]) -G10_EXP_IRA.blocks = {irb.label : irb for irb in [G10_EXP_IRB0, G10_EXP_IRB1, - G10_EXP_IRB2, G10_EXP_IRB3]} +for irb in [G10_EXP_IRB0, G10_EXP_IRB1, G10_EXP_IRB2, G10_EXP_IRB3]: + G10_EXP_IRA.add_irblock(irb) # graph 11 : If/Else conditions with alive variables -G11_IRA = IRATest() +G11_IRA = IRA.new_ircfg() G11_IRB0 = gen_irblock(LBL0, [[ExprAff(a, b)]]) G11_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) @@ -378,17 +385,18 @@ G11_IRB3 = gen_irblock(LBL3, [[ExprAff(a, a+CST1)]]) G11_IRB4 = gen_irblock(LBL4, [[ExprAff(b, b+CST1)]]) -G11_IRA.blocks = {irb.label : irb for irb in [G11_IRB0, G11_IRB1, G11_IRB2]} +for irb in [G11_IRB0, G11_IRB1, G11_IRB2]: + G11_IRA.add_irblock(irb) -G11_IRA.graph.add_uniq_edge(G11_IRB0.label, G11_IRB1.label) -#G11_IRA.graph.add_uniq_edge(G11_IRB3.label, G11_IRB1.label) -G11_IRA.graph.add_uniq_edge(G11_IRB1.label, G11_IRB0.label) -#G11_IRA.graph.add_uniq_edge(G11_IRB4.label, G11_IRB0.label) -G11_IRA.graph.add_uniq_edge(G11_IRB1.label, G11_IRB2.label) +G11_IRA.add_uniq_edge(G11_IRB0.loc_key, G11_IRB1.loc_key) +#G11_IRA.add_uniq_edge(G11_IRB3.loc_key, G11_IRB1.loc_key) +G11_IRA.add_uniq_edge(G11_IRB1.loc_key, G11_IRB0.loc_key) +#G11_IRA.add_uniq_edge(G11_IRB4.loc_key, G11_IRB0.loc_key) +G11_IRA.add_uniq_edge(G11_IRB1.loc_key, G11_IRB2.loc_key) # Expected output for graph 11 -G11_EXP_IRA = IRATest() +G11_EXP_IRA = IRA.new_ircfg() G11_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, b)]]) G11_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) @@ -396,13 +404,14 @@ G11_EXP_IRB2 = gen_irblock(LBL2, [[ExprAff(r, a)]]) #G11_EXP_IRB3 = gen_irblock(LBL3, [[ExprAff(a, a+CST1)]]) #G11_EXP_IRB4 = gen_irblock(LBL4, [[ExprAff(b, b+CST1)]]) -G11_EXP_IRA.blocks = {irb.label : irb for irb in [G11_EXP_IRB0, G11_EXP_IRB1, - G11_EXP_IRB2]} +for irb in [G11_EXP_IRB0, G11_EXP_IRB1, + G11_EXP_IRB2]: + G11_EXP_IRA.add_irblock(irb) # graph 12 : Graph with multiple out points and useless definitions # of return register -G12_IRA = IRATest() +G12_IRA = IRA.new_ircfg() G12_IRB0 = gen_irblock(LBL0, [[ExprAff(r, CST1)], [ExprAff(a, CST2)]]) G12_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -411,17 +420,17 @@ G12_IRB3 = gen_irblock(LBL3, [[ExprAff(r, CST3)]]) G12_IRB4 = gen_irblock(LBL4, [[ExprAff(r, CST2)]]) G12_IRB5 = gen_irblock(LBL5, [[ExprAff(r, b)]]) -G12_IRA.blocks = {irb.label : irb for irb in [G12_IRB0, G12_IRB1, G12_IRB2, - G12_IRB3, G12_IRB4, G12_IRB5]} +for irb in [G12_IRB0, G12_IRB1, G12_IRB2, G12_IRB3, G12_IRB4, G12_IRB5]: + G12_IRA.add_irblock(irb) -G12_IRA.graph.add_uniq_edge(G12_IRB0.label, G12_IRB1.label) -G12_IRA.graph.add_uniq_edge(G12_IRB0.label, G12_IRB2.label) -G12_IRA.graph.add_uniq_edge(G12_IRB2.label, G12_IRB3.label) -G12_IRA.graph.add_uniq_edge(G12_IRB2.label, G12_IRB4.label) -G12_IRA.graph.add_uniq_edge(G12_IRB4.label, G12_IRB5.label) +G12_IRA.add_uniq_edge(G12_IRB0.loc_key, G12_IRB1.loc_key) +G12_IRA.add_uniq_edge(G12_IRB0.loc_key, G12_IRB2.loc_key) +G12_IRA.add_uniq_edge(G12_IRB2.loc_key, G12_IRB3.loc_key) +G12_IRA.add_uniq_edge(G12_IRB2.loc_key, G12_IRB4.loc_key) +G12_IRA.add_uniq_edge(G12_IRB4.loc_key, G12_IRB5.loc_key) # Expected output for graph 12 -G12_EXP_IRA = IRATest() +G12_EXP_IRA = IRA.new_ircfg() G12_EXP_IRB0 = gen_irblock(LBL0, [[], []]) G12_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -431,13 +440,14 @@ G12_EXP_IRB4 = gen_irblock(LBL4, [[]]) G12_EXP_IRB5 = gen_irblock(LBL5, [[ExprAff(r, b)]]) -G12_EXP_IRA.blocks = {irb.label : irb for irb in [G12_EXP_IRB0, G12_EXP_IRB1, - G12_EXP_IRB2, G12_EXP_IRB3, - G12_EXP_IRB4, G12_EXP_IRB5]} +for irb in [G12_EXP_IRB0, G12_EXP_IRB1, + G12_EXP_IRB2, G12_EXP_IRB3, + G12_EXP_IRB4, G12_EXP_IRB5]: + G12_EXP_IRA.add_irblock(irb) # graph 13 : Graph where a leaf has lost its son -G13_IRA = IRATest() +G13_IRA = IRA.new_ircfg() G13_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]]) G13_IRB1 = gen_irblock(LBL1, [[ExprAff(r, b)]]) @@ -446,16 +456,16 @@ G13_IRB2 = gen_irblock(LBL2, [[ExprAff(d, CST2)], [ExprAff(a, b+CST1), G13_IRB3 = gen_irblock(LBL3, [[]]) # lost son G13_IRB4 = gen_irblock(LBL4, [[ExprAff(b, CST2)]]) -G13_IRA.blocks = {irb.label : irb for irb in [G13_IRB0, G13_IRB1, G13_IRB2, - G13_IRB4]} +for irb in [G13_IRB0, G13_IRB1, G13_IRB2, G13_IRB4]: + G13_IRA.add_irblock(irb) -G13_IRA.graph.add_uniq_edge(G13_IRB0.label, G13_IRB1.label) -G13_IRA.graph.add_uniq_edge(G13_IRB0.label, G13_IRB4.label) -G13_IRA.graph.add_uniq_edge(G13_IRB2.label, G13_IRB3.label) -G13_IRA.graph.add_uniq_edge(G13_IRB4.label, G13_IRB2.label) +G13_IRA.add_uniq_edge(G13_IRB0.loc_key, G13_IRB1.loc_key) +G13_IRA.add_uniq_edge(G13_IRB0.loc_key, G13_IRB4.loc_key) +G13_IRA.add_uniq_edge(G13_IRB2.loc_key, G13_IRB3.loc_key) +G13_IRA.add_uniq_edge(G13_IRB4.loc_key, G13_IRB2.loc_key) # Expected output for graph 13 -G13_EXP_IRA = IRATest() +G13_EXP_IRA = IRA.new_ircfg() G13_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]]) G13_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, b)]]) @@ -464,58 +474,62 @@ G13_EXP_IRB2 = gen_irblock(LBL2, [[ExprAff(d, CST2)], [ExprAff(a, b+CST1), G13_EXP_IRB3 = gen_irblock(LBL3, [[]]) G13_EXP_IRB4 = gen_irblock(LBL4, [[ExprAff(b, CST2)]]) -G13_EXP_IRA.blocks = {irb.label: irb for irb in [G13_EXP_IRB0, G13_EXP_IRB1, - G13_EXP_IRB2, G13_EXP_IRB4]} +for irb in [G13_EXP_IRB0, G13_EXP_IRB1, G13_EXP_IRB2, G13_EXP_IRB4]: + G13_EXP_IRA.add_irblock(irb) #G13_EXP_IRA = G13_IRA # graph 14 : Graph where variable assigned multiple times in a block but still # useful in the end -G14_IRA = IRATest() +G14_IRA = IRA.new_ircfg() G14_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(c, a)], [ExprAff(a, CST2)]]) G14_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+c)]]) -G14_IRA.blocks = {irb.label : irb for irb in [G14_IRB0, G14_IRB1]} +for irb in [G14_IRB0, G14_IRB1]: + G14_IRA.add_irblock(irb) -G14_IRA.graph.add_uniq_edge(G14_IRB0.label, G14_IRB1.label) +G14_IRA.add_uniq_edge(G14_IRB0.loc_key, G14_IRB1.loc_key) # Expected output for graph 1 -G14_EXP_IRA = IRATest() +G14_EXP_IRA = IRA.new_ircfg() G14_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(c, a)], [ExprAff(a, CST2)]]) G14_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+c)]]) -G14_EXP_IRA.blocks = {irb.label: irb for irb in [G14_EXP_IRB0, G14_EXP_IRB1]} +for irb in [G14_EXP_IRB0, G14_EXP_IRB1]: + G14_EXP_IRA.add_irblock(irb) # graph 15 : Graph where variable assigned multiple and read at the same time, # but useless -G15_IRA = IRATest() +G15_IRA = IRA.new_ircfg() G15_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST2)], [ExprAff(a, CST1), ExprAff(b, a+CST2), ExprAff(c, CST1)]]) G15_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a)]]) -G15_IRA.blocks = {irb.label : irb for irb in [G15_IRB0, G15_IRB1]} +for irb in [G15_IRB0, G15_IRB1]: + G15_IRA.add_irblock(irb) -G15_IRA.graph.add_uniq_edge(G15_IRB0.label, G15_IRB1.label) +G15_IRA.add_uniq_edge(G15_IRB0.loc_key, G15_IRB1.loc_key) # Expected output for graph 1 -G15_EXP_IRA = IRATest() +G15_EXP_IRA = IRA.new_ircfg() G15_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(a, CST1)]]) G15_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a)]]) -G15_EXP_IRA.blocks = {irb.label: irb for irb in [G15_EXP_IRB0, G15_EXP_IRB1]} +for irb in [G15_EXP_IRB0, G15_EXP_IRB1]: + G15_EXP_IRA.add_irblock(irb) # graph 16 : Graph where variable assigned multiple times in the same bloc -G16_IRA = IRATest() +G16_IRA = IRA.new_ircfg() G16_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1), ExprAff(b, CST2), ExprAff(c, CST3)], [ExprAff(a, c+CST1), @@ -523,25 +537,28 @@ G16_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1), ExprAff(b, CST2), G16_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+b)], [ExprAff(r, c+r)]]) G16_IRB2 = gen_irblock(LBL2, [[]]) -G16_IRA.blocks = {irb.label : irb for irb in [G16_IRB0, G16_IRB1]} +for irb in [G16_IRB0, G16_IRB1]: + G16_IRA.add_irblock(irb) -G16_IRA.graph.add_uniq_edge(G16_IRB0.label, G16_IRB1.label) -G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB2.label) +G16_IRA.add_uniq_edge(G16_IRB0.loc_key, G16_IRB1.loc_key) +G16_IRA.add_uniq_edge(G16_IRB1.loc_key, G16_IRB2.loc_key) -G16_IRA.blocks = {irb.label : irb for irb in [G16_IRB0, G16_IRB1]} +for irb in [G16_IRB0, G16_IRB1]: + G16_IRA.add_irblock(irb) # Expected output for graph 1 -G16_EXP_IRA = IRATest() +G16_EXP_IRA = IRA.new_ircfg() G16_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(c, CST3)], [ExprAff(a, c + CST1), ExprAff(b, c + CST2)]]) G16_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a+b)], [ExprAff(r, c+r)]]) -G16_EXP_IRA.blocks = {irb.label: irb for irb in [G16_EXP_IRB0, G16_EXP_IRB1]} +for irb in [G16_EXP_IRB0, G16_EXP_IRB1]: + G16_EXP_IRA.add_irblock(irb) # graph 17 : parallel ir -G17_IRA = IRATest() +G17_IRA = IRA.new_ircfg() G17_IRB0 = gen_irblock(LBL0, [[ExprAff(a, a*b), ExprAff(b, c), @@ -597,12 +614,13 @@ G17_IRB0 = gen_irblock(LBL0, [[ExprAff(a, a*b), ]) -G17_IRA.blocks = {irb.label : irb for irb in [G17_IRB0]} +for irb in [G17_IRB0]: + G17_IRA.add_irblock(irb) -G17_IRA.graph.add_node(G17_IRB0.label) +#G17_IRA.graph.add_node(G17_IRB0.loc_key) # Expected output for graph 17 -G17_EXP_IRA = IRATest() +G17_EXP_IRA = IRA.new_ircfg() G17_EXP_IRB0 = gen_irblock(LBL0, [[], @@ -639,7 +657,8 @@ G17_EXP_IRB0 = gen_irblock(LBL0, [[], # Trick because a+b+c != ((a+b)+c) ]) -G17_EXP_IRA.blocks = {irb.label : irb for irb in [G17_EXP_IRB0]} +for irb in [G17_EXP_IRB0]: + G17_EXP_IRA.add_irblock(irb) # Begining of tests @@ -667,17 +686,16 @@ for test_nb, test in enumerate([(G1_IRA, G1_EXP_IRA), print "[+] Test", test_nb+1 # Print initial graph, for debug - open("graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph.dot()) + open("graph_%02d.dot" % (test_nb+1), "w").write(g_ira.dot()) reaching_defs = ReachingDefinitions(g_ira) defuse = DiGraphDefUse(reaching_defs, deref_mem=True) - #open("defuse_%02d.dot" % (test_nb+1), "w").write(defuse.dot()) # # Simplify graph - dead_simp(g_ira) + dead_simp(IRA, g_ira) # # Print simplified graph, for debug - open("simp_graph_%02d.dot" % (test_nb+1), "w").write(g_ira.graph.dot()) + open("simp_graph_%02d.dot" % (test_nb+1), "w").write(g_ira.dot()) # Same number of blocks assert len(g_ira.blocks) == len(g_exp_ira.blocks) diff --git a/test/analysis/depgraph.py b/test/analysis/depgraph.py index 9fb046d0..2ba5f044 100644 --- a/test/analysis/depgraph.py +++ b/test/analysis/depgraph.py @@ -1,6 +1,7 @@ """Regression test module for DependencyGraph""" -from miasm2.expression.expression import ExprId, ExprInt, ExprAff, ExprCond -from miasm2.core.asmblock import AsmLabel +from miasm2.expression.expression import ExprId, ExprInt, ExprAff, ExprCond, \ + ExprLoc, LocKey +from miasm2.core.locationdb import LocationDB from miasm2.ir.analysis import ira from miasm2.ir.ir import IRBlock, AssignBlock from miasm2.core.graph import DiGraph @@ -9,6 +10,8 @@ from itertools import count from pdb import pm import re +loc_db = LocationDB() + EMULATION = True try: import z3 @@ -21,6 +24,7 @@ B = ExprId("b", 32) C = ExprId("c", 32) D = ExprId("d", 32) R = ExprId("r", 32) +COND = ExprId("cond", 32) A_INIT = ExprId("a_init", 32) B_INIT = ExprId("b_init", 32) @@ -41,13 +45,13 @@ CST33 = ExprInt(0x33, 32) CST35 = ExprInt(0x35, 32) CST37 = ExprInt(0x37, 32) -LBL0 = AsmLabel("lbl0") -LBL1 = AsmLabel("lbl1") -LBL2 = AsmLabel("lbl2") -LBL3 = AsmLabel("lbl3") -LBL4 = AsmLabel("lbl4") -LBL5 = AsmLabel("lbl5") -LBL6 = AsmLabel("lbl6") +LBL0 = loc_db.add_location("lbl0", 0) +LBL1 = loc_db.add_location("lbl1", 1) +LBL2 = loc_db.add_location("lbl2", 2) +LBL3 = loc_db.add_location("lbl3", 3) +LBL4 = loc_db.add_location("lbl4", 4) +LBL5 = loc_db.add_location("lbl5", 5) +LBL6 = loc_db.add_location("lbl6", 6) def gen_irblock(label, exprs_list): """ Returns an IRBlock. @@ -87,10 +91,10 @@ class IRATest(ira): """Fake IRA class for tests""" - def __init__(self, symbol_pool=None): + def __init__(self, loc_db=None): arch = Arch() - super(IRATest, self).__init__(arch, 32, symbol_pool) - self.IRDst = PC + super(IRATest, self).__init__(arch, 32, loc_db) + self.IRDst = ExprId("IRDst", 32) self.ret_reg = R def get_out_regs(self, _): @@ -111,18 +115,17 @@ def bloc2graph(irgraph, label=False, lines=True): # Generate basic blocks out_blocks = [] - for label in irgraph.graph.nodes(): - if isinstance(label, AsmLabel): - label_name = label.name - else: - label_name = str(label) + for label in irgraph.nodes(): + assert isinstance(label, LocKey) + label_names = irgraph.loc_db.get_location_names(label) + label_name = list(label_names)[0] if hasattr(irgraph, 'blocks'): irblock = irgraph.blocks[label] else: irblock = None - if isinstance(label, AsmLabel): - out_block = '%s [\n' % label.name + if isinstance(label, LocKey): + out_block = '%s [\n' % label_name else: out_block = '%s [\n' % label out_block += "%s " % block_attr @@ -151,20 +154,19 @@ def bloc2graph(irgraph, label=False, lines=True): out += out_blocks # Generate links - for src, dst in irgraph.graph.edges(): - if isinstance(src, AsmLabel): - src_name = src.name - else: - src_name = str(src) - if isinstance(dst, AsmLabel): - dst_name = dst.name - else: - dst_name = str(dst) + for src, dst in irgraph.edges(): + assert isinstance(src, LocKey) + src_names = irgraph.loc_db.get_location_names(src) + assert isinstance(dst, LocKey) + dst_names = irgraph.loc_db.get_location_names(dst) - edge_color = "black" - out.append('%s -> %s' % (src_name, - dst_name) + - '[' + edge_attr % ("", edge_color) + '];') + src_name = list(src_names)[0] + dst_name = list(dst_names)[0] + + edge_color = "black" + out.append('%s -> %s' % (src_name, + dst_name) + + '[' + edge_attr % ("", edge_color) + '];') out.append("}") return '\n'.join(out) @@ -184,19 +186,20 @@ def dg2graph(graph, label=False, lines=True): # Generate basic blocks out_blocks = [] - for label in graph.nodes(): - if isinstance(label, DependencyNode): - label_name = "%s %s %s" % (label.label.name, - label.element, - label.line_nb) + for node in graph.nodes(): + if isinstance(node, DependencyNode): + name = loc_db.pretty_str(node.loc_key) + node_name = "%s %s %s" % (name, + node.element, + node.line_nb) else: - label_name = str(label) - out_block = '%s [\n' % hash(label) + node_name = str(node) + out_block = '%s [\n' % hash(node) out_block += "%s " % block_attr out_block += 'label =<<table border="0" cellborder="0" cellpadding="3">' block_label = '<tr><td %s>%s</td></tr>' % ( - label_attr, label_name) + label_attr, node_name) block_html_lines = [] block_html_lines = ('<tr><td %s>' % td_attr + ('</td></tr><tr><td %s>' % td_attr).join(block_html_lines) + @@ -226,370 +229,476 @@ DNC2 = DependencyNode(LBL1, C, 0) DNB3 = DependencyNode(LBL1, B, 1) DNC3 = DependencyNode(LBL1, C, 0) +IRA = IRATest(loc_db) +IRDst = IRA.IRDst +END = ExprId("END", IRDst.size) # graph 1 -G1_IRA = IRATest() - -G1_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G1_IRB1 = gen_irblock(LBL1, [[ExprAff(B, C)]]) -G1_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) +G1_IRA = IRA.new_ircfg() -G1_IRA.graph.add_uniq_edge(G1_IRB0.label, G1_IRB1.label) -G1_IRA.graph.add_uniq_edge(G1_IRB1.label, G1_IRB2.label) +G1_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G1_IRB1 = gen_irblock(LBL1, [[ExprAff(B, C), ExprAff(IRDst, ExprLoc(LBL2, 32))]]) +G1_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B), ExprAff(IRDst, END)]]) -G1_IRA.blocks = dict([(irb.label, irb) for irb in [G1_IRB0, G1_IRB1, G1_IRB2]]) +for irb in [G1_IRB0, G1_IRB1, G1_IRB2]: + G1_IRA.add_irblock(irb) # graph 2 -G2_IRA = IRATest() +G2_IRA = IRA.new_ircfg() -G2_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G2_IRB1 = gen_irblock(LBL1, [[ExprAff(B, CST2)]]) -G2_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B + C)]]) +G2_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G2_IRB1 = gen_irblock(LBL1, [[ExprAff(B, CST2), ExprAff(IRDst, ExprLoc(LBL2, 32))]]) +G2_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B + C), ExprAff(IRDst, END)]]) -G2_IRA.graph.add_uniq_edge(G2_IRB0.label, G2_IRB1.label) -G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB2.label) - -G2_IRA.blocks = dict([(irb.label, irb) for irb in [G2_IRB0, G2_IRB1, G2_IRB2]]) +for irb in [G2_IRB0, G2_IRB1, G2_IRB2]: + G2_IRA.add_irblock(irb) # graph 3 -G3_IRA = IRATest() - -G3_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G3_IRB1 = gen_irblock(LBL1, [[ExprAff(B, CST2)]]) -G3_IRB2 = gen_irblock(LBL2, [[ExprAff(B, CST3)]]) -G3_IRB3 = gen_irblock(LBL3, [[ExprAff(A, B + C)]]) - -G3_IRA.graph.add_uniq_edge(G3_IRB0.label, G3_IRB1.label) -G3_IRA.graph.add_uniq_edge(G3_IRB0.label, G3_IRB2.label) -G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB3.label) -G3_IRA.graph.add_uniq_edge(G3_IRB2.label, G3_IRB3.label) - -G3_IRA.blocks = dict([(irb.label, irb) for irb in [G3_IRB0, G3_IRB1, - G3_IRB2, G3_IRB3]]) +G3_IRA = IRA.new_ircfg() + +G3_IRB0 = gen_irblock( + LBL0, + [ + [ExprAff(C, CST1), ExprAff( + IRDst, ExprCond( + COND, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + ) + ] + ] +) + +G3_IRB1 = gen_irblock(LBL1, [[ExprAff(B, CST2), ExprAff(IRDst, ExprLoc(LBL3, 32))]]) +G3_IRB2 = gen_irblock(LBL2, [[ExprAff(B, CST3), ExprAff(IRDst, ExprLoc(LBL3, 32))]]) +G3_IRB3 = gen_irblock(LBL3, [[ExprAff(A, B + C), ExprAff(IRDst, END)]]) + +for irb in [G3_IRB0, G3_IRB1, G3_IRB2, G3_IRB3]: + G3_IRA.add_irblock(irb) # graph 4 -G4_IRA = IRATest() - -G4_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G4_IRB1 = gen_irblock(LBL1, [[ExprAff(C, C + CST2)], - [ExprAff(G4_IRA.IRDst, - ExprCond(C, ExprId(LBL2, 32), - ExprId(LBL1, 32)))]]) +G4_IRA = IRA.new_ircfg() -G4_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) +G4_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G4_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(C, C + CST2)], + [ExprAff(IRDst, + ExprCond( + C, + ExprLoc(LBL2, 32), + ExprLoc(LBL1, 32)) + ) + ]] +) -G4_IRA.graph.add_uniq_edge(G4_IRB0.label, G4_IRB1.label) -G4_IRA.graph.add_uniq_edge(G4_IRB1.label, G4_IRB2.label) -G4_IRA.graph.add_uniq_edge(G4_IRB1.label, G4_IRB1.label) +G4_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B), ExprAff(IRDst, END)]]) -G4_IRA.blocks = dict([(irb.label, irb) for irb in [G4_IRB0, G4_IRB1, G4_IRB2]]) +for irb in [G4_IRB0, G4_IRB1, G4_IRB2]: + G4_IRA.add_irblock(irb) # graph 5 -G5_IRA = IRATest() - -G5_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1)]]) -G5_IRB1 = gen_irblock(LBL1, [[ExprAff(B, B + CST2)], - [ExprAff(G5_IRA.IRDst, - ExprCond(B, ExprId(LBL2, 32), - ExprId(LBL1, 32)))]]) - -G5_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) - -G5_IRA.graph.add_uniq_edge(G5_IRB0.label, G5_IRB1.label) -G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB2.label) -G5_IRA.graph.add_uniq_edge(G5_IRB1.label, G5_IRB1.label) - -G5_IRA.blocks = dict([(irb.label, irb) for irb in [G5_IRB0, G5_IRB1, G5_IRB2]]) +G5_IRA = IRA.new_ircfg() + +G5_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G5_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(B, B + CST2)], + [ExprAff( + IRDst, + ExprCond( + B, + ExprLoc(LBL2, 32), + ExprLoc(LBL1, 32) + ) + ) + ] + ] +) + +G5_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B), ExprAff(IRDst, END)]]) + +for irb in [G5_IRB0, G5_IRB1, G5_IRB2]: + G5_IRA.add_irblock(irb) # graph 6 -G6_IRA = IRATest() +G6_IRA = IRA.new_ircfg() -G6_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1)]]) -G6_IRB1 = gen_irblock(LBL1, [[ExprAff(A, B)]]) +G6_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G6_IRB1 = gen_irblock(LBL1, [[ExprAff(A, B), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) -G6_IRA.graph.add_uniq_edge(G6_IRB0.label, G6_IRB1.label) -G6_IRA.graph.add_uniq_edge(G6_IRB1.label, G6_IRB1.label) - -G6_IRA.blocks = dict([(irb.label, irb) for irb in [G6_IRB0, G6_IRB1]]) +for irb in [G6_IRB0, G6_IRB1]: + G6_IRA.add_irblock(irb) # graph 7 -G7_IRA = IRATest() - -G7_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G7_IRB1 = gen_irblock(LBL1, [[ExprAff(B, C)], [ExprAff(A, B)]]) -G7_IRB2 = gen_irblock(LBL2, [[ExprAff(D, A)]]) - -G7_IRA.graph.add_uniq_edge(G7_IRB0.label, G7_IRB1.label) -G7_IRA.graph.add_uniq_edge(G7_IRB1.label, G7_IRB1.label) -G7_IRA.graph.add_uniq_edge(G7_IRB1.label, G7_IRB2.label) - -G7_IRA.blocks = dict([(irb.label, irb) for irb in [G7_IRB0, G7_IRB1, G7_IRB2]]) +G7_IRA = IRA.new_ircfg() + +G7_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G7_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(B, C)], + [ExprAff(A, B)], + [ExprAff( + IRDst, + ExprCond( + COND, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + ) + ] + ] +) + +G7_IRB2 = gen_irblock(LBL2, [[ExprAff(D, A), ExprAff(IRDst, END)]]) + +for irb in [G7_IRB0, G7_IRB1, G7_IRB2]: + G7_IRA.add_irblock(irb) # graph 8 -G8_IRA = IRATest() - -G8_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) -G8_IRB1 = gen_irblock(LBL1, [[ExprAff(B, C)], [ExprAff(C, D)]]) -G8_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) - -G8_IRA.graph.add_uniq_edge(G8_IRB0.label, G8_IRB1.label) -G8_IRA.graph.add_uniq_edge(G8_IRB1.label, G8_IRB1.label) -G8_IRA.graph.add_uniq_edge(G8_IRB1.label, G8_IRB2.label) - -G8_IRA.blocks = dict([(irb.label, irb) for irb in [G8_IRB0, G8_IRB1, G8_IRB2]]) +G8_IRA = IRA.new_ircfg() + +G8_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G8_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(B, C)], + [ExprAff(C, D), + ExprAff( + IRDst, + ExprCond( + COND, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + ) + ] + ] +) +G8_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B), ExprAff(IRDst, END)]]) + +for irb in [G8_IRB0, G8_IRB1, G8_IRB2]: + G8_IRA.add_irblock(irb) # graph 9 is graph 8 # graph 10 -G10_IRA = IRATest() - -G10_IRB1 = gen_irblock(LBL1, [[ExprAff(B, B + CST2)]]) -G10_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) - -G10_IRA.graph.add_uniq_edge(G10_IRB1.label, G10_IRB2.label) -G10_IRA.graph.add_uniq_edge(G10_IRB1.label, G10_IRB1.label) - -G10_IRA.blocks = dict([(irb.label, irb) for irb in [G10_IRB1, G10_IRB2]]) +G10_IRA = IRA.new_ircfg() + +G10_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(B, B + CST2), + ExprAff( + IRDst, + ExprCond( + COND, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + ) + ] + ] +) + +G10_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B), ExprAff(IRDst, END)]]) + +for irb in [G10_IRB1, G10_IRB2]: + G10_IRA.add_irblock(irb) # graph 11 -G11_IRA = IRATest() - -G11_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1), - ExprAff(B, CST2)]]) -G11_IRB1 = gen_irblock(LBL1, [[ExprAff(A, B), - ExprAff(B, A)]]) -G11_IRB2 = gen_irblock(LBL2, [[ExprAff(A, A - B)]]) - -G11_IRA.graph.add_uniq_edge(G11_IRB0.label, G11_IRB1.label) -G11_IRA.graph.add_uniq_edge(G11_IRB1.label, G11_IRB2.label) - -G11_IRA.blocks = dict([(irb.label, irb) - for irb in [G11_IRB0, G11_IRB1, G11_IRB2]]) +G11_IRA = IRA.new_ircfg() + +G11_IRB0 = gen_irblock( + LBL0, + [ + [ExprAff(A, CST1), + ExprAff(B, CST2), + ExprAff(IRDst, ExprLoc(LBL1, 32)) + ] + ] +) + +G11_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(A, B), + ExprAff(B, A), + ExprAff(IRDst, ExprLoc(LBL2, 32)) + ] + ] +) + +G11_IRB2 = gen_irblock(LBL2, [[ExprAff(A, A - B), ExprAff(IRDst, END)]]) + +for irb in [G11_IRB0, G11_IRB1, G11_IRB2]: + G11_IRA.add_irblock(irb) # graph 12 -G12_IRA = IRATest() +G12_IRA = IRA.new_ircfg() -G12_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1)]]) -G12_IRB1 = gen_irblock(LBL1, [[ExprAff(A, B)], [ExprAff(B, B + CST2)]]) -G12_IRB2 = gen_irblock(LBL2, [[ExprAff(B, A)]]) +G12_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G12_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(A, B)], + [ExprAff(B, B + CST2), + ExprAff( + IRDst, + ExprCond( + COND, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + ) + ] + ] +) -G12_IRA.graph.add_uniq_edge(G12_IRB0.label, G12_IRB1.label) -G12_IRA.graph.add_uniq_edge(G12_IRB1.label, G12_IRB2.label) -G12_IRA.graph.add_uniq_edge(G12_IRB1.label, G12_IRB1.label) +G12_IRB2 = gen_irblock(LBL2, [[ExprAff(B, A), ExprAff(IRDst, END)]]) -G12_IRA.blocks = dict([(irb.label, irb) for irb in [G12_IRB0, G12_IRB1, - G12_IRB2]]) +for irb in [G12_IRB0, G12_IRB1, G12_IRB2]: + G12_IRA.add_irblock(irb) # graph 13 -G13_IRA = IRATest() +G13_IRA = IRA.new_ircfg() G13_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)], #[ExprAff(B, A)], - [ExprAff(G13_IRA.IRDst, - ExprId(LBL1, 32))]]) + [ExprAff(IRDst, + ExprLoc(LBL1, 32))]]) G13_IRB1 = gen_irblock(LBL1, [[ExprAff(C, A)], #[ExprAff(A, A + CST1)], - [ExprAff(G13_IRA.IRDst, - ExprCond(R, ExprId(LBL2, 32), - ExprId(LBL1, 32)))]]) + [ExprAff(IRDst, + ExprCond( + R, + ExprLoc(LBL2, 32), + ExprLoc(LBL3, 32) + ) + )]]) G13_IRB2 = gen_irblock(LBL2, [[ExprAff(B, A + CST3)], [ExprAff(A, B + CST3)], - [ExprAff(G13_IRA.IRDst, - ExprId(LBL1, 32))]]) - -G13_IRB3 = gen_irblock(LBL3, [[ExprAff(R, C)]]) + [ExprAff(IRDst, + ExprLoc(LBL1, 32))]]) -G13_IRA.graph.add_uniq_edge(G13_IRB0.label, G13_IRB1.label) -G13_IRA.graph.add_uniq_edge(G13_IRB1.label, G13_IRB2.label) -G13_IRA.graph.add_uniq_edge(G13_IRB2.label, G13_IRB1.label) -G13_IRA.graph.add_uniq_edge(G13_IRB1.label, G13_IRB3.label) +G13_IRB3 = gen_irblock(LBL3, [[ExprAff(R, C), ExprAff(IRDst, END)]]) -G13_IRA.blocks = dict([(irb.label, irb) for irb in [G13_IRB0, G13_IRB1, - G13_IRB2, G13_IRB3]]) +for irb in [G13_IRB0, G13_IRB1, G13_IRB2, G13_IRB3]: + G13_IRA.add_irblock(irb) # graph 14 -G14_IRA = IRATest() +G14_IRA = IRA.new_ircfg() G14_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)], - [ExprAff(G14_IRA.IRDst, - ExprId(LBL1, 32))] + [ExprAff(IRDst, + ExprLoc(LBL1, 32))] ]) G14_IRB1 = gen_irblock(LBL1, [[ExprAff(B, A)], - [ExprAff(G14_IRA.IRDst, - ExprCond(C, ExprId(LBL2, 32), - ExprId(LBL3, 32)))] + [ExprAff(IRDst, + ExprCond( + C, + ExprLoc(LBL2, 32), + ExprLoc(LBL3, 32) + ) + ) + ] ]) G14_IRB2 = gen_irblock(LBL2, [[ExprAff(D, A)], [ExprAff(A, D + CST1)], - [ExprAff(G14_IRA.IRDst, - ExprId(LBL1, 32))] + [ExprAff(IRDst, + ExprLoc(LBL1, 32))] ]) -G14_IRB3 = gen_irblock(LBL3, [[ExprAff(R, D + B)]]) - -G14_IRA.graph.add_uniq_edge(G14_IRB0.label, G14_IRB1.label) -G14_IRA.graph.add_uniq_edge(G14_IRB1.label, G14_IRB2.label) -G14_IRA.graph.add_uniq_edge(G14_IRB2.label, G14_IRB1.label) -G14_IRA.graph.add_uniq_edge(G14_IRB1.label, G14_IRB3.label) +G14_IRB3 = gen_irblock(LBL3, [[ExprAff(R, D + B), ExprAff(IRDst, END)]]) -G14_IRA.blocks = dict([(irb.label, irb) for irb in [G14_IRB0, G14_IRB1, - G14_IRB2, G14_IRB3]]) +for irb in [G14_IRB0, G14_IRB1, G14_IRB2, G14_IRB3]: + G14_IRA.add_irblock(irb) # graph 16 -G15_IRA = IRATest() +G15_IRA = IRA.new_ircfg() -G15_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)]]) +G15_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) G15_IRB1 = gen_irblock(LBL1, [[ExprAff(D, A + B)], [ExprAff(C, D)], - [ExprAff(B, C)]]) -G15_IRB2 = gen_irblock(LBL2, [[ExprAff(R, B)]]) - -G15_IRA.graph.add_uniq_edge(G15_IRB0.label, G15_IRB1.label) -G15_IRA.graph.add_uniq_edge(G15_IRB1.label, G15_IRB2.label) -G15_IRA.graph.add_uniq_edge(G15_IRB1.label, G15_IRB1.label) - -G15_IRA.blocks = dict([(irb.label, irb) for irb in [G15_IRB0, G15_IRB1, - G15_IRB2]]) + [ExprAff(B, C), + ExprAff(IRDst, + ExprCond( + C, + ExprLoc(LBL1, 32), + ExprLoc(LBL2, 32) + ) + )]]) +G15_IRB2 = gen_irblock(LBL2, [[ExprAff(R, B), ExprAff(IRDst, END)]]) + +for irb in [G15_IRB0, G15_IRB1, G15_IRB2]: + G15_IRA.add_irblock(irb) # graph 16 -G16_IRA = IRATest() - -G16_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)]]) -G16_IRB1 = gen_irblock(LBL1, [[ExprAff(R, D)]]) -G16_IRB2 = gen_irblock(LBL2, [[ExprAff(D, A)]]) -G16_IRB3 = gen_irblock(LBL3, [[ExprAff(R, D)]]) -G16_IRB4 = gen_irblock(LBL4, [[ExprAff(R, A)]]) -G16_IRB5 = gen_irblock(LBL5, [[ExprAff(R, A)]]) - -G16_IRA.graph.add_uniq_edge(G16_IRB0.label, G16_IRB1.label) -G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB2.label) -G16_IRA.graph.add_uniq_edge(G16_IRB2.label, G16_IRB1.label) -G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB3.label) -G16_IRA.graph.add_uniq_edge(G16_IRB3.label, G16_IRB1.label) -G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB4.label) -G16_IRA.graph.add_uniq_edge(G16_IRB4.label, G16_IRB1.label) -G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB5.label) - -G16_IRA.blocks = dict([(irb.label, irb) for irb in [G16_IRB0, G16_IRB1, - G16_IRB2, G16_IRB3, - G16_IRB4, G16_IRB5]]) +G16_IRA = IRA.new_ircfg() + +G16_IRB0 = gen_irblock( + LBL0, [ + [ExprAff(A, CST1), ExprAff(IRDst, ExprLoc(LBL1, 32))] + ] +) + +G16_IRB1 = gen_irblock( + LBL1, + [ + [ExprAff(R, D), + ExprAff( + IRDst, + ExprCond( + C, + ExprCond( + C, + ExprCond( + C, + ExprLoc(LBL2, 32), + ExprLoc(LBL3, 32) + ), + ExprLoc(LBL4, 32) + ), + ExprLoc(LBL5, 32) + ) + ) + ] + ] +) + + + +G16_IRB2 = gen_irblock(LBL2, [[ExprAff(D, A), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G16_IRB3 = gen_irblock(LBL3, [[ExprAff(R, D), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G16_IRB4 = gen_irblock(LBL4, [[ExprAff(R, A), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) +G16_IRB5 = gen_irblock(LBL5, [[ExprAff(R, A), ExprAff(IRDst, ExprLoc(LBL1, 32))]]) + +for irb in [G16_IRB0, G16_IRB1, G16_IRB2, G16_IRB3, G16_IRB4, G16_IRB5]: + G16_IRA.add_irblock(irb) # graph 17 -G17_IRA = IRATest() +G17_IRA = IRA.new_ircfg() G17_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1), - ExprAff(D, CST2)]]) + ExprAff(D, CST2), + ExprAff(IRDst, ExprLoc(LBL1, 32))]]) G17_IRB1 = gen_irblock(LBL1, [[ExprAff(A, D), - ExprAff(B, D)]]) -G17_IRB2 = gen_irblock(LBL2, [[ExprAff(A, A - B)]]) + ExprAff(B, D), + ExprAff(IRDst, ExprLoc(LBL2, 32))]]) +G17_IRB2 = gen_irblock(LBL2, [[ExprAff(A, A - B), + ExprAff(IRDst, END)]]) -G17_IRA.graph.add_uniq_edge(G17_IRB0.label, G17_IRB1.label) -G17_IRA.graph.add_uniq_edge(G17_IRB1.label, G17_IRB2.label) +G17_IRA.add_uniq_edge(G17_IRB0.loc_key, G17_IRB1.loc_key) +G17_IRA.add_uniq_edge(G17_IRB1.loc_key, G17_IRB2.loc_key) -G17_IRA.blocks = dict([(irb.label, irb) for irb in [G17_IRB0, G17_IRB1, - G17_IRB2]]) +for irb in [G17_IRB0, G17_IRB1, G17_IRB2]: + G17_IRA.add_irblock(irb) # Test graph 1 G1_TEST1_DN1 = DependencyNode( - G1_IRB2.label, A, len(G1_IRB2)) + G1_IRB2.loc_key, A, len(G1_IRB2)) -G1_INPUT = (set([G1_TEST1_DN1]), set([G1_IRB0.label])) +G1_INPUT = (set([G1_TEST1_DN1]), set([G1_IRB0.loc_key])) # Test graph 2 G2_TEST1_DN1 = DependencyNode( - G2_IRB2.label, A, len(G2_IRB2)) + G2_IRB2.loc_key, A, len(G2_IRB2)) -G2_INPUT = (set([G2_TEST1_DN1]), set([G2_IRB0.label])) +G2_INPUT = (set([G2_TEST1_DN1]), set([G2_IRB0.loc_key])) # Test graph 3 G3_TEST1_0_DN1 = DependencyNode( - G3_IRB3.label, A, len(G3_IRB3)) + G3_IRB3.loc_key, A, len(G3_IRB3)) -G3_INPUT = (set([G3_TEST1_0_DN1]), set([G3_IRB0.label])) +G3_INPUT = (set([G3_TEST1_0_DN1]), set([G3_IRB0.loc_key])) # Test graph 4 G4_TEST1_DN1 = DependencyNode( - G4_IRB2.label, A, len(G2_IRB0)) + G4_IRB2.loc_key, A, len(G2_IRB0)) -G4_INPUT = (set([G4_TEST1_DN1]), set([G4_IRB0.label])) +G4_INPUT = (set([G4_TEST1_DN1]), set([G4_IRB0.loc_key])) # Test graph 5 G5_TEST1_0_DN1 = DependencyNode( - G5_IRB2.label, A, len(G5_IRB2)) + G5_IRB2.loc_key, A, len(G5_IRB2)) -G5_INPUT = (set([G5_TEST1_0_DN1]), set([G5_IRB0.label])) +G5_INPUT = (set([G5_TEST1_0_DN1]), set([G5_IRB0.loc_key])) # Test graph 6 G6_TEST1_0_DN1 = DependencyNode( - G6_IRB1.label, A, len(G6_IRB1)) + G6_IRB1.loc_key, A, len(G6_IRB1)) -G6_INPUT = (set([G6_TEST1_0_DN1]), set([G6_IRB0.label])) +G6_INPUT = (set([G6_TEST1_0_DN1]), set([G6_IRB0.loc_key])) # Test graph 7 G7_TEST1_0_DN1 = DependencyNode( - G7_IRB2.label, D, len(G7_IRB2)) + G7_IRB2.loc_key, D, len(G7_IRB2)) -G7_INPUT = (set([G7_TEST1_0_DN1]), set([G7_IRB0.label])) +G7_INPUT = (set([G7_TEST1_0_DN1]), set([G7_IRB0.loc_key])) # Test graph 8 G8_TEST1_0_DN1 = DependencyNode( - G8_IRB2.label, A, len(G8_IRB2)) + G8_IRB2.loc_key, A, len(G8_IRB2)) -G8_INPUT = (set([G8_TEST1_0_DN1]), set([G3_IRB0.label])) +G8_INPUT = (set([G8_TEST1_0_DN1]), set([G3_IRB0.loc_key])) # Test 9: Multi elements G9_TEST1_0_DN1 = DependencyNode( - G8_IRB2.label, A, len(G8_IRB2)) + G8_IRB2.loc_key, A, len(G8_IRB2)) G9_TEST1_0_DN5 = DependencyNode( - G8_IRB2.label, C, len(G8_IRB2)) + G8_IRB2.loc_key, C, len(G8_IRB2)) -G9_INPUT = (set([G9_TEST1_0_DN1, G9_TEST1_0_DN5]), set([G8_IRB0.label])) +G9_INPUT = (set([G9_TEST1_0_DN1, G9_TEST1_0_DN5]), set([G8_IRB0.loc_key])) # Test 10: loop at beginning G10_TEST1_0_DN1 = DependencyNode( - G10_IRB2.label, A, len(G10_IRB2)) + G10_IRB2.loc_key, A, len(G10_IRB2)) -G10_INPUT = (set([G10_TEST1_0_DN1]), set([G10_IRB1.label])) +G10_INPUT = (set([G10_TEST1_0_DN1]), set([G10_IRB1.loc_key])) # Test 11: no dual bloc emulation G11_TEST1_DN1 = DependencyNode( - G11_IRB2.label, A, len(G11_IRB2)) + G11_IRB2.loc_key, A, len(G11_IRB2)) -G11_INPUT = (set([G11_TEST1_DN1]), set([G11_IRB0.label])) +G11_INPUT = (set([G11_TEST1_DN1]), set([G11_IRB0.loc_key])) # Test graph 12 -G12_TEST1_0_DN1 = DependencyNode(G12_IRB2.label, B, 1) +G12_TEST1_0_DN1 = DependencyNode(G12_IRB2.loc_key, B, 1) G12_INPUT = (set([G12_TEST1_0_DN1]), set([])) @@ -597,7 +706,7 @@ G12_INPUT = (set([G12_TEST1_0_DN1]), set([])) # All filters -G13_TEST1_0_DN4 = DependencyNode(G13_IRB3.label, R, 1) +G13_TEST1_0_DN4 = DependencyNode(G13_IRB3.loc_key, R, 1) G13_INPUT = (set([G13_TEST1_0_DN4]), set([])) @@ -605,24 +714,24 @@ G13_INPUT = (set([G13_TEST1_0_DN4]), set([])) # All filters -G14_TEST1_0_DN1 = DependencyNode(G14_IRB3.label, R, 1) +G14_TEST1_0_DN1 = DependencyNode(G14_IRB3.loc_key, R, 1) G14_INPUT = (set([G14_TEST1_0_DN1]), set([])) # Test graph 15 -G15_TEST1_0_DN1 = DependencyNode(G15_IRB2.label, R, 1) +G15_TEST1_0_DN1 = DependencyNode(G15_IRB2.loc_key, R, 1) G15_INPUT = (set([G15_TEST1_0_DN1]), set([])) # Test graph 16 -G16_TEST1_0_DN1 = DependencyNode(G16_IRB5.label, R, 1) +G16_TEST1_0_DN1 = DependencyNode(G16_IRB5.loc_key, R, 1) G16_INPUT = (set([G16_TEST1_0_DN1]), set([])) # Test graph 17 -G17_TEST1_DN1 = DependencyNode(G17_IRB2.label, A, 1) +G17_TEST1_DN1 = DependencyNode(G17_IRB2.loc_key, A, 1) G17_INPUT = (set([G17_TEST1_DN1]), set([])) @@ -638,7 +747,8 @@ def flatNode(node): element = int(node.element.arg) else: RuntimeError("Unsupported type '%s'" % type(enode.element)) - return (node.label.name, + name = loc_db.pretty_str(node.loc_key) + return (name, element, node.line_nb) else: @@ -736,7 +846,8 @@ def match_results(resultsA, resultsB, nodes): def get_flat_init_depnodes(depnodes): out = [] for node in depnodes: - out.append((node.label.name, + name = loc_db.pretty_str(node.loc_key) + out.append((name, node.element.name, node.line_nb, 0)) @@ -1017,21 +1128,23 @@ for test_nb, test in enumerate([(G1_IRA, G1_INPUT), # Extract test elements print "[+] Test", test_nb + 1 - g_ira, (depnodes, heads) = test + ircfg, (depnodes, heads) = test - open("graph_%02d.dot" % (test_nb + 1), "w").write(g_ira.graph.dot()) - open("graph_%02d.dot" % (test_nb + 1), "w").write(bloc2graph(g_ira)) + open("graph_%02d.dot" % (test_nb + 1), "w").write(ircfg.dot()) + open("graph_%02d.dot" % (test_nb + 1), "w").write(bloc2graph(ircfg)) # Different options suffix_key_list = ["", "_nosimp", "_nomem", "_nocall", "_implicit"] # Test classes - for g_ind, g_dep in enumerate([DependencyGraph(g_ira), - DependencyGraph(g_ira, apply_simp=False), - DependencyGraph(g_ira, follow_mem=False), - DependencyGraph(g_ira, follow_mem=False, - follow_call=False), - # DependencyGraph(g_ira, implicit=True), + for g_ind, g_dep in enumerate([DependencyGraph(ircfg), + DependencyGraph(ircfg, apply_simp=False), + DependencyGraph(ircfg, follow_mem=False), + DependencyGraph( + ircfg, follow_mem=False, + follow_call=False + ), + # DependencyGraph(ircfg, implicit=True), ]): # if g_ind == 4: # TODO: Implicit specifications @@ -1052,14 +1165,13 @@ for test_nb, test in enumerate([(G1_IRA, G1_INPUT), all_results.add(unflatGraph(flatGraph(result.graph))) open("graph_test_%02d_%02d.dot" % (test_nb + 1, i), "w").write(dg2graph(result.graph)) - # print all_flat + if g_ind == 0: all_flat = sorted(all_flat) all_flats.append(all_flat) flat_depnodes = get_flat_init_depnodes(depnodes) if not match_results(all_results, test_results[test_nb], flat_depnodes): FAILED.add(test_nb) - # fds continue if FAILED: diff --git a/test/analysis/dg_test_02_implicit_expected.json b/test/analysis/dg_test_02_implicit_expected.json index 9394f01d..cfcf7258 100644 --- a/test/analysis/dg_test_02_implicit_expected.json +++ b/test/analysis/dg_test_02_implicit_expected.json @@ -1 +1 @@ -[{"has_loop": false, "EAX": "0x4", "satisfiability": true, "constraints": {"zf_init": "0x1"}}, {"has_loop": false, "EAX": "0x3", "satisfiability": true, "constraints": {"zf_init": "0x0"}}] +[{"has_loop": false, "EAX": "0x4", "satisfiability": true, "constraints": {"zf": "0x1"}}, {"has_loop": false, "EAX": "0x3", "satisfiability": true, "constraints": {"zf": "0x0"}}] diff --git a/test/analysis/dg_test_04_expected.json b/test/analysis/dg_test_04_expected.json index fb115835..24687e4a 100644 --- a/test/analysis/dg_test_04_expected.json +++ b/test/analysis/dg_test_04_expected.json @@ -1 +1 @@ -[{"EAX": "EBX_init", "has_loop": false}] +[{"EAX": "EBX", "has_loop": false}] diff --git a/test/analysis/dg_test_04_implicit_expected.json b/test/analysis/dg_test_04_implicit_expected.json index 73e7209e..21dbfc96 100644 --- a/test/analysis/dg_test_04_implicit_expected.json +++ b/test/analysis/dg_test_04_implicit_expected.json @@ -1 +1 @@ -[{"has_loop": false, "EAX": "EBX_init", "satisfiability": true, "constraints": {}}, {"has_loop": true, "EAX": "EBX_init", "satisfiability": false, "constraints": {}}] +[{"has_loop": false, "EAX": "EBX", "satisfiability": true, "constraints": {}}, {"has_loop": true, "EAX": "EBX", "satisfiability": false, "constraints": {}}] diff --git a/test/analysis/dg_test_06_implicit_expected.json b/test/analysis/dg_test_06_implicit_expected.json index bda75296..be4e9afb 100644 --- a/test/analysis/dg_test_06_implicit_expected.json +++ b/test/analysis/dg_test_06_implicit_expected.json @@ -1 +1 @@ -[{"has_loop": false, "EAX": "0x1", "satisfiability": true, "constraints": {"EAX_init": "0xffffffff"}}, {"has_loop": false, "EAX": "0x2", "satisfiability": false, "constraints": {}}] +[{"has_loop": false, "EAX": "0x1", "satisfiability": true, "constraints": {"EAX": "0xffffffff"}}, {"has_loop": false, "EAX": "0x2", "satisfiability": false, "constraints": {}}] diff --git a/test/analysis/dg_test_10_implicit_expected.json b/test/analysis/dg_test_10_implicit_expected.json index 05b34918..36a84788 100644 --- a/test/analysis/dg_test_10_implicit_expected.json +++ b/test/analysis/dg_test_10_implicit_expected.json @@ -1 +1 @@ -[{"has_loop": false, "EAX": "0x1", "EBX": "0x3", "satisfiability": true, "constraints": {"zf_init": "0x0"}}, {"has_loop": false, "EAX": "0x2", "EBX": "0x3", "satisfiability": false, "constraints": {}}, {"has_loop": false, "EAX": "0x1", "EBX": "0x4", "satisfiability": false, "constraints": {}}, {"has_loop": false, "EAX": "0x2", "EBX": "0x4", "satisfiability": true, "constraints": {"zf_init": "0x1"}}] +[{"has_loop": false, "EAX": "0x1", "EBX": "0x3", "satisfiability": true, "constraints": {"zf": "0x0"}}, {"has_loop": false, "EAX": "0x2", "EBX": "0x3", "satisfiability": false, "constraints": {}}, {"has_loop": false, "EAX": "0x1", "EBX": "0x4", "satisfiability": false, "constraints": {}}, {"has_loop": false, "EAX": "0x2", "EBX": "0x4", "satisfiability": true, "constraints": {"zf": "0x1"}}] diff --git a/test/analysis/dse.py b/test/analysis/dse.py index 5a72db34..a05d8595 100644 --- a/test/analysis/dse.py +++ b/test/analysis/dse.py @@ -34,8 +34,7 @@ class DSETest(object): self.myjit = jitter(jitter_engine) self.myjit.init_stack() - self.myjit.jit.log_regs = True - self.myjit.jit.log_mn = True + self.myjit.set_trace_log() self.dse = None self.assembly = None @@ -70,17 +69,17 @@ class DSETest(object): def asm(self): mn_x86 = self.machine.mn - blocks, symbol_pool = parse_asm.parse_txt( + blocks, loc_db = parse_asm.parse_txt( mn_x86, self.arch_attrib, self.TXT, - symbol_pool=self.myjit.ir_arch.symbol_pool + loc_db=self.myjit.ir_arch.loc_db ) # fix shellcode addr - symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) + loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) output = StrPatchwork() - patches = asm_resolve_final(mn_x86, blocks, symbol_pool) + patches = asm_resolve_final(mn_x86, blocks, loc_db) for offset, raw in patches.items(): output[offset] = raw diff --git a/test/arch/aarch64/arch.py b/test/arch/aarch64/arch.py index a6aa7ba5..cba175e6 100644 --- a/test/arch/aarch64/arch.py +++ b/test/arch/aarch64/arch.py @@ -2,9 +2,9 @@ import sys import time from pdb import pm from miasm2.arch.aarch64.arch import * -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB -symbol_pool = AsmSymbolPool() +loc_db = LocationDB() reg_tests_aarch64 = [ ("XXXXXXXX MOV W1, WZR", @@ -1814,7 +1814,7 @@ for s, l in reg_tests_aarch64[:]: print s print mn assert(str(mn) == s) - l = mn_aarch64.fromstring(s, symbol_pool, 'l') + l = mn_aarch64.fromstring(s, loc_db, 'l') assert(str(l) == s) a = mn_aarch64.asm(l) print [x for x in a] diff --git a/test/arch/aarch64/unit/asm_test.py b/test/arch/aarch64/unit/asm_test.py index ca27ef9d..677d474f 100644 --- a/test/arch/aarch64/unit/asm_test.py +++ b/test/arch/aarch64/unit/asm_test.py @@ -16,23 +16,18 @@ class Asm_Test(object): self.myjit = Machine("aarch64l").jitter(jitter) self.myjit.init_stack() - self.myjit.jit.log_regs = False - self.myjit.jit.log_mn = False - - def __call__(self): self.asm() self.run() self.check() - def asm(self): - blocks, symbol_pool = parse_asm.parse_txt(mn_aarch64, 'l', self.TXT, - symbol_pool = self.myjit.ir_arch.symbol_pool) + blocks, loc_db = parse_asm.parse_txt(mn_aarch64, 'l', self.TXT, + loc_db = self.myjit.ir_arch.loc_db) # fix shellcode addr - symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) + loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) s = StrPatchwork() - patches = asmblock.asm_resolve_final(mn_aarch64, blocks, symbol_pool) + patches = asmblock.asm_resolve_final(mn_aarch64, blocks, loc_db) for offset, raw in patches.items(): s[offset] = raw diff --git a/test/arch/arm/arch.py b/test/arch/arm/arch.py index a951689b..d92c24b2 100644 --- a/test/arch/arm/arch.py +++ b/test/arch/arm/arch.py @@ -1,45 +1,10 @@ import time from miasm2.arch.arm.arch import * -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB from pdb import pm -symbol_pool = AsmSymbolPool() -if 0: - a = bs('00') - b = bs('01') - c = bs(l=2) - d = bs(l=4, fname='rd') - e = bs_name(l=1, name={'ADD': 0, 'SUB': 1}) - assert(isinstance(e, bs_divert)) - scc = bs_mod_name(l=1, mn_mod=['', 'S']) - f = bs(l=1, cls=(arm_reg,)) - - class arm_mov(mn_arm): - fields = [bs('0000'), bs('0000'), bs('0000')] - - class arm_DATA(mn_arm): - fields = [bs('1111'), e, scc, f, bs('0')] - mn = mn_arm.dis(0xF000000) - - -if 0: - import cProfile - cProfile.run('mn_arm.dis("\xe1\xa0\xa0\x06", "l")') - # l = mn_arm.dis(bin_stream("\xe1\xa0\xa0\x06"), mode_arm) - # print l - """ - mode = 64 - l = mn_x86.fromstring("ADC DWORD PTR [RAX], 0x11223344", mode) - print 'xx' - #t= time.time() - import cProfile - def f(): - x = l.asm(mode) - print x - cProfile.run('f()') - """ - +loc_db = LocationDB() def h2i(s): return s.replace(' ', '').decode('hex') @@ -268,15 +233,11 @@ for s, l in reg_tests_arm: print s print mn assert(str(mn) == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] - l = mn_arm.fromstring(s, symbol_pool, 'l') - # print l + l = mn_arm.fromstring(s, loc_db, 'l') assert(str(l) == s) a = mn_arm.asm(l) print [x for x in a] print repr(b) - # print mn.args assert(b in a) reg_tests_armt = [ @@ -723,36 +684,14 @@ for s, l in reg_tests_armt: print s print mn assert(str(mn) == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] - l = mn_armt.fromstring(s, symbol_pool, 'l') - # print l + l = mn_armt.fromstring(s, loc_db, 'l') assert(str(l) == s) print 'Asm..', l a = mn_armt.asm(l) print [x for x in a] print repr(b) - # print mn.args assert(b in a) -""" -print "*"*30, "START SPECIAL PARSING", "*"*30 -parse_tests = [ - "MOV LR, toto", - "MOV LR, 1+toto", - "MOV LR, (lend-lstart)^toto<<<R1", - "MOV LR, R1 LSL (l_end-l_start)^toto<<<R1", - "MOV LR, R1 LSL (l_end-l_start)^toto<<<R1", - "EOR R0, R1, toto^titi+1", - ] - -for l in parse_tests: - print "-"*80 - l = mn_arm.fromstring(l, 'l') - print l.name, ", ".join([str(a) for a in l.args]) -""" - - print 'TEST time', time.time() - ts # speed test arm @@ -790,7 +729,6 @@ instr_num = 0 ts = time.time() while off < bs.getlen(): mn = mn_armt.dis(bs, 'l', off) - # print instr_num, off, str(mn) instr_num += 1 off += mn.l print 'instr per sec:', instr_num / (time.time() - ts) diff --git a/test/arch/arm/sem.py b/test/arch/arm/sem.py index d9e6aa76..64cda610 100755 --- a/test/arch/arm/sem.py +++ b/test/arch/arm/sem.py @@ -9,29 +9,30 @@ from miasm2.arch.arm.arch import mn_arm as mn from miasm2.arch.arm.sem import ir_arml as ir_arch from miasm2.arch.arm.regs import * from miasm2.expression.expression import * -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB from pdb import pm logging.getLogger('cpuhelper').setLevel(logging.ERROR) EXCLUDE_REGS = set([ir_arch().IRDst]) -symbol_pool = AsmSymbolPool() def M(addr): return ExprMem(ExprInt(addr, 16), 16) def compute(asm, inputstate={}, debug=False): + loc_db = LocationDB() sympool = dict(regs_init) sympool.update({k: ExprInt(v, k.size) for k, v in inputstate.iteritems()}) - interm = ir_arch() - symexec = SymbolicExecutionEngine(interm, sympool) - instr = mn.fromstring(asm, symbol_pool, "l") + ir_tmp = ir_arch(loc_db) + ircfg = ir_tmp.new_ircfg() + symexec = SymbolicExecutionEngine(ir_tmp, sympool) + instr = mn.fromstring(asm, loc_db, "l") code = mn.asm(instr)[0] instr = mn.dis(code, "l") instr.offset = inputstate.get(PC, 0) - interm.add_instr(instr) - symexec.run_at(instr.offset) + lbl = ir_tmp.add_instr_to_ircfg(instr, ircfg) + symexec.run_at(ircfg, lbl) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: diff --git a/test/arch/mips32/arch.py b/test/arch/mips32/arch.py index c6b68c0c..1cbb554d 100644 --- a/test/arch/mips32/arch.py +++ b/test/arch/mips32/arch.py @@ -1,10 +1,10 @@ import time from pdb import pm -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB from miasm2.arch.mips32.arch import * -symbol_pool = AsmSymbolPool() +loc_db = LocationDB() reg_tests_mips32 = [ ("004496D8 ADDU GP, GP, T9", @@ -228,13 +228,9 @@ for s, l in reg_tests_mips32: print s print mn assert(str(mn) == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] - l = mn_mips32.fromstring(s, symbol_pool, 'b') - # print l + l = mn_mips32.fromstring(s, loc_db, 'b') assert(str(l) == s) a = mn_mips32.asm(l, 'b') print [x for x in a] print repr(b) - # print mn.args assert(b in a) diff --git a/test/arch/mips32/unit/asm_test.py b/test/arch/mips32/unit/asm_test.py index f03a32d7..da792874 100644 --- a/test/arch/mips32/unit/asm_test.py +++ b/test/arch/mips32/unit/asm_test.py @@ -18,21 +18,18 @@ class Asm_Test(object): self.myjit = Machine("mips32l").jitter(jitter) self.myjit.init_stack() - self.myjit.jit.log_regs = False - self.myjit.jit.log_mn = False - def __call__(self): self.asm() self.run() self.check() def asm(self): - blocks, symbol_pool = parse_asm.parse_txt(mn_mips32, 'l', self.TXT, - symbol_pool=self.myjit.ir_arch.symbol_pool) + blocks, loc_db = parse_asm.parse_txt(mn_mips32, 'l', self.TXT, + loc_db=self.myjit.ir_arch.loc_db) # fix shellcode addr - symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) + loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) s = StrPatchwork() - patches = asmblock.asm_resolve_final(mn_mips32, blocks, symbol_pool) + patches = asmblock.asm_resolve_final(mn_mips32, blocks, loc_db) for offset, raw in patches.items(): s[offset] = raw diff --git a/test/arch/msp430/arch.py b/test/arch/msp430/arch.py index 3df2becb..91de95b3 100644 --- a/test/arch/msp430/arch.py +++ b/test/arch/msp430/arch.py @@ -1,9 +1,9 @@ import time from pdb import pm from miasm2.arch.msp430.arch import * -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB -symbol_pool = AsmSymbolPool() +loc_db = LocationDB() def h2i(s): return s.replace(' ', '').decode('hex') @@ -95,13 +95,9 @@ for s, l in reg_tests_msp: print s print mn assert(str(mn) == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] - l = mn_msp430.fromstring(s, symbol_pool, None) - # print l + l = mn_msp430.fromstring(s, loc_db, None) assert(str(l) == s) a = mn_msp430.asm(l) print [x for x in a] print repr(b) - # print mn.args assert(b in a) diff --git a/test/arch/msp430/sem.py b/test/arch/msp430/sem.py index 3b2c2f2e..10e57e36 100755 --- a/test/arch/msp430/sem.py +++ b/test/arch/msp430/sem.py @@ -9,25 +9,29 @@ from miasm2.arch.msp430.arch import mn_msp430 as mn, mode_msp430 as mode from miasm2.arch.msp430.sem import ir_msp430 as ir_arch from miasm2.arch.msp430.regs import * from miasm2.expression.expression import * +from miasm2.core.locationdb import LocationDB logging.getLogger('cpuhelper').setLevel(logging.ERROR) EXCLUDE_REGS = set([res, ir_arch().IRDst]) + def M(addr): return ExprMem(ExprInt(addr, 16), 16) def compute(asm, inputstate={}, debug=False): + loc_db = LocationDB() sympool = dict(regs_init) sympool.update({k: ExprInt(v, k.size) for k, v in inputstate.iteritems()}) - interm = ir_arch() - symexec = SymbolicExecutionEngine(interm, sympool) + ir_tmp = ir_arch(loc_db) + ircfg = ir_tmp.new_ircfg() + symexec = SymbolicExecutionEngine(ir_tmp, sympool) instr = mn.fromstring(asm, mode) code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(PC, 0) - interm.add_instr(instr) - symexec.run_at(instr.offset) + loc_key = ir_tmp.add_instr_to_ircfg(instr, ircfg) + symexec.run_at(ircfg, loc_key) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: diff --git a/test/arch/sh4/arch.py b/test/arch/sh4/arch.py index 574dcf49..f744b215 100644 --- a/test/arch/sh4/arch.py +++ b/test/arch/sh4/arch.py @@ -2,9 +2,9 @@ import time from pdb import pm from sys import stderr from miasm2.arch.sh4.arch import * -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB -symbol_pool = AsmSymbolPool() +loc_db = LocationDB() def h2i(s): return s.replace(' ', '').decode('hex') @@ -398,15 +398,11 @@ for s, l in reg_tests_sh4: print s print mn assert(str(mn) == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] - l = mn_sh4.fromstring(s, symbol_pool, None) - # print l + l = mn_sh4.fromstring(s, loc_db, None) assert(str(l) == s) a = mn_sh4.asm(l) print [x for x in a] print repr(b) - # print mn.args assert(b in a) diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 05b31815..43e973e1 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -5,9 +5,9 @@ from miasm2.arch.x86.arch import mn_x86, deref_mem_ad, \ base_expr, rmarg, print_size from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 from miasm2.core.bin_stream import bin_stream_str -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB -symbol_pool = AsmSymbolPool() +loc_db = LocationDB() mylabel16 = m2_expr.ExprId('mylabel16', 16) mylabel32 = m2_expr.ExprId('mylabel32', 32) @@ -3062,17 +3062,13 @@ for mode, s, l, in reg_tests: print s print mn assert(str(mn).strip() == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] print 'fromstring', repr(s) - l = mn_x86.fromstring(s, symbol_pool, mode) - # print l + l = mn_x86.fromstring(s, loc_db, mode) print 'str args', [(str(x), x.size) for x in l.args] assert(str(l).strip(' ') == s) a = mn_x86.asm(l) print 'asm result', [x for x in a] print repr(b) - # test_file[mode[0]].write(b) for x in a: print "BYTES", repr(x) @@ -3086,7 +3082,6 @@ for mode, s, l, in reg_tests: assert(str(rl).strip(' ') == s) print repr(b), a assert(b in a) - # print mn.args print 'TEST time', time.time() - ts @@ -3118,9 +3113,7 @@ def profile_dis(o): print 'instr per sec:', instr_num / (time.time() - ts) import cProfile -# cProfile.run(r'mn_x86.dis("\x81\x54\x18\xfe\x44\x33\x22\x11", m32)') cProfile.run('profile_dis(o)') -# profile_dis(o) # Test instruction representation with prefix instr_bytes = '\x65\xc7\x00\x09\x00\x00\x00' diff --git a/test/arch/x86/sem.py b/test/arch/x86/sem.py index b3b7e940..0783089d 100755 --- a/test/arch/x86/sem.py +++ b/test/arch/x86/sem.py @@ -12,24 +12,23 @@ from miasm2.arch.x86.arch import mn_x86 as mn from miasm2.arch.x86.sem import ir_x86_32 as ir_32, ir_x86_64 as ir_64 from miasm2.arch.x86.regs import * from miasm2.expression.expression import * -from miasm2.expression.simplifications import expr_simp +from miasm2.expression.simplifications import expr_simp from miasm2.core import parse_asm, asmblock -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB logging.getLogger('cpuhelper').setLevel(logging.ERROR) EXCLUDE_REGS = set([ir_32().IRDst, ir_64().IRDst]) -symbol_pool = AsmSymbolPool() m32 = 32 m64 = 64 -def symb_exec(interm, inputstate, debug): +def symb_exec(lbl, ir_arch, ircfg, inputstate, debug): sympool = dict(regs_init) sympool.update(inputstate) - symexec = SymbolicExecutionEngine(interm, sympool) - symexec.run_at(0) + symexec = SymbolicExecutionEngine(ir_arch, sympool) + symexec.run_at(ircfg, lbl) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: @@ -38,23 +37,25 @@ def symb_exec(interm, inputstate, debug): if k not in EXCLUDE_REGS and regs_init.get(k, None) != v} def compute(ir, mode, asm, inputstate={}, debug=False): - instr = mn.fromstring(asm, symbol_pool, mode) + loc_db = LocationDB() + instr = mn.fromstring(asm, loc_db, mode) code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(EIP, 0) - interm = ir() - interm.add_instr(instr) - return symb_exec(interm, inputstate, debug) + ir_arch = ir(loc_db) + ircfg = ir_arch.new_ircfg() + lbl = ir_arch.add_instr_to_ircfg(instr, ircfg) + return symb_exec(lbl, ir_arch, ircfg, inputstate, debug) def compute_txt(ir, mode, txt, inputstate={}, debug=False): - blocks, symbol_pool = parse_asm.parse_txt(mn, mode, txt) - symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) - patches = asmblock.asm_resolve_final(mn, blocks, symbol_pool) - interm = ir(symbol_pool) - for bbl in blocks: - interm.add_block(bbl) - return symb_exec(interm, inputstate, debug) + asmcfg, loc_db = parse_asm.parse_txt(mn, mode, txt) + loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) + patches = asmblock.asm_resolve_final(mn, asmcfg, loc_db) + ir_arch = ir(loc_db) + lbl = loc_db.get_name_location("main") + ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) + return symb_exec(lbl, ir_arch, ircfg, inputstate, debug) op_add = lambda a, b: a+b op_sub = lambda a, b: a-b diff --git a/test/arch/x86/unit/access_xmm.py b/test/arch/x86/unit/access_xmm.py new file mode 100644 index 00000000..950c8b56 --- /dev/null +++ b/test/arch/x86/unit/access_xmm.py @@ -0,0 +1,16 @@ +#! /usr/bin/env python2 +"""Test getter and setter for XMM registers (128 bits)""" + +from miasm2.analysis.machine import Machine + +# Jitter engine doesn't matter, use the always available 'python' one +myjit = Machine("x86_32").jitter("python") + +# Test basic access (get) +assert myjit.cpu.XMM0 == 0 + +# Test set +myjit.cpu.XMM1 = 0x00112233445566778899aabbccddeeffL + +# Ensure set has been correctly handled +assert myjit.cpu.XMM1 == 0x00112233445566778899aabbccddeeffL diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py index 961967f9..91da1942 100644 --- a/test/arch/x86/unit/asm_test.py +++ b/test/arch/x86/unit/asm_test.py @@ -18,9 +18,6 @@ class Asm_Test(object): self.myjit = Machine(self.arch_name).jitter(jitter_engine) self.myjit.init_stack() - self.myjit.jit.log_regs = False - self.myjit.jit.log_mn = False - def test_init(self): pass @@ -43,12 +40,12 @@ class Asm_Test(object): assert(self.myjit.pc == self.ret_addr) def asm(self): - blocks, symbol_pool = parse_asm.parse_txt(mn_x86, self.arch_attrib, self.TXT, - symbol_pool = self.myjit.ir_arch.symbol_pool) + blocks, loc_db = parse_asm.parse_txt(mn_x86, self.arch_attrib, self.TXT, + loc_db = self.myjit.ir_arch.loc_db) # fix shellcode addr - symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) + loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) s = StrPatchwork() - patches = asmblock.asm_resolve_final(mn_x86, blocks, symbol_pool) + patches = asmblock.asm_resolve_final(mn_x86, blocks, loc_db) for offset, raw in patches.items(): s[offset] = raw @@ -81,10 +78,6 @@ class Asm_Test_16(Asm_Test): self.myjit.stack_size = 0x1000 self.myjit.init_stack() - self.myjit.jit.log_regs = False - self.myjit.jit.log_mn = False - - def init_machine(self): self.myjit.vm.add_memory_page(self.run_addr, PAGE_READ | PAGE_WRITE, self.assembly) self.myjit.push_uint16_t(self.ret_addr) diff --git a/test/arch/x86/unit/mn_cdq.py b/test/arch/x86/unit/mn_cdq.py index b6abc781..947b40bb 100644 --- a/test/arch/x86/unit/mn_cdq.py +++ b/test/arch/x86/unit/mn_cdq.py @@ -10,7 +10,7 @@ class Test_CBW_16(Asm_Test_16): MYSTRING = "test CBW 16" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.EAX = 0x87654321 @@ -31,7 +31,7 @@ class Test_CBW_16_signed(Asm_Test_16): MYSTRING = "test CBW 16 signed" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.EAX = 0x87654381 @@ -52,7 +52,7 @@ class Test_CBW_32(Asm_Test_32): MYSTRING = "test CBW 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.EAX = 0x87654321 @@ -73,7 +73,7 @@ class Test_CBW_32_signed(Asm_Test_32): MYSTRING = "test CBW 32 signed" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.EAX = 0x87654381 @@ -94,7 +94,7 @@ class Test_CDQ_32(Asm_Test_32): MYSTRING = "test cdq 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.EAX = 0x77654321 @@ -115,7 +115,7 @@ class Test_CDQ_32_signed(Asm_Test_32): MYSTRING = "test cdq 32 signed" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.EAX = 0x87654321 @@ -136,7 +136,7 @@ class Test_CDQ_64(Asm_Test_64): MYSTRING = "test cdq 64" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.RAX = 0x1234567877654321 @@ -157,7 +157,7 @@ class Test_CDQ_64_signed(Asm_Test_64): MYSTRING = "test cdq 64 signed" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.RAX = 0x1234567887654321 @@ -178,7 +178,7 @@ class Test_CDQE_64(Asm_Test_64): MYSTRING = "test cdq 64" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.RAX = 0x1234567877654321 @@ -199,7 +199,7 @@ class Test_CDQE_64_signed(Asm_Test_64): MYSTRING = "test cdq 64 signed" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.RAX = 0x1234567887654321 @@ -220,7 +220,7 @@ class Test_CWD_32(Asm_Test_32): MYSTRING = "test cdq 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.EAX = 0x87654321 @@ -241,7 +241,7 @@ class Test_CWD_32_signed(Asm_Test_32): MYSTRING = "test cdq 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.EAX = 0x87658321 @@ -262,7 +262,7 @@ class Test_CWD_32(Asm_Test_32): MYSTRING = "test cdq 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.EAX = 0x87654321 @@ -283,7 +283,7 @@ class Test_CWDE_32(Asm_Test_32): MYSTRING = "test cwde 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.EAX = 0x87654321 @@ -304,7 +304,7 @@ class Test_CWDE_32_signed(Asm_Test_32): MYSTRING = "test cwde 32 signed" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.RAX = 0x87658321 @@ -325,7 +325,7 @@ class Test_CWDE_64(Asm_Test_64): MYSTRING = "test cwde 64" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.RAX = 0x1234567887654321 @@ -346,7 +346,7 @@ class Test_CWDE_64_signed(Asm_Test_64): MYSTRING = "test cwde 64 signed" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.RAX = 0x1234567887658321 @@ -367,7 +367,7 @@ class Test_CQO_64(Asm_Test_64): MYSTRING = "test cwde 64" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.RAX = 0x1234567887654321 @@ -388,7 +388,7 @@ class Test_CQO_64_signed(Asm_Test_64): MYSTRING = "test cwde 64 signed" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.myjit.cpu.RAX = 0x8234567887658321 diff --git a/test/arch/x86/unit/mn_int.py b/test/arch/x86/unit/mn_int.py index 09792371..efacb105 100755 --- a/test/arch/x86/unit/mn_int.py +++ b/test/arch/x86/unit/mn_int.py @@ -8,12 +8,18 @@ from asm_test import Asm_Test_32 class Test_INT(Asm_Test_32): TXT = ''' main: + MOV ECX, 0x10 + loop: INT 0x42 + DEC ECX + JNZ loop + ret: RET ''' def set_int_num(self, jitter): - self.int_num = jitter.cpu.get_interrupt_num() + assert jitter.cpu.get_interrupt_num() == 0x42 + self.int_num += 1 jitter.cpu.set_exception(0) return True @@ -24,7 +30,7 @@ class Test_INT(Asm_Test_32): self.set_int_num) def check(self): - assert self.int_num == 0x42 + assert self.int_num == 0x10 self.myjit.cpu.set_interrupt_num(14) assert self.myjit.cpu.get_interrupt_num() == 14 diff --git a/test/arch/x86/unit/mn_pushpop.py b/test/arch/x86/unit/mn_pushpop.py index 7ac400c0..6e9005ca 100755 --- a/test/arch/x86/unit/mn_pushpop.py +++ b/test/arch/x86/unit/mn_pushpop.py @@ -21,7 +21,7 @@ class Test_PUSHAD_32(Asm_Test_32): MYSTRING = "test pushad 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): init_regs(self) @@ -48,7 +48,7 @@ class Test_PUSHA_32(Asm_Test_32): MYSTRING = "test pusha 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): init_regs(self) @@ -75,7 +75,7 @@ class Test_PUSHA_16(Asm_Test_16): MYSTRING = "test pusha 16" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): init_regs(self) @@ -102,7 +102,7 @@ class Test_PUSHAD_16(Asm_Test_16): MYSTRING = "test pushad 16" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): init_regs(self) @@ -129,7 +129,7 @@ class Test_PUSH_mode32_32(Asm_Test_32): MYSTRING = "test push mode32 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): init_regs(self) @@ -152,7 +152,7 @@ class Test_PUSH_mode32_16(Asm_Test_32): MYSTRING = "test push mode32 16" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): init_regs(self) @@ -175,7 +175,7 @@ class Test_PUSH_mode16_16(Asm_Test_16): MYSTRING = "test push mode16 16" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): init_regs(self) @@ -198,7 +198,7 @@ class Test_PUSH_mode16_32(Asm_Test_16): MYSTRING = "test push mode16 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): init_regs(self) @@ -221,7 +221,7 @@ class Test_POP_mode32_32(Asm_Test_32): MYSTRING = "test pop mode32 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.value = 0x11223344 @@ -243,7 +243,7 @@ class Test_POP_mode32_16(Asm_Test_32): MYSTRING = "test pop mode32 16" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.value = 0x1122 @@ -265,7 +265,7 @@ class Test_POP_mode16_16(Asm_Test_16): MYSTRING = "test pop mode16 16" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.value = 0x1122 @@ -287,7 +287,7 @@ class Test_POP_mode16_32(Asm_Test_16): MYSTRING = "test pop mode16 32" def prepare(self): - self.myjit.ir_arch.symbol_pool.add_label("lbl_ret", self.ret_addr) + self.myjit.ir_arch.loc_db.add_location("lbl_ret", self.ret_addr) def test_init(self): self.value = 0x11223344 diff --git a/test/arch/x86/unit/mn_strings.py b/test/arch/x86/unit/mn_strings.py index 3cb70e2a..8ca148e5 100755 --- a/test/arch/x86/unit/mn_strings.py +++ b/test/arch/x86/unit/mn_strings.py @@ -21,7 +21,8 @@ class Test_SCAS(Asm_Test_32): def check(self): assert(self.myjit.cpu.ECX == len(self.MYSTRING)) - assert(self.myjit.cpu.EDI == self.myjit.ir_arch.symbol_pool.getby_name('mystr').offset + len(self.MYSTRING)+1) + mystr = self.myjit.ir_arch.loc_db.get_name_location('mystr') + assert(self.myjit.cpu.EDI == self.myjit.ir_arch.loc_db.get_location_offset(mystr) + len(self.MYSTRING)+1) class Test_MOVS(Asm_Test_32): @@ -42,8 +43,10 @@ class Test_MOVS(Asm_Test_32): def check(self): assert(self.myjit.cpu.ECX == 0) - assert(self.myjit.cpu.EDI == self.myjit.ir_arch.symbol_pool.getby_name('buffer').offset + len(self.MYSTRING)) - assert(self.myjit.cpu.ESI == self.myjit.ir_arch.symbol_pool.getby_name('mystr').offset + len(self.MYSTRING)) + buffer = self.myjit.ir_arch.loc_db.get_name_location('buffer') + assert(self.myjit.cpu.EDI == self.myjit.ir_arch.loc_db.get_location_offset(buffer) + len(self.MYSTRING)) + mystr = self.myjit.ir_arch.loc_db.get_name_location('mystr') + assert(self.myjit.cpu.ESI == self.myjit.ir_arch.loc_db.get_location_offset(mystr) + len(self.MYSTRING)) if __name__ == "__main__": diff --git a/test/core/asmblock.py b/test/core/asmblock.py index 7f0dbc5f..cd1d262a 100644 --- a/test/core/asmblock.py +++ b/test/core/asmblock.py @@ -3,7 +3,7 @@ from pdb import pm from miasm2.arch.x86.disasm import dis_x86_32 from miasm2.analysis.binary import Container from miasm2.core.asmblock import AsmCFG, AsmConstraint, AsmBlock, \ - AsmLabel, AsmBlockBad, AsmConstraintTo, AsmConstraintNext, \ + AsmBlockBad, AsmConstraintTo, AsmConstraintNext, \ bbl_simplifier from miasm2.core.graph import DiGraphSimplifier, MatchGraphJoker from miasm2.expression.expression import ExprId @@ -19,57 +19,57 @@ first_block = mdis.dis_block(0) assert len(first_block.lines) == 5 print first_block -## Test redisassemble blocks +## Test redisassemble asmcfg first_block_bis = mdis.dis_block(0) assert len(first_block.lines) == len(first_block_bis.lines) print first_block_bis ## Disassembly of several block, with cache -blocks = mdis.dis_multiblock(0) -assert len(blocks) == 17 +asmcfg = mdis.dis_multiblock(0) +assert len(asmcfg) == 17 -## Test redisassemble blocks -blocks = mdis.dis_multiblock(0) -assert len(blocks) == 17 +## Test redisassemble asmcfg +asmcfg = mdis.dis_multiblock(0) +assert len(asmcfg) == 17 ## Equality between assembly lines is not yet implemented -assert len(blocks.heads()) == 1 -assert len(blocks.heads()[0].lines) == len(first_block.lines) +assert len(asmcfg.heads()) == 1 +assert len(asmcfg.loc_key_to_block(asmcfg.heads()[0]).lines) == len(first_block.lines) # Test AsmCFG -assert isinstance(blocks, AsmCFG) -assert len(blocks.pendings) == 0 -assert len(blocks.nodes()) == 17 -assert len(blocks.edges2constraint) == len(blocks.edges()) -assert len(blocks.edges()) == 24 -assert blocks.getby_offset(0x63).lines[0].offset == 0x5f -assert blocks.getby_offset(0x69).lines[0].offset == 0x69 +assert isinstance(asmcfg, AsmCFG) +assert len(asmcfg.pendings) == 0 +assert len(asmcfg.nodes()) == 17 +assert len(asmcfg.edges2constraint) == len(asmcfg.edges()) +assert len(asmcfg.edges()) == 24 +assert asmcfg.getby_offset(0x63).lines[0].offset == 0x5f +assert asmcfg.getby_offset(0x69).lines[0].offset == 0x69 ## Convert to dot -open("graph.dot", "w").write(blocks.dot()) +open("graph.dot", "w").write(asmcfg.dot()) ## Modify the structure: link the first and the last block -leaves = blocks.leaves() +leaves = asmcfg.leaves() assert len(leaves) == 1 -last_block = leaves.pop() +last_block_loc_key = leaves.pop() ### Remove first_block for the rest of the graph -first_block = blocks.heads()[0] +first_block = asmcfg.loc_key_to_block(asmcfg.heads()[0]) assert len(first_block.bto) == 2 -for succ in blocks.successors(first_block): - blocks.del_edge(first_block, succ) +for succ in asmcfg.successors(first_block.loc_key): + asmcfg.del_edge(first_block.loc_key, succ) ### Modification must be reported from the graph assert len(first_block.bto) == 0 -assert last_block in blocks +assert last_block_loc_key in asmcfg.nodes() ### Remove predecessors of last block -for pred in blocks.predecessors(last_block): - blocks.del_edge(pred, last_block) +for pred in asmcfg.predecessors(last_block_loc_key): + asmcfg.del_edge(pred, last_block_loc_key) ### Link first and last block -blocks.add_edge(first_block, last_block, AsmConstraint.c_next) -### Only one link between two blocks +asmcfg.add_edge(first_block.loc_key, last_block_loc_key, AsmConstraint.c_next) +### Only one link between two asmcfg try: - blocks.add_edge(first_block, last_block, AsmConstraint.c_to) + asmcfg.add_edge(first_block, last_block_loc_key, AsmConstraint.c_to) good = False except AssertionError: good = True @@ -79,222 +79,233 @@ assert good assert len(first_block.bto) == 1 assert list(first_block.bto)[0].c_t == AsmConstraint.c_next -## Simplify the obtained graph to keep only blocks which reach a block +## Simplify the obtained graph to keep only asmcfg which reach a block ## finishing with RET def remove_useless_blocks(d_g, graph): """Remove leaves without a RET""" - for block in graph.leaves(): + for leaf_label in graph.leaves(): + block = graph.loc_key_to_block(leaf_label) if block.lines[-1].name != "RET": - graph.del_node(block) + graph.del_block(graph.loc_key_to_block(leaf_label)) ### Use a graph simplifier to recursively apply the simplification pass dg = DiGraphSimplifier() dg.enable_passes([remove_useless_blocks]) -blocks = dg(blocks) +asmcfg = dg(asmcfg) -### Only two blocks should remain -assert len(blocks) == 2 -assert first_block in blocks -assert last_block in blocks +### Only two asmcfg should remain +assert len(asmcfg) == 2 +assert first_block.loc_key in asmcfg.nodes() +assert last_block_loc_key in asmcfg.nodes() ## Graph the final output -open("graph2.dot", "w").write(blocks.dot()) +open("graph2.dot", "w").write(asmcfg.dot()) # Test helper methods -## Label2block should always be updated -assert blocks.label2block(first_block.label) == first_block -my_block = AsmBlock(AsmLabel("testlabel")) -blocks.add_node(my_block) -assert len(blocks) == 3 -assert blocks.label2block(first_block.label) == first_block -assert blocks.label2block(my_block.label) == my_block +## loc_key_to_block should always be updated +assert asmcfg.loc_key_to_block(first_block.loc_key) == first_block +testlabel = mdis.loc_db.get_or_create_name_location("testlabel") +my_block = AsmBlock(testlabel) +asmcfg.add_block(my_block) +assert len(asmcfg) == 3 +assert asmcfg.loc_key_to_block(first_block.loc_key) == first_block +assert asmcfg.loc_key_to_block(my_block.loc_key) == my_block -## Bad blocks -assert len(list(blocks.get_bad_blocks())) == 0 -assert len(list(blocks.get_bad_blocks_predecessors())) == 0 +## Bad asmcfg +assert len(list(asmcfg.get_bad_blocks())) == 0 +assert len(list(asmcfg.get_bad_blocks_predecessors())) == 0 ### Add a bad block, not linked -my_bad_block = AsmBlockBad(AsmLabel("testlabel_bad")) -blocks.add_node(my_bad_block) -assert list(blocks.get_bad_blocks()) == [my_bad_block] -assert len(list(blocks.get_bad_blocks_predecessors())) == 0 +testlabel_bad = mdis.loc_db.get_or_create_name_location("testlabel_bad") +my_bad_block = AsmBlockBad(testlabel_bad) +asmcfg.add_block(my_bad_block) +assert list(asmcfg.get_bad_blocks()) == [my_bad_block] +assert len(list(asmcfg.get_bad_blocks_predecessors())) == 0 ### Link the bad block and update edges -### Indeed, a sub-element has been modified (bto from a block from blocks) -my_block.bto.add(AsmConstraintTo(my_bad_block.label)) -blocks.rebuild_edges() -assert list(blocks.get_bad_blocks_predecessors()) == [my_block] +### Indeed, a sub-element has been modified (bto from a block from asmcfg) +my_block.bto.add(AsmConstraintTo(my_bad_block.loc_key)) +asmcfg.rebuild_edges() +assert list(asmcfg.get_bad_blocks_predecessors()) == [my_block.loc_key] ### Test strict option -my_block.bto.add(AsmConstraintTo(my_block.label)) -blocks.rebuild_edges() -assert list(blocks.get_bad_blocks_predecessors(strict=False)) == [my_block] -assert len(list(blocks.get_bad_blocks_predecessors(strict=True))) == 0 +my_block.bto.add(AsmConstraintTo(my_block.loc_key)) +asmcfg.rebuild_edges() +assert list(asmcfg.get_bad_blocks_predecessors(strict=False)) == [my_block.loc_key] +assert len(list(asmcfg.get_bad_blocks_predecessors(strict=True))) == 0 ## Sanity check -blocks.sanity_check() +asmcfg.sanity_check() ### Next on itself -my_block_ni = AsmBlock(AsmLabel("testlabel_nextitself")) -my_block_ni.bto.add(AsmConstraintNext(my_block_ni.label)) -blocks.add_node(my_block_ni) +testlabel_nextitself = mdis.loc_db.get_or_create_name_location("testlabel_nextitself") +my_block_ni = AsmBlock(testlabel_nextitself) +my_block_ni.bto.add(AsmConstraintNext(my_block_ni.loc_key)) +asmcfg.add_block(my_block_ni) error_raised = False try: - blocks.sanity_check() + asmcfg.sanity_check() except RuntimeError: error_raised = True assert error_raised ### Back to a normal state -blocks.del_node(my_block_ni) -blocks.sanity_check() +asmcfg.del_block(my_block_ni) +asmcfg.sanity_check() ### Multiple next on the same node -my_block_target = AsmBlock(AsmLabel("testlabel_target")) -blocks.add_node(my_block_target) -my_block_src1 = AsmBlock(AsmLabel("testlabel_src1")) -my_block_src2 = AsmBlock(AsmLabel("testlabel_src2")) -my_block_src1.bto.add(AsmConstraintNext(my_block_target.label)) -blocks.add_node(my_block_src1) +testlabel_target = mdis.loc_db.get_or_create_name_location("testlabel_target") +my_block_target = AsmBlock(testlabel_target) +asmcfg.add_block(my_block_target) +testlabel_src1 = mdis.loc_db.get_or_create_name_location("testlabel_src1") +testlabel_src2 = mdis.loc_db.get_or_create_name_location("testlabel_src2") +my_block_src1 = AsmBlock(testlabel_src1) +my_block_src2 = AsmBlock(testlabel_src2) +my_block_src1.bto.add(AsmConstraintNext(my_block_target.loc_key)) +asmcfg.add_block(my_block_src1) ### OK for now -blocks.sanity_check() +asmcfg.sanity_check() ### Add a second next from src2 to target (already src1 -> target) -my_block_src2.bto.add(AsmConstraintNext(my_block_target.label)) -blocks.add_node(my_block_src2) +my_block_src2.bto.add(AsmConstraintNext(my_block_target.loc_key)) +asmcfg.add_block(my_block_src2) error_raised = False try: - blocks.sanity_check() + asmcfg.sanity_check() except RuntimeError: error_raised = True assert error_raised -blocks.del_node(my_block_src2) -blocks.sanity_check() +asmcfg.del_block(my_block_src2) +asmcfg.sanity_check() ## Guess block size ### Initial state assert not hasattr(first_block, 'size') assert not hasattr(first_block, 'max_size') -blocks.guess_blocks_size(mdis.arch) +asmcfg.guess_blocks_size(mdis.arch) assert first_block.size == 39 -assert blocks.label2block(my_block_src1.label).size == 0 +assert asmcfg.loc_key_to_block(my_block_src1.loc_key).size == 0 assert first_block.max_size == 39 -assert blocks.label2block(my_block_src1.label).max_size == 0 +assert asmcfg.loc_key_to_block(my_block_src1.loc_key).max_size == 0 ## Check pendings ### Create a pending element -my_block_src = AsmBlock(AsmLabel("testlabel_pend_src")) -my_block_dst = AsmBlock(AsmLabel("testlabel_pend_dst")) -my_block_src.bto.add(AsmConstraintTo(my_block_dst.label)) -blocks.add_node(my_block_src) +testlabel_pend_src = mdis.loc_db.get_or_create_name_location("testlabel_pend_src") +testlabel_pend_dst = mdis.loc_db.get_or_create_name_location("testlabel_pend_dst") +my_block_src = AsmBlock(testlabel_pend_src) +my_block_dst = AsmBlock(testlabel_pend_dst) +my_block_src.bto.add(AsmConstraintTo(my_block_dst.loc_key)) +asmcfg.add_block(my_block_src) ### Check resulting state -assert len(blocks) == 7 -assert len(blocks.pendings) == 1 -assert my_block_dst.label in blocks.pendings -assert len(blocks.pendings[my_block_dst.label]) == 1 -pending = list(blocks.pendings[my_block_dst.label])[0] -assert isinstance(pending, blocks.AsmCFGPending) +assert len(asmcfg) == 7 +assert len(asmcfg.pendings) == 1 +assert my_block_dst.loc_key in asmcfg.pendings +assert len(asmcfg.pendings[my_block_dst.loc_key]) == 1 +pending = list(asmcfg.pendings[my_block_dst.loc_key])[0] +assert isinstance(pending, asmcfg.AsmCFGPending) assert pending.waiter == my_block_src assert pending.constraint == AsmConstraint.c_to ### Sanity check must fail error_raised = False try: - blocks.sanity_check() + asmcfg.sanity_check() except RuntimeError: error_raised = True assert error_raised ### Pending must disappeared when adding expected block -blocks.add_node(my_block_dst) -assert len(blocks) == 8 -assert len(blocks.pendings) == 0 -blocks.sanity_check() +asmcfg.add_block(my_block_dst) +assert len(asmcfg) == 8 +assert len(asmcfg.pendings) == 0 +asmcfg.sanity_check() # Test block_merge data2 = "31c0eb0c31c9750c31d2eb0c31ffebf831dbebf031edebfc31f6ebf031e4c3".decode("hex") cont2 = Container.from_string(data2) mdis = dis_x86_32(cont2.bin_stream) ## Elements to merge -blocks = mdis.dis_multiblock(0) +asmcfg = mdis.dis_multiblock(0) ## Block alone -blocks.add_node(mdis.dis_block(0x1c)) +asmcfg.add_block(mdis.dis_block(0x1c)) ## Bad block -blocks.add_node(mdis.dis_block(len(data2))) +asmcfg.add_block(mdis.dis_block(len(data2))) ## Dump the graph before merging -open("graph3.dot", "w").write(blocks.dot()) +open("graph3.dot", "w").write(asmcfg.dot()) ## Apply merging -blocks = bbl_simplifier(blocks) +asmcfg = bbl_simplifier(asmcfg) ## Dump the graph after merging -open("graph4.dot", "w").write(blocks.dot()) +open("graph4.dot", "w").write(asmcfg.dot()) ## Check the final state -assert len(blocks) == 5 -assert len(list(blocks.get_bad_blocks())) == 1 -### Check "special" blocks -entry_blocks = blocks.heads() -bad_block = (block for block in entry_blocks - if isinstance(block, AsmBlockBad)).next() -entry_blocks.remove(bad_block) -alone_block = (block for block in entry_blocks - if len(blocks.successors(block)) == 0).next() -entry_blocks.remove(alone_block) +assert len(asmcfg) == 5 +assert len(list(asmcfg.get_bad_blocks())) == 1 +### Check "special" asmcfg +entry_asmcfg = asmcfg.heads() +bad_block_lbl = (lbl for lbl in entry_asmcfg + if isinstance(asmcfg.loc_key_to_block(lbl), AsmBlockBad)).next() +entry_asmcfg.remove(bad_block_lbl) +alone_block = (asmcfg.loc_key_to_block(lbl) for lbl in entry_asmcfg + if len(asmcfg.successors(lbl)) == 0).next() +entry_asmcfg.remove(alone_block.loc_key) assert alone_block.lines[-1].name == "RET" assert len(alone_block.lines) == 2 ### Check resulting function -entry_block = entry_blocks.pop() +entry_block = asmcfg.loc_key_to_block(entry_asmcfg.pop()) assert len(entry_block.lines) == 4 assert map(str, entry_block.lines) == ['XOR EAX, EAX', 'XOR EBX, EBX', 'XOR ECX, ECX', - 'JNZ loc_0000000000000014:0x00000014'] -assert len(blocks.successors(entry_block)) == 2 + 'JNZ loc_key_3'] +assert len(asmcfg.successors(entry_block.loc_key)) == 2 assert len(entry_block.bto) == 2 -nextb = blocks.label2block((cons.label for cons in entry_block.bto - if cons.c_t == AsmConstraint.c_next).next()) -tob = blocks.label2block((cons.label for cons in entry_block.bto - if cons.c_t == AsmConstraint.c_to).next()) +nextb = asmcfg.loc_key_to_block((cons.loc_key for cons in entry_block.bto + if cons.c_t == AsmConstraint.c_next).next()) +tob = asmcfg.loc_key_to_block((cons.loc_key for cons in entry_block.bto + if cons.c_t == AsmConstraint.c_to).next()) assert len(nextb.lines) == 4 assert map(str, nextb.lines) == ['XOR EDX, EDX', 'XOR ESI, ESI', 'XOR EDI, EDI', - 'JMP loc_0000000000000008:0x00000008'] -assert blocks.successors(nextb) == [nextb] + 'JMP loc_key_4'] +assert asmcfg.successors(nextb.loc_key) == [nextb.loc_key] assert len(tob.lines) == 2 assert map(str, tob.lines) == ['XOR EBP, EBP', - 'JMP loc_0000000000000014:0x00000014'] -assert blocks.successors(tob) == [tob] + 'JMP loc_key_3'] +assert asmcfg.successors(tob.loc_key) == [tob.loc_key] # Check split_block ## Without condition for a split, no change -blocks_bef = blocks.copy() -blocks.apply_splitting(mdis.symbol_pool) -assert blocks_bef == blocks +asmcfg_bef = asmcfg.copy() +asmcfg.apply_splitting(mdis.loc_db) +assert asmcfg_bef == asmcfg +open("graph5.dot", "w").write(asmcfg.dot()) ## Create conditions for a block split -inside_firstbbl = mdis.symbol_pool.getby_offset(4) +inside_firstbbl = mdis.loc_db.get_offset_location(4) tob.bto.add(AsmConstraintTo(inside_firstbbl)) -blocks.rebuild_edges() -assert len(blocks.pendings) == 1 -assert inside_firstbbl in blocks.pendings -blocks.apply_splitting(mdis.symbol_pool) +asmcfg.rebuild_edges() +assert len(asmcfg.pendings) == 1 +assert inside_firstbbl in asmcfg.pendings +asmcfg.apply_splitting(mdis.loc_db) ## Check result -assert len(blocks) == 6 -assert len(blocks.pendings) == 0 +assert len(asmcfg) == 6 +assert len(asmcfg.pendings) == 0 assert len(entry_block.lines) == 2 assert map(str, entry_block.lines) == ['XOR EAX, EAX', 'XOR EBX, EBX'] -assert len(blocks.successors(entry_block)) == 1 -newb = blocks.successors(entry_block)[0] +assert len(asmcfg.successors(entry_block.loc_key)) == 1 +lbl_newb = asmcfg.successors(entry_block.loc_key)[0] +newb = asmcfg.loc_key_to_block(lbl_newb) assert len(newb.lines) == 2 assert map(str, newb.lines) == ['XOR ECX, ECX', - 'JNZ loc_0000000000000014:0x00000014'] -preds = blocks.predecessors(newb) + 'JNZ loc_key_3'] +preds = asmcfg.predecessors(lbl_newb) assert len(preds) == 2 -assert entry_block in preds -assert tob in preds -assert blocks.edges2constraint[(entry_block, newb)] == AsmConstraint.c_next -assert blocks.edges2constraint[(tob, newb)] == AsmConstraint.c_to +assert entry_block.loc_key in preds +assert tob.loc_key in preds +assert asmcfg.edges2constraint[(entry_block.loc_key, lbl_newb)] == AsmConstraint.c_next +assert asmcfg.edges2constraint[(tob.loc_key, lbl_newb)] == AsmConstraint.c_to # Check double block split data = "74097405b8020000007405b803000000b804000000c3".decode('hex') cont = Container.from_string(data) mdis = dis_x86_32(cont.bin_stream) -blocks = mdis.dis_multiblock(0) +asmcfg = mdis.dis_multiblock(0) ## Check resulting disasm -assert len(blocks.nodes()) == 6 -blocks.sanity_check() +assert len(asmcfg.nodes()) == 6 +asmcfg.sanity_check() ## Check graph structure bbl0 = MatchGraphJoker(name="0") bbl2 = MatchGraphJoker(name="2") @@ -307,8 +318,18 @@ matcher = bbl0 >> bbl2 >> bbl4 >> bbl9 >> bblB >> bbl10 matcher += bbl2 >> bbl9 >> bbl10 matcher += bbl0 >> bblB -solutions = list(matcher.match(blocks)) +solutions = list(matcher.match(asmcfg)) assert len(solutions) == 1 solution = solutions.pop() -for jbbl, block in solution.iteritems(): - assert block.label.offset == int(jbbl._name, 16) +for jbbl, label in solution.iteritems(): + offset = mdis.loc_db.get_location_offset(label) + assert offset == int(jbbl._name, 16) + +loc_key_dum = mdis.loc_db.get_or_create_name_location("dummy_loc") +asmcfg.add_node(loc_key_dum) +error_raised = False +try: + asmcfg.sanity_check() +except RuntimeError: + error_raised = True +assert error_raised diff --git a/test/core/graph.py b/test/core/graph.py index 9f8afcae..b71c3d51 100644 --- a/test/core/graph.py +++ b/test/core/graph.py @@ -257,7 +257,7 @@ assert len([sol for sol in sols if sol[j1] == 1]) == 1 assert len([sol for sol in sols if sol[j1] == 2]) == 1 ## Check filter -j2 = MatchGraphJoker(name="son", restrict_out=False, filt=lambda node: node < 2) +j2 = MatchGraphJoker(name="son", restrict_out=False, filt=lambda graph, node: node < 2) matcher = j1 >> j2 >> j1 sols = list(matcher.match(graph)) assert len(sols) == 1 diff --git a/test/core/locationdb.py b/test/core/locationdb.py new file mode 100644 index 00000000..b9a5f707 --- /dev/null +++ b/test/core/locationdb.py @@ -0,0 +1,108 @@ +from miasm2.core.locationdb import LocationDB + + +# Basic tests (LocationDB description) +loc_db = LocationDB() +loc_key1 = loc_db.add_location() +loc_key2 = loc_db.add_location(offset=0x1234) +loc_key3 = loc_db.add_location(name="first_name") +loc_db.add_location_name(loc_key3, "second_name") +loc_db.set_location_offset(loc_key3, 0x5678) +loc_db.remove_location_name(loc_key3, "second_name") + +assert loc_db.get_location_offset(loc_key1) is None +assert loc_db.get_location_offset(loc_key2) == 0x1234 + +assert loc_db.pretty_str(loc_key1) == str(loc_key1) +assert loc_db.pretty_str(loc_key2) == "loc_1234" +assert loc_db.pretty_str(loc_key3) == "first_name" +loc_db.consistency_check() + +# Offset manipulation +loc_key4 = loc_db.add_location() +assert loc_db.get_location_offset(loc_key4) is None +loc_db.set_location_offset(loc_key4, 0x1122) +assert loc_db.get_location_offset(loc_key4) == 0x1122 +loc_db.unset_location_offset(loc_key4) +assert loc_db.get_location_offset(loc_key4) is None +try: + loc_db.set_location_offset(loc_key4, 0x1234) + has_raised = False +except KeyError: + has_raised = True +assert has_raised +assert loc_db.get_location_offset(loc_key4) is None +loc_db.set_location_offset(loc_key4, 0x1122) +try: + loc_db.set_location_offset(loc_key4, 0x1123) + has_raised = False +except ValueError: + has_raised = True +assert has_raised +assert loc_db.get_location_offset(loc_key4) == 0x1122 +loc_db.set_location_offset(loc_key4, 0x1123, force=True) +assert loc_db.get_location_offset(loc_key4) == 0x1123 +assert 0x1123 in loc_db.offsets +try: + loc_db.add_location(offset=0x1123) + has_raised = False +except ValueError: + has_raised = True +assert loc_db.add_location(offset=0x1123, strict=False) == loc_key4 +assert loc_db.get_offset_location(0x1123) == loc_key4 +assert loc_db.get_or_create_offset_location(0x1123) == loc_key4 +loc_key4_bis = loc_db.get_or_create_offset_location(0x1144) +assert loc_db.get_offset_location(0x1144) == loc_key4_bis +loc_db.consistency_check() + +# Names manipulation +loc_key5 = loc_db.add_location() +name1 = "name1" +name2 = "name2" +name3 = "name3" +assert len(loc_db.get_location_names(loc_key5)) == 0 +loc_db.add_location_name(loc_key5, name1) +loc_db.add_location_name(loc_key5, name2) +assert name1 in loc_db.names +assert name2 in loc_db.names +assert name1 in loc_db.get_location_names(loc_key5) +assert name2 in loc_db.get_location_names(loc_key5) +assert loc_db.get_name_location(name1) == loc_key5 +loc_db.remove_location_name(loc_key5, name1) +assert name1 not in loc_db.names +assert name1 not in loc_db.get_location_names(loc_key5) +try: + loc_db.remove_location_name(loc_key5, name1) + has_raised = False +except KeyError: + has_raised = True +try: + loc_db.add_location_name(loc_key1, name2) + has_raised = False +except KeyError: + has_raised = True +try: + loc_db.add_location(name=name2) + has_raised = False +except ValueError: + has_raised = True +assert loc_db.add_location(name=name2, strict=False) == loc_key5 +assert loc_db.get_or_create_name_location(name2) == loc_key5 +loc_key5_bis = loc_db.get_or_create_name_location(name3) +assert loc_db.get_name_location(name3) == loc_key5_bis +loc_db.consistency_check() + +# Merge +loc_db2 = LocationDB() +loc_db2.add_location(offset=0x3344) +loc_db2.add_location(name=name2) +loc_db.merge(loc_db2) +assert 0x3344 in loc_db.offsets +assert name2 in loc_db.names +loc_db.consistency_check() +assert loc_db.get_name_location(name2) == loc_key5 + +# Delete +loc_db.remove_location(loc_key5) +assert loc_db.get_name_location(name2) is None +loc_db.consistency_check() diff --git a/test/core/parse_asm.py b/test/core/parse_asm.py index 54f3be1d..ddb195d2 100755 --- a/test/core/parse_asm.py +++ b/test/core/parse_asm.py @@ -64,18 +64,19 @@ class TestParseAsm(unittest.TestCase): .string "toto" ''' - blocks, symbol_pool = parse_txt(mn_x86, 32, ASM0) + asmcfg, loc_db = parse_txt(mn_x86, 32, ASM0) patches = asm_resolve_final(mn_x86, - blocks, - symbol_pool) + asmcfg, + loc_db) lbls = [] for i in xrange(6): - lbls.append(symbol_pool.getby_name('lbl%d' % i)) + lbls.append(loc_db.get_name_location('lbl%d' % i)) # align test - assert(lbls[5].offset % 0x10 == 0) + offset = loc_db.get_location_offset(lbls[5]) + assert(offset % 0x10 == 0) lbl2block = {} - for block in blocks: - lbl2block[block.label] = block + for block in asmcfg.blocks: + lbl2block[block.loc_key] = block # dontsplit test assert(lbls[2] == lbl2block[lbls[1]].get_next()) assert(lbls[3] == lbl2block[lbls[2]].get_next()) @@ -94,13 +95,13 @@ class TestParseAsm(unittest.TestCase): RET ''' - blocks, symbol_pool = parse_txt(mn_x86, 32, ASM0) + asmcfg, loc_db = parse_txt(mn_x86, 32, ASM0) lbls = [] for i in xrange(2): - lbls.append(symbol_pool.getby_name('lbl%d' % i)) + lbls.append(loc_db.get_name_location('lbl%d' % i)) lbl2block = {} - for block in blocks: - lbl2block[block.label] = block + for block in asmcfg.blocks: + lbl2block[block.loc_key] = block # split test assert(lbl2block[lbls[1]].get_next() is None) diff --git a/test/core/sembuilder.py b/test/core/sembuilder.py index ebf9f385..f7a96b89 100644 --- a/test/core/sembuilder.py +++ b/test/core/sembuilder.py @@ -2,22 +2,23 @@ import inspect from pdb import pm from miasm2.core.sembuilder import SemBuilder +from miasm2.core.locationdb import LocationDB import miasm2.expression.expression as m2_expr -from miasm2.core.asmblock import AsmLabel + + # Test classes class IR(object): + def __init__(self, loc_db): + self.loc_db = loc_db IRDst = m2_expr.ExprId("IRDst", 32) def get_next_instr(self, _): - return AsmLabel("NEXT") - - def get_next_label(self, _): - return AsmLabel("NEXT") + return m2_expr.LocKey(0) - def gen_label(self): - return AsmLabel("GEN") + def get_next_loc_key(self, _): + return m2_expr.LocKey(0) class Instr(object): mode = 32 @@ -44,7 +45,8 @@ def test(Arg1, Arg2, Arg3): a = m2_expr.ExprId('A', 32) b = m2_expr.ExprId('B', 32) c = m2_expr.ExprId('C', 32) -ir = IR() +loc_db = LocationDB() +ir = IR(loc_db) instr = Instr() res = test(ir, instr, a, b, c) @@ -58,7 +60,7 @@ for statement in res[0]: print "[+] Blocks:" for irb in res[1]: - print irb.label + print irb.loc_key for assignblk in irb: for expr in assignblk: print expr diff --git a/test/expression/parser.py b/test/expression/parser.py index 9c01c8a1..1d5889fb 100644 --- a/test/expression/parser.py +++ b/test/expression/parser.py @@ -1,9 +1,10 @@ from miasm2.expression.parser import str_to_expr from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \ - ExprCond, ExprCompose, ExprOp, ExprAff + ExprCond, ExprCompose, ExprOp, ExprAff, ExprLoc, LocKey for expr_test in [ExprInt(0x12, 32), ExprId('test', 32), + ExprLoc(LocKey(12), 32), ExprSlice(ExprInt(0x10, 32), 0, 8), ExprMem(ExprInt(0x10, 32), 32), ExprCond(ExprInt(0x10, 32), ExprInt(0x11, 32), ExprInt(0x12, 32)), diff --git a/test/expression/simplifications.py b/test/expression/simplifications.py index a4e839cf..b2591a83 100644 --- a/test/expression/simplifications.py +++ b/test/expression/simplifications.py @@ -177,6 +177,10 @@ to_test = [(ExprInt(1, 32) - ExprInt(1, 32), ExprInt(0, 32)), (ExprInt(0x4142, 32)[:32], ExprInt(0x4142, 32)), (ExprInt(0x4142, 32)[:8], ExprInt(0x42, 8)), (ExprInt(0x4142, 32)[8:16], ExprInt(0x41, 8)), + (ExprOp('>>', ExprOp('<<', a, ExprInt(0x4, 32)), ExprInt(0x4, 32)), + ExprOp('&', a, ExprInt(0x0FFFFFFF, 32))), + (ExprOp('<<', ExprOp('>>', a, ExprInt(0x4, 32)), ExprInt(0x4, 32)), + ExprOp('&', a, ExprInt(0xFFFFFFF0, 32))), (a[:32], a), (a[:8][:8], a[:8]), (a[:16][:8], a[:8]), diff --git a/test/ir/symbexec.py b/test/ir/symbexec.py index 7d5bf44a..3158be60 100755 --- a/test/ir/symbexec.py +++ b/test/ir/symbexec.py @@ -10,10 +10,15 @@ class TestSymbExec(unittest.TestCase): from miasm2.expression.expression import ExprInt, ExprId, ExprMem, \ ExprCompose, ExprAff from miasm2.arch.x86.sem import ir_x86_32 + from miasm2.core.locationdb import LocationDB from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import AssignBlock + loc_db = LocationDB() + ira = ir_x86_32(loc_db) + ircfg = ira.new_ircfg() + id_x = ExprId('x', 32) id_a = ExprId('a', 32) id_b = ExprId('b', 32) @@ -21,7 +26,7 @@ class TestSymbExec(unittest.TestCase): id_d = ExprId('d', 32) id_e = ExprId('e', 64) - sb = SymbolicExecutionEngine(ir_x86_32(), + sb = SymbolicExecutionEngine(ira, { ExprMem(ExprInt(0x4, 32), 8): ExprInt(0x44, 8), ExprMem(ExprInt(0x5, 32), 8): ExprInt(0x33, 8), @@ -222,16 +227,17 @@ class TestSymbExec(unittest.TestCase): assert found - sb_empty = SymbolicExecutionEngine(ir_x86_32(), {}) + sb_empty = SymbolicExecutionEngine(ira) sb_empty.dump() # Test memory full print 'full' - arch_addr8 = ir_x86_32() + arch_addr8 = ir_x86_32(loc_db) + ircfg = arch_addr8.new_ircfg() # Hack to obtain tiny address space arch_addr8.addrsize = 5 - sb_addr8 = SymbolicExecutionEngine(arch_addr8, {}) + sb_addr8 = SymbolicExecutionEngine(arch_addr8) sb_addr8.dump() # Fulfill memory sb_addr8.apply_change(ExprMem(ExprInt(0, 5), 256), ExprInt(0, 256)) diff --git a/test/ir/translators/z3_ir.py b/test/ir/translators/z3_ir.py index 6ae2dcd0..4806ad96 100644 --- a/test/ir/translators/z3_ir.py +++ b/test/ir/translators/z3_ir.py @@ -1,12 +1,16 @@ import z3 -from miasm2.core.asmblock import AsmLabel +from miasm2.core.locationdb import LocationDB from miasm2.expression.expression import * -from miasm2.ir.translators.translator import Translator -from miasm2.ir.translators.z3_ir import Z3Mem +from miasm2.ir.translators.z3_ir import Z3Mem, TranslatorZ3 # Some examples of use/unit tests. +loc_db = LocationDB() +translator1 = TranslatorZ3(endianness="<", loc_db=loc_db) +translator2 = TranslatorZ3(endianness=">", loc_db=loc_db) + + def equiv(z3_expr1, z3_expr2): s = z3.Solver() s.add(z3.Not(z3_expr1 == z3_expr2)) @@ -34,17 +38,17 @@ assert equiv(z3.BitVec('a', 32) + z3.BitVecVal(3, 32) - z3.BitVecVal(1, 32), # Z3Mem short tests # -------------------------------------------------------------------------- -mem = Z3Mem(endianness='<') # little endian +mem = Z3Mem(endianness='<') # little endian eax = z3.BitVec('EAX', 32) assert equiv( # @32[EAX] mem.get(eax, 32), # @16[EAX+2] . @16[EAX] - z3.Concat(mem.get(eax+2, 16), + z3.Concat(mem.get(eax+2, 16), mem.get(eax, 16))) # -------------------------------------------------------------------------- -ax = z3.BitVec('AX', 16) +ax = z3.BitVec('AX', 16) assert not equiv( # @16[EAX] with EAX = ZeroExtend(AX) mem.get(z3.ZeroExt(16, ax), 16), @@ -54,7 +58,7 @@ assert not equiv( # TranslatorZ3 tests # -------------------------------------------------------------------------- e = ExprId('x', 32) -ez3 = Translator.to_language('z3').from_expr(e) +ez3 = translator1.from_expr(e) z3_e = z3.BitVec('x', 32) assert equiv(ez3, z3_e) @@ -63,7 +67,7 @@ assert equiv(ez3, z3_e) four = ExprInt(4, 32) five = ExprInt(5, 32) e2 = (e + five + four) * five -ez3 = Translator.to_language('z3').from_expr(e2) +ez3 = translator1.from_expr(e2) z3_four = z3.BitVecVal(4, 32) z3_five = z3.BitVecVal(5, 32) @@ -74,7 +78,7 @@ assert equiv(ez3, z3_e2) emem = ExprMem(ExprInt(0xdeadbeef, 32), size=32) emem2 = ExprMem(ExprInt(0xfee1dead, 32), size=32) e3 = (emem + e) * ExprInt(2, 32) * emem2 -ez3 = Translator.to_language('z3').from_expr(e3) +ez3 = translator1.from_expr(e3) mem = Z3Mem() z3_emem = mem.get(z3.BitVecVal(0xdeadbeef, 32), 32) @@ -84,7 +88,7 @@ assert equiv(ez3, z3_e3) # -------------------------------------------------------------------------- e4 = emem * five -ez3 = Translator.to_language('z3').from_expr(e4) +ez3 = translator1.from_expr(e4) z3_e4 = z3_emem * z3_five assert equiv(ez3, z3_e4) @@ -98,7 +102,7 @@ check_interp(model[mem.get_mem_array(32)], [(0xdeadbeef, 2), (0xdeadbeef + 3, 0)]) # -------------------------------------------------------------------------- -ez3 = Translator.to_language("z3", endianness=">").from_expr(e4) +ez3 = translator2.from_expr(e4) memb = Z3Mem(endianness=">") z3_emem = memb.get(z3.BitVecVal(0xdeadbeef, 32), 32) @@ -115,7 +119,7 @@ check_interp(model[memb.get_mem_array(32)], # -------------------------------------------------------------------------- e5 = ExprSlice(ExprCompose(e, four), 0, 32) * five -ez3 = Translator.to_language('z3').from_expr(e5) +ez3 = translator1.from_expr(e5) z3_e5 = z3.Extract(31, 0, z3.Concat(z3_four, z3_e)) * z3_five assert equiv(ez3, z3_e5) @@ -126,7 +130,7 @@ seven = ExprInt(7, 32) one0seven = ExprInt(0x107, 32) for miasm_int, res in [(five, 1), (four, 0), (seven, 0), (one0seven, 0)]: e6 = ExprOp('parity', miasm_int) - ez3 = Translator.to_language('z3').from_expr(e6) + ez3 = translator1.from_expr(e6) z3_e6 = z3.BitVecVal(res, 1) assert equiv(ez3, z3_e6) @@ -134,37 +138,40 @@ for miasm_int, res in [(five, 1), (four, 0), (seven, 0), (one0seven, 0)]: # '-' for miasm_int, res in [(five, -5), (four, -4)]: e6 = ExprOp('-', miasm_int) - ez3 = Translator.to_language('z3').from_expr(e6) + ez3 = translator1.from_expr(e6) z3_e6 = z3.BitVecVal(res, 32) assert equiv(ez3, z3_e6) # -------------------------------------------------------------------------- -e7 = ExprId(AsmLabel("label_histoire", 0xdeadbeef), 32) -ez3 = Translator.to_language('z3').from_expr(e7) +label_histoire = loc_db.add_location("label_histoire", 0xdeadbeef) +e7 = ExprLoc(label_histoire, 32) +ez3 = translator1.from_expr(e7) z3_e7 = z3.BitVecVal(0xdeadbeef, 32) assert equiv(ez3, z3_e7) # Should just not throw anything to pass -e8 = ExprId(AsmLabel("label_jambe"), 32) -ez3 = Translator.to_language('z3').from_expr(e8) +lbl_e8 = loc_db.add_location("label_jambe") + +e8 = ExprLoc(lbl_e8, 32) +ez3 = translator1.from_expr(e8) assert not equiv(ez3, z3_e7) # -------------------------------------------------------------------------- # cntleadzeros, cnttrailzeros # cnttrailzeros(0x1138) == 3 -cnttrailzeros1 = Translator.to_language('z3').from_expr(ExprOp("cnttrailzeros", ExprInt(0x1138, 32))) +cnttrailzeros1 = translator1.from_expr(ExprOp("cnttrailzeros", ExprInt(0x1138, 32))) cnttrailzeros2 = z3.BitVecVal(3, 32) assert(equiv(cnttrailzeros1, cnttrailzeros2)) # cntleadzeros(0x11300) == 0xf -cntleadzeros1 = Translator.to_language('z3').from_expr(ExprOp("cntleadzeros", ExprInt(0x11300, 32))) +cntleadzeros1 = translator1.from_expr(ExprOp("cntleadzeros", ExprInt(0x11300, 32))) cntleadzeros2 = z3.BitVecVal(0xf, 32) assert(equiv(cntleadzeros1, cntleadzeros2)) # cnttrailzeros(0x8000) + 1 == cntleadzeros(0x8000) -cnttrailzeros3 = Translator.to_language('z3').from_expr(ExprOp("cnttrailzeros", ExprInt(0x8000, 32)) + ExprInt(1, 32)) -cntleadzeros3 = Translator.to_language('z3').from_expr(ExprOp("cntleadzeros", ExprInt(0x8000, 32))) +cnttrailzeros3 = translator1.from_expr(ExprOp("cnttrailzeros", ExprInt(0x8000, 32)) + ExprInt(1, 32)) +cntleadzeros3 = translator1.from_expr(ExprOp("cntleadzeros", ExprInt(0x8000, 32))) assert(equiv(cnttrailzeros3, cntleadzeros3)) print "TranslatorZ3 tests are OK." diff --git a/test/jitter/bad_block.py b/test/jitter/bad_block.py new file mode 100644 index 00000000..ae11e696 --- /dev/null +++ b/test/jitter/bad_block.py @@ -0,0 +1,43 @@ +import sys +from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE, EXCEPT_UNK_MNEMO +from miasm2.analysis.machine import Machine + +def code_sentinelle(jitter): + jitter.run = False + jitter.pc = 0 + return True + +machine = Machine("x86_32") +jitter = machine.jitter(sys.argv[1]) + +jitter.init_stack() + +# nop +# mov eax, 0x42 +# XX +data = "90b842000000ffff90909090".decode('hex') + +# Will raise memory error at 0x40000006 + +error_raised = False +def raise_me(jitter): + global error_raised + error_raised = True + assert jitter.pc == 0x40000006 + return False + +jitter.add_exception_handler(EXCEPT_UNK_MNEMO, raise_me) + +run_addr = 0x40000000 + +jitter.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, data) + +jitter.set_trace_log() +jitter.push_uint32_t(0x1337beef) + +jitter.add_breakpoint(0x1337beef, code_sentinelle) + +jitter.init_run(run_addr) +jitter.continue_run() + +assert error_raised is True diff --git a/test/jitter/jit_options.py b/test/jitter/jit_options.py index 4fe936d5..a0ddbc11 100644 --- a/test/jitter/jit_options.py +++ b/test/jitter/jit_options.py @@ -33,8 +33,7 @@ def init_jitter(): # Init jitter myjit.init_stack() - myjit.jit.log_regs = True - myjit.jit.log_mn = True + myjit.set_trace_log() myjit.push_uint32_t(0x1337beef) myjit.add_breakpoint(0x1337beef, code_sentinelle) diff --git a/test/jitter/jmp_out_mem.py b/test/jitter/jmp_out_mem.py new file mode 100644 index 00000000..93ae8304 --- /dev/null +++ b/test/jitter/jmp_out_mem.py @@ -0,0 +1,46 @@ +import sys +from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE, EXCEPT_ACCESS_VIOL +from miasm2.analysis.machine import Machine + +def code_sentinelle(jitter): + jitter.run = False + jitter.pc = 0 + return True + + +machine = Machine("x86_32") +jitter = machine.jitter(sys.argv[1]) + +jitter.init_stack() + +# nop +# mov eax, 0x42 +# jmp 0x20 + +data = "90b842000000eb20".decode('hex') + +# Will raise memory error at 0x40000028 + +error_raised = False +def raise_me(jitter): + global error_raised + error_raised = True + assert jitter.pc == 0x40000028 + return False + +jitter.add_exception_handler(EXCEPT_ACCESS_VIOL, raise_me) + + +run_addr = 0x40000000 + +jitter.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, data) + +jitter.set_trace_log() +jitter.push_uint32_t(0x1337beef) + +jitter.add_breakpoint(0x1337beef, code_sentinelle) + +jitter.init_run(run_addr) +jitter.continue_run() + +assert error_raised is True diff --git a/test/jitter/test_post_instr.py b/test/jitter/test_post_instr.py index 3e68d58e..39e87616 100644 --- a/test/jitter/test_post_instr.py +++ b/test/jitter/test_post_instr.py @@ -1,6 +1,6 @@ +import sys from miasm2.analysis.machine import Machine from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE, EXCEPT_BREAKPOINT_MEMORY, EXCEPT_ACCESS_VIOL -import sys machine = Machine("x86_32") jitter = machine.jitter(sys.argv[1]) @@ -23,8 +23,8 @@ jitter.vm.add_memory_page(0x1000, PAGE_READ|PAGE_WRITE, "\x00"*0x1000, "code pag # RET jitter.vm.set_mem(0x1000, "B844332211C3".decode('hex')) -jitter.jit.log_mn = True -jitter.jit.log_regs = True + +jitter.set_trace_log() def do_not_raise_me(jitter): raise ValueError("Should not be here") @@ -41,6 +41,3 @@ try: jitter.continue_run() except AssertionError: assert jitter.vm.get_exception() == EXCEPT_ACCESS_VIOL -except RuntimeError: - assert sys.argv[1] == 'python' - assert jitter.vm.get_exception() == EXCEPT_ACCESS_VIOL diff --git a/test/test_all.py b/test/test_all.py index 52873f4b..40df315c 100755 --- a/test/test_all.py +++ b/test/test_all.py @@ -59,7 +59,6 @@ class ArchUnitTest(RegressionTest): # script -> blacklisted jitter blacklist = { "x86/unit/mn_float.py": ["python", "llvm"], - "x86/unit/mn_div.py": ["gcc"], } for script in ["x86/sem.py", "x86/unit/mn_strings.py", @@ -98,6 +97,8 @@ for script in ["x86/sem.py", tags = [TAGS[jitter]] if jitter in TAGS else [] testset += ArchUnitTest(script, jitter, base_dir="arch", tags=tags) +testset += ArchUnitTest("x86/unit/access_xmm.py", "python", base_dir="arch") + ### QEMU regression tests class QEMUTest(RegressionTest): """Test against QEMU regression tests @@ -231,6 +232,7 @@ for script in ["interval.py", "parse_asm.py", "utils.py", "sembuilder.py", + "locationdb.py", "test_types.py", ]: testset += RegressionTest([script], base_dir="core") @@ -379,6 +381,8 @@ for script in ["jitload.py", "vm_mngr.py", "jit_options.py", "test_post_instr.py", + "bad_block.py", + "jmp_out_mem.py", ]: for engine in ArchUnitTest.jitter_engines: testset += RegressionTest([script, engine], base_dir="jitter", |